RubyGems - htm - Versions diffs - 0.0.1 → 0.0.10 - Mend

htm 0.0.1 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

checksums.yaml +4 -4
data/.aigcm_msg +1 -0
data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
data/.claude/settings.local.json +92 -0
data/.envrc +1 -0
data/.irbrc +283 -80
data/.tbls.yml +31 -0
data/CHANGELOG.md +314 -16
data/CLAUDE.md +603 -0
data/README.md +76 -5
data/Rakefile +5 -0
data/SETUP.md +132 -101
data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
data/db/migrate/00002_create_robots.rb +11 -0
data/db/migrate/00003_create_file_sources.rb +20 -0
data/db/migrate/00004_create_nodes.rb +65 -0
data/db/migrate/00005_create_tags.rb +13 -0
data/db/migrate/00006_create_node_tags.rb +18 -0
data/db/migrate/00007_create_robot_nodes.rb +26 -0
data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
data/db/schema.sql +390 -36
data/docs/api/database.md +19 -232
data/docs/api/embedding-service.md +1 -7
data/docs/api/htm.md +305 -364
data/docs/api/index.md +1 -7
data/docs/api/long-term-memory.md +342 -590
data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
data/docs/api/yard/HTM/AuthorizationError.md +11 -0
data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
data/docs/api/yard/HTM/Configuration.md +175 -0
data/docs/api/yard/HTM/Database.md +99 -0
data/docs/api/yard/HTM/DatabaseError.md +14 -0
data/docs/api/yard/HTM/EmbeddingError.md +18 -0
data/docs/api/yard/HTM/EmbeddingService.md +58 -0
data/docs/api/yard/HTM/Error.md +11 -0
data/docs/api/yard/HTM/JobAdapter.md +39 -0
data/docs/api/yard/HTM/LongTermMemory.md +342 -0
data/docs/api/yard/HTM/NotFoundError.md +17 -0
data/docs/api/yard/HTM/Observability.md +107 -0
data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
data/docs/api/yard/HTM/Railtie.md +27 -0
data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
data/docs/api/yard/HTM/TagError.md +18 -0
data/docs/api/yard/HTM/TagService.md +67 -0
data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
data/docs/api/yard/HTM/Timeframe.md +40 -0
data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
data/docs/api/yard/HTM/ValidationError.md +20 -0
data/docs/api/yard/HTM/WorkingMemory.md +131 -0
data/docs/api/yard/HTM.md +80 -0
data/docs/api/yard/index.csv +179 -0
data/docs/api/yard-reference.md +51 -0
data/docs/architecture/adrs/001-postgresql-timescaledb.md +1 -1
data/docs/architecture/adrs/003-ollama-embeddings.md +1 -1
data/docs/architecture/adrs/010-redis-working-memory-rejected.md +2 -27
data/docs/architecture/adrs/index.md +2 -13
data/docs/architecture/hive-mind.md +165 -166
data/docs/architecture/index.md +2 -2
data/docs/architecture/overview.md +5 -171
data/docs/architecture/two-tier-memory.md +1 -35
data/docs/assets/images/adr-010-current-architecture.svg +37 -0
data/docs/assets/images/adr-010-proposed-architecture.svg +48 -0
data/docs/assets/images/adr-dependency-tree.svg +93 -0
data/docs/assets/images/class-hierarchy.svg +55 -0
data/docs/assets/images/exception-hierarchy.svg +45 -0
data/docs/assets/images/htm-architecture-overview.svg +83 -0
data/docs/assets/images/htm-complete-memory-flow.svg +160 -0
data/docs/assets/images/htm-context-assembly-flow.svg +148 -0
data/docs/assets/images/htm-eviction-process.svg +141 -0
data/docs/assets/images/htm-memory-addition-flow.svg +138 -0
data/docs/assets/images/htm-memory-recall-flow.svg +152 -0
data/docs/assets/images/htm-node-states.svg +123 -0
data/docs/assets/images/project-structure.svg +78 -0
data/docs/assets/images/test-directory-structure.svg +38 -0
data/{dbdoc → docs/database}/README.md +127 -125
data/docs/database/public.file_sources.md +42 -0
data/docs/database/public.file_sources.svg +211 -0
data/{dbdoc → docs/database}/public.node_tags.md +7 -8
data/docs/database/public.node_tags.svg +239 -0
data/{dbdoc → docs/database}/public.nodes.md +22 -17
data/docs/database/public.nodes.svg +271 -0
data/docs/database/public.robot_nodes.md +46 -0
data/docs/database/public.robot_nodes.svg +243 -0
data/{dbdoc → docs/database}/public.robots.md +2 -3
data/docs/database/public.robots.svg +161 -0
data/docs/database/public.tags.svg +139 -0
data/{dbdoc → docs/database}/schema.json +941 -630
data/docs/database/schema.svg +282 -0
data/docs/development/index.md +1 -29
data/docs/development/schema.md +134 -309
data/docs/development/testing.md +1 -9
data/docs/getting-started/index.md +47 -0
data/docs/{installation.md → getting-started/installation.md} +2 -2
data/docs/{quick-start.md → getting-started/quick-start.md} +5 -5
data/docs/guides/adding-memories.md +295 -643
data/docs/guides/recalling-memories.md +36 -1
data/docs/guides/search-strategies.md +85 -51
data/docs/images/htm-er-diagram.svg +156 -0
data/docs/index.md +16 -31
data/docs/multi_framework_support.md +4 -4
data/examples/README.md +280 -0
data/examples/basic_usage.rb +18 -16
data/examples/cli_app/htm_cli.rb +146 -8
data/examples/cli_app/temp.log +93 -0
data/examples/custom_llm_configuration.rb +1 -2
data/examples/example_app/app.rb +11 -14
data/examples/file_loader_usage.rb +177 -0
data/examples/robot_groups/lib/robot_group.rb +419 -0
data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
data/examples/robot_groups/multi_process.rb +286 -0
data/examples/robot_groups/robot_worker.rb +136 -0
data/examples/robot_groups/same_process.rb +229 -0
data/examples/sinatra_app/Gemfile +1 -0
data/examples/sinatra_app/Gemfile.lock +166 -0
data/examples/sinatra_app/app.rb +219 -24
data/examples/timeframe_demo.rb +276 -0
data/lib/htm/active_record_config.rb +10 -3
data/lib/htm/circuit_breaker.rb +202 -0
data/lib/htm/configuration.rb +313 -80
data/lib/htm/database.rb +67 -36
data/lib/htm/embedding_service.rb +39 -2
data/lib/htm/errors.rb +131 -11
data/lib/htm/{sinatra.rb → integrations/sinatra.rb} +87 -12
data/lib/htm/job_adapter.rb +10 -3
data/lib/htm/jobs/generate_embedding_job.rb +5 -4
data/lib/htm/jobs/generate_tags_job.rb +4 -0
data/lib/htm/loaders/markdown_loader.rb +263 -0
data/lib/htm/loaders/paragraph_chunker.rb +112 -0
data/lib/htm/long_term_memory.rb +601 -321
data/lib/htm/models/file_source.rb +99 -0
data/lib/htm/models/node.rb +116 -12
data/lib/htm/models/robot.rb +53 -4
data/lib/htm/models/robot_node.rb +51 -0
data/lib/htm/models/tag.rb +302 -0
data/lib/htm/observability.rb +395 -0
data/lib/htm/tag_service.rb +60 -3
data/lib/htm/tasks.rb +29 -0
data/lib/htm/timeframe.rb +194 -0
data/lib/htm/timeframe_extractor.rb +307 -0
data/lib/htm/version.rb +1 -1
data/lib/htm/working_memory.rb +165 -70
data/lib/htm.rb +352 -133
data/lib/tasks/doc.rake +300 -0
data/lib/tasks/files.rake +299 -0
data/lib/tasks/htm.rake +188 -2
data/lib/tasks/jobs.rake +10 -12
data/lib/tasks/tags.rake +194 -0
data/mkdocs.yml +91 -9
data/notes/ARCHITECTURE_REVIEW.md +1167 -0
data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
data/notes/next_steps.md +100 -0
data/notes/plan.md +627 -0
data/notes/tag_ontology_enhancement_ideas.md +222 -0
data/notes/timescaledb_removal_summary.md +200 -0
metadata +177 -37
data/db/migrate/20250101000002_create_robots.rb +0 -14
data/db/migrate/20250101000003_create_nodes.rb +0 -42
data/db/migrate/20250101000005_create_tags.rb +0 -38
data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
data/dbdoc/public.node_tags.svg +0 -112
data/dbdoc/public.nodes.svg +0 -118
data/dbdoc/public.robots.svg +0 -90
data/dbdoc/public.tags.svg +0 -60
data/dbdoc/schema.svg +0 -154
data/{dbdoc → docs/database}/public.node_stats.md +0 -0
data/{dbdoc → docs/database}/public.node_stats.svg +0 -0
data/{dbdoc → docs/database}/public.nodes_tags.md +0 -0
data/{dbdoc → docs/database}/public.nodes_tags.svg +0 -0
data/{dbdoc → docs/database}/public.ontology_structure.md +0 -0
data/{dbdoc → docs/database}/public.ontology_structure.svg +0 -0
data/{dbdoc → docs/database}/public.operations_log.md +0 -0
data/{dbdoc → docs/database}/public.operations_log.svg +0 -0
data/{dbdoc → docs/database}/public.relationships.md +0 -0
data/{dbdoc → docs/database}/public.relationships.svg +0 -0
data/{dbdoc → docs/database}/public.robot_activity.md +0 -0
data/{dbdoc → docs/database}/public.robot_activity.svg +0 -0
data/{dbdoc → docs/database}/public.schema_migrations.md +0 -0
data/{dbdoc → docs/database}/public.schema_migrations.svg +0 -0
data/{dbdoc → docs/database}/public.tags.md +3 -3
/data/{dbdoc → docs/database}/public.topic_relationships.md +0 -0
/data/{dbdoc → docs/database}/public.topic_relationships.svg +0 -0

data/README.md CHANGED Viewed

@@ -27,7 +27,7 @@
 - **Client-Side Embeddings**
     - Automatic embedding generation before database insertion
-    - Supports Ollama (local, default) and OpenAI
+    - Uses the [ruby_llm](https://ruby_llm.com) gem for LLM access
     - Configurable embedding providers and models
 - **Two-Tier Memory Architecture**
@@ -62,6 +62,12 @@
     - Tag-based categorization
     - Hierarchical tag structures
+- **File Loading**
+    - Load markdown files into long-term memory
+    - Automatic paragraph-based chunking
+    - Source file tracking with re-sync support
+    - YAML frontmatter extraction as metadata
 ## Installation
 Add this line to your application's Gemfile:
@@ -264,6 +270,42 @@ memories = htm.recall(
 htm.forget(node_id, confirm: :confirmed)
 ```
+### Loading Files
+HTM can load text-based files (currently markdown) into long-term memory with automatic chunking and source tracking.
+```ruby
+htm = HTM.new(robot_name: "Document Loader")
+# Load a single markdown file
+result = htm.load_file("docs/guide.md")
+# => { file_source_id: 1, chunks_created: 5, chunks_updated: 0, chunks_deleted: 0 }
+# Load all markdown files in a directory
+results = htm.load_directory("docs/", pattern: "**/*.md")
+# Get nodes from a specific file
+nodes = htm.nodes_from_file("docs/guide.md")
+# Unload a file (soft deletes chunks)
+htm.unload_file("docs/guide.md")
+```
+**Features:**
+- **Paragraph chunking**: Text split by blank lines, code blocks preserved
+- **Source tracking**: Files tracked with mtime for automatic re-sync
+- **YAML frontmatter**: Extracted and stored as metadata
+- **Duplicate detection**: Content hash prevents duplicate nodes
+**Rake tasks:**
+```bash
+rake 'htm:files:load[docs/guide.md]'   # Load a single file
+rake 'htm:files:load_dir[docs/]'       # Load all markdown files from directory
+rake htm:files:list                     # List all loaded file sources
+rake htm:files:sync                     # Sync all files (reload changed)
+rake htm:files:stats                    # Show file loading statistics
+```
 ### Automatic Tag Extraction
 HTM automatically extracts hierarchical tags from content using LLM analysis. Tags are inferred from the content itself - you never specify them manually.
@@ -518,13 +560,14 @@ HTM provides a minimal, focused API with only 3 core instance methods for memory
 ### Core Memory Operations
-#### `remember(content, source: "")`
+#### `remember(content, source: "", metadata: {})`
 Store information in memory. Embeddings and tags are automatically generated asynchronously.
 **Parameters:**
 - `content` (String, required) - The information to remember. Converted to string if nil. Returns ID of last node if empty.
 - `source` (String, optional) - Where the content came from (e.g., "user", "assistant", "system"). Defaults to empty string.
+- `metadata` (Hash, optional) - Arbitrary key-value metadata stored as JSONB. Keys must be strings or symbols. Defaults to `{}`.
 **Returns:** Integer - The node ID of the stored memory
@@ -536,6 +579,12 @@ node_id = htm.remember("PostgreSQL is excellent for vector search with pgvector"
 # Store without source (uses default empty string)
 node_id = htm.remember("HTM uses two-tier memory architecture")
+# Store with metadata
+node_id = htm.remember(
+  "User prefers dark mode",
+  metadata: { category: "preference", priority: "high", version: 2 }
+)
 # Nil/empty handling
 node_id = htm.remember(nil)  # Returns ID of last node without creating duplicate
 node_id = htm.remember("")   # Returns ID of last node without creating duplicate
@@ -543,7 +592,7 @@ node_id = htm.remember("")   # Returns ID of last node without creating duplicat
 ---
-#### `recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [])`
+#### `recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], metadata: {})`
 Retrieve memories using temporal filtering and semantic/keyword search.
@@ -559,8 +608,9 @@ Retrieve memories using temporal filtering and semantic/keyword search.
   - `:hybrid` - Weighted combination (70% vector, 30% full-text)
 - `with_relevance` (Boolean, optional) - Include dynamic relevance scores. Default: false
 - `query_tags` (Array<String>, optional) - Filter results by tags. Default: []
+- `metadata` (Hash, optional) - Filter results by metadata using JSONB containment (`@>`). Default: `{}`
-**Returns:** Array<Hash> - Matching memories with fields: `id`, `content`, `source`, `created_at`, `access_count`, (optionally `relevance`)
+**Returns:** Array<Hash> - Matching memories with fields: `id`, `content`, `source`, `created_at`, `access_count`, `metadata`, (optionally `relevance`)
 **Example:**
 ```ruby
@@ -586,6 +636,21 @@ memories = htm.recall(
   query_tags: ["architecture"]
 )
 # => [{ "id" => 123, "content" => "...", "relevance" => 0.92, ... }, ...]
+# Filter by metadata
+memories = htm.recall(
+  "user preferences",
+  metadata: { category: "preference" }
+)
+# => Returns only nodes with metadata containing { category: "preference" }
+# Combine metadata with other filters
+memories = htm.recall(
+  "settings",
+  timeframe: "last month",
+  strategy: :hybrid,
+  metadata: { priority: "high", version: 2 }
+)
 ```
 ---
@@ -1312,10 +1377,16 @@ See [htm_teamwork.md](htm_teamwork.md) for detailed design documentation and pla
 ### Database Schema
 - `robots`: Robot registry for all LLM agents using HTM
-- `nodes`: Main memory storage with vector embeddings (pgvector), full-text search (tsvector), metadata
+- `nodes`: Main memory storage with vector embeddings (pgvector), full-text search (tsvector), JSONB metadata
 - `tags`: Hierarchical tag ontology (format: `root:level1:level2:level3`)
 - `node_tags`: Join table implementing many-to-many relationship between nodes and tags
+**Nodes Table Key Columns:**
+- `content`: The memory content
+- `embedding`: Vector embedding for semantic search (up to 2000 dimensions)
+- `metadata`: JSONB column for arbitrary key-value data (filterable via `@>` containment operator)
+- `content_hash`: SHA-256 hash for deduplication
 ### Service Architecture
 HTM uses a layered architecture for LLM integration:

data/Rakefile CHANGED Viewed

@@ -28,6 +28,11 @@ task :example do
   ruby "examples/basic_usage.rb"
 end
+desc "Run timeframe demo"
+task :timeframe_demo do
+  ruby "examples/timeframe_demo.rb"
+end
 desc "Show gem stats"
 task :stats do
   puts "\nHTM Gem Statistics:"

data/SETUP.md CHANGED Viewed

@@ -3,122 +3,165 @@
 ## Prerequisites
 1. **Ruby** (version 3.0 or higher)
-2. **TimescaleDB Cloud Account** (already set up)
-3. **Database Environment Variables** (already configured)
-4. **Ollama** (for embeddings via RubyLLM)
+2. **PostgreSQL** (14+ with pgvector and pg_trgm extensions)
+3. **Ollama** (for embeddings via RubyLLM)
-## Ollama Setup
+## PostgreSQL Setup
-HTM uses RubyLLM with the Ollama provider for generating embeddings. You need to install and run Ollama locally.
+### 1. Install PostgreSQL
-### 1. Install Ollama
+**macOS (via Homebrew):**
+```bash
+brew install postgresql@17
+brew services start postgresql@17
+```
-**macOS:**
+**Ubuntu/Debian:**
 ```bash
-curl https://ollama.ai/install.sh | sh
+sudo apt install postgresql postgresql-contrib
+sudo systemctl start postgresql
 ```
-**Or download from:** https://ollama.ai/download
+### 2. Install pgvector Extension
-### 2. Start Ollama Service
+**macOS:**
+```bash
+brew install pgvector
+```
+**Ubuntu/Debian:**
 ```bash
-# Ollama typically starts automatically after installation
-# Verify it's running:
-curl http://localhost:11434/api/version
+sudo apt install postgresql-17-pgvector
 ```
-### 3. Pull the gpt-oss Model
+**From source:**
+```bash
+git clone https://github.com/pgvector/pgvector.git
+cd pgvector
+make
+sudo make install
+```
+### 3. Create Database and Enable Extensions
 ```bash
-# Pull the default model used by HTM
-ollama pull gpt-oss
+# Create the development database
+createdb htm_development
-# Verify the model is available
-ollama list
+# Enable required extensions
+psql htm_development -c "CREATE EXTENSION IF NOT EXISTS vector;"
+psql htm_development -c "CREATE EXTENSION IF NOT EXISTS pg_trgm;"
+# Verify extensions
+psql htm_development -c "SELECT extname, extversion FROM pg_extension;"
 ```
-### 4. Test Embedding Generation
+### 4. Set Environment Variable
 ```bash
-# Test that embeddings work
-ollama run gpt-oss "Hello, world!"
-```
+# Add to your ~/.bashrc or ~/.zshrc
+export HTM_DBURL="postgresql://postgres@localhost:5432/htm_development"
-### Optional: Custom Ollama URL
+# Or for a specific user with password
+export HTM_DBURL="postgresql://username:password@localhost:5432/htm_development"
+```
-If Ollama is running on a different host/port, set the environment variable:
+### 5. Verify Connection
 ```bash
-export OLLAMA_URL="http://custom-host:11434"
+cd /path/to/HTM
+ruby test_connection.rb
+```
+You should see:
+```
+✓ Connected successfully!
+✓ pgvector Extension: Version 0.8.x
+✓ pg_trgm Extension: Version 1.6
 ```
-## Database Setup
+## Ollama Setup
-### 1. Load Database Credentials
+HTM uses RubyLLM with the Ollama provider for generating embeddings. You need to install and run Ollama locally.
-The HTM project uses environment variables to manage database credentials. These are defined in `~/.bashrc__tiger`.
+### 1. Install Ollama
+**macOS:**
 ```bash
-# Load the Tiger database environment variables
-source ~/.bashrc__tiger
+curl https://ollama.ai/install.sh | sh
 ```
-To make these variables available automatically in new shell sessions, ensure `~/.bashrc__tiger` is sourced in your `~/.bashrc` or `~/.bash_profile`.
-### 2. Verify Connection
+**Or download from:** https://ollama.ai/download
-Test the database connection:
+### 2. Start Ollama Service
 ```bash
-cd /path/to/HTM
-ruby test_connection.rb
+# Ollama typically starts automatically after installation
+# Verify it's running:
+curl http://localhost:11434/api/version
 ```
-You should see:
+### 3. Pull Required Models
+```bash
+# Pull the embedding model
+ollama pull nomic-embed-text
+# Pull the chat model (for tag extraction)
+ollama pull llama3
+# Verify models are available
+ollama list
 ```
-✓ Connected successfully!
-✓ TimescaleDB Extension: Version 2.22.1
-✓ pgvector Extension: Version 0.8.1
-✓ pg_trgm Extension: Version 1.6
+### 4. Test Embedding Generation
+```bash
+# Test that embeddings work
+curl http://localhost:11434/api/embeddings -d '{
+  "model": "nomic-embed-text",
+  "prompt": "Hello, world!"
+}'
 ```
-### 3. Enable Extensions (One-time)
+### Optional: Custom Ollama URL
-Enable required PostgreSQL extensions (already done, but can be re-run safely):
+If Ollama is running on a different host/port, set the environment variable:
 ```bash
-ruby enable_extensions.rb
+export OLLAMA_URL="http://custom-host:11434"
 ```
 ## Environment Variables Reference
-After sourcing `~/.bashrc__tiger`, these variables are available:
 | Variable | Description | Example Value |
 |----------|-------------|---------------|
-| `HTM_SERVICE_NAME` | Service identifier | `db-67977` |
-| `HTM_DBNAME` | Database name | `tsdb` |
-| `HTM_DBUSER` | Database user | `tsdbadmin` |
-| `HTM_DBPASS` | Database password | `***` |
-| `HTM_DBURL` | Full connection URL (preferred) | `postgres://...` |
-| `HTM_DBPORT` | Database port | `37807` |
+| `HTM_DBURL` | Full PostgreSQL connection URL (preferred) | `postgresql://postgres@localhost:5432/htm_development` |
+| `HTM_DBNAME` | Database name (fallback) | `htm_development` |
+| `HTM_DBUSER` | Database user (fallback) | `postgres` |
+| `HTM_DBPASS` | Database password (fallback) | `` |
+| `HTM_DBHOST` | Database host (fallback) | `localhost` |
+| `HTM_DBPORT` | Database port (fallback) | `5432` |
+| `OLLAMA_URL` | Ollama server URL | `http://localhost:11434` |
 ## Development Workflow
 ### Quick Start
 ```bash
-# 1. Source environment variables (if not in .bashrc)
-source ~/.bashrc__tiger
+# 1. Set database URL (if not in shell config)
+export HTM_DBURL="postgresql://postgres@localhost:5432/htm_development"
-# 2. Install dependencies (when gem is created)
+# 2. Install dependencies
 bundle install
-# 3. Initialize database schema (when ready)
-ruby -r ./lib/htm -e "HTMDatabase.setup"
+# 3. Initialize database schema
+rake db_setup
+# 4. Run tests
+rake test
-# 4. Test HTM functionality (when implemented)
+# 5. Try the basic example
 ruby examples/basic_usage.rb
 ```
@@ -130,10 +173,8 @@ HTM uses Minitest for testing:
 # Run all tests
 rake test
-# Or run directly with Ruby
-ruby test/htm_test.rb
 # Run specific test file
+ruby test/htm_test.rb
 ruby test/embedding_service_test.rb
 # Run integration tests (requires database)
@@ -150,14 +191,18 @@ HTM/
 │   │   ├── database.rb           # Database setup and schema
 │   │   ├── long_term_memory.rb   # PostgreSQL-backed storage
 │   │   ├── working_memory.rb     # In-memory active context
-│   │   ├── embedding_service.rb  # RubyLLM embedding generation (Ollama/gpt-oss)
+│   │   ├── embedding_service.rb  # RubyLLM embedding generation
+│   │   ├── tag_service.rb        # Hierarchical tag extraction
+│   │   ├── configuration.rb      # Multi-provider LLM config
 │   │   └── version.rb            # Version constant
-├── sql/
+├── config/
+│   └── database.yml              # Database configuration
+├── db/
 │   └── schema.sql                # Database schema
 ├── test/
 │   ├── test_helper.rb            # Minitest configuration
 │   ├── htm_test.rb               # Basic HTM tests
-│   ├── embedding_service_test.rb # Embedding tests (RubyLLM/Ollama)
+│   ├── embedding_service_test.rb # Embedding tests
 │   └── integration_test.rb       # Full integration tests
 ├── examples/
 │   └── basic_usage.rb            # Basic usage example
@@ -165,24 +210,12 @@ HTM/
 ├── enable_extensions.rb          # Enable PostgreSQL extensions
 ├── SETUP.md                      # This file
 ├── README.md                     # Project overview
-├── htm_teamwork.md               # Planning and design doc
+├── CLAUDE.md                     # AI assistant instructions
 ├── Gemfile
 ├── htm.gemspec
 └── Rakefile                      # Rake tasks
 ```
-## Next Steps
-1. **Phase 1**: Create basic gem structure
-2. **Phase 2**: Implement database schema
-3. **Phase 3**: Implement LongTermMemory class
-4. **Phase 4**: Implement WorkingMemory class
-5. **Phase 5**: Implement HTM main class
-6. **Phase 6**: Add tests
-7. **Phase 7**: Create examples
-See `htm_teamwork.md` for detailed roadmap.
 ## Troubleshooting
 ### Ollama Issues
@@ -193,24 +226,20 @@ If you encounter embedding errors:
 # Verify Ollama is running
 curl http://localhost:11434/api/version
-# Check if gpt-oss model is available
-ollama list | grep gpt-oss
+# Check if models are available
+ollama list
 # Test embedding generation
-ollama run gpt-oss "Test embedding"
+curl http://localhost:11434/api/embeddings -d '{"model": "nomic-embed-text", "prompt": "Test"}'
-# View Ollama logs
-ollama logs
-# Restart Ollama service
-# On macOS, Ollama runs as a background service
-# Check Activity Monitor or restart from the menu bar
+# View Ollama logs (macOS)
+# Check Console.app or Activity Monitor
 ```
 **Common Ollama Errors:**
 - **"connection refused"**: Ollama service is not running. Start Ollama from Applications or via CLI.
-- **"model not found"**: Run `ollama pull gpt-oss` to download the model.
+- **"model not found"**: Run `ollama pull nomic-embed-text` to download the model.
 - **Custom URL not working**: Ensure `OLLAMA_URL` environment variable is set correctly.
 ### Database Connection Issues
@@ -218,14 +247,15 @@ ollama logs
 If you get connection errors:
 ```bash
-# Verify environment variables are set
+# Verify environment variable is set
 echo $HTM_DBURL
 # Test connection manually
-psql $HTM_DBURL
+psql $HTM_DBURL -c "SELECT 1"
-# Check if ~/.bashrc__tiger is sourced
-grep "bashrc__tiger" ~/.bashrc
+# Check PostgreSQL is running
+brew services list | grep postgresql  # macOS
+systemctl status postgresql           # Linux
 ```
 ### Extension Issues
@@ -233,31 +263,32 @@ grep "bashrc__tiger" ~/.bashrc
 If extensions aren't available:
 ```bash
+# Check if pgvector is installed
+psql htm_development -c "SELECT * FROM pg_available_extensions WHERE name = 'vector';"
 # Re-run extension setup
 ruby enable_extensions.rb
-# Check extension status manually
-psql $HTM_DBURL -c "SELECT extname, extversion FROM pg_extension ORDER BY extname"
+# Check extension status
+psql htm_development -c "SELECT extname, extversion FROM pg_extension ORDER BY extname"
 ```
-### SSL Issues
+### Test Database
-The TimescaleDB Cloud instance requires SSL. If you see SSL errors:
+For running tests, create a separate test database:
 ```bash
-# Ensure sslmode is set in connection URL
-echo $HTM_DBURL | grep sslmode
-# Should show: sslmode=require
+createdb htm_development_test
+psql htm_development_test -c "CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION IF NOT EXISTS pg_trgm;"
 ```
 ## Resources
 - **Ollama**: https://ollama.ai/
-- **RubyLLM**: https://github.com/madbomber/ruby_llm
-- **TimescaleDB Docs**: https://docs.timescale.com/
+- **RubyLLM**: https://github.com/crmne/ruby_llm
 - **pgvector Docs**: https://github.com/pgvector/pgvector
-- **Planning Document**: `htm_teamwork.md`
 - **PostgreSQL Docs**: https://www.postgresql.org/docs/
+- **Planning Document**: `htm_teamwork.md`
 ## Support

data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} RENAMED Viewed

@@ -2,7 +2,6 @@
 class EnableExtensions < ActiveRecord::Migration[7.1]
   def up
-    # Note: On TimescaleDB Cloud, the extension is named 'vector' not 'pgvector'
     enable_extension 'vector'
     enable_extension 'pg_trgm'
   end

data/db/migrate/00002_create_robots.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+class CreateRobots < ActiveRecord::Migration[7.1]
+  def change
+    create_table :robots, comment: 'Registry of all LLM robots using the HTM system' do |t|
+      t.text :name, comment: 'Human-readable name for the robot'
+      t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When the robot was first registered'
+      t.timestamptz :last_active, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'Last time the robot accessed the system'
+    end
+  end
+end

data/db/migrate/00003_create_file_sources.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+class CreateFileSources < ActiveRecord::Migration[7.1]
+  def change
+    create_table :file_sources, comment: 'Source file metadata for loaded documents' do |t|
+      t.text :file_path, null: false, comment: 'Absolute path to source file'
+      t.string :file_hash, limit: 64, comment: 'SHA-256 hash of file content'
+      t.timestamptz :mtime, comment: 'File modification time'
+      t.integer :file_size, comment: 'File size in bytes'
+      t.jsonb :frontmatter, default: {}, comment: 'Parsed YAML frontmatter'
+      t.timestamptz :last_synced_at, comment: 'When file was last synced to HTM'
+      t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }
+      t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }
+    end
+    add_index :file_sources, :file_path, unique: true, name: 'idx_file_sources_path_unique'
+    add_index :file_sources, :file_hash, name: 'idx_file_sources_hash'
+    add_index :file_sources, :last_synced_at, name: 'idx_file_sources_last_synced'
+  end
+end

data/db/migrate/00004_create_nodes.rb ADDED Viewed

@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+class CreateNodes < ActiveRecord::Migration[7.1]
+  def change
+    create_table :nodes, comment: 'Core memory storage for conversation messages and context' do |t|
+      t.text :content, null: false, comment: 'The conversation message/utterance content'
+      t.integer :access_count, default: 0, null: false, comment: 'Number of times this node has been accessed/retrieved'
+      t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was created'
+      t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last modified'
+      t.timestamptz :last_accessed, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last accessed'
+      t.integer :token_count, comment: 'Number of tokens in the content (for context budget management)'
+      t.vector :embedding, limit: 2000, comment: 'Vector embedding (max 2000 dimensions) for semantic search'
+      t.integer :embedding_dimension, comment: 'Actual number of dimensions used in the embedding vector (max 2000)'
+      t.string :content_hash, limit: 64, comment: 'SHA-256 hash of content for deduplication'
+      t.timestamptz :deleted_at, comment: 'Soft delete timestamp - node is considered deleted when set'
+      t.bigint :source_id, comment: 'Reference to source file (for file-loaded nodes)'
+      t.integer :chunk_position, comment: 'Position within source file (0-indexed)'
+      t.jsonb :metadata, default: {}, null: false, comment: 'Flexible metadata storage (memory_type, importance, source, etc.)'
+    end
+    # Basic indexes for common queries
+    add_index :nodes, :created_at, name: 'idx_nodes_created_at'
+    add_index :nodes, :updated_at, name: 'idx_nodes_updated_at'
+    add_index :nodes, :last_accessed, name: 'idx_nodes_last_accessed'
+    add_index :nodes, :access_count, name: 'idx_nodes_access_count'
+    add_index :nodes, :content_hash, unique: true, name: 'idx_nodes_content_hash_unique'
+    add_index :nodes, :deleted_at, name: 'idx_nodes_deleted_at'
+    add_index :nodes, :source_id, name: 'idx_nodes_source_id'
+    add_index :nodes, [:source_id, :chunk_position], name: 'idx_nodes_source_chunk_position'
+    # Partial index for efficiently querying non-deleted nodes
+    add_index :nodes, :created_at, name: 'idx_nodes_not_deleted_created_at', where: 'deleted_at IS NULL'
+    # GIN index for JSONB metadata queries
+    add_index :nodes, :metadata, using: :gin, name: 'idx_nodes_metadata'
+    # Vector similarity search index (HNSW for better performance)
+    execute <<-SQL
+      CREATE INDEX idx_nodes_embedding ON nodes
+        USING hnsw (embedding vector_cosine_ops)
+        WITH (m = 16, ef_construction = 64)
+    SQL
+    # Full-text search on conversation content
+    execute <<-SQL
+      CREATE INDEX idx_nodes_content_gin ON nodes
+        USING gin(to_tsvector('english', content))
+    SQL
+    # Trigram indexes for fuzzy matching on conversation content
+    execute <<-SQL
+      CREATE INDEX idx_nodes_content_trgm ON nodes
+        USING gin(content gin_trgm_ops)
+    SQL
+    # Check constraint for embedding dimensions
+    execute <<-SQL
+      ALTER TABLE nodes ADD CONSTRAINT check_embedding_dimension
+        CHECK (embedding_dimension IS NULL OR (embedding_dimension > 0 AND embedding_dimension <= 2000))
+    SQL
+    # Foreign key to file_sources table
+    add_foreign_key :nodes, :file_sources, column: :source_id, on_delete: :nullify
+  end
+end

data/db/migrate/00005_create_tags.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+class CreateTags < ActiveRecord::Migration[7.1]
+  def change
+    create_table :tags, comment: 'Unique tag names for categorization' do |t|
+      t.text :name, null: false, comment: 'Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb)'
+      t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this tag was created'
+    end
+    add_index :tags, :name, unique: true, name: 'idx_tags_name_unique'
+    add_index :tags, :name, using: :btree, opclass: :text_pattern_ops, name: 'idx_tags_name_pattern'
+  end
+end