RubyGems - htm - Versions diffs - 0.0.2 → 0.0.10 - Mend

htm 0.0.2 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

checksums.yaml +4 -4
data/.aigcm_msg +1 -0
data/.architecture/reviews/comprehensive-codebase-review.md +577 -0
data/.claude/settings.local.json +92 -0
data/.irbrc +283 -80
data/.tbls.yml +2 -1
data/CHANGELOG.md +294 -26
data/CLAUDE.md +603 -0
data/README.md +76 -5
data/Rakefile +5 -0
data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} +0 -1
data/db/migrate/00002_create_robots.rb +11 -0
data/db/migrate/00003_create_file_sources.rb +20 -0
data/db/migrate/00004_create_nodes.rb +65 -0
data/db/migrate/00005_create_tags.rb +13 -0
data/db/migrate/00006_create_node_tags.rb +18 -0
data/db/migrate/00007_create_robot_nodes.rb +26 -0
data/db/migrate/00009_add_working_memory_to_robot_nodes.rb +12 -0
data/db/schema.sql +172 -1
data/docs/api/database.md +1 -2
data/docs/api/htm.md +197 -2
data/docs/api/yard/HTM/ActiveRecordConfig.md +23 -0
data/docs/api/yard/HTM/AuthorizationError.md +11 -0
data/docs/api/yard/HTM/CircuitBreaker.md +92 -0
data/docs/api/yard/HTM/CircuitBreakerOpenError.md +34 -0
data/docs/api/yard/HTM/Configuration.md +175 -0
data/docs/api/yard/HTM/Database.md +99 -0
data/docs/api/yard/HTM/DatabaseError.md +14 -0
data/docs/api/yard/HTM/EmbeddingError.md +18 -0
data/docs/api/yard/HTM/EmbeddingService.md +58 -0
data/docs/api/yard/HTM/Error.md +11 -0
data/docs/api/yard/HTM/JobAdapter.md +39 -0
data/docs/api/yard/HTM/LongTermMemory.md +342 -0
data/docs/api/yard/HTM/NotFoundError.md +17 -0
data/docs/api/yard/HTM/Observability.md +107 -0
data/docs/api/yard/HTM/QueryTimeoutError.md +19 -0
data/docs/api/yard/HTM/Railtie.md +27 -0
data/docs/api/yard/HTM/ResourceExhaustedError.md +13 -0
data/docs/api/yard/HTM/TagError.md +18 -0
data/docs/api/yard/HTM/TagService.md +67 -0
data/docs/api/yard/HTM/Timeframe/Result.md +24 -0
data/docs/api/yard/HTM/Timeframe.md +40 -0
data/docs/api/yard/HTM/TimeframeExtractor/Result.md +24 -0
data/docs/api/yard/HTM/TimeframeExtractor.md +45 -0
data/docs/api/yard/HTM/ValidationError.md +20 -0
data/docs/api/yard/HTM/WorkingMemory.md +131 -0
data/docs/api/yard/HTM.md +80 -0
data/docs/api/yard/index.csv +179 -0
data/docs/api/yard-reference.md +51 -0
data/docs/database/README.md +128 -128
data/docs/database/public.file_sources.md +42 -0
data/docs/database/public.file_sources.svg +211 -0
data/docs/database/public.node_tags.md +4 -4
data/docs/database/public.node_tags.svg +212 -79
data/docs/database/public.nodes.md +22 -12
data/docs/database/public.nodes.svg +246 -127
data/docs/database/public.robot_nodes.md +11 -9
data/docs/database/public.robot_nodes.svg +220 -98
data/docs/database/public.robots.md +2 -2
data/docs/database/public.robots.svg +136 -81
data/docs/database/public.tags.md +3 -3
data/docs/database/public.tags.svg +118 -39
data/docs/database/schema.json +850 -771
data/docs/database/schema.svg +256 -197
data/docs/development/schema.md +67 -2
data/docs/guides/adding-memories.md +93 -7
data/docs/guides/recalling-memories.md +36 -1
data/examples/README.md +280 -0
data/examples/cli_app/htm_cli.rb +65 -5
data/examples/cli_app/temp.log +93 -0
data/examples/file_loader_usage.rb +177 -0
data/examples/robot_groups/lib/robot_group.rb +419 -0
data/examples/robot_groups/lib/working_memory_channel.rb +140 -0
data/examples/robot_groups/multi_process.rb +286 -0
data/examples/robot_groups/robot_worker.rb +136 -0
data/examples/robot_groups/same_process.rb +229 -0
data/examples/timeframe_demo.rb +276 -0
data/lib/htm/active_record_config.rb +1 -1
data/lib/htm/circuit_breaker.rb +202 -0
data/lib/htm/configuration.rb +59 -13
data/lib/htm/database.rb +67 -36
data/lib/htm/embedding_service.rb +39 -2
data/lib/htm/errors.rb +131 -11
data/lib/htm/jobs/generate_embedding_job.rb +5 -4
data/lib/htm/jobs/generate_tags_job.rb +4 -0
data/lib/htm/loaders/markdown_loader.rb +263 -0
data/lib/htm/loaders/paragraph_chunker.rb +112 -0
data/lib/htm/long_term_memory.rb +460 -343
data/lib/htm/models/file_source.rb +99 -0
data/lib/htm/models/node.rb +80 -5
data/lib/htm/models/robot.rb +24 -1
data/lib/htm/models/robot_node.rb +1 -0
data/lib/htm/models/tag.rb +254 -4
data/lib/htm/observability.rb +395 -0
data/lib/htm/tag_service.rb +60 -3
data/lib/htm/tasks.rb +26 -1
data/lib/htm/timeframe.rb +194 -0
data/lib/htm/timeframe_extractor.rb +307 -0
data/lib/htm/version.rb +1 -1
data/lib/htm/working_memory.rb +165 -70
data/lib/htm.rb +328 -130
data/lib/tasks/doc.rake +300 -0
data/lib/tasks/files.rake +299 -0
data/lib/tasks/htm.rake +158 -3
data/lib/tasks/jobs.rake +3 -9
data/lib/tasks/tags.rake +166 -6
data/mkdocs.yml +36 -1
data/notes/ARCHITECTURE_REVIEW.md +1167 -0
data/notes/IMPLEMENTATION_SUMMARY.md +606 -0
data/notes/MULTI_FRAMEWORK_IMPLEMENTATION.md +451 -0
data/notes/next_steps.md +100 -0
data/notes/plan.md +627 -0
data/notes/tag_ontology_enhancement_ideas.md +222 -0
data/notes/timescaledb_removal_summary.md +200 -0
metadata +125 -15
data/db/migrate/20250101000002_create_robots.rb +0 -14
data/db/migrate/20250101000003_create_nodes.rb +0 -42
data/db/migrate/20250101000005_create_tags.rb +0 -38
data/db/migrate/20250101000007_add_node_vector_indexes.rb +0 -30
data/db/migrate/20250125000001_add_content_hash_to_nodes.rb +0 -14
data/db/migrate/20250125000002_create_robot_nodes.rb +0 -35
data/db/migrate/20250125000003_remove_source_and_robot_id_from_nodes.rb +0 -28
data/db/migrate/20250126000001_create_working_memories.rb +0 -19
data/db/migrate/20250126000002_remove_unused_columns.rb +0 -12
data/docs/database/public.working_memories.md +0 -40
data/docs/database/public.working_memories.svg +0 -112
data/lib/htm/models/working_memory_entry.rb +0 -88

data/README.md CHANGED Viewed

@@ -27,7 +27,7 @@
 - **Client-Side Embeddings**
     - Automatic embedding generation before database insertion
-    - Supports Ollama (local, default) and OpenAI
+    - Uses the [ruby_llm](https://ruby_llm.com) gem for LLM access
     - Configurable embedding providers and models
 - **Two-Tier Memory Architecture**
@@ -62,6 +62,12 @@
     - Tag-based categorization
     - Hierarchical tag structures
+- **File Loading**
+    - Load markdown files into long-term memory
+    - Automatic paragraph-based chunking
+    - Source file tracking with re-sync support
+    - YAML frontmatter extraction as metadata
 ## Installation
 Add this line to your application's Gemfile:
@@ -264,6 +270,42 @@ memories = htm.recall(
 htm.forget(node_id, confirm: :confirmed)
 ```
+### Loading Files
+HTM can load text-based files (currently markdown) into long-term memory with automatic chunking and source tracking.
+```ruby
+htm = HTM.new(robot_name: "Document Loader")
+# Load a single markdown file
+result = htm.load_file("docs/guide.md")
+# => { file_source_id: 1, chunks_created: 5, chunks_updated: 0, chunks_deleted: 0 }
+# Load all markdown files in a directory
+results = htm.load_directory("docs/", pattern: "**/*.md")
+# Get nodes from a specific file
+nodes = htm.nodes_from_file("docs/guide.md")
+# Unload a file (soft deletes chunks)
+htm.unload_file("docs/guide.md")
+```
+**Features:**
+- **Paragraph chunking**: Text split by blank lines, code blocks preserved
+- **Source tracking**: Files tracked with mtime for automatic re-sync
+- **YAML frontmatter**: Extracted and stored as metadata
+- **Duplicate detection**: Content hash prevents duplicate nodes
+**Rake tasks:**
+```bash
+rake 'htm:files:load[docs/guide.md]'   # Load a single file
+rake 'htm:files:load_dir[docs/]'       # Load all markdown files from directory
+rake htm:files:list                     # List all loaded file sources
+rake htm:files:sync                     # Sync all files (reload changed)
+rake htm:files:stats                    # Show file loading statistics
+```
 ### Automatic Tag Extraction
 HTM automatically extracts hierarchical tags from content using LLM analysis. Tags are inferred from the content itself - you never specify them manually.
@@ -518,13 +560,14 @@ HTM provides a minimal, focused API with only 3 core instance methods for memory
 ### Core Memory Operations
-#### `remember(content, source: "")`
+#### `remember(content, source: "", metadata: {})`
 Store information in memory. Embeddings and tags are automatically generated asynchronously.
 **Parameters:**
 - `content` (String, required) - The information to remember. Converted to string if nil. Returns ID of last node if empty.
 - `source` (String, optional) - Where the content came from (e.g., "user", "assistant", "system"). Defaults to empty string.
+- `metadata` (Hash, optional) - Arbitrary key-value metadata stored as JSONB. Keys must be strings or symbols. Defaults to `{}`.
 **Returns:** Integer - The node ID of the stored memory
@@ -536,6 +579,12 @@ node_id = htm.remember("PostgreSQL is excellent for vector search with pgvector"
 # Store without source (uses default empty string)
 node_id = htm.remember("HTM uses two-tier memory architecture")
+# Store with metadata
+node_id = htm.remember(
+  "User prefers dark mode",
+  metadata: { category: "preference", priority: "high", version: 2 }
+)
 # Nil/empty handling
 node_id = htm.remember(nil)  # Returns ID of last node without creating duplicate
 node_id = htm.remember("")   # Returns ID of last node without creating duplicate
@@ -543,7 +592,7 @@ node_id = htm.remember("")   # Returns ID of last node without creating duplicat
 ---
-#### `recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [])`
+#### `recall(topic, timeframe: nil, limit: 20, strategy: :vector, with_relevance: false, query_tags: [], metadata: {})`
 Retrieve memories using temporal filtering and semantic/keyword search.
@@ -559,8 +608,9 @@ Retrieve memories using temporal filtering and semantic/keyword search.
   - `:hybrid` - Weighted combination (70% vector, 30% full-text)
 - `with_relevance` (Boolean, optional) - Include dynamic relevance scores. Default: false
 - `query_tags` (Array<String>, optional) - Filter results by tags. Default: []
+- `metadata` (Hash, optional) - Filter results by metadata using JSONB containment (`@>`). Default: `{}`
-**Returns:** Array<Hash> - Matching memories with fields: `id`, `content`, `source`, `created_at`, `access_count`, (optionally `relevance`)
+**Returns:** Array<Hash> - Matching memories with fields: `id`, `content`, `source`, `created_at`, `access_count`, `metadata`, (optionally `relevance`)
 **Example:**
 ```ruby
@@ -586,6 +636,21 @@ memories = htm.recall(
   query_tags: ["architecture"]
 )
 # => [{ "id" => 123, "content" => "...", "relevance" => 0.92, ... }, ...]
+# Filter by metadata
+memories = htm.recall(
+  "user preferences",
+  metadata: { category: "preference" }
+)
+# => Returns only nodes with metadata containing { category: "preference" }
+# Combine metadata with other filters
+memories = htm.recall(
+  "settings",
+  timeframe: "last month",
+  strategy: :hybrid,
+  metadata: { priority: "high", version: 2 }
+)
 ```
 ---
@@ -1312,10 +1377,16 @@ See [htm_teamwork.md](htm_teamwork.md) for detailed design documentation and pla
 ### Database Schema
 - `robots`: Robot registry for all LLM agents using HTM
-- `nodes`: Main memory storage with vector embeddings (pgvector), full-text search (tsvector), metadata
+- `nodes`: Main memory storage with vector embeddings (pgvector), full-text search (tsvector), JSONB metadata
 - `tags`: Hierarchical tag ontology (format: `root:level1:level2:level3`)
 - `node_tags`: Join table implementing many-to-many relationship between nodes and tags
+**Nodes Table Key Columns:**
+- `content`: The memory content
+- `embedding`: Vector embedding for semantic search (up to 2000 dimensions)
+- `metadata`: JSONB column for arbitrary key-value data (filterable via `@>` containment operator)
+- `content_hash`: SHA-256 hash for deduplication
 ### Service Architecture
 HTM uses a layered architecture for LLM integration:

data/Rakefile CHANGED Viewed

@@ -28,6 +28,11 @@ task :example do
   ruby "examples/basic_usage.rb"
 end
+desc "Run timeframe demo"
+task :timeframe_demo do
+  ruby "examples/timeframe_demo.rb"
+end
 desc "Show gem stats"
 task :stats do
   puts "\nHTM Gem Statistics:"

data/db/migrate/{20250101000001_enable_extensions.rb → 00001_enable_extensions.rb} RENAMED Viewed

@@ -2,7 +2,6 @@
 class EnableExtensions < ActiveRecord::Migration[7.1]
   def up
-    # Note: On TimescaleDB Cloud, the extension is named 'vector' not 'pgvector'
     enable_extension 'vector'
     enable_extension 'pg_trgm'
   end

data/db/migrate/00002_create_robots.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+class CreateRobots < ActiveRecord::Migration[7.1]
+  def change
+    create_table :robots, comment: 'Registry of all LLM robots using the HTM system' do |t|
+      t.text :name, comment: 'Human-readable name for the robot'
+      t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When the robot was first registered'
+      t.timestamptz :last_active, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'Last time the robot accessed the system'
+    end
+  end
+end

data/db/migrate/00003_create_file_sources.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+class CreateFileSources < ActiveRecord::Migration[7.1]
+  def change
+    create_table :file_sources, comment: 'Source file metadata for loaded documents' do |t|
+      t.text :file_path, null: false, comment: 'Absolute path to source file'
+      t.string :file_hash, limit: 64, comment: 'SHA-256 hash of file content'
+      t.timestamptz :mtime, comment: 'File modification time'
+      t.integer :file_size, comment: 'File size in bytes'
+      t.jsonb :frontmatter, default: {}, comment: 'Parsed YAML frontmatter'
+      t.timestamptz :last_synced_at, comment: 'When file was last synced to HTM'
+      t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }
+      t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }
+    end
+    add_index :file_sources, :file_path, unique: true, name: 'idx_file_sources_path_unique'
+    add_index :file_sources, :file_hash, name: 'idx_file_sources_hash'
+    add_index :file_sources, :last_synced_at, name: 'idx_file_sources_last_synced'
+  end
+end

data/db/migrate/00004_create_nodes.rb ADDED Viewed

@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+class CreateNodes < ActiveRecord::Migration[7.1]
+  def change
+    create_table :nodes, comment: 'Core memory storage for conversation messages and context' do |t|
+      t.text :content, null: false, comment: 'The conversation message/utterance content'
+      t.integer :access_count, default: 0, null: false, comment: 'Number of times this node has been accessed/retrieved'
+      t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was created'
+      t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last modified'
+      t.timestamptz :last_accessed, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this memory was last accessed'
+      t.integer :token_count, comment: 'Number of tokens in the content (for context budget management)'
+      t.vector :embedding, limit: 2000, comment: 'Vector embedding (max 2000 dimensions) for semantic search'
+      t.integer :embedding_dimension, comment: 'Actual number of dimensions used in the embedding vector (max 2000)'
+      t.string :content_hash, limit: 64, comment: 'SHA-256 hash of content for deduplication'
+      t.timestamptz :deleted_at, comment: 'Soft delete timestamp - node is considered deleted when set'
+      t.bigint :source_id, comment: 'Reference to source file (for file-loaded nodes)'
+      t.integer :chunk_position, comment: 'Position within source file (0-indexed)'
+      t.jsonb :metadata, default: {}, null: false, comment: 'Flexible metadata storage (memory_type, importance, source, etc.)'
+    end
+    # Basic indexes for common queries
+    add_index :nodes, :created_at, name: 'idx_nodes_created_at'
+    add_index :nodes, :updated_at, name: 'idx_nodes_updated_at'
+    add_index :nodes, :last_accessed, name: 'idx_nodes_last_accessed'
+    add_index :nodes, :access_count, name: 'idx_nodes_access_count'
+    add_index :nodes, :content_hash, unique: true, name: 'idx_nodes_content_hash_unique'
+    add_index :nodes, :deleted_at, name: 'idx_nodes_deleted_at'
+    add_index :nodes, :source_id, name: 'idx_nodes_source_id'
+    add_index :nodes, [:source_id, :chunk_position], name: 'idx_nodes_source_chunk_position'
+    # Partial index for efficiently querying non-deleted nodes
+    add_index :nodes, :created_at, name: 'idx_nodes_not_deleted_created_at', where: 'deleted_at IS NULL'
+    # GIN index for JSONB metadata queries
+    add_index :nodes, :metadata, using: :gin, name: 'idx_nodes_metadata'
+    # Vector similarity search index (HNSW for better performance)
+    execute <<-SQL
+      CREATE INDEX idx_nodes_embedding ON nodes
+        USING hnsw (embedding vector_cosine_ops)
+        WITH (m = 16, ef_construction = 64)
+    SQL
+    # Full-text search on conversation content
+    execute <<-SQL
+      CREATE INDEX idx_nodes_content_gin ON nodes
+        USING gin(to_tsvector('english', content))
+    SQL
+    # Trigram indexes for fuzzy matching on conversation content
+    execute <<-SQL
+      CREATE INDEX idx_nodes_content_trgm ON nodes
+        USING gin(content gin_trgm_ops)
+    SQL
+    # Check constraint for embedding dimensions
+    execute <<-SQL
+      ALTER TABLE nodes ADD CONSTRAINT check_embedding_dimension
+        CHECK (embedding_dimension IS NULL OR (embedding_dimension > 0 AND embedding_dimension <= 2000))
+    SQL
+    # Foreign key to file_sources table
+    add_foreign_key :nodes, :file_sources, column: :source_id, on_delete: :nullify
+  end
+end

data/db/migrate/00005_create_tags.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+class CreateTags < ActiveRecord::Migration[7.1]
+  def change
+    create_table :tags, comment: 'Unique tag names for categorization' do |t|
+      t.text :name, null: false, comment: 'Hierarchical tag in format: root:level1:level2 (e.g., database:postgresql:timescaledb)'
+      t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this tag was created'
+    end
+    add_index :tags, :name, unique: true, name: 'idx_tags_name_unique'
+    add_index :tags, :name, using: :btree, opclass: :text_pattern_ops, name: 'idx_tags_name_pattern'
+  end
+end

data/db/migrate/00006_create_node_tags.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+class CreateNodeTags < ActiveRecord::Migration[7.1]
+  def change
+    create_table :node_tags, comment: 'Join table connecting nodes to tags (many-to-many)' do |t|
+      t.bigint :node_id, null: false, comment: 'ID of the node being tagged'
+      t.bigint :tag_id, null: false, comment: 'ID of the tag being applied'
+      t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }, comment: 'When this association was created'
+    end
+    add_index :node_tags, [:node_id, :tag_id], unique: true, name: 'idx_node_tags_unique'
+    add_index :node_tags, :node_id, name: 'idx_node_tags_node_id'
+    add_index :node_tags, :tag_id, name: 'idx_node_tags_tag_id'
+    add_foreign_key :node_tags, :nodes, column: :node_id, on_delete: :cascade
+    add_foreign_key :node_tags, :tags, column: :tag_id, on_delete: :cascade
+  end
+end

data/db/migrate/00007_create_robot_nodes.rb ADDED Viewed

@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+class CreateRobotNodes < ActiveRecord::Migration[7.1]
+  def change
+    create_table :robot_nodes, comment: 'Join table connecting robots to nodes (many-to-many)' do |t|
+      t.bigint :robot_id, null: false, comment: 'ID of the robot that remembered this node'
+      t.bigint :node_id, null: false, comment: 'ID of the node being remembered'
+      t.timestamptz :first_remembered_at, default: -> { 'CURRENT_TIMESTAMP' },
+                    comment: 'When this robot first remembered this content'
+      t.timestamptz :last_remembered_at, default: -> { 'CURRENT_TIMESTAMP' },
+                    comment: 'When this robot last tried to remember this content'
+      t.integer :remember_count, default: 1, null: false,
+                comment: 'Number of times this robot has tried to remember this content'
+      t.timestamptz :created_at, default: -> { 'CURRENT_TIMESTAMP' }
+      t.timestamptz :updated_at, default: -> { 'CURRENT_TIMESTAMP' }
+    end
+    add_index :robot_nodes, [:robot_id, :node_id], unique: true, name: 'idx_robot_nodes_unique'
+    add_index :robot_nodes, :robot_id, name: 'idx_robot_nodes_robot_id'
+    add_index :robot_nodes, :node_id, name: 'idx_robot_nodes_node_id'
+    add_index :robot_nodes, :last_remembered_at, name: 'idx_robot_nodes_last_remembered_at'
+    add_foreign_key :robot_nodes, :robots, column: :robot_id, on_delete: :cascade
+    add_foreign_key :robot_nodes, :nodes, column: :node_id, on_delete: :cascade
+  end
+end

data/db/migrate/00009_add_working_memory_to_robot_nodes.rb ADDED Viewed

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+class AddWorkingMemoryToRobotNodes < ActiveRecord::Migration[7.1]
+  def change
+    add_column :robot_nodes, :working_memory, :boolean, default: false, null: false,
+               comment: 'True if this node is currently in the robot working memory'
+    add_index :robot_nodes, [:robot_id, :working_memory],
+              where: 'working_memory = true',
+              name: 'idx_robot_nodes_working_memory'
+  end
+end

data/db/schema.sql CHANGED Viewed

@@ -23,6 +23,81 @@ CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public;
 -- Name: EXTENSION vector; Type: COMMENT; Schema: -; Owner: -
 --
+--
+-- Name: file_sources; Type: TABLE; Schema: public; Owner: -
+--
+CREATE TABLE public.file_sources (
+    id bigint NOT NULL,
+    file_path text NOT NULL,
+    file_hash character varying(64),
+    mtime timestamp with time zone,
+    file_size integer,
+    frontmatter jsonb DEFAULT '{}'::jsonb,
+    last_synced_at timestamp with time zone,
+    created_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP,
+    updated_at timestamp with time zone DEFAULT CURRENT_TIMESTAMP
+);
+--
+-- Name: TABLE file_sources; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON TABLE public.file_sources IS 'Source file metadata for loaded documents';
+--
+-- Name: COLUMN file_sources.file_path; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON COLUMN public.file_sources.file_path IS 'Absolute path to source file';
+--
+-- Name: COLUMN file_sources.file_hash; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON COLUMN public.file_sources.file_hash IS 'SHA-256 hash of file content';
+--
+-- Name: COLUMN file_sources.mtime; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON COLUMN public.file_sources.mtime IS 'File modification time';
+--
+-- Name: COLUMN file_sources.file_size; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON COLUMN public.file_sources.file_size IS 'File size in bytes';
+--
+-- Name: COLUMN file_sources.frontmatter; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON COLUMN public.file_sources.frontmatter IS 'Parsed YAML frontmatter';
+--
+-- Name: COLUMN file_sources.last_synced_at; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON COLUMN public.file_sources.last_synced_at IS 'When file was last synced to HTM';
+--
+-- Name: file_sources_id_seq; Type: SEQUENCE; Schema: public; Owner: -
+--
+CREATE SEQUENCE public.file_sources_id_seq
+    START WITH 1
+    INCREMENT BY 1
+    NO MINVALUE
+    NO MAXVALUE
+    CACHE 1;
+--
+-- Name: file_sources_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
+--
+ALTER SEQUENCE public.file_sources_id_seq OWNED BY public.file_sources.id;
 --
 -- Name: node_tags; Type: TABLE; Schema: public; Owner: -
 --
@@ -90,6 +165,10 @@ CREATE TABLE public.nodes (
     embedding public.vector(2000),
     embedding_dimension integer,
     content_hash character varying(64),
+    deleted_at timestamp with time zone,
+    source_id bigint,
+    chunk_position integer,
+    metadata jsonb DEFAULT '{}'::jsonb NOT NULL,
     CONSTRAINT check_embedding_dimension CHECK (((embedding_dimension IS NULL) OR ((embedding_dimension > 0) AND (embedding_dimension <= 2000))))
 );
@@ -153,6 +232,30 @@ COMMENT ON COLUMN public.nodes.embedding_dimension IS 'Actual number of dimensio
 COMMENT ON COLUMN public.nodes.content_hash IS 'SHA-256 hash of content for deduplication';
+--
+-- Name: COLUMN nodes.deleted_at; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON COLUMN public.nodes.deleted_at IS 'Soft delete timestamp - node is considered deleted when set';
+--
+-- Name: COLUMN nodes.source_id; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON COLUMN public.nodes.source_id IS 'Reference to source file (for file-loaded nodes)';
+--
+-- Name: COLUMN nodes.chunk_position; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON COLUMN public.nodes.chunk_position IS 'Position within source file (0-indexed)';
+--
+-- Name: COLUMN nodes.metadata; Type: COMMENT; Schema: public; Owner: -
+--
+COMMENT ON COLUMN public.nodes.metadata IS 'Flexible metadata storage (memory_type, importance, source, etc.)';
 --
 -- Name: nodes_id_seq; Type: SEQUENCE; Schema: public; Owner: -
 --
@@ -402,6 +505,12 @@ CREATE SEQUENCE public.working_memories_id_seq
 ALTER SEQUENCE public.working_memories_id_seq OWNED BY public.working_memories.id;
+--
+-- Name: file_sources id; Type: DEFAULT; Schema: public; Owner: -
+--
+ALTER TABLE ONLY public.file_sources ALTER COLUMN id SET DEFAULT nextval('public.file_sources_id_seq'::regclass);
 --
 -- Name: node_tags id; Type: DEFAULT; Schema: public; Owner: -
 --
@@ -438,6 +547,13 @@ ALTER TABLE ONLY public.tags ALTER COLUMN id SET DEFAULT nextval('public.tags_id
 ALTER TABLE ONLY public.working_memories ALTER COLUMN id SET DEFAULT nextval('public.working_memories_id_seq'::regclass);
+--
+-- Name: file_sources file_sources_pkey; Type: CONSTRAINT; Schema: public; Owner: -
+--
+ALTER TABLE ONLY public.file_sources
+    ADD CONSTRAINT file_sources_pkey PRIMARY KEY (id);
 --
 -- Name: node_tags node_tags_pkey; Type: CONSTRAINT; Schema: public; Owner: -
 --
@@ -487,6 +603,24 @@ ALTER TABLE ONLY public.tags
 ALTER TABLE ONLY public.working_memories
     ADD CONSTRAINT working_memories_pkey PRIMARY KEY (id);
+--
+-- Name: idx_file_sources_hash; Type: INDEX; Schema: public; Owner: -
+--
+CREATE INDEX idx_file_sources_hash ON public.file_sources USING btree (file_hash);
+--
+-- Name: idx_file_sources_last_synced; Type: INDEX; Schema: public; Owner: -
+--
+CREATE INDEX idx_file_sources_last_synced ON public.file_sources USING btree (last_synced_at);
+--
+-- Name: idx_file_sources_path_unique; Type: INDEX; Schema: public; Owner: -
+--
+CREATE UNIQUE INDEX idx_file_sources_path_unique ON public.file_sources USING btree (file_path);
 --
 -- Name: idx_node_tags_node_id; Type: INDEX; Schema: public; Owner: -
 --
@@ -535,6 +669,12 @@ CREATE INDEX idx_nodes_content_trgm ON public.nodes USING gin (content public.gi
 CREATE INDEX idx_nodes_created_at ON public.nodes USING btree (created_at);
+--
+-- Name: idx_nodes_deleted_at; Type: INDEX; Schema: public; Owner: -
+--
+CREATE INDEX idx_nodes_deleted_at ON public.nodes USING btree (deleted_at);
 --
 -- Name: idx_nodes_embedding; Type: INDEX; Schema: public; Owner: -
 --
@@ -547,6 +687,30 @@ CREATE INDEX idx_nodes_embedding ON public.nodes USING hnsw (embedding public.ve
 CREATE INDEX idx_nodes_last_accessed ON public.nodes USING btree (last_accessed);
+--
+-- Name: idx_nodes_metadata; Type: INDEX; Schema: public; Owner: -
+--
+CREATE INDEX idx_nodes_metadata ON public.nodes USING gin (metadata);
+--
+-- Name: idx_nodes_not_deleted_created_at; Type: INDEX; Schema: public; Owner: -
+--
+CREATE INDEX idx_nodes_not_deleted_created_at ON public.nodes USING btree (created_at) WHERE (deleted_at IS NULL);
+--
+-- Name: idx_nodes_source_chunk_position; Type: INDEX; Schema: public; Owner: -
+--
+CREATE INDEX idx_nodes_source_chunk_position ON public.nodes USING btree (source_id, chunk_position);
+--
+-- Name: idx_nodes_source_id; Type: INDEX; Schema: public; Owner: -
+--
+CREATE INDEX idx_nodes_source_id ON public.nodes USING btree (source_id);
 --
 -- Name: idx_nodes_updated_at; Type: INDEX; Schema: public; Owner: -
 --
@@ -621,6 +785,13 @@ ALTER TABLE ONLY public.working_memories
 ALTER TABLE ONLY public.working_memories
     ADD CONSTRAINT fk_rails_4b7c3eb07b FOREIGN KEY (robot_id) REFERENCES public.robots(id) ON DELETE CASCADE;
+--
+-- Name: nodes fk_rails_920ad16d08; Type: FK CONSTRAINT; Schema: public; Owner: -
+--
+ALTER TABLE ONLY public.nodes
+    ADD CONSTRAINT fk_rails_920ad16d08 FOREIGN KEY (source_id) REFERENCES public.file_sources(id) ON DELETE SET NULL;
 --
 -- Name: robot_nodes fk_rails_9b003078a8; Type: FK CONSTRAINT; Schema: public; Owner: -
 --
@@ -653,4 +824,4 @@ ALTER TABLE ONLY public.robot_nodes
 -- PostgreSQL database dump complete
 --
-\unrestrict 6qynyffXXn5BTZM7u0DVZKV2Nc24dPezkY3OOwzriuYfchXNsoQuf114yBOqrIb
+\unrestrict DUrF24Zrve4qSBwlDrJ4qAzzZhvhX5s2S57oHYVJ0ZPbaDC4ItMZ29Pv9oI3Q9d

data/docs/api/database.md CHANGED Viewed

@@ -276,9 +276,8 @@ For detailed database schema documentation, see:
 | [robots](../database/public.robots.md) | Robot registry for multi-robot tracking |
 | [nodes](../database/public.nodes.md) | Primary memory storage with vector embeddings |
 | [tags](../database/public.tags.md) | Hierarchical tag names for categorization |
-| [robot_nodes](../database/public.robot_nodes.md) | Robot-to-node associations (hive mind) |
+| [robot_nodes](../database/public.robot_nodes.md) | Robot-to-node associations (hive mind, working memory) |
 | [node_tags](../database/public.node_tags.md) | Node-to-tag associations |
-| [working_memories](../database/public.working_memories.md) | Per-robot working memory state |
 ### Required Extensions