hazo_files 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -185,6 +185,8 @@ interface FileMetadataInput {
185
185
  uploaded_by?: string;
186
186
  /** Original filename at upload time (V2) */
187
187
  original_filename?: string;
188
+ /** Content tag classifying the document type (V3) */
189
+ content_tag?: string;
188
190
  }
189
191
  /**
190
192
  * Input for updating an existing metadata record
@@ -258,6 +260,23 @@ interface RemoveExtractionOptions {
258
260
  /** Merge strategy to use when recalculating (default: 'shallow') */
259
261
  mergeStrategy?: 'shallow' | 'deep';
260
262
  }
263
+ /**
264
+ * Configuration for LLM-based content tagging.
265
+ * When enabled, calls the LLM with a specific prompt and writes
266
+ * the extracted field value to the content_tag column.
267
+ */
268
+ interface ContentTagConfig {
269
+ /** Whether to enable LLM-based content tagging */
270
+ content_tag_set_by_llm: boolean;
271
+ /** Prompt area for hazo_llm_api lookup */
272
+ content_tag_prompt_area: string;
273
+ /** Prompt key within the area */
274
+ content_tag_prompt_key: string;
275
+ /** Optional variables to substitute in the prompt template */
276
+ content_tag_prompt_variables?: Record<string, string>;
277
+ /** Field name to extract from the LLM response as the content tag */
278
+ content_tag_prompt_return_fieldname: string;
279
+ }
261
280
 
262
281
  /**
263
282
  * Naming convention types for hazo_files
@@ -412,6 +431,8 @@ interface FileMetadataRecordV2 extends FileMetadataRecord {
412
431
  storage_verified_at?: string | null;
413
432
  /** ISO timestamp when file was soft-deleted */
414
433
  deleted_at?: string | null;
434
+ /** Content tag classifying the document type (V3) */
435
+ content_tag?: string | null;
415
436
  }
416
437
  /**
417
438
  * Options for adding a reference to a file
@@ -1004,7 +1025,7 @@ declare class FileMetadataService {
1004
1025
  /**
1005
1026
  * Update specific V2 fields on a record
1006
1027
  */
1007
- updateFields(fileId: string, fields: Partial<Pick<FileMetadataRecordV2, 'scope_id' | 'uploaded_by' | 'original_filename' | 'storage_verified_at' | 'status'>>): Promise<boolean>;
1028
+ updateFields(fileId: string, fields: Partial<Pick<FileMetadataRecordV2, 'scope_id' | 'uploaded_by' | 'original_filename' | 'storage_verified_at' | 'status' | 'content_tag'>>): Promise<boolean>;
1008
1029
  /**
1009
1030
  * Find orphaned files (zero references)
1010
1031
  */
@@ -1532,6 +1553,11 @@ interface UploadExtractOptions extends TrackedUploadOptions {
1532
1553
  * Whether to create the folder path if it doesn't exist
1533
1554
  */
1534
1555
  createFolders?: boolean;
1556
+ /**
1557
+ * Content tag configuration for this upload.
1558
+ * Overrides the default config set on the service.
1559
+ */
1560
+ contentTagConfig?: ContentTagConfig;
1535
1561
  }
1536
1562
  /**
1537
1563
  * Result of upload with extraction
@@ -1551,6 +1577,8 @@ interface UploadExtractResult {
1551
1577
  generatedFolderPath?: string;
1552
1578
  /** Original file name before renaming */
1553
1579
  originalFileName?: string;
1580
+ /** Content tag assigned by LLM (if content tagging was performed) */
1581
+ contentTag?: string;
1554
1582
  }
1555
1583
  /**
1556
1584
  * Options for creating folders from naming convention
@@ -1608,7 +1636,8 @@ declare class UploadExtractService {
1608
1636
  private fileManager;
1609
1637
  private namingService?;
1610
1638
  private extractionService?;
1611
- constructor(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService);
1639
+ private defaultContentTagConfig?;
1640
+ constructor(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService, defaultContentTagConfig?: ContentTagConfig);
1612
1641
  /**
1613
1642
  * Upload a file with optional extraction and naming convention
1614
1643
  */
@@ -1633,6 +1662,21 @@ declare class UploadExtractService {
1633
1662
  fullPath?: string;
1634
1663
  folderPath?: string;
1635
1664
  }>;
1665
+ /**
1666
+ * Perform content tagging via LLM extraction.
1667
+ * Calls the LLM with the configured prompt, extracts the specified field,
1668
+ * and writes it to the content_tag column.
1669
+ */
1670
+ private performContentTagging;
1671
+ /**
1672
+ * Manually tag a file's content via LLM.
1673
+ * Works with existing DB records, resolving the file path internally.
1674
+ *
1675
+ * @param fileId - Database record ID of the file
1676
+ * @param config - Content tag config (falls back to default if not provided)
1677
+ * @returns OperationResult with the tag value
1678
+ */
1679
+ tagFileContent(fileId: string, config?: ContentTagConfig): Promise<OperationResult<string>>;
1636
1680
  /**
1637
1681
  * Get the file manager
1638
1682
  */
@@ -1649,7 +1693,7 @@ declare class UploadExtractService {
1649
1693
  /**
1650
1694
  * Create an UploadExtractService instance
1651
1695
  */
1652
- declare function createUploadExtractService(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService): UploadExtractService;
1696
+ declare function createUploadExtractService(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService, defaultContentTagConfig?: ContentTagConfig): UploadExtractService;
1653
1697
 
1654
1698
  /**
1655
1699
  * Server Factory
@@ -1723,6 +1767,11 @@ interface HazoFilesServerOptions {
1723
1767
  * Track download/access operations
1724
1768
  */
1725
1769
  trackDownloads?: boolean;
1770
+ /**
1771
+ * Default content tag configuration for LLM-based content classification.
1772
+ * When set, uploads can automatically classify document content.
1773
+ */
1774
+ defaultContentTagConfig?: ContentTagConfig;
1726
1775
  }
1727
1776
  /**
1728
1777
  * Result of createHazoFilesServer
@@ -1863,6 +1912,8 @@ interface HazoFilesColumnDefinitions {
1863
1912
  deleted_at: 'TEXT' | 'TIMESTAMP';
1864
1913
  /** Original filename at upload time (V2) */
1865
1914
  original_filename: 'TEXT';
1915
+ /** Content tag classifying the document type (V3) */
1916
+ content_tag: 'TEXT';
1866
1917
  }
1867
1918
  /**
1868
1919
  * Schema definition for a specific database type
@@ -2045,6 +2096,46 @@ declare const HAZO_FILES_NAMING_TABLE_SCHEMA: HazoFilesNamingTableSchema;
2045
2096
  * Get DDL for a custom naming table name
2046
2097
  */
2047
2098
  declare function getNamingSchemaForTable(tableName: string, dbType: 'sqlite' | 'postgres'): DatabaseSchemaDefinition;
2099
+ /**
2100
+ * Migration schema for adding V3 content tagging column to existing tables.
2101
+ * Idempotent — safe to run multiple times.
2102
+ *
2103
+ * @example
2104
+ * ```typescript
2105
+ * import { HAZO_FILES_MIGRATION_V3 } from 'hazo_files';
2106
+ *
2107
+ * // SQLite
2108
+ * for (const stmt of HAZO_FILES_MIGRATION_V3.sqlite.alterStatements) {
2109
+ * try { await db.run(stmt); } catch { /* column already exists *\/ }
2110
+ * }
2111
+ * for (const idx of HAZO_FILES_MIGRATION_V3.sqlite.indexes) {
2112
+ * await db.run(idx);
2113
+ * }
2114
+ *
2115
+ * // PostgreSQL
2116
+ * for (const stmt of HAZO_FILES_MIGRATION_V3.postgres.alterStatements) {
2117
+ * await client.query(stmt);
2118
+ * }
2119
+ * for (const idx of HAZO_FILES_MIGRATION_V3.postgres.indexes) {
2120
+ * await client.query(idx);
2121
+ * }
2122
+ * ```
2123
+ */
2124
+ interface HazoFilesMigrationV3 {
2125
+ /** Default table name */
2126
+ tableName: string;
2127
+ /** SQLite migration statements */
2128
+ sqlite: MigrationSchemaDefinition;
2129
+ /** PostgreSQL migration statements */
2130
+ postgres: MigrationSchemaDefinition;
2131
+ /** New column names added in V3 */
2132
+ newColumns: readonly string[];
2133
+ }
2134
+ declare const HAZO_FILES_MIGRATION_V3: HazoFilesMigrationV3;
2135
+ /**
2136
+ * Get V3 migration statements for a custom table name
2137
+ */
2138
+ declare function getMigrationV3ForTable(tableName: string, dbType: 'sqlite' | 'postgres'): MigrationSchemaDefinition;
2048
2139
 
2049
2140
  /**
2050
2141
  * Migration: Add Reference Tracking (V2)
@@ -2087,6 +2178,33 @@ declare function migrateToV2(executor: MigrationExecutor, dbType: 'sqlite' | 'po
2087
2178
  */
2088
2179
  declare function backfillV2Defaults(executor: MigrationExecutor, dbType: 'sqlite' | 'postgres', tableName?: string): Promise<void>;
2089
2180
 
2181
+ /**
2182
+ * Migration: Add Content Tag (V3)
2183
+ *
2184
+ * Adds content_tag column to an existing hazo_files table.
2185
+ * Idempotent — safe to run multiple times.
2186
+ */
2187
+
2188
+ /**
2189
+ * Run the V3 migration: add content_tag column and index.
2190
+ *
2191
+ * @param executor - Object with a `run(sql)` method
2192
+ * @param dbType - Database type ('sqlite' | 'postgres')
2193
+ * @param tableName - Custom table name (defaults to 'hazo_files')
2194
+ *
2195
+ * @example
2196
+ * ```typescript
2197
+ * import { migrateToV3 } from 'hazo_files';
2198
+ *
2199
+ * // SQLite with better-sqlite3
2200
+ * await migrateToV3({ run: (sql) => db.exec(sql) }, 'sqlite');
2201
+ *
2202
+ * // PostgreSQL with pg
2203
+ * await migrateToV3({ run: (sql) => client.query(sql) }, 'postgres');
2204
+ * ```
2205
+ */
2206
+ declare function migrateToV3(executor: MigrationExecutor, dbType: 'sqlite' | 'postgres', tableName?: string): Promise<void>;
2207
+
2090
2208
  /**
2091
2209
  * Common utility functions
2092
2210
  */
@@ -2929,4 +3047,4 @@ declare function toV2Record(record: FileMetadataRecord): FileMetadataRecordV2;
2929
3047
  */
2930
3048
  declare function buildFileWithStatus(record: FileMetadataRecord): FileWithStatus;
2931
3049
 
2932
- export { ALL_SYSTEM_VARIABLES, type AddExtractionOptions, type AddRefOptions, type AuthCallbacks, AuthenticationError, type CleanupOrphanedOptions, ConfigurationError, type CreateFolderOptions, type CrudServiceLike, DEFAULT_DATE_FORMATS, type DatabaseSchemaDefinition, type DatabaseTrackingConfig, DirectoryExistsError, DirectoryNotEmptyError, DirectoryNotFoundError, type DownloadOptions, type ExtractionData, type ExtractionOptions, type ExtractionResult, type FileBrowserState, type FileDataStructure, FileExistsError, type FileInfo, type FileItem, FileManager, type FileManagerOptions, type FileMetadataInput, type FileMetadataRecord, type FileMetadataRecordV2, FileMetadataService, type FileMetadataServiceOptions, type FileMetadataUpdate, FileNotFoundError, type FileRef, type FileRefVisibility, type FileStatus, type FileSystemItem, FileTooLargeError, type FileWithStatus, type FindOrphanedOptions, type FolderItem, type GeneratedNameResult, type GoogleAuthConfig, GoogleDriveAuth, type GoogleDriveConfig, GoogleDriveModule, HAZO_FILES_DEFAULT_TABLE_NAME, HAZO_FILES_MIGRATION_V2, HAZO_FILES_NAMING_DEFAULT_TABLE_NAME, HAZO_FILES_NAMING_TABLE_SCHEMA, HAZO_FILES_TABLE_SCHEMA, type HazoFilesColumnDefinitions, type HazoFilesConfig, HazoFilesError, type HazoFilesMigrationV2, type HazoFilesNamingColumnDefinitions, type HazoFilesNamingTableSchema, type HazoFilesServerInstance, type HazoFilesServerOptions, type HazoFilesTableSchema, type HazoLLMInstance, type HazoLogger, InvalidExtensionError, InvalidPathError, LLMExtractionService, type LLMFactory, type LLMProvider, type ListNamingConventionsOptions, type ListOptions, type LocalStorageConfig, LocalStorageModule, type MetadataLogger, type MigrationExecutor, type MigrationSchemaDefinition, type MoveOptions, type NameGenerationOptions, type NamingConventionInput, type NamingConventionRecord, NamingConventionService, type NamingConventionServiceOptions, type NamingConventionType, type NamingConventionUpdate, type NamingRuleConfiguratorProps, type NamingRuleHistoryEntry, type NamingRuleSchema, type NamingVariable, OperationError, type OperationResult, type ParsedNamingConvention, type PatternSegment, PermissionDeniedError, type ProgressCallback, type RemoveExtractionOptions, type RemoveRefsCriteria, type RenameOptions, SYSTEM_COUNTER_VARIABLES, SYSTEM_DATE_VARIABLES, SYSTEM_FILE_VARIABLES, type StorageModule, type StorageProvider, type TokenData, TrackedFileManager, type TrackedFileManagerFullOptions, type TrackedFileManagerOptions, type TrackedUploadOptions, type TreeNode, type UploadExtractOptions, type UploadExtractResult, UploadExtractService, type UploadOptions, type UploadWithRefOptions, type UseNamingRuleActions, type UseNamingRuleReturn, type UseNamingRuleState, type VariableCategory, addExtractionToFileData, backfillV2Defaults, buildFileWithStatus, clearExtractions, clonePattern, computeFileHash, computeFileHashFromStream, computeFileHashSync, computeFileInfo, createAndInitializeModule, createBasicFileManager, createEmptyFileDataStructure, createEmptyNamingRuleSchema, createFileItem, createFileManager, createFileMetadataService, createFileRef, createFolderItem, createGoogleDriveAuth, createGoogleDriveModule, createHazoFilesServer, createInitializedFileManager, createInitializedTrackedFileManager, createLLMExtractionService, createLiteralSegment, createLocalModule, createModule, createNamingConventionService, createTrackedFileManager, createUploadExtractService, createVariableSegment, deepMerge, errorResult, filterItems, formatBytes, formatCounter, formatDateToken, generateExtractionId, generateId, generatePreviewName, generateRefId, generateSampleConfig, generateSegmentId, getBaseName, getBreadcrumbs, getDirName, getExtension, getExtensionFromMime, getExtractionById, getExtractionCount, getExtractions, getFileCategory, getFileMetadataValues, getMergedData, getMigrationForTable, getMimeType, getNameWithoutExtension, getNamingSchemaForTable, getParentPath, getPathSegments, getRegisteredProviders, getRelativePath, getSchemaForTable, getSystemVariablePreviewValues, hasExtension, hasExtractionStructure, hasFileContentChanged, hashesEqual, hazo_files_generate_file_name, hazo_files_generate_folder_name, isAudio, isChildPath, isCounterVariable, isDateVariable, isDocument, isFile, isFileMetadataVariable, isFolder, isImage, isPreviewable, isProviderRegistered, isText, isVideo, joinPath, loadConfig, loadConfigAsync, migrateToV2, normalizePath, parseConfig, parseFileData, parseFileRefs, parsePatternString, patternToString, recalculateMergedData, registerModule, removeExtractionById, removeExtractionByIndex, removeRefFromArray, removeRefsByCriteriaFromArray, sanitizeFilename, saveConfig, sortItems, stringifyFileData, stringifyFileRefs, successResult, toV2Record, updateExtractionById, validateExtractionData, validateFileDataStructure, validateNamingRuleSchema, validatePath };
3050
+ export { ALL_SYSTEM_VARIABLES, type AddExtractionOptions, type AddRefOptions, type AuthCallbacks, AuthenticationError, type CleanupOrphanedOptions, ConfigurationError, type ContentTagConfig, type CreateFolderOptions, type CrudServiceLike, DEFAULT_DATE_FORMATS, type DatabaseSchemaDefinition, type DatabaseTrackingConfig, DirectoryExistsError, DirectoryNotEmptyError, DirectoryNotFoundError, type DownloadOptions, type ExtractionData, type ExtractionOptions, type ExtractionResult, type FileBrowserState, type FileDataStructure, FileExistsError, type FileInfo, type FileItem, FileManager, type FileManagerOptions, type FileMetadataInput, type FileMetadataRecord, type FileMetadataRecordV2, FileMetadataService, type FileMetadataServiceOptions, type FileMetadataUpdate, FileNotFoundError, type FileRef, type FileRefVisibility, type FileStatus, type FileSystemItem, FileTooLargeError, type FileWithStatus, type FindOrphanedOptions, type FolderItem, type GeneratedNameResult, type GoogleAuthConfig, GoogleDriveAuth, type GoogleDriveConfig, GoogleDriveModule, HAZO_FILES_DEFAULT_TABLE_NAME, HAZO_FILES_MIGRATION_V2, HAZO_FILES_MIGRATION_V3, HAZO_FILES_NAMING_DEFAULT_TABLE_NAME, HAZO_FILES_NAMING_TABLE_SCHEMA, HAZO_FILES_TABLE_SCHEMA, type HazoFilesColumnDefinitions, type HazoFilesConfig, HazoFilesError, type HazoFilesMigrationV2, type HazoFilesMigrationV3, type HazoFilesNamingColumnDefinitions, type HazoFilesNamingTableSchema, type HazoFilesServerInstance, type HazoFilesServerOptions, type HazoFilesTableSchema, type HazoLLMInstance, type HazoLogger, InvalidExtensionError, InvalidPathError, LLMExtractionService, type LLMFactory, type LLMProvider, type ListNamingConventionsOptions, type ListOptions, type LocalStorageConfig, LocalStorageModule, type MetadataLogger, type MigrationExecutor, type MigrationSchemaDefinition, type MoveOptions, type NameGenerationOptions, type NamingConventionInput, type NamingConventionRecord, NamingConventionService, type NamingConventionServiceOptions, type NamingConventionType, type NamingConventionUpdate, type NamingRuleConfiguratorProps, type NamingRuleHistoryEntry, type NamingRuleSchema, type NamingVariable, OperationError, type OperationResult, type ParsedNamingConvention, type PatternSegment, PermissionDeniedError, type ProgressCallback, type RemoveExtractionOptions, type RemoveRefsCriteria, type RenameOptions, SYSTEM_COUNTER_VARIABLES, SYSTEM_DATE_VARIABLES, SYSTEM_FILE_VARIABLES, type StorageModule, type StorageProvider, type TokenData, TrackedFileManager, type TrackedFileManagerFullOptions, type TrackedFileManagerOptions, type TrackedUploadOptions, type TreeNode, type UploadExtractOptions, type UploadExtractResult, UploadExtractService, type UploadOptions, type UploadWithRefOptions, type UseNamingRuleActions, type UseNamingRuleReturn, type UseNamingRuleState, type VariableCategory, addExtractionToFileData, backfillV2Defaults, buildFileWithStatus, clearExtractions, clonePattern, computeFileHash, computeFileHashFromStream, computeFileHashSync, computeFileInfo, createAndInitializeModule, createBasicFileManager, createEmptyFileDataStructure, createEmptyNamingRuleSchema, createFileItem, createFileManager, createFileMetadataService, createFileRef, createFolderItem, createGoogleDriveAuth, createGoogleDriveModule, createHazoFilesServer, createInitializedFileManager, createInitializedTrackedFileManager, createLLMExtractionService, createLiteralSegment, createLocalModule, createModule, createNamingConventionService, createTrackedFileManager, createUploadExtractService, createVariableSegment, deepMerge, errorResult, filterItems, formatBytes, formatCounter, formatDateToken, generateExtractionId, generateId, generatePreviewName, generateRefId, generateSampleConfig, generateSegmentId, getBaseName, getBreadcrumbs, getDirName, getExtension, getExtensionFromMime, getExtractionById, getExtractionCount, getExtractions, getFileCategory, getFileMetadataValues, getMergedData, getMigrationForTable, getMigrationV3ForTable, getMimeType, getNameWithoutExtension, getNamingSchemaForTable, getParentPath, getPathSegments, getRegisteredProviders, getRelativePath, getSchemaForTable, getSystemVariablePreviewValues, hasExtension, hasExtractionStructure, hasFileContentChanged, hashesEqual, hazo_files_generate_file_name, hazo_files_generate_folder_name, isAudio, isChildPath, isCounterVariable, isDateVariable, isDocument, isFile, isFileMetadataVariable, isFolder, isImage, isPreviewable, isProviderRegistered, isText, isVideo, joinPath, loadConfig, loadConfigAsync, migrateToV2, migrateToV3, normalizePath, parseConfig, parseFileData, parseFileRefs, parsePatternString, patternToString, recalculateMergedData, registerModule, removeExtractionById, removeExtractionByIndex, removeRefFromArray, removeRefsByCriteriaFromArray, sanitizeFilename, saveConfig, sortItems, stringifyFileData, stringifyFileRefs, successResult, toV2Record, updateExtractionById, validateExtractionData, validateFileDataStructure, validateNamingRuleSchema, validatePath };
@@ -185,6 +185,8 @@ interface FileMetadataInput {
185
185
  uploaded_by?: string;
186
186
  /** Original filename at upload time (V2) */
187
187
  original_filename?: string;
188
+ /** Content tag classifying the document type (V3) */
189
+ content_tag?: string;
188
190
  }
189
191
  /**
190
192
  * Input for updating an existing metadata record
@@ -258,6 +260,23 @@ interface RemoveExtractionOptions {
258
260
  /** Merge strategy to use when recalculating (default: 'shallow') */
259
261
  mergeStrategy?: 'shallow' | 'deep';
260
262
  }
263
+ /**
264
+ * Configuration for LLM-based content tagging.
265
+ * When enabled, calls the LLM with a specific prompt and writes
266
+ * the extracted field value to the content_tag column.
267
+ */
268
+ interface ContentTagConfig {
269
+ /** Whether to enable LLM-based content tagging */
270
+ content_tag_set_by_llm: boolean;
271
+ /** Prompt area for hazo_llm_api lookup */
272
+ content_tag_prompt_area: string;
273
+ /** Prompt key within the area */
274
+ content_tag_prompt_key: string;
275
+ /** Optional variables to substitute in the prompt template */
276
+ content_tag_prompt_variables?: Record<string, string>;
277
+ /** Field name to extract from the LLM response as the content tag */
278
+ content_tag_prompt_return_fieldname: string;
279
+ }
261
280
 
262
281
  /**
263
282
  * Naming convention types for hazo_files
@@ -412,6 +431,8 @@ interface FileMetadataRecordV2 extends FileMetadataRecord {
412
431
  storage_verified_at?: string | null;
413
432
  /** ISO timestamp when file was soft-deleted */
414
433
  deleted_at?: string | null;
434
+ /** Content tag classifying the document type (V3) */
435
+ content_tag?: string | null;
415
436
  }
416
437
  /**
417
438
  * Options for adding a reference to a file
@@ -1004,7 +1025,7 @@ declare class FileMetadataService {
1004
1025
  /**
1005
1026
  * Update specific V2 fields on a record
1006
1027
  */
1007
- updateFields(fileId: string, fields: Partial<Pick<FileMetadataRecordV2, 'scope_id' | 'uploaded_by' | 'original_filename' | 'storage_verified_at' | 'status'>>): Promise<boolean>;
1028
+ updateFields(fileId: string, fields: Partial<Pick<FileMetadataRecordV2, 'scope_id' | 'uploaded_by' | 'original_filename' | 'storage_verified_at' | 'status' | 'content_tag'>>): Promise<boolean>;
1008
1029
  /**
1009
1030
  * Find orphaned files (zero references)
1010
1031
  */
@@ -1532,6 +1553,11 @@ interface UploadExtractOptions extends TrackedUploadOptions {
1532
1553
  * Whether to create the folder path if it doesn't exist
1533
1554
  */
1534
1555
  createFolders?: boolean;
1556
+ /**
1557
+ * Content tag configuration for this upload.
1558
+ * Overrides the default config set on the service.
1559
+ */
1560
+ contentTagConfig?: ContentTagConfig;
1535
1561
  }
1536
1562
  /**
1537
1563
  * Result of upload with extraction
@@ -1551,6 +1577,8 @@ interface UploadExtractResult {
1551
1577
  generatedFolderPath?: string;
1552
1578
  /** Original file name before renaming */
1553
1579
  originalFileName?: string;
1580
+ /** Content tag assigned by LLM (if content tagging was performed) */
1581
+ contentTag?: string;
1554
1582
  }
1555
1583
  /**
1556
1584
  * Options for creating folders from naming convention
@@ -1608,7 +1636,8 @@ declare class UploadExtractService {
1608
1636
  private fileManager;
1609
1637
  private namingService?;
1610
1638
  private extractionService?;
1611
- constructor(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService);
1639
+ private defaultContentTagConfig?;
1640
+ constructor(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService, defaultContentTagConfig?: ContentTagConfig);
1612
1641
  /**
1613
1642
  * Upload a file with optional extraction and naming convention
1614
1643
  */
@@ -1633,6 +1662,21 @@ declare class UploadExtractService {
1633
1662
  fullPath?: string;
1634
1663
  folderPath?: string;
1635
1664
  }>;
1665
+ /**
1666
+ * Perform content tagging via LLM extraction.
1667
+ * Calls the LLM with the configured prompt, extracts the specified field,
1668
+ * and writes it to the content_tag column.
1669
+ */
1670
+ private performContentTagging;
1671
+ /**
1672
+ * Manually tag a file's content via LLM.
1673
+ * Works with existing DB records, resolving the file path internally.
1674
+ *
1675
+ * @param fileId - Database record ID of the file
1676
+ * @param config - Content tag config (falls back to default if not provided)
1677
+ * @returns OperationResult with the tag value
1678
+ */
1679
+ tagFileContent(fileId: string, config?: ContentTagConfig): Promise<OperationResult<string>>;
1636
1680
  /**
1637
1681
  * Get the file manager
1638
1682
  */
@@ -1649,7 +1693,7 @@ declare class UploadExtractService {
1649
1693
  /**
1650
1694
  * Create an UploadExtractService instance
1651
1695
  */
1652
- declare function createUploadExtractService(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService): UploadExtractService;
1696
+ declare function createUploadExtractService(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService, defaultContentTagConfig?: ContentTagConfig): UploadExtractService;
1653
1697
 
1654
1698
  /**
1655
1699
  * Server Factory
@@ -1723,6 +1767,11 @@ interface HazoFilesServerOptions {
1723
1767
  * Track download/access operations
1724
1768
  */
1725
1769
  trackDownloads?: boolean;
1770
+ /**
1771
+ * Default content tag configuration for LLM-based content classification.
1772
+ * When set, uploads can automatically classify document content.
1773
+ */
1774
+ defaultContentTagConfig?: ContentTagConfig;
1726
1775
  }
1727
1776
  /**
1728
1777
  * Result of createHazoFilesServer
@@ -1863,6 +1912,8 @@ interface HazoFilesColumnDefinitions {
1863
1912
  deleted_at: 'TEXT' | 'TIMESTAMP';
1864
1913
  /** Original filename at upload time (V2) */
1865
1914
  original_filename: 'TEXT';
1915
+ /** Content tag classifying the document type (V3) */
1916
+ content_tag: 'TEXT';
1866
1917
  }
1867
1918
  /**
1868
1919
  * Schema definition for a specific database type
@@ -2045,6 +2096,46 @@ declare const HAZO_FILES_NAMING_TABLE_SCHEMA: HazoFilesNamingTableSchema;
2045
2096
  * Get DDL for a custom naming table name
2046
2097
  */
2047
2098
  declare function getNamingSchemaForTable(tableName: string, dbType: 'sqlite' | 'postgres'): DatabaseSchemaDefinition;
2099
+ /**
2100
+ * Migration schema for adding V3 content tagging column to existing tables.
2101
+ * Idempotent — safe to run multiple times.
2102
+ *
2103
+ * @example
2104
+ * ```typescript
2105
+ * import { HAZO_FILES_MIGRATION_V3 } from 'hazo_files';
2106
+ *
2107
+ * // SQLite
2108
+ * for (const stmt of HAZO_FILES_MIGRATION_V3.sqlite.alterStatements) {
2109
+ * try { await db.run(stmt); } catch { /* column already exists *\/ }
2110
+ * }
2111
+ * for (const idx of HAZO_FILES_MIGRATION_V3.sqlite.indexes) {
2112
+ * await db.run(idx);
2113
+ * }
2114
+ *
2115
+ * // PostgreSQL
2116
+ * for (const stmt of HAZO_FILES_MIGRATION_V3.postgres.alterStatements) {
2117
+ * await client.query(stmt);
2118
+ * }
2119
+ * for (const idx of HAZO_FILES_MIGRATION_V3.postgres.indexes) {
2120
+ * await client.query(idx);
2121
+ * }
2122
+ * ```
2123
+ */
2124
+ interface HazoFilesMigrationV3 {
2125
+ /** Default table name */
2126
+ tableName: string;
2127
+ /** SQLite migration statements */
2128
+ sqlite: MigrationSchemaDefinition;
2129
+ /** PostgreSQL migration statements */
2130
+ postgres: MigrationSchemaDefinition;
2131
+ /** New column names added in V3 */
2132
+ newColumns: readonly string[];
2133
+ }
2134
+ declare const HAZO_FILES_MIGRATION_V3: HazoFilesMigrationV3;
2135
+ /**
2136
+ * Get V3 migration statements for a custom table name
2137
+ */
2138
+ declare function getMigrationV3ForTable(tableName: string, dbType: 'sqlite' | 'postgres'): MigrationSchemaDefinition;
2048
2139
 
2049
2140
  /**
2050
2141
  * Migration: Add Reference Tracking (V2)
@@ -2087,6 +2178,33 @@ declare function migrateToV2(executor: MigrationExecutor, dbType: 'sqlite' | 'po
2087
2178
  */
2088
2179
  declare function backfillV2Defaults(executor: MigrationExecutor, dbType: 'sqlite' | 'postgres', tableName?: string): Promise<void>;
2089
2180
 
2181
+ /**
2182
+ * Migration: Add Content Tag (V3)
2183
+ *
2184
+ * Adds content_tag column to an existing hazo_files table.
2185
+ * Idempotent — safe to run multiple times.
2186
+ */
2187
+
2188
+ /**
2189
+ * Run the V3 migration: add content_tag column and index.
2190
+ *
2191
+ * @param executor - Object with a `run(sql)` method
2192
+ * @param dbType - Database type ('sqlite' | 'postgres')
2193
+ * @param tableName - Custom table name (defaults to 'hazo_files')
2194
+ *
2195
+ * @example
2196
+ * ```typescript
2197
+ * import { migrateToV3 } from 'hazo_files';
2198
+ *
2199
+ * // SQLite with better-sqlite3
2200
+ * await migrateToV3({ run: (sql) => db.exec(sql) }, 'sqlite');
2201
+ *
2202
+ * // PostgreSQL with pg
2203
+ * await migrateToV3({ run: (sql) => client.query(sql) }, 'postgres');
2204
+ * ```
2205
+ */
2206
+ declare function migrateToV3(executor: MigrationExecutor, dbType: 'sqlite' | 'postgres', tableName?: string): Promise<void>;
2207
+
2090
2208
  /**
2091
2209
  * Common utility functions
2092
2210
  */
@@ -2929,4 +3047,4 @@ declare function toV2Record(record: FileMetadataRecord): FileMetadataRecordV2;
2929
3047
  */
2930
3048
  declare function buildFileWithStatus(record: FileMetadataRecord): FileWithStatus;
2931
3049
 
2932
- export { ALL_SYSTEM_VARIABLES, type AddExtractionOptions, type AddRefOptions, type AuthCallbacks, AuthenticationError, type CleanupOrphanedOptions, ConfigurationError, type CreateFolderOptions, type CrudServiceLike, DEFAULT_DATE_FORMATS, type DatabaseSchemaDefinition, type DatabaseTrackingConfig, DirectoryExistsError, DirectoryNotEmptyError, DirectoryNotFoundError, type DownloadOptions, type ExtractionData, type ExtractionOptions, type ExtractionResult, type FileBrowserState, type FileDataStructure, FileExistsError, type FileInfo, type FileItem, FileManager, type FileManagerOptions, type FileMetadataInput, type FileMetadataRecord, type FileMetadataRecordV2, FileMetadataService, type FileMetadataServiceOptions, type FileMetadataUpdate, FileNotFoundError, type FileRef, type FileRefVisibility, type FileStatus, type FileSystemItem, FileTooLargeError, type FileWithStatus, type FindOrphanedOptions, type FolderItem, type GeneratedNameResult, type GoogleAuthConfig, GoogleDriveAuth, type GoogleDriveConfig, GoogleDriveModule, HAZO_FILES_DEFAULT_TABLE_NAME, HAZO_FILES_MIGRATION_V2, HAZO_FILES_NAMING_DEFAULT_TABLE_NAME, HAZO_FILES_NAMING_TABLE_SCHEMA, HAZO_FILES_TABLE_SCHEMA, type HazoFilesColumnDefinitions, type HazoFilesConfig, HazoFilesError, type HazoFilesMigrationV2, type HazoFilesNamingColumnDefinitions, type HazoFilesNamingTableSchema, type HazoFilesServerInstance, type HazoFilesServerOptions, type HazoFilesTableSchema, type HazoLLMInstance, type HazoLogger, InvalidExtensionError, InvalidPathError, LLMExtractionService, type LLMFactory, type LLMProvider, type ListNamingConventionsOptions, type ListOptions, type LocalStorageConfig, LocalStorageModule, type MetadataLogger, type MigrationExecutor, type MigrationSchemaDefinition, type MoveOptions, type NameGenerationOptions, type NamingConventionInput, type NamingConventionRecord, NamingConventionService, type NamingConventionServiceOptions, type NamingConventionType, type NamingConventionUpdate, type NamingRuleConfiguratorProps, type NamingRuleHistoryEntry, type NamingRuleSchema, type NamingVariable, OperationError, type OperationResult, type ParsedNamingConvention, type PatternSegment, PermissionDeniedError, type ProgressCallback, type RemoveExtractionOptions, type RemoveRefsCriteria, type RenameOptions, SYSTEM_COUNTER_VARIABLES, SYSTEM_DATE_VARIABLES, SYSTEM_FILE_VARIABLES, type StorageModule, type StorageProvider, type TokenData, TrackedFileManager, type TrackedFileManagerFullOptions, type TrackedFileManagerOptions, type TrackedUploadOptions, type TreeNode, type UploadExtractOptions, type UploadExtractResult, UploadExtractService, type UploadOptions, type UploadWithRefOptions, type UseNamingRuleActions, type UseNamingRuleReturn, type UseNamingRuleState, type VariableCategory, addExtractionToFileData, backfillV2Defaults, buildFileWithStatus, clearExtractions, clonePattern, computeFileHash, computeFileHashFromStream, computeFileHashSync, computeFileInfo, createAndInitializeModule, createBasicFileManager, createEmptyFileDataStructure, createEmptyNamingRuleSchema, createFileItem, createFileManager, createFileMetadataService, createFileRef, createFolderItem, createGoogleDriveAuth, createGoogleDriveModule, createHazoFilesServer, createInitializedFileManager, createInitializedTrackedFileManager, createLLMExtractionService, createLiteralSegment, createLocalModule, createModule, createNamingConventionService, createTrackedFileManager, createUploadExtractService, createVariableSegment, deepMerge, errorResult, filterItems, formatBytes, formatCounter, formatDateToken, generateExtractionId, generateId, generatePreviewName, generateRefId, generateSampleConfig, generateSegmentId, getBaseName, getBreadcrumbs, getDirName, getExtension, getExtensionFromMime, getExtractionById, getExtractionCount, getExtractions, getFileCategory, getFileMetadataValues, getMergedData, getMigrationForTable, getMimeType, getNameWithoutExtension, getNamingSchemaForTable, getParentPath, getPathSegments, getRegisteredProviders, getRelativePath, getSchemaForTable, getSystemVariablePreviewValues, hasExtension, hasExtractionStructure, hasFileContentChanged, hashesEqual, hazo_files_generate_file_name, hazo_files_generate_folder_name, isAudio, isChildPath, isCounterVariable, isDateVariable, isDocument, isFile, isFileMetadataVariable, isFolder, isImage, isPreviewable, isProviderRegistered, isText, isVideo, joinPath, loadConfig, loadConfigAsync, migrateToV2, normalizePath, parseConfig, parseFileData, parseFileRefs, parsePatternString, patternToString, recalculateMergedData, registerModule, removeExtractionById, removeExtractionByIndex, removeRefFromArray, removeRefsByCriteriaFromArray, sanitizeFilename, saveConfig, sortItems, stringifyFileData, stringifyFileRefs, successResult, toV2Record, updateExtractionById, validateExtractionData, validateFileDataStructure, validateNamingRuleSchema, validatePath };
3050
+ export { ALL_SYSTEM_VARIABLES, type AddExtractionOptions, type AddRefOptions, type AuthCallbacks, AuthenticationError, type CleanupOrphanedOptions, ConfigurationError, type ContentTagConfig, type CreateFolderOptions, type CrudServiceLike, DEFAULT_DATE_FORMATS, type DatabaseSchemaDefinition, type DatabaseTrackingConfig, DirectoryExistsError, DirectoryNotEmptyError, DirectoryNotFoundError, type DownloadOptions, type ExtractionData, type ExtractionOptions, type ExtractionResult, type FileBrowserState, type FileDataStructure, FileExistsError, type FileInfo, type FileItem, FileManager, type FileManagerOptions, type FileMetadataInput, type FileMetadataRecord, type FileMetadataRecordV2, FileMetadataService, type FileMetadataServiceOptions, type FileMetadataUpdate, FileNotFoundError, type FileRef, type FileRefVisibility, type FileStatus, type FileSystemItem, FileTooLargeError, type FileWithStatus, type FindOrphanedOptions, type FolderItem, type GeneratedNameResult, type GoogleAuthConfig, GoogleDriveAuth, type GoogleDriveConfig, GoogleDriveModule, HAZO_FILES_DEFAULT_TABLE_NAME, HAZO_FILES_MIGRATION_V2, HAZO_FILES_MIGRATION_V3, HAZO_FILES_NAMING_DEFAULT_TABLE_NAME, HAZO_FILES_NAMING_TABLE_SCHEMA, HAZO_FILES_TABLE_SCHEMA, type HazoFilesColumnDefinitions, type HazoFilesConfig, HazoFilesError, type HazoFilesMigrationV2, type HazoFilesMigrationV3, type HazoFilesNamingColumnDefinitions, type HazoFilesNamingTableSchema, type HazoFilesServerInstance, type HazoFilesServerOptions, type HazoFilesTableSchema, type HazoLLMInstance, type HazoLogger, InvalidExtensionError, InvalidPathError, LLMExtractionService, type LLMFactory, type LLMProvider, type ListNamingConventionsOptions, type ListOptions, type LocalStorageConfig, LocalStorageModule, type MetadataLogger, type MigrationExecutor, type MigrationSchemaDefinition, type MoveOptions, type NameGenerationOptions, type NamingConventionInput, type NamingConventionRecord, NamingConventionService, type NamingConventionServiceOptions, type NamingConventionType, type NamingConventionUpdate, type NamingRuleConfiguratorProps, type NamingRuleHistoryEntry, type NamingRuleSchema, type NamingVariable, OperationError, type OperationResult, type ParsedNamingConvention, type PatternSegment, PermissionDeniedError, type ProgressCallback, type RemoveExtractionOptions, type RemoveRefsCriteria, type RenameOptions, SYSTEM_COUNTER_VARIABLES, SYSTEM_DATE_VARIABLES, SYSTEM_FILE_VARIABLES, type StorageModule, type StorageProvider, type TokenData, TrackedFileManager, type TrackedFileManagerFullOptions, type TrackedFileManagerOptions, type TrackedUploadOptions, type TreeNode, type UploadExtractOptions, type UploadExtractResult, UploadExtractService, type UploadOptions, type UploadWithRefOptions, type UseNamingRuleActions, type UseNamingRuleReturn, type UseNamingRuleState, type VariableCategory, addExtractionToFileData, backfillV2Defaults, buildFileWithStatus, clearExtractions, clonePattern, computeFileHash, computeFileHashFromStream, computeFileHashSync, computeFileInfo, createAndInitializeModule, createBasicFileManager, createEmptyFileDataStructure, createEmptyNamingRuleSchema, createFileItem, createFileManager, createFileMetadataService, createFileRef, createFolderItem, createGoogleDriveAuth, createGoogleDriveModule, createHazoFilesServer, createInitializedFileManager, createInitializedTrackedFileManager, createLLMExtractionService, createLiteralSegment, createLocalModule, createModule, createNamingConventionService, createTrackedFileManager, createUploadExtractService, createVariableSegment, deepMerge, errorResult, filterItems, formatBytes, formatCounter, formatDateToken, generateExtractionId, generateId, generatePreviewName, generateRefId, generateSampleConfig, generateSegmentId, getBaseName, getBreadcrumbs, getDirName, getExtension, getExtensionFromMime, getExtractionById, getExtractionCount, getExtractions, getFileCategory, getFileMetadataValues, getMergedData, getMigrationForTable, getMigrationV3ForTable, getMimeType, getNameWithoutExtension, getNamingSchemaForTable, getParentPath, getPathSegments, getRegisteredProviders, getRelativePath, getSchemaForTable, getSystemVariablePreviewValues, hasExtension, hasExtractionStructure, hasFileContentChanged, hashesEqual, hazo_files_generate_file_name, hazo_files_generate_folder_name, isAudio, isChildPath, isCounterVariable, isDateVariable, isDocument, isFile, isFileMetadataVariable, isFolder, isImage, isPreviewable, isProviderRegistered, isText, isVideo, joinPath, loadConfig, loadConfigAsync, migrateToV2, migrateToV3, normalizePath, parseConfig, parseFileData, parseFileRefs, parsePatternString, patternToString, recalculateMergedData, registerModule, removeExtractionById, removeExtractionByIndex, removeRefFromArray, removeRefsByCriteriaFromArray, sanitizeFilename, saveConfig, sortItems, stringifyFileData, stringifyFileRefs, successResult, toV2Record, updateExtractionById, validateExtractionData, validateFileDataStructure, validateNamingRuleSchema, validatePath };