hazo_files 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -18,7 +18,8 @@ A powerful, modular file management package for Node.js and React applications w
18
18
  - **Upload + Extract Workflow**: Combined service for uploading files with automatic LLM extraction and naming
19
19
  - **File Reference Tracking**: Multi-entity file references with orphan detection, soft delete, and lifecycle management
20
20
  - **File Change Detection**: xxHash-based content hashing for efficient change detection
21
- - **Schema Migrations**: Built-in V2 migration utilities for adding reference tracking to existing databases
21
+ - **Content Tagging**: Optional LLM-based content classification at upload time or on-demand via `content_tag` field
22
+ - **Schema Migrations**: Built-in V2/V3 migration utilities for adding reference tracking and content tagging to existing databases
22
23
  - **TypeScript**: Full type safety and IntelliSense support
23
24
  - **OAuth Integration**: Built-in Google Drive OAuth authentication
24
25
  - **Progress Tracking**: Upload/download progress callbacks
@@ -1108,11 +1109,17 @@ const extractionService = new LLMExtractionService((provider, options) => {
1108
1109
  return createLLM({ provider, ...options });
1109
1110
  }, 'gemini');
1110
1111
 
1111
- // Create upload + extract service
1112
+ // Create upload + extract service (with optional content tag config)
1112
1113
  const uploadExtract = new UploadExtractService(
1113
1114
  trackedFileManager,
1114
1115
  namingService,
1115
- extractionService
1116
+ extractionService,
1117
+ {
1118
+ content_tag_set_by_llm: true,
1119
+ content_tag_prompt_area: 'classification',
1120
+ content_tag_prompt_key: 'classify_document',
1121
+ content_tag_prompt_return_fieldname: 'document_type',
1122
+ }
1116
1123
  );
1117
1124
 
1118
1125
  // Upload with extraction and naming convention
@@ -1140,6 +1147,8 @@ if (result.success) {
1140
1147
  console.log('Uploaded to:', result.generatedPath);
1141
1148
  // e.g., '/documents/2024/ACME/ACME_Q4_2024-12-09_001.pdf'
1142
1149
  console.log('Extracted data:', result.extraction?.data);
1150
+ console.log('Content tag:', result.contentTag);
1151
+ // e.g., 'invoice', 'report', 'contract'
1143
1152
  }
1144
1153
 
1145
1154
  // Generate path preview without uploading
@@ -1194,6 +1203,74 @@ const autoResult = await extractionService.extract(
1194
1203
  );
1195
1204
  ```
1196
1205
 
1206
+ ## Content Tagging
1207
+
1208
+ Automatically classify uploaded files using LLM-based content analysis. The `content_tag` field stores a classification string (e.g., "invoice", "report", "contract") determined by an LLM prompt.
1209
+
1210
+ ### Configuration
1211
+
1212
+ ```typescript
1213
+ import type { ContentTagConfig } from 'hazo_files';
1214
+
1215
+ const contentTagConfig: ContentTagConfig = {
1216
+ content_tag_set_by_llm: true,
1217
+ content_tag_prompt_area: 'classification',
1218
+ content_tag_prompt_key: 'classify_document',
1219
+ content_tag_prompt_return_fieldname: 'document_type',
1220
+ content_tag_prompt_variables: { language: 'en' }, // optional
1221
+ };
1222
+ ```
1223
+
1224
+ ### Automatic Tagging at Upload
1225
+
1226
+ Pass `contentTagConfig` to `UploadExtractService` constructor (default for all uploads) or per-upload via options:
1227
+
1228
+ ```typescript
1229
+ // Per-upload override
1230
+ const result = await uploadExtract.uploadWithExtract(buffer, 'file.pdf', {
1231
+ basePath: '/docs',
1232
+ contentTagConfig: {
1233
+ content_tag_set_by_llm: true,
1234
+ content_tag_prompt_area: 'classification',
1235
+ content_tag_prompt_key: 'classify_document',
1236
+ content_tag_prompt_return_fieldname: 'document_type',
1237
+ },
1238
+ });
1239
+ console.log(result.contentTag); // e.g., 'invoice'
1240
+ ```
1241
+
1242
+ ### Manual Tagging
1243
+
1244
+ Tag existing files by their database record ID:
1245
+
1246
+ ```typescript
1247
+ const tagResult = await uploadExtract.tagFileContent('file-record-id');
1248
+ if (tagResult.success) {
1249
+ console.log('Tagged as:', tagResult.data);
1250
+ }
1251
+ ```
1252
+
1253
+ ### V3 Database Migration
1254
+
1255
+ If you have an existing `hazo_files` table, run the V3 migration to add the `content_tag` column:
1256
+
1257
+ ```typescript
1258
+ import { migrateToV3, HAZO_FILES_MIGRATION_V3 } from 'hazo_files';
1259
+
1260
+ // Using the migration helper
1261
+ await migrateToV3(
1262
+ { run: (sql) => db.run(sql) },
1263
+ 'sqlite'
1264
+ );
1265
+
1266
+ // Or run statements manually
1267
+ for (const stmt of HAZO_FILES_MIGRATION_V3.sqlite.alterStatements) {
1268
+ try { await db.run(stmt); } catch { /* column exists */ }
1269
+ }
1270
+ ```
1271
+
1272
+ New tables created with `HAZO_FILES_TABLE_SCHEMA` already include the `content_tag` column.
1273
+
1197
1274
  ## File Reference Tracking
1198
1275
 
1199
1276
  Track which entities (form fields, chat messages, etc.) reference each file. Multiple entities can reference the same file, enabling shared files without duplication.
@@ -1274,7 +1351,7 @@ for (const idx of HAZO_FILES_MIGRATION_V2.sqlite.indexes) {
1274
1351
  }
1275
1352
  ```
1276
1353
 
1277
- New tables created with `HAZO_FILES_TABLE_SCHEMA` already include V2 columns.
1354
+ New tables created with `HAZO_FILES_TABLE_SCHEMA` already include V2 columns. For V3 content tagging migration, see [Content Tagging](#content-tagging) above.
1278
1355
 
1279
1356
  ### Reference Tracking Types
1280
1357
 
@@ -1333,6 +1410,13 @@ const hazoFiles = await createHazoFilesServer({
1333
1410
  },
1334
1411
  enableTracking: true,
1335
1412
  llmFactory: (provider) => createLLM({ provider }),
1413
+ // Optional: enable automatic content tagging for all uploads
1414
+ defaultContentTagConfig: {
1415
+ content_tag_set_by_llm: true,
1416
+ content_tag_prompt_area: 'classification',
1417
+ content_tag_prompt_key: 'classify_document',
1418
+ content_tag_prompt_return_fieldname: 'document_type',
1419
+ },
1336
1420
  });
1337
1421
 
1338
1422
  // Access all services
package/dist/index.d.mts CHANGED
@@ -185,6 +185,8 @@ interface FileMetadataInput {
185
185
  uploaded_by?: string;
186
186
  /** Original filename at upload time (V2) */
187
187
  original_filename?: string;
188
+ /** Content tag classifying the document type (V3) */
189
+ content_tag?: string;
188
190
  }
189
191
  /**
190
192
  * Input for updating an existing metadata record
@@ -258,6 +260,23 @@ interface RemoveExtractionOptions {
258
260
  /** Merge strategy to use when recalculating (default: 'shallow') */
259
261
  mergeStrategy?: 'shallow' | 'deep';
260
262
  }
263
+ /**
264
+ * Configuration for LLM-based content tagging.
265
+ * When enabled, calls the LLM with a specific prompt and writes
266
+ * the extracted field value to the content_tag column.
267
+ */
268
+ interface ContentTagConfig {
269
+ /** Whether to enable LLM-based content tagging */
270
+ content_tag_set_by_llm: boolean;
271
+ /** Prompt area for hazo_llm_api lookup */
272
+ content_tag_prompt_area: string;
273
+ /** Prompt key within the area */
274
+ content_tag_prompt_key: string;
275
+ /** Optional variables to substitute in the prompt template */
276
+ content_tag_prompt_variables?: Record<string, string>;
277
+ /** Field name to extract from the LLM response as the content tag */
278
+ content_tag_prompt_return_fieldname: string;
279
+ }
261
280
 
262
281
  /**
263
282
  * Naming convention types for hazo_files
@@ -412,6 +431,8 @@ interface FileMetadataRecordV2 extends FileMetadataRecord {
412
431
  storage_verified_at?: string | null;
413
432
  /** ISO timestamp when file was soft-deleted */
414
433
  deleted_at?: string | null;
434
+ /** Content tag classifying the document type (V3) */
435
+ content_tag?: string | null;
415
436
  }
416
437
  /**
417
438
  * Options for adding a reference to a file
@@ -1004,7 +1025,7 @@ declare class FileMetadataService {
1004
1025
  /**
1005
1026
  * Update specific V2 fields on a record
1006
1027
  */
1007
- updateFields(fileId: string, fields: Partial<Pick<FileMetadataRecordV2, 'scope_id' | 'uploaded_by' | 'original_filename' | 'storage_verified_at' | 'status'>>): Promise<boolean>;
1028
+ updateFields(fileId: string, fields: Partial<Pick<FileMetadataRecordV2, 'scope_id' | 'uploaded_by' | 'original_filename' | 'storage_verified_at' | 'status' | 'content_tag'>>): Promise<boolean>;
1008
1029
  /**
1009
1030
  * Find orphaned files (zero references)
1010
1031
  */
@@ -1532,6 +1553,11 @@ interface UploadExtractOptions extends TrackedUploadOptions {
1532
1553
  * Whether to create the folder path if it doesn't exist
1533
1554
  */
1534
1555
  createFolders?: boolean;
1556
+ /**
1557
+ * Content tag configuration for this upload.
1558
+ * Overrides the default config set on the service.
1559
+ */
1560
+ contentTagConfig?: ContentTagConfig;
1535
1561
  }
1536
1562
  /**
1537
1563
  * Result of upload with extraction
@@ -1551,6 +1577,8 @@ interface UploadExtractResult {
1551
1577
  generatedFolderPath?: string;
1552
1578
  /** Original file name before renaming */
1553
1579
  originalFileName?: string;
1580
+ /** Content tag assigned by LLM (if content tagging was performed) */
1581
+ contentTag?: string;
1554
1582
  }
1555
1583
  /**
1556
1584
  * Options for creating folders from naming convention
@@ -1608,7 +1636,8 @@ declare class UploadExtractService {
1608
1636
  private fileManager;
1609
1637
  private namingService?;
1610
1638
  private extractionService?;
1611
- constructor(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService);
1639
+ private defaultContentTagConfig?;
1640
+ constructor(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService, defaultContentTagConfig?: ContentTagConfig);
1612
1641
  /**
1613
1642
  * Upload a file with optional extraction and naming convention
1614
1643
  */
@@ -1633,6 +1662,21 @@ declare class UploadExtractService {
1633
1662
  fullPath?: string;
1634
1663
  folderPath?: string;
1635
1664
  }>;
1665
+ /**
1666
+ * Perform content tagging via LLM extraction.
1667
+ * Calls the LLM with the configured prompt, extracts the specified field,
1668
+ * and writes it to the content_tag column.
1669
+ */
1670
+ private performContentTagging;
1671
+ /**
1672
+ * Manually tag a file's content via LLM.
1673
+ * Works with existing DB records, resolving the file path internally.
1674
+ *
1675
+ * @param fileId - Database record ID of the file
1676
+ * @param config - Content tag config (falls back to default if not provided)
1677
+ * @returns OperationResult with the tag value
1678
+ */
1679
+ tagFileContent(fileId: string, config?: ContentTagConfig): Promise<OperationResult<string>>;
1636
1680
  /**
1637
1681
  * Get the file manager
1638
1682
  */
@@ -1649,7 +1693,7 @@ declare class UploadExtractService {
1649
1693
  /**
1650
1694
  * Create an UploadExtractService instance
1651
1695
  */
1652
- declare function createUploadExtractService(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService): UploadExtractService;
1696
+ declare function createUploadExtractService(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService, defaultContentTagConfig?: ContentTagConfig): UploadExtractService;
1653
1697
 
1654
1698
  /**
1655
1699
  * Configuration loader for hazo_files
@@ -1743,6 +1787,8 @@ interface HazoFilesColumnDefinitions {
1743
1787
  deleted_at: 'TEXT' | 'TIMESTAMP';
1744
1788
  /** Original filename at upload time (V2) */
1745
1789
  original_filename: 'TEXT';
1790
+ /** Content tag classifying the document type (V3) */
1791
+ content_tag: 'TEXT';
1746
1792
  }
1747
1793
  /**
1748
1794
  * Schema definition for a specific database type
@@ -1925,6 +1971,46 @@ declare const HAZO_FILES_NAMING_TABLE_SCHEMA: HazoFilesNamingTableSchema;
1925
1971
  * Get DDL for a custom naming table name
1926
1972
  */
1927
1973
  declare function getNamingSchemaForTable(tableName: string, dbType: 'sqlite' | 'postgres'): DatabaseSchemaDefinition;
1974
+ /**
1975
+ * Migration schema for adding V3 content tagging column to existing tables.
1976
+ * Idempotent — safe to run multiple times.
1977
+ *
1978
+ * @example
1979
+ * ```typescript
1980
+ * import { HAZO_FILES_MIGRATION_V3 } from 'hazo_files';
1981
+ *
1982
+ * // SQLite
1983
+ * for (const stmt of HAZO_FILES_MIGRATION_V3.sqlite.alterStatements) {
1984
+ * try { await db.run(stmt); } catch { /* column already exists *\/ }
1985
+ * }
1986
+ * for (const idx of HAZO_FILES_MIGRATION_V3.sqlite.indexes) {
1987
+ * await db.run(idx);
1988
+ * }
1989
+ *
1990
+ * // PostgreSQL
1991
+ * for (const stmt of HAZO_FILES_MIGRATION_V3.postgres.alterStatements) {
1992
+ * await client.query(stmt);
1993
+ * }
1994
+ * for (const idx of HAZO_FILES_MIGRATION_V3.postgres.indexes) {
1995
+ * await client.query(idx);
1996
+ * }
1997
+ * ```
1998
+ */
1999
+ interface HazoFilesMigrationV3 {
2000
+ /** Default table name */
2001
+ tableName: string;
2002
+ /** SQLite migration statements */
2003
+ sqlite: MigrationSchemaDefinition;
2004
+ /** PostgreSQL migration statements */
2005
+ postgres: MigrationSchemaDefinition;
2006
+ /** New column names added in V3 */
2007
+ newColumns: readonly string[];
2008
+ }
2009
+ declare const HAZO_FILES_MIGRATION_V3: HazoFilesMigrationV3;
2010
+ /**
2011
+ * Get V3 migration statements for a custom table name
2012
+ */
2013
+ declare function getMigrationV3ForTable(tableName: string, dbType: 'sqlite' | 'postgres'): MigrationSchemaDefinition;
1928
2014
 
1929
2015
  /**
1930
2016
  * Migration: Add Reference Tracking (V2)
@@ -1967,6 +2053,33 @@ declare function migrateToV2(executor: MigrationExecutor, dbType: 'sqlite' | 'po
1967
2053
  */
1968
2054
  declare function backfillV2Defaults(executor: MigrationExecutor, dbType: 'sqlite' | 'postgres', tableName?: string): Promise<void>;
1969
2055
 
2056
+ /**
2057
+ * Migration: Add Content Tag (V3)
2058
+ *
2059
+ * Adds content_tag column to an existing hazo_files table.
2060
+ * Idempotent — safe to run multiple times.
2061
+ */
2062
+
2063
+ /**
2064
+ * Run the V3 migration: add content_tag column and index.
2065
+ *
2066
+ * @param executor - Object with a `run(sql)` method
2067
+ * @param dbType - Database type ('sqlite' | 'postgres')
2068
+ * @param tableName - Custom table name (defaults to 'hazo_files')
2069
+ *
2070
+ * @example
2071
+ * ```typescript
2072
+ * import { migrateToV3 } from 'hazo_files';
2073
+ *
2074
+ * // SQLite with better-sqlite3
2075
+ * await migrateToV3({ run: (sql) => db.exec(sql) }, 'sqlite');
2076
+ *
2077
+ * // PostgreSQL with pg
2078
+ * await migrateToV3({ run: (sql) => client.query(sql) }, 'postgres');
2079
+ * ```
2080
+ */
2081
+ declare function migrateToV3(executor: MigrationExecutor, dbType: 'sqlite' | 'postgres', tableName?: string): Promise<void>;
2082
+
1970
2083
  /**
1971
2084
  * Common utility functions
1972
2085
  */
@@ -2782,4 +2895,4 @@ declare function toV2Record(record: FileMetadataRecord): FileMetadataRecordV2;
2782
2895
  */
2783
2896
  declare function buildFileWithStatus(record: FileMetadataRecord): FileWithStatus;
2784
2897
 
2785
- export { ALL_SYSTEM_VARIABLES, type AddExtractionOptions, type AddRefOptions, type AuthCallbacks, AuthenticationError, type CleanupOrphanedOptions, ConfigurationError, type CreateFolderOptions, type CrudServiceLike, DEFAULT_DATE_FORMATS, type DatabaseSchemaDefinition, type DatabaseTrackingConfig, DirectoryExistsError, DirectoryNotEmptyError, DirectoryNotFoundError, type DownloadOptions, type ExtractionData, type ExtractionOptions, type ExtractionResult, type FileBrowserState, type FileDataStructure, FileExistsError, type FileInfo, type FileItem, FileManager, type FileManagerOptions, type FileMetadataInput, type FileMetadataRecord, type FileMetadataRecordV2, FileMetadataService, type FileMetadataServiceOptions, type FileMetadataUpdate, FileNotFoundError, type FileRef, type FileRefVisibility, type FileStatus, type FileSystemItem, FileTooLargeError, type FileWithStatus, type FindOrphanedOptions, type FolderItem, type GeneratedNameResult, type GoogleAuthConfig, GoogleDriveAuth, type GoogleDriveConfig, GoogleDriveModule, HAZO_FILES_DEFAULT_TABLE_NAME, HAZO_FILES_MIGRATION_V2, HAZO_FILES_NAMING_DEFAULT_TABLE_NAME, HAZO_FILES_NAMING_TABLE_SCHEMA, HAZO_FILES_TABLE_SCHEMA, type HazoFilesColumnDefinitions, type HazoFilesConfig, HazoFilesError, type HazoFilesMigrationV2, type HazoFilesNamingColumnDefinitions, type HazoFilesNamingTableSchema, type HazoFilesTableSchema, type HazoLLMInstance, InvalidExtensionError, InvalidPathError, LLMExtractionService, type LLMFactory, type LLMProvider, type ListNamingConventionsOptions, type ListOptions, type LocalStorageConfig, LocalStorageModule, type MetadataLogger, type MigrationExecutor, type MigrationSchemaDefinition, type MoveOptions, type NameGenerationOptions, type NamingConventionInput, type NamingConventionRecord, NamingConventionService, type NamingConventionServiceOptions, type NamingConventionType, type NamingConventionUpdate, type NamingRuleConfiguratorProps, type NamingRuleHistoryEntry, type NamingRuleSchema, type NamingVariable, OperationError, type OperationResult, type ParsedNamingConvention, type PatternSegment, PermissionDeniedError, type ProgressCallback, type RemoveExtractionOptions, type RemoveRefsCriteria, type RenameOptions, SYSTEM_COUNTER_VARIABLES, SYSTEM_DATE_VARIABLES, SYSTEM_FILE_VARIABLES, type StorageModule, type StorageProvider, type TokenData, TrackedFileManager, type TrackedFileManagerFullOptions, type TrackedFileManagerOptions, type TrackedUploadOptions, type TreeNode, type UploadExtractOptions, type UploadExtractResult, UploadExtractService, type UploadOptions, type UploadWithRefOptions, type UseNamingRuleActions, type UseNamingRuleReturn, type UseNamingRuleState, type VariableCategory, addExtractionToFileData, backfillV2Defaults, buildFileWithStatus, clearExtractions, clonePattern, computeFileHash, computeFileHashFromStream, computeFileHashSync, computeFileInfo, createAndInitializeModule, createEmptyFileDataStructure, createEmptyNamingRuleSchema, createFileItem, createFileManager, createFileMetadataService, createFileRef, createFolderItem, createGoogleDriveAuth, createGoogleDriveModule, createInitializedFileManager, createInitializedTrackedFileManager, createLLMExtractionService, createLiteralSegment, createLocalModule, createModule, createNamingConventionService, createTrackedFileManager, createUploadExtractService, createVariableSegment, deepMerge, errorResult, filterItems, formatBytes, formatCounter, formatDateToken, generateExtractionId, generateId, generatePreviewName, generateRefId, generateSampleConfig, generateSegmentId, getBaseName, getBreadcrumbs, getDirName, getExtension, getExtensionFromMime, getExtractionById, getExtractionCount, getExtractions, getFileCategory, getFileMetadataValues, getMergedData, getMigrationForTable, getMimeType, getNameWithoutExtension, getNamingSchemaForTable, getParentPath, getPathSegments, getRegisteredProviders, getRelativePath, getSchemaForTable, getSystemVariablePreviewValues, hasExtension, hasExtractionStructure, hasFileContentChanged, hashesEqual, hazo_files_generate_file_name, hazo_files_generate_folder_name, isAudio, isChildPath, isCounterVariable, isDateVariable, isDocument, isFile, isFileMetadataVariable, isFolder, isImage, isPreviewable, isProviderRegistered, isText, isVideo, joinPath, loadConfig, loadConfigAsync, migrateToV2, normalizePath, parseConfig, parseFileData, parseFileRefs, parsePatternString, patternToString, recalculateMergedData, registerModule, removeExtractionById, removeExtractionByIndex, removeRefFromArray, removeRefsByCriteriaFromArray, sanitizeFilename, saveConfig, sortItems, stringifyFileData, stringifyFileRefs, successResult, toV2Record, updateExtractionById, validateExtractionData, validateFileDataStructure, validateNamingRuleSchema, validatePath };
2898
+ export { ALL_SYSTEM_VARIABLES, type AddExtractionOptions, type AddRefOptions, type AuthCallbacks, AuthenticationError, type CleanupOrphanedOptions, ConfigurationError, type ContentTagConfig, type CreateFolderOptions, type CrudServiceLike, DEFAULT_DATE_FORMATS, type DatabaseSchemaDefinition, type DatabaseTrackingConfig, DirectoryExistsError, DirectoryNotEmptyError, DirectoryNotFoundError, type DownloadOptions, type ExtractionData, type ExtractionOptions, type ExtractionResult, type FileBrowserState, type FileDataStructure, FileExistsError, type FileInfo, type FileItem, FileManager, type FileManagerOptions, type FileMetadataInput, type FileMetadataRecord, type FileMetadataRecordV2, FileMetadataService, type FileMetadataServiceOptions, type FileMetadataUpdate, FileNotFoundError, type FileRef, type FileRefVisibility, type FileStatus, type FileSystemItem, FileTooLargeError, type FileWithStatus, type FindOrphanedOptions, type FolderItem, type GeneratedNameResult, type GoogleAuthConfig, GoogleDriveAuth, type GoogleDriveConfig, GoogleDriveModule, HAZO_FILES_DEFAULT_TABLE_NAME, HAZO_FILES_MIGRATION_V2, HAZO_FILES_MIGRATION_V3, HAZO_FILES_NAMING_DEFAULT_TABLE_NAME, HAZO_FILES_NAMING_TABLE_SCHEMA, HAZO_FILES_TABLE_SCHEMA, type HazoFilesColumnDefinitions, type HazoFilesConfig, HazoFilesError, type HazoFilesMigrationV2, type HazoFilesMigrationV3, type HazoFilesNamingColumnDefinitions, type HazoFilesNamingTableSchema, type HazoFilesTableSchema, type HazoLLMInstance, InvalidExtensionError, InvalidPathError, LLMExtractionService, type LLMFactory, type LLMProvider, type ListNamingConventionsOptions, type ListOptions, type LocalStorageConfig, LocalStorageModule, type MetadataLogger, type MigrationExecutor, type MigrationSchemaDefinition, type MoveOptions, type NameGenerationOptions, type NamingConventionInput, type NamingConventionRecord, NamingConventionService, type NamingConventionServiceOptions, type NamingConventionType, type NamingConventionUpdate, type NamingRuleConfiguratorProps, type NamingRuleHistoryEntry, type NamingRuleSchema, type NamingVariable, OperationError, type OperationResult, type ParsedNamingConvention, type PatternSegment, PermissionDeniedError, type ProgressCallback, type RemoveExtractionOptions, type RemoveRefsCriteria, type RenameOptions, SYSTEM_COUNTER_VARIABLES, SYSTEM_DATE_VARIABLES, SYSTEM_FILE_VARIABLES, type StorageModule, type StorageProvider, type TokenData, TrackedFileManager, type TrackedFileManagerFullOptions, type TrackedFileManagerOptions, type TrackedUploadOptions, type TreeNode, type UploadExtractOptions, type UploadExtractResult, UploadExtractService, type UploadOptions, type UploadWithRefOptions, type UseNamingRuleActions, type UseNamingRuleReturn, type UseNamingRuleState, type VariableCategory, addExtractionToFileData, backfillV2Defaults, buildFileWithStatus, clearExtractions, clonePattern, computeFileHash, computeFileHashFromStream, computeFileHashSync, computeFileInfo, createAndInitializeModule, createEmptyFileDataStructure, createEmptyNamingRuleSchema, createFileItem, createFileManager, createFileMetadataService, createFileRef, createFolderItem, createGoogleDriveAuth, createGoogleDriveModule, createInitializedFileManager, createInitializedTrackedFileManager, createLLMExtractionService, createLiteralSegment, createLocalModule, createModule, createNamingConventionService, createTrackedFileManager, createUploadExtractService, createVariableSegment, deepMerge, errorResult, filterItems, formatBytes, formatCounter, formatDateToken, generateExtractionId, generateId, generatePreviewName, generateRefId, generateSampleConfig, generateSegmentId, getBaseName, getBreadcrumbs, getDirName, getExtension, getExtensionFromMime, getExtractionById, getExtractionCount, getExtractions, getFileCategory, getFileMetadataValues, getMergedData, getMigrationForTable, getMigrationV3ForTable, getMimeType, getNameWithoutExtension, getNamingSchemaForTable, getParentPath, getPathSegments, getRegisteredProviders, getRelativePath, getSchemaForTable, getSystemVariablePreviewValues, hasExtension, hasExtractionStructure, hasFileContentChanged, hashesEqual, hazo_files_generate_file_name, hazo_files_generate_folder_name, isAudio, isChildPath, isCounterVariable, isDateVariable, isDocument, isFile, isFileMetadataVariable, isFolder, isImage, isPreviewable, isProviderRegistered, isText, isVideo, joinPath, loadConfig, loadConfigAsync, migrateToV2, migrateToV3, normalizePath, parseConfig, parseFileData, parseFileRefs, parsePatternString, patternToString, recalculateMergedData, registerModule, removeExtractionById, removeExtractionByIndex, removeRefFromArray, removeRefsByCriteriaFromArray, sanitizeFilename, saveConfig, sortItems, stringifyFileData, stringifyFileRefs, successResult, toV2Record, updateExtractionById, validateExtractionData, validateFileDataStructure, validateNamingRuleSchema, validatePath };
package/dist/index.d.ts CHANGED
@@ -185,6 +185,8 @@ interface FileMetadataInput {
185
185
  uploaded_by?: string;
186
186
  /** Original filename at upload time (V2) */
187
187
  original_filename?: string;
188
+ /** Content tag classifying the document type (V3) */
189
+ content_tag?: string;
188
190
  }
189
191
  /**
190
192
  * Input for updating an existing metadata record
@@ -258,6 +260,23 @@ interface RemoveExtractionOptions {
258
260
  /** Merge strategy to use when recalculating (default: 'shallow') */
259
261
  mergeStrategy?: 'shallow' | 'deep';
260
262
  }
263
+ /**
264
+ * Configuration for LLM-based content tagging.
265
+ * When enabled, calls the LLM with a specific prompt and writes
266
+ * the extracted field value to the content_tag column.
267
+ */
268
+ interface ContentTagConfig {
269
+ /** Whether to enable LLM-based content tagging */
270
+ content_tag_set_by_llm: boolean;
271
+ /** Prompt area for hazo_llm_api lookup */
272
+ content_tag_prompt_area: string;
273
+ /** Prompt key within the area */
274
+ content_tag_prompt_key: string;
275
+ /** Optional variables to substitute in the prompt template */
276
+ content_tag_prompt_variables?: Record<string, string>;
277
+ /** Field name to extract from the LLM response as the content tag */
278
+ content_tag_prompt_return_fieldname: string;
279
+ }
261
280
 
262
281
  /**
263
282
  * Naming convention types for hazo_files
@@ -412,6 +431,8 @@ interface FileMetadataRecordV2 extends FileMetadataRecord {
412
431
  storage_verified_at?: string | null;
413
432
  /** ISO timestamp when file was soft-deleted */
414
433
  deleted_at?: string | null;
434
+ /** Content tag classifying the document type (V3) */
435
+ content_tag?: string | null;
415
436
  }
416
437
  /**
417
438
  * Options for adding a reference to a file
@@ -1004,7 +1025,7 @@ declare class FileMetadataService {
1004
1025
  /**
1005
1026
  * Update specific V2 fields on a record
1006
1027
  */
1007
- updateFields(fileId: string, fields: Partial<Pick<FileMetadataRecordV2, 'scope_id' | 'uploaded_by' | 'original_filename' | 'storage_verified_at' | 'status'>>): Promise<boolean>;
1028
+ updateFields(fileId: string, fields: Partial<Pick<FileMetadataRecordV2, 'scope_id' | 'uploaded_by' | 'original_filename' | 'storage_verified_at' | 'status' | 'content_tag'>>): Promise<boolean>;
1008
1029
  /**
1009
1030
  * Find orphaned files (zero references)
1010
1031
  */
@@ -1532,6 +1553,11 @@ interface UploadExtractOptions extends TrackedUploadOptions {
1532
1553
  * Whether to create the folder path if it doesn't exist
1533
1554
  */
1534
1555
  createFolders?: boolean;
1556
+ /**
1557
+ * Content tag configuration for this upload.
1558
+ * Overrides the default config set on the service.
1559
+ */
1560
+ contentTagConfig?: ContentTagConfig;
1535
1561
  }
1536
1562
  /**
1537
1563
  * Result of upload with extraction
@@ -1551,6 +1577,8 @@ interface UploadExtractResult {
1551
1577
  generatedFolderPath?: string;
1552
1578
  /** Original file name before renaming */
1553
1579
  originalFileName?: string;
1580
+ /** Content tag assigned by LLM (if content tagging was performed) */
1581
+ contentTag?: string;
1554
1582
  }
1555
1583
  /**
1556
1584
  * Options for creating folders from naming convention
@@ -1608,7 +1636,8 @@ declare class UploadExtractService {
1608
1636
  private fileManager;
1609
1637
  private namingService?;
1610
1638
  private extractionService?;
1611
- constructor(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService);
1639
+ private defaultContentTagConfig?;
1640
+ constructor(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService, defaultContentTagConfig?: ContentTagConfig);
1612
1641
  /**
1613
1642
  * Upload a file with optional extraction and naming convention
1614
1643
  */
@@ -1633,6 +1662,21 @@ declare class UploadExtractService {
1633
1662
  fullPath?: string;
1634
1663
  folderPath?: string;
1635
1664
  }>;
1665
+ /**
1666
+ * Perform content tagging via LLM extraction.
1667
+ * Calls the LLM with the configured prompt, extracts the specified field,
1668
+ * and writes it to the content_tag column.
1669
+ */
1670
+ private performContentTagging;
1671
+ /**
1672
+ * Manually tag a file's content via LLM.
1673
+ * Works with existing DB records, resolving the file path internally.
1674
+ *
1675
+ * @param fileId - Database record ID of the file
1676
+ * @param config - Content tag config (falls back to default if not provided)
1677
+ * @returns OperationResult with the tag value
1678
+ */
1679
+ tagFileContent(fileId: string, config?: ContentTagConfig): Promise<OperationResult<string>>;
1636
1680
  /**
1637
1681
  * Get the file manager
1638
1682
  */
@@ -1649,7 +1693,7 @@ declare class UploadExtractService {
1649
1693
  /**
1650
1694
  * Create an UploadExtractService instance
1651
1695
  */
1652
- declare function createUploadExtractService(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService): UploadExtractService;
1696
+ declare function createUploadExtractService(fileManager: TrackedFileManager, namingService?: NamingConventionService, extractionService?: LLMExtractionService, defaultContentTagConfig?: ContentTagConfig): UploadExtractService;
1653
1697
 
1654
1698
  /**
1655
1699
  * Configuration loader for hazo_files
@@ -1743,6 +1787,8 @@ interface HazoFilesColumnDefinitions {
1743
1787
  deleted_at: 'TEXT' | 'TIMESTAMP';
1744
1788
  /** Original filename at upload time (V2) */
1745
1789
  original_filename: 'TEXT';
1790
+ /** Content tag classifying the document type (V3) */
1791
+ content_tag: 'TEXT';
1746
1792
  }
1747
1793
  /**
1748
1794
  * Schema definition for a specific database type
@@ -1925,6 +1971,46 @@ declare const HAZO_FILES_NAMING_TABLE_SCHEMA: HazoFilesNamingTableSchema;
1925
1971
  * Get DDL for a custom naming table name
1926
1972
  */
1927
1973
  declare function getNamingSchemaForTable(tableName: string, dbType: 'sqlite' | 'postgres'): DatabaseSchemaDefinition;
1974
+ /**
1975
+ * Migration schema for adding V3 content tagging column to existing tables.
1976
+ * Idempotent — safe to run multiple times.
1977
+ *
1978
+ * @example
1979
+ * ```typescript
1980
+ * import { HAZO_FILES_MIGRATION_V3 } from 'hazo_files';
1981
+ *
1982
+ * // SQLite
1983
+ * for (const stmt of HAZO_FILES_MIGRATION_V3.sqlite.alterStatements) {
1984
+ * try { await db.run(stmt); } catch { /* column already exists *\/ }
1985
+ * }
1986
+ * for (const idx of HAZO_FILES_MIGRATION_V3.sqlite.indexes) {
1987
+ * await db.run(idx);
1988
+ * }
1989
+ *
1990
+ * // PostgreSQL
1991
+ * for (const stmt of HAZO_FILES_MIGRATION_V3.postgres.alterStatements) {
1992
+ * await client.query(stmt);
1993
+ * }
1994
+ * for (const idx of HAZO_FILES_MIGRATION_V3.postgres.indexes) {
1995
+ * await client.query(idx);
1996
+ * }
1997
+ * ```
1998
+ */
1999
+ interface HazoFilesMigrationV3 {
2000
+ /** Default table name */
2001
+ tableName: string;
2002
+ /** SQLite migration statements */
2003
+ sqlite: MigrationSchemaDefinition;
2004
+ /** PostgreSQL migration statements */
2005
+ postgres: MigrationSchemaDefinition;
2006
+ /** New column names added in V3 */
2007
+ newColumns: readonly string[];
2008
+ }
2009
+ declare const HAZO_FILES_MIGRATION_V3: HazoFilesMigrationV3;
2010
+ /**
2011
+ * Get V3 migration statements for a custom table name
2012
+ */
2013
+ declare function getMigrationV3ForTable(tableName: string, dbType: 'sqlite' | 'postgres'): MigrationSchemaDefinition;
1928
2014
 
1929
2015
  /**
1930
2016
  * Migration: Add Reference Tracking (V2)
@@ -1967,6 +2053,33 @@ declare function migrateToV2(executor: MigrationExecutor, dbType: 'sqlite' | 'po
1967
2053
  */
1968
2054
  declare function backfillV2Defaults(executor: MigrationExecutor, dbType: 'sqlite' | 'postgres', tableName?: string): Promise<void>;
1969
2055
 
2056
+ /**
2057
+ * Migration: Add Content Tag (V3)
2058
+ *
2059
+ * Adds content_tag column to an existing hazo_files table.
2060
+ * Idempotent — safe to run multiple times.
2061
+ */
2062
+
2063
+ /**
2064
+ * Run the V3 migration: add content_tag column and index.
2065
+ *
2066
+ * @param executor - Object with a `run(sql)` method
2067
+ * @param dbType - Database type ('sqlite' | 'postgres')
2068
+ * @param tableName - Custom table name (defaults to 'hazo_files')
2069
+ *
2070
+ * @example
2071
+ * ```typescript
2072
+ * import { migrateToV3 } from 'hazo_files';
2073
+ *
2074
+ * // SQLite with better-sqlite3
2075
+ * await migrateToV3({ run: (sql) => db.exec(sql) }, 'sqlite');
2076
+ *
2077
+ * // PostgreSQL with pg
2078
+ * await migrateToV3({ run: (sql) => client.query(sql) }, 'postgres');
2079
+ * ```
2080
+ */
2081
+ declare function migrateToV3(executor: MigrationExecutor, dbType: 'sqlite' | 'postgres', tableName?: string): Promise<void>;
2082
+
1970
2083
  /**
1971
2084
  * Common utility functions
1972
2085
  */
@@ -2782,4 +2895,4 @@ declare function toV2Record(record: FileMetadataRecord): FileMetadataRecordV2;
2782
2895
  */
2783
2896
  declare function buildFileWithStatus(record: FileMetadataRecord): FileWithStatus;
2784
2897
 
2785
- export { ALL_SYSTEM_VARIABLES, type AddExtractionOptions, type AddRefOptions, type AuthCallbacks, AuthenticationError, type CleanupOrphanedOptions, ConfigurationError, type CreateFolderOptions, type CrudServiceLike, DEFAULT_DATE_FORMATS, type DatabaseSchemaDefinition, type DatabaseTrackingConfig, DirectoryExistsError, DirectoryNotEmptyError, DirectoryNotFoundError, type DownloadOptions, type ExtractionData, type ExtractionOptions, type ExtractionResult, type FileBrowserState, type FileDataStructure, FileExistsError, type FileInfo, type FileItem, FileManager, type FileManagerOptions, type FileMetadataInput, type FileMetadataRecord, type FileMetadataRecordV2, FileMetadataService, type FileMetadataServiceOptions, type FileMetadataUpdate, FileNotFoundError, type FileRef, type FileRefVisibility, type FileStatus, type FileSystemItem, FileTooLargeError, type FileWithStatus, type FindOrphanedOptions, type FolderItem, type GeneratedNameResult, type GoogleAuthConfig, GoogleDriveAuth, type GoogleDriveConfig, GoogleDriveModule, HAZO_FILES_DEFAULT_TABLE_NAME, HAZO_FILES_MIGRATION_V2, HAZO_FILES_NAMING_DEFAULT_TABLE_NAME, HAZO_FILES_NAMING_TABLE_SCHEMA, HAZO_FILES_TABLE_SCHEMA, type HazoFilesColumnDefinitions, type HazoFilesConfig, HazoFilesError, type HazoFilesMigrationV2, type HazoFilesNamingColumnDefinitions, type HazoFilesNamingTableSchema, type HazoFilesTableSchema, type HazoLLMInstance, InvalidExtensionError, InvalidPathError, LLMExtractionService, type LLMFactory, type LLMProvider, type ListNamingConventionsOptions, type ListOptions, type LocalStorageConfig, LocalStorageModule, type MetadataLogger, type MigrationExecutor, type MigrationSchemaDefinition, type MoveOptions, type NameGenerationOptions, type NamingConventionInput, type NamingConventionRecord, NamingConventionService, type NamingConventionServiceOptions, type NamingConventionType, type NamingConventionUpdate, type NamingRuleConfiguratorProps, type NamingRuleHistoryEntry, type NamingRuleSchema, type NamingVariable, OperationError, type OperationResult, type ParsedNamingConvention, type PatternSegment, PermissionDeniedError, type ProgressCallback, type RemoveExtractionOptions, type RemoveRefsCriteria, type RenameOptions, SYSTEM_COUNTER_VARIABLES, SYSTEM_DATE_VARIABLES, SYSTEM_FILE_VARIABLES, type StorageModule, type StorageProvider, type TokenData, TrackedFileManager, type TrackedFileManagerFullOptions, type TrackedFileManagerOptions, type TrackedUploadOptions, type TreeNode, type UploadExtractOptions, type UploadExtractResult, UploadExtractService, type UploadOptions, type UploadWithRefOptions, type UseNamingRuleActions, type UseNamingRuleReturn, type UseNamingRuleState, type VariableCategory, addExtractionToFileData, backfillV2Defaults, buildFileWithStatus, clearExtractions, clonePattern, computeFileHash, computeFileHashFromStream, computeFileHashSync, computeFileInfo, createAndInitializeModule, createEmptyFileDataStructure, createEmptyNamingRuleSchema, createFileItem, createFileManager, createFileMetadataService, createFileRef, createFolderItem, createGoogleDriveAuth, createGoogleDriveModule, createInitializedFileManager, createInitializedTrackedFileManager, createLLMExtractionService, createLiteralSegment, createLocalModule, createModule, createNamingConventionService, createTrackedFileManager, createUploadExtractService, createVariableSegment, deepMerge, errorResult, filterItems, formatBytes, formatCounter, formatDateToken, generateExtractionId, generateId, generatePreviewName, generateRefId, generateSampleConfig, generateSegmentId, getBaseName, getBreadcrumbs, getDirName, getExtension, getExtensionFromMime, getExtractionById, getExtractionCount, getExtractions, getFileCategory, getFileMetadataValues, getMergedData, getMigrationForTable, getMimeType, getNameWithoutExtension, getNamingSchemaForTable, getParentPath, getPathSegments, getRegisteredProviders, getRelativePath, getSchemaForTable, getSystemVariablePreviewValues, hasExtension, hasExtractionStructure, hasFileContentChanged, hashesEqual, hazo_files_generate_file_name, hazo_files_generate_folder_name, isAudio, isChildPath, isCounterVariable, isDateVariable, isDocument, isFile, isFileMetadataVariable, isFolder, isImage, isPreviewable, isProviderRegistered, isText, isVideo, joinPath, loadConfig, loadConfigAsync, migrateToV2, normalizePath, parseConfig, parseFileData, parseFileRefs, parsePatternString, patternToString, recalculateMergedData, registerModule, removeExtractionById, removeExtractionByIndex, removeRefFromArray, removeRefsByCriteriaFromArray, sanitizeFilename, saveConfig, sortItems, stringifyFileData, stringifyFileRefs, successResult, toV2Record, updateExtractionById, validateExtractionData, validateFileDataStructure, validateNamingRuleSchema, validatePath };
2898
+ export { ALL_SYSTEM_VARIABLES, type AddExtractionOptions, type AddRefOptions, type AuthCallbacks, AuthenticationError, type CleanupOrphanedOptions, ConfigurationError, type ContentTagConfig, type CreateFolderOptions, type CrudServiceLike, DEFAULT_DATE_FORMATS, type DatabaseSchemaDefinition, type DatabaseTrackingConfig, DirectoryExistsError, DirectoryNotEmptyError, DirectoryNotFoundError, type DownloadOptions, type ExtractionData, type ExtractionOptions, type ExtractionResult, type FileBrowserState, type FileDataStructure, FileExistsError, type FileInfo, type FileItem, FileManager, type FileManagerOptions, type FileMetadataInput, type FileMetadataRecord, type FileMetadataRecordV2, FileMetadataService, type FileMetadataServiceOptions, type FileMetadataUpdate, FileNotFoundError, type FileRef, type FileRefVisibility, type FileStatus, type FileSystemItem, FileTooLargeError, type FileWithStatus, type FindOrphanedOptions, type FolderItem, type GeneratedNameResult, type GoogleAuthConfig, GoogleDriveAuth, type GoogleDriveConfig, GoogleDriveModule, HAZO_FILES_DEFAULT_TABLE_NAME, HAZO_FILES_MIGRATION_V2, HAZO_FILES_MIGRATION_V3, HAZO_FILES_NAMING_DEFAULT_TABLE_NAME, HAZO_FILES_NAMING_TABLE_SCHEMA, HAZO_FILES_TABLE_SCHEMA, type HazoFilesColumnDefinitions, type HazoFilesConfig, HazoFilesError, type HazoFilesMigrationV2, type HazoFilesMigrationV3, type HazoFilesNamingColumnDefinitions, type HazoFilesNamingTableSchema, type HazoFilesTableSchema, type HazoLLMInstance, InvalidExtensionError, InvalidPathError, LLMExtractionService, type LLMFactory, type LLMProvider, type ListNamingConventionsOptions, type ListOptions, type LocalStorageConfig, LocalStorageModule, type MetadataLogger, type MigrationExecutor, type MigrationSchemaDefinition, type MoveOptions, type NameGenerationOptions, type NamingConventionInput, type NamingConventionRecord, NamingConventionService, type NamingConventionServiceOptions, type NamingConventionType, type NamingConventionUpdate, type NamingRuleConfiguratorProps, type NamingRuleHistoryEntry, type NamingRuleSchema, type NamingVariable, OperationError, type OperationResult, type ParsedNamingConvention, type PatternSegment, PermissionDeniedError, type ProgressCallback, type RemoveExtractionOptions, type RemoveRefsCriteria, type RenameOptions, SYSTEM_COUNTER_VARIABLES, SYSTEM_DATE_VARIABLES, SYSTEM_FILE_VARIABLES, type StorageModule, type StorageProvider, type TokenData, TrackedFileManager, type TrackedFileManagerFullOptions, type TrackedFileManagerOptions, type TrackedUploadOptions, type TreeNode, type UploadExtractOptions, type UploadExtractResult, UploadExtractService, type UploadOptions, type UploadWithRefOptions, type UseNamingRuleActions, type UseNamingRuleReturn, type UseNamingRuleState, type VariableCategory, addExtractionToFileData, backfillV2Defaults, buildFileWithStatus, clearExtractions, clonePattern, computeFileHash, computeFileHashFromStream, computeFileHashSync, computeFileInfo, createAndInitializeModule, createEmptyFileDataStructure, createEmptyNamingRuleSchema, createFileItem, createFileManager, createFileMetadataService, createFileRef, createFolderItem, createGoogleDriveAuth, createGoogleDriveModule, createInitializedFileManager, createInitializedTrackedFileManager, createLLMExtractionService, createLiteralSegment, createLocalModule, createModule, createNamingConventionService, createTrackedFileManager, createUploadExtractService, createVariableSegment, deepMerge, errorResult, filterItems, formatBytes, formatCounter, formatDateToken, generateExtractionId, generateId, generatePreviewName, generateRefId, generateSampleConfig, generateSegmentId, getBaseName, getBreadcrumbs, getDirName, getExtension, getExtensionFromMime, getExtractionById, getExtractionCount, getExtractions, getFileCategory, getFileMetadataValues, getMergedData, getMigrationForTable, getMigrationV3ForTable, getMimeType, getNameWithoutExtension, getNamingSchemaForTable, getParentPath, getPathSegments, getRegisteredProviders, getRelativePath, getSchemaForTable, getSystemVariablePreviewValues, hasExtension, hasExtractionStructure, hasFileContentChanged, hashesEqual, hazo_files_generate_file_name, hazo_files_generate_folder_name, isAudio, isChildPath, isCounterVariable, isDateVariable, isDocument, isFile, isFileMetadataVariable, isFolder, isImage, isPreviewable, isProviderRegistered, isText, isVideo, joinPath, loadConfig, loadConfigAsync, migrateToV2, migrateToV3, normalizePath, parseConfig, parseFileData, parseFileRefs, parsePatternString, patternToString, recalculateMergedData, registerModule, removeExtractionById, removeExtractionByIndex, removeRefFromArray, removeRefsByCriteriaFromArray, sanitizeFilename, saveConfig, sortItems, stringifyFileData, stringifyFileRefs, successResult, toV2Record, updateExtractionById, validateExtractionData, validateFileDataStructure, validateNamingRuleSchema, validatePath };