hazo_files 1.4.2 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -4
- package/dist/index.d.mts +117 -4
- package/dist/index.d.ts +117 -4
- package/dist/index.js +158 -11
- package/dist/index.mjs +155 -11
- package/dist/server/index.d.mts +122 -4
- package/dist/server/index.d.ts +122 -4
- package/dist/server/index.js +162 -13
- package/dist/server/index.mjs +159 -13
- package/package.json +2 -6
package/dist/index.js
CHANGED
|
@@ -46,6 +46,7 @@ __export(index_exports, {
|
|
|
46
46
|
GoogleDriveModule: () => GoogleDriveModule,
|
|
47
47
|
HAZO_FILES_DEFAULT_TABLE_NAME: () => HAZO_FILES_DEFAULT_TABLE_NAME,
|
|
48
48
|
HAZO_FILES_MIGRATION_V2: () => HAZO_FILES_MIGRATION_V2,
|
|
49
|
+
HAZO_FILES_MIGRATION_V3: () => HAZO_FILES_MIGRATION_V3,
|
|
49
50
|
HAZO_FILES_NAMING_DEFAULT_TABLE_NAME: () => HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
|
|
50
51
|
HAZO_FILES_NAMING_TABLE_SCHEMA: () => HAZO_FILES_NAMING_TABLE_SCHEMA,
|
|
51
52
|
HAZO_FILES_TABLE_SCHEMA: () => HAZO_FILES_TABLE_SCHEMA,
|
|
@@ -115,6 +116,7 @@ __export(index_exports, {
|
|
|
115
116
|
getFileMetadataValues: () => getFileMetadataValues,
|
|
116
117
|
getMergedData: () => getMergedData,
|
|
117
118
|
getMigrationForTable: () => getMigrationForTable,
|
|
119
|
+
getMigrationV3ForTable: () => getMigrationV3ForTable,
|
|
118
120
|
getMimeType: () => getMimeType,
|
|
119
121
|
getNameWithoutExtension: () => getNameWithoutExtension,
|
|
120
122
|
getNamingSchemaForTable: () => getNamingSchemaForTable,
|
|
@@ -147,6 +149,7 @@ __export(index_exports, {
|
|
|
147
149
|
loadConfig: () => loadConfig,
|
|
148
150
|
loadConfigAsync: () => loadConfigAsync,
|
|
149
151
|
migrateToV2: () => migrateToV2,
|
|
152
|
+
migrateToV3: () => migrateToV3,
|
|
150
153
|
normalizePath: () => normalizePath,
|
|
151
154
|
parseConfig: () => parseConfig,
|
|
152
155
|
parseFileData: () => parseFileData,
|
|
@@ -2395,6 +2398,7 @@ var FileMetadataService = class {
|
|
|
2395
2398
|
if (input.scope_id !== void 0) record.scope_id = input.scope_id;
|
|
2396
2399
|
if (input.uploaded_by !== void 0) record.uploaded_by = input.uploaded_by;
|
|
2397
2400
|
if (input.original_filename !== void 0) record.original_filename = input.original_filename;
|
|
2401
|
+
if (input.content_tag !== void 0) record.content_tag = input.content_tag;
|
|
2398
2402
|
const results = await this.crud.insert(record);
|
|
2399
2403
|
this.logger?.debug?.("Recorded file upload", { path: input.file_path });
|
|
2400
2404
|
return results[0] || null;
|
|
@@ -4229,10 +4233,11 @@ function generatePreviewName(pattern, userVariables, options = {}) {
|
|
|
4229
4233
|
|
|
4230
4234
|
// src/services/upload-extract-service.ts
|
|
4231
4235
|
var UploadExtractService = class {
|
|
4232
|
-
constructor(fileManager, namingService, extractionService) {
|
|
4236
|
+
constructor(fileManager, namingService, extractionService, defaultContentTagConfig) {
|
|
4233
4237
|
this.fileManager = fileManager;
|
|
4234
4238
|
this.namingService = namingService;
|
|
4235
4239
|
this.extractionService = extractionService;
|
|
4240
|
+
this.defaultContentTagConfig = defaultContentTagConfig;
|
|
4236
4241
|
}
|
|
4237
4242
|
/**
|
|
4238
4243
|
* Upload a file with optional extraction and naming convention
|
|
@@ -4309,11 +4314,12 @@ var UploadExtractService = class {
|
|
|
4309
4314
|
metadata.extraction_id = extractionData.id;
|
|
4310
4315
|
metadata.extraction_source = extractionData.source;
|
|
4311
4316
|
}
|
|
4317
|
+
const effectiveContentTagConfig = options.contentTagConfig ?? this.defaultContentTagConfig;
|
|
4318
|
+
const needsContentTagging = effectiveContentTagConfig?.content_tag_set_by_llm && this.extractionService && this.fileManager.isTrackingActive();
|
|
4312
4319
|
const uploadResult = await this.fileManager.uploadFile(source, fullPath, {
|
|
4313
4320
|
...options,
|
|
4314
4321
|
metadata,
|
|
4315
|
-
awaitRecording: !!extractionData
|
|
4316
|
-
// Await recording when extraction needs to be added
|
|
4322
|
+
awaitRecording: !!extractionData || !!needsContentTagging
|
|
4317
4323
|
});
|
|
4318
4324
|
if (!uploadResult.success) {
|
|
4319
4325
|
return {
|
|
@@ -4337,13 +4343,23 @@ var UploadExtractService = class {
|
|
|
4337
4343
|
);
|
|
4338
4344
|
}
|
|
4339
4345
|
}
|
|
4346
|
+
let contentTag;
|
|
4347
|
+
if (needsContentTagging && effectiveContentTagConfig) {
|
|
4348
|
+
contentTag = await this.performContentTagging(
|
|
4349
|
+
source,
|
|
4350
|
+
mimeType,
|
|
4351
|
+
effectiveContentTagConfig,
|
|
4352
|
+
fullPath
|
|
4353
|
+
);
|
|
4354
|
+
}
|
|
4340
4355
|
return {
|
|
4341
4356
|
success: true,
|
|
4342
4357
|
file: uploadResult.data,
|
|
4343
4358
|
extraction: extractionData,
|
|
4344
4359
|
generatedPath: fullPath,
|
|
4345
4360
|
generatedFolderPath: generatedFolderPath || void 0,
|
|
4346
|
-
originalFileName
|
|
4361
|
+
originalFileName,
|
|
4362
|
+
contentTag
|
|
4347
4363
|
};
|
|
4348
4364
|
} catch (error) {
|
|
4349
4365
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -4441,6 +4457,76 @@ var UploadExtractService = class {
|
|
|
4441
4457
|
folderPath: folderPath || void 0
|
|
4442
4458
|
};
|
|
4443
4459
|
}
|
|
4460
|
+
/**
|
|
4461
|
+
* Perform content tagging via LLM extraction.
|
|
4462
|
+
* Calls the LLM with the configured prompt, extracts the specified field,
|
|
4463
|
+
* and writes it to the content_tag column.
|
|
4464
|
+
*/
|
|
4465
|
+
async performContentTagging(buffer, mimeType, config, filePath) {
|
|
4466
|
+
try {
|
|
4467
|
+
if (!this.extractionService) return void 0;
|
|
4468
|
+
const result = await this.extractionService.extract(buffer, mimeType, {
|
|
4469
|
+
promptArea: config.content_tag_prompt_area,
|
|
4470
|
+
promptKey: config.content_tag_prompt_key,
|
|
4471
|
+
promptVariables: config.content_tag_prompt_variables
|
|
4472
|
+
});
|
|
4473
|
+
if (!result.success || !result.data) return void 0;
|
|
4474
|
+
const tagValue = result.data[config.content_tag_prompt_return_fieldname];
|
|
4475
|
+
if (typeof tagValue !== "string" || !tagValue) return void 0;
|
|
4476
|
+
const metadataService = this.fileManager.getMetadataService();
|
|
4477
|
+
if (metadataService) {
|
|
4478
|
+
const storageType = this.fileManager.getProvider() || "local";
|
|
4479
|
+
const record = await metadataService.findByPath(filePath, storageType);
|
|
4480
|
+
if (record) {
|
|
4481
|
+
await metadataService.updateFields(record.id, { content_tag: tagValue });
|
|
4482
|
+
}
|
|
4483
|
+
}
|
|
4484
|
+
return tagValue;
|
|
4485
|
+
} catch {
|
|
4486
|
+
return void 0;
|
|
4487
|
+
}
|
|
4488
|
+
}
|
|
4489
|
+
/**
|
|
4490
|
+
* Manually tag a file's content via LLM.
|
|
4491
|
+
* Works with existing DB records, resolving the file path internally.
|
|
4492
|
+
*
|
|
4493
|
+
* @param fileId - Database record ID of the file
|
|
4494
|
+
* @param config - Content tag config (falls back to default if not provided)
|
|
4495
|
+
* @returns OperationResult with the tag value
|
|
4496
|
+
*/
|
|
4497
|
+
async tagFileContent(fileId, config) {
|
|
4498
|
+
const effectiveConfig = config ?? this.defaultContentTagConfig;
|
|
4499
|
+
if (!effectiveConfig || !effectiveConfig.content_tag_set_by_llm) {
|
|
4500
|
+
return { success: false, error: "Content tagging is not configured or disabled" };
|
|
4501
|
+
}
|
|
4502
|
+
if (!this.extractionService) {
|
|
4503
|
+
return { success: false, error: "Extraction service not available" };
|
|
4504
|
+
}
|
|
4505
|
+
const metadataService = this.fileManager.getMetadataService();
|
|
4506
|
+
if (!metadataService) {
|
|
4507
|
+
return { success: false, error: "Metadata service not available (tracking not enabled)" };
|
|
4508
|
+
}
|
|
4509
|
+
const record = await metadataService.findById(fileId);
|
|
4510
|
+
if (!record) {
|
|
4511
|
+
return { success: false, error: `File record not found: ${fileId}` };
|
|
4512
|
+
}
|
|
4513
|
+
const downloadResult = await this.fileManager.downloadFile(record.file_path);
|
|
4514
|
+
if (!downloadResult.success || !downloadResult.data) {
|
|
4515
|
+
return { success: false, error: `Failed to download file: ${downloadResult.error}` };
|
|
4516
|
+
}
|
|
4517
|
+
const buffer = Buffer.isBuffer(downloadResult.data) ? downloadResult.data : Buffer.from(downloadResult.data);
|
|
4518
|
+
const mimeType = getMimeType(record.filename);
|
|
4519
|
+
const tagValue = await this.performContentTagging(
|
|
4520
|
+
buffer,
|
|
4521
|
+
mimeType,
|
|
4522
|
+
effectiveConfig,
|
|
4523
|
+
record.file_path
|
|
4524
|
+
);
|
|
4525
|
+
if (!tagValue) {
|
|
4526
|
+
return { success: false, error: "Content tagging did not produce a result" };
|
|
4527
|
+
}
|
|
4528
|
+
return { success: true, data: tagValue };
|
|
4529
|
+
}
|
|
4444
4530
|
/**
|
|
4445
4531
|
* Get the file manager
|
|
4446
4532
|
*/
|
|
@@ -4460,8 +4546,8 @@ var UploadExtractService = class {
|
|
|
4460
4546
|
return this.extractionService;
|
|
4461
4547
|
}
|
|
4462
4548
|
};
|
|
4463
|
-
function createUploadExtractService(fileManager, namingService, extractionService) {
|
|
4464
|
-
return new UploadExtractService(fileManager, namingService, extractionService);
|
|
4549
|
+
function createUploadExtractService(fileManager, namingService, extractionService, defaultContentTagConfig) {
|
|
4550
|
+
return new UploadExtractService(fileManager, namingService, extractionService, defaultContentTagConfig);
|
|
4465
4551
|
}
|
|
4466
4552
|
|
|
4467
4553
|
// src/schema/index.ts
|
|
@@ -4488,7 +4574,8 @@ var HAZO_FILES_TABLE_SCHEMA = {
|
|
|
4488
4574
|
uploaded_by TEXT,
|
|
4489
4575
|
storage_verified_at TEXT,
|
|
4490
4576
|
deleted_at TEXT,
|
|
4491
|
-
original_filename TEXT
|
|
4577
|
+
original_filename TEXT,
|
|
4578
|
+
content_tag TEXT
|
|
4492
4579
|
)`,
|
|
4493
4580
|
indexes: [
|
|
4494
4581
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
|
|
@@ -4498,7 +4585,8 @@ var HAZO_FILES_TABLE_SCHEMA = {
|
|
|
4498
4585
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
|
|
4499
4586
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
|
|
4500
4587
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
|
|
4501
|
-
"CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
|
|
4588
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)",
|
|
4589
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_content_tag ON hazo_files (content_tag)"
|
|
4502
4590
|
]
|
|
4503
4591
|
},
|
|
4504
4592
|
postgres: {
|
|
@@ -4521,7 +4609,8 @@ var HAZO_FILES_TABLE_SCHEMA = {
|
|
|
4521
4609
|
uploaded_by UUID,
|
|
4522
4610
|
storage_verified_at TIMESTAMP WITH TIME ZONE,
|
|
4523
4611
|
deleted_at TIMESTAMP WITH TIME ZONE,
|
|
4524
|
-
original_filename TEXT
|
|
4612
|
+
original_filename TEXT,
|
|
4613
|
+
content_tag TEXT
|
|
4525
4614
|
)`,
|
|
4526
4615
|
indexes: [
|
|
4527
4616
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
|
|
@@ -4531,7 +4620,8 @@ var HAZO_FILES_TABLE_SCHEMA = {
|
|
|
4531
4620
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
|
|
4532
4621
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
|
|
4533
4622
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
|
|
4534
|
-
"CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
|
|
4623
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)",
|
|
4624
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_content_tag ON hazo_files (content_tag)"
|
|
4535
4625
|
]
|
|
4536
4626
|
},
|
|
4537
4627
|
columns: [
|
|
@@ -4553,7 +4643,8 @@ var HAZO_FILES_TABLE_SCHEMA = {
|
|
|
4553
4643
|
"uploaded_by",
|
|
4554
4644
|
"storage_verified_at",
|
|
4555
4645
|
"deleted_at",
|
|
4556
|
-
"original_filename"
|
|
4646
|
+
"original_filename",
|
|
4647
|
+
"content_tag"
|
|
4557
4648
|
]
|
|
4558
4649
|
};
|
|
4559
4650
|
function getSchemaForTable(tableName, dbType) {
|
|
@@ -4694,6 +4785,45 @@ function getNamingSchemaForTable(tableName, dbType) {
|
|
|
4694
4785
|
)
|
|
4695
4786
|
};
|
|
4696
4787
|
}
|
|
4788
|
+
var HAZO_FILES_MIGRATION_V3 = {
|
|
4789
|
+
tableName: HAZO_FILES_DEFAULT_TABLE_NAME,
|
|
4790
|
+
sqlite: {
|
|
4791
|
+
alterStatements: [
|
|
4792
|
+
"ALTER TABLE hazo_files ADD COLUMN content_tag TEXT"
|
|
4793
|
+
],
|
|
4794
|
+
indexes: [
|
|
4795
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_content_tag ON hazo_files (content_tag)"
|
|
4796
|
+
],
|
|
4797
|
+
backfill: ""
|
|
4798
|
+
// No backfill needed — column is nullable, defaults to NULL
|
|
4799
|
+
},
|
|
4800
|
+
postgres: {
|
|
4801
|
+
alterStatements: [
|
|
4802
|
+
"ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS content_tag TEXT"
|
|
4803
|
+
],
|
|
4804
|
+
indexes: [
|
|
4805
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_content_tag ON hazo_files (content_tag)"
|
|
4806
|
+
],
|
|
4807
|
+
backfill: ""
|
|
4808
|
+
// No backfill needed — column is nullable, defaults to NULL
|
|
4809
|
+
},
|
|
4810
|
+
newColumns: [
|
|
4811
|
+
"content_tag"
|
|
4812
|
+
]
|
|
4813
|
+
};
|
|
4814
|
+
function getMigrationV3ForTable(tableName, dbType) {
|
|
4815
|
+
const migration = HAZO_FILES_MIGRATION_V3[dbType];
|
|
4816
|
+
const defaultName = HAZO_FILES_MIGRATION_V3.tableName;
|
|
4817
|
+
return {
|
|
4818
|
+
alterStatements: migration.alterStatements.map(
|
|
4819
|
+
(stmt) => stmt.replace(new RegExp(defaultName, "g"), tableName)
|
|
4820
|
+
),
|
|
4821
|
+
indexes: migration.indexes.map(
|
|
4822
|
+
(idx) => idx.replace(new RegExp(defaultName, "g"), tableName)
|
|
4823
|
+
),
|
|
4824
|
+
backfill: migration.backfill
|
|
4825
|
+
};
|
|
4826
|
+
}
|
|
4697
4827
|
|
|
4698
4828
|
// src/migrations/add-reference-tracking.ts
|
|
4699
4829
|
async function migrateToV2(executor, dbType, tableName) {
|
|
@@ -4712,6 +4842,20 @@ async function backfillV2Defaults(executor, dbType, tableName) {
|
|
|
4712
4842
|
const migration = tableName ? getMigrationForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V2[dbType];
|
|
4713
4843
|
await executor.run(migration.backfill);
|
|
4714
4844
|
}
|
|
4845
|
+
|
|
4846
|
+
// src/migrations/add-content-tag.ts
|
|
4847
|
+
async function migrateToV3(executor, dbType, tableName) {
|
|
4848
|
+
const migration = tableName ? getMigrationV3ForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V3[dbType];
|
|
4849
|
+
for (const stmt of migration.alterStatements) {
|
|
4850
|
+
try {
|
|
4851
|
+
await executor.run(stmt);
|
|
4852
|
+
} catch {
|
|
4853
|
+
}
|
|
4854
|
+
}
|
|
4855
|
+
for (const idx of migration.indexes) {
|
|
4856
|
+
await executor.run(idx);
|
|
4857
|
+
}
|
|
4858
|
+
}
|
|
4715
4859
|
// Annotate the CommonJS export names for ESM import in node:
|
|
4716
4860
|
0 && (module.exports = {
|
|
4717
4861
|
ALL_SYSTEM_VARIABLES,
|
|
@@ -4730,6 +4874,7 @@ async function backfillV2Defaults(executor, dbType, tableName) {
|
|
|
4730
4874
|
GoogleDriveModule,
|
|
4731
4875
|
HAZO_FILES_DEFAULT_TABLE_NAME,
|
|
4732
4876
|
HAZO_FILES_MIGRATION_V2,
|
|
4877
|
+
HAZO_FILES_MIGRATION_V3,
|
|
4733
4878
|
HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
|
|
4734
4879
|
HAZO_FILES_NAMING_TABLE_SCHEMA,
|
|
4735
4880
|
HAZO_FILES_TABLE_SCHEMA,
|
|
@@ -4799,6 +4944,7 @@ async function backfillV2Defaults(executor, dbType, tableName) {
|
|
|
4799
4944
|
getFileMetadataValues,
|
|
4800
4945
|
getMergedData,
|
|
4801
4946
|
getMigrationForTable,
|
|
4947
|
+
getMigrationV3ForTable,
|
|
4802
4948
|
getMimeType,
|
|
4803
4949
|
getNameWithoutExtension,
|
|
4804
4950
|
getNamingSchemaForTable,
|
|
@@ -4831,6 +4977,7 @@ async function backfillV2Defaults(executor, dbType, tableName) {
|
|
|
4831
4977
|
loadConfig,
|
|
4832
4978
|
loadConfigAsync,
|
|
4833
4979
|
migrateToV2,
|
|
4980
|
+
migrateToV3,
|
|
4834
4981
|
normalizePath,
|
|
4835
4982
|
parseConfig,
|
|
4836
4983
|
parseFileData,
|
package/dist/index.mjs
CHANGED
|
@@ -2219,6 +2219,7 @@ var FileMetadataService = class {
|
|
|
2219
2219
|
if (input.scope_id !== void 0) record.scope_id = input.scope_id;
|
|
2220
2220
|
if (input.uploaded_by !== void 0) record.uploaded_by = input.uploaded_by;
|
|
2221
2221
|
if (input.original_filename !== void 0) record.original_filename = input.original_filename;
|
|
2222
|
+
if (input.content_tag !== void 0) record.content_tag = input.content_tag;
|
|
2222
2223
|
const results = await this.crud.insert(record);
|
|
2223
2224
|
this.logger?.debug?.("Recorded file upload", { path: input.file_path });
|
|
2224
2225
|
return results[0] || null;
|
|
@@ -4053,10 +4054,11 @@ function generatePreviewName(pattern, userVariables, options = {}) {
|
|
|
4053
4054
|
|
|
4054
4055
|
// src/services/upload-extract-service.ts
|
|
4055
4056
|
var UploadExtractService = class {
|
|
4056
|
-
constructor(fileManager, namingService, extractionService) {
|
|
4057
|
+
constructor(fileManager, namingService, extractionService, defaultContentTagConfig) {
|
|
4057
4058
|
this.fileManager = fileManager;
|
|
4058
4059
|
this.namingService = namingService;
|
|
4059
4060
|
this.extractionService = extractionService;
|
|
4061
|
+
this.defaultContentTagConfig = defaultContentTagConfig;
|
|
4060
4062
|
}
|
|
4061
4063
|
/**
|
|
4062
4064
|
* Upload a file with optional extraction and naming convention
|
|
@@ -4133,11 +4135,12 @@ var UploadExtractService = class {
|
|
|
4133
4135
|
metadata.extraction_id = extractionData.id;
|
|
4134
4136
|
metadata.extraction_source = extractionData.source;
|
|
4135
4137
|
}
|
|
4138
|
+
const effectiveContentTagConfig = options.contentTagConfig ?? this.defaultContentTagConfig;
|
|
4139
|
+
const needsContentTagging = effectiveContentTagConfig?.content_tag_set_by_llm && this.extractionService && this.fileManager.isTrackingActive();
|
|
4136
4140
|
const uploadResult = await this.fileManager.uploadFile(source, fullPath, {
|
|
4137
4141
|
...options,
|
|
4138
4142
|
metadata,
|
|
4139
|
-
awaitRecording: !!extractionData
|
|
4140
|
-
// Await recording when extraction needs to be added
|
|
4143
|
+
awaitRecording: !!extractionData || !!needsContentTagging
|
|
4141
4144
|
});
|
|
4142
4145
|
if (!uploadResult.success) {
|
|
4143
4146
|
return {
|
|
@@ -4161,13 +4164,23 @@ var UploadExtractService = class {
|
|
|
4161
4164
|
);
|
|
4162
4165
|
}
|
|
4163
4166
|
}
|
|
4167
|
+
let contentTag;
|
|
4168
|
+
if (needsContentTagging && effectiveContentTagConfig) {
|
|
4169
|
+
contentTag = await this.performContentTagging(
|
|
4170
|
+
source,
|
|
4171
|
+
mimeType,
|
|
4172
|
+
effectiveContentTagConfig,
|
|
4173
|
+
fullPath
|
|
4174
|
+
);
|
|
4175
|
+
}
|
|
4164
4176
|
return {
|
|
4165
4177
|
success: true,
|
|
4166
4178
|
file: uploadResult.data,
|
|
4167
4179
|
extraction: extractionData,
|
|
4168
4180
|
generatedPath: fullPath,
|
|
4169
4181
|
generatedFolderPath: generatedFolderPath || void 0,
|
|
4170
|
-
originalFileName
|
|
4182
|
+
originalFileName,
|
|
4183
|
+
contentTag
|
|
4171
4184
|
};
|
|
4172
4185
|
} catch (error) {
|
|
4173
4186
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -4265,6 +4278,76 @@ var UploadExtractService = class {
|
|
|
4265
4278
|
folderPath: folderPath || void 0
|
|
4266
4279
|
};
|
|
4267
4280
|
}
|
|
4281
|
+
/**
|
|
4282
|
+
* Perform content tagging via LLM extraction.
|
|
4283
|
+
* Calls the LLM with the configured prompt, extracts the specified field,
|
|
4284
|
+
* and writes it to the content_tag column.
|
|
4285
|
+
*/
|
|
4286
|
+
async performContentTagging(buffer, mimeType, config, filePath) {
|
|
4287
|
+
try {
|
|
4288
|
+
if (!this.extractionService) return void 0;
|
|
4289
|
+
const result = await this.extractionService.extract(buffer, mimeType, {
|
|
4290
|
+
promptArea: config.content_tag_prompt_area,
|
|
4291
|
+
promptKey: config.content_tag_prompt_key,
|
|
4292
|
+
promptVariables: config.content_tag_prompt_variables
|
|
4293
|
+
});
|
|
4294
|
+
if (!result.success || !result.data) return void 0;
|
|
4295
|
+
const tagValue = result.data[config.content_tag_prompt_return_fieldname];
|
|
4296
|
+
if (typeof tagValue !== "string" || !tagValue) return void 0;
|
|
4297
|
+
const metadataService = this.fileManager.getMetadataService();
|
|
4298
|
+
if (metadataService) {
|
|
4299
|
+
const storageType = this.fileManager.getProvider() || "local";
|
|
4300
|
+
const record = await metadataService.findByPath(filePath, storageType);
|
|
4301
|
+
if (record) {
|
|
4302
|
+
await metadataService.updateFields(record.id, { content_tag: tagValue });
|
|
4303
|
+
}
|
|
4304
|
+
}
|
|
4305
|
+
return tagValue;
|
|
4306
|
+
} catch {
|
|
4307
|
+
return void 0;
|
|
4308
|
+
}
|
|
4309
|
+
}
|
|
4310
|
+
/**
|
|
4311
|
+
* Manually tag a file's content via LLM.
|
|
4312
|
+
* Works with existing DB records, resolving the file path internally.
|
|
4313
|
+
*
|
|
4314
|
+
* @param fileId - Database record ID of the file
|
|
4315
|
+
* @param config - Content tag config (falls back to default if not provided)
|
|
4316
|
+
* @returns OperationResult with the tag value
|
|
4317
|
+
*/
|
|
4318
|
+
async tagFileContent(fileId, config) {
|
|
4319
|
+
const effectiveConfig = config ?? this.defaultContentTagConfig;
|
|
4320
|
+
if (!effectiveConfig || !effectiveConfig.content_tag_set_by_llm) {
|
|
4321
|
+
return { success: false, error: "Content tagging is not configured or disabled" };
|
|
4322
|
+
}
|
|
4323
|
+
if (!this.extractionService) {
|
|
4324
|
+
return { success: false, error: "Extraction service not available" };
|
|
4325
|
+
}
|
|
4326
|
+
const metadataService = this.fileManager.getMetadataService();
|
|
4327
|
+
if (!metadataService) {
|
|
4328
|
+
return { success: false, error: "Metadata service not available (tracking not enabled)" };
|
|
4329
|
+
}
|
|
4330
|
+
const record = await metadataService.findById(fileId);
|
|
4331
|
+
if (!record) {
|
|
4332
|
+
return { success: false, error: `File record not found: ${fileId}` };
|
|
4333
|
+
}
|
|
4334
|
+
const downloadResult = await this.fileManager.downloadFile(record.file_path);
|
|
4335
|
+
if (!downloadResult.success || !downloadResult.data) {
|
|
4336
|
+
return { success: false, error: `Failed to download file: ${downloadResult.error}` };
|
|
4337
|
+
}
|
|
4338
|
+
const buffer = Buffer.isBuffer(downloadResult.data) ? downloadResult.data : Buffer.from(downloadResult.data);
|
|
4339
|
+
const mimeType = getMimeType(record.filename);
|
|
4340
|
+
const tagValue = await this.performContentTagging(
|
|
4341
|
+
buffer,
|
|
4342
|
+
mimeType,
|
|
4343
|
+
effectiveConfig,
|
|
4344
|
+
record.file_path
|
|
4345
|
+
);
|
|
4346
|
+
if (!tagValue) {
|
|
4347
|
+
return { success: false, error: "Content tagging did not produce a result" };
|
|
4348
|
+
}
|
|
4349
|
+
return { success: true, data: tagValue };
|
|
4350
|
+
}
|
|
4268
4351
|
/**
|
|
4269
4352
|
* Get the file manager
|
|
4270
4353
|
*/
|
|
@@ -4284,8 +4367,8 @@ var UploadExtractService = class {
|
|
|
4284
4367
|
return this.extractionService;
|
|
4285
4368
|
}
|
|
4286
4369
|
};
|
|
4287
|
-
function createUploadExtractService(fileManager, namingService, extractionService) {
|
|
4288
|
-
return new UploadExtractService(fileManager, namingService, extractionService);
|
|
4370
|
+
function createUploadExtractService(fileManager, namingService, extractionService, defaultContentTagConfig) {
|
|
4371
|
+
return new UploadExtractService(fileManager, namingService, extractionService, defaultContentTagConfig);
|
|
4289
4372
|
}
|
|
4290
4373
|
|
|
4291
4374
|
// src/schema/index.ts
|
|
@@ -4312,7 +4395,8 @@ var HAZO_FILES_TABLE_SCHEMA = {
|
|
|
4312
4395
|
uploaded_by TEXT,
|
|
4313
4396
|
storage_verified_at TEXT,
|
|
4314
4397
|
deleted_at TEXT,
|
|
4315
|
-
original_filename TEXT
|
|
4398
|
+
original_filename TEXT,
|
|
4399
|
+
content_tag TEXT
|
|
4316
4400
|
)`,
|
|
4317
4401
|
indexes: [
|
|
4318
4402
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
|
|
@@ -4322,7 +4406,8 @@ var HAZO_FILES_TABLE_SCHEMA = {
|
|
|
4322
4406
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
|
|
4323
4407
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
|
|
4324
4408
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
|
|
4325
|
-
"CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
|
|
4409
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)",
|
|
4410
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_content_tag ON hazo_files (content_tag)"
|
|
4326
4411
|
]
|
|
4327
4412
|
},
|
|
4328
4413
|
postgres: {
|
|
@@ -4345,7 +4430,8 @@ var HAZO_FILES_TABLE_SCHEMA = {
|
|
|
4345
4430
|
uploaded_by UUID,
|
|
4346
4431
|
storage_verified_at TIMESTAMP WITH TIME ZONE,
|
|
4347
4432
|
deleted_at TIMESTAMP WITH TIME ZONE,
|
|
4348
|
-
original_filename TEXT
|
|
4433
|
+
original_filename TEXT,
|
|
4434
|
+
content_tag TEXT
|
|
4349
4435
|
)`,
|
|
4350
4436
|
indexes: [
|
|
4351
4437
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_path ON hazo_files (file_path)",
|
|
@@ -4355,7 +4441,8 @@ var HAZO_FILES_TABLE_SCHEMA = {
|
|
|
4355
4441
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_status ON hazo_files (status)",
|
|
4356
4442
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_scope ON hazo_files (scope_id)",
|
|
4357
4443
|
"CREATE INDEX IF NOT EXISTS idx_hazo_files_ref_count ON hazo_files (ref_count)",
|
|
4358
|
-
"CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)"
|
|
4444
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_deleted ON hazo_files (deleted_at)",
|
|
4445
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_content_tag ON hazo_files (content_tag)"
|
|
4359
4446
|
]
|
|
4360
4447
|
},
|
|
4361
4448
|
columns: [
|
|
@@ -4377,7 +4464,8 @@ var HAZO_FILES_TABLE_SCHEMA = {
|
|
|
4377
4464
|
"uploaded_by",
|
|
4378
4465
|
"storage_verified_at",
|
|
4379
4466
|
"deleted_at",
|
|
4380
|
-
"original_filename"
|
|
4467
|
+
"original_filename",
|
|
4468
|
+
"content_tag"
|
|
4381
4469
|
]
|
|
4382
4470
|
};
|
|
4383
4471
|
function getSchemaForTable(tableName, dbType) {
|
|
@@ -4518,6 +4606,45 @@ function getNamingSchemaForTable(tableName, dbType) {
|
|
|
4518
4606
|
)
|
|
4519
4607
|
};
|
|
4520
4608
|
}
|
|
4609
|
+
var HAZO_FILES_MIGRATION_V3 = {
|
|
4610
|
+
tableName: HAZO_FILES_DEFAULT_TABLE_NAME,
|
|
4611
|
+
sqlite: {
|
|
4612
|
+
alterStatements: [
|
|
4613
|
+
"ALTER TABLE hazo_files ADD COLUMN content_tag TEXT"
|
|
4614
|
+
],
|
|
4615
|
+
indexes: [
|
|
4616
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_content_tag ON hazo_files (content_tag)"
|
|
4617
|
+
],
|
|
4618
|
+
backfill: ""
|
|
4619
|
+
// No backfill needed — column is nullable, defaults to NULL
|
|
4620
|
+
},
|
|
4621
|
+
postgres: {
|
|
4622
|
+
alterStatements: [
|
|
4623
|
+
"ALTER TABLE hazo_files ADD COLUMN IF NOT EXISTS content_tag TEXT"
|
|
4624
|
+
],
|
|
4625
|
+
indexes: [
|
|
4626
|
+
"CREATE INDEX IF NOT EXISTS idx_hazo_files_content_tag ON hazo_files (content_tag)"
|
|
4627
|
+
],
|
|
4628
|
+
backfill: ""
|
|
4629
|
+
// No backfill needed — column is nullable, defaults to NULL
|
|
4630
|
+
},
|
|
4631
|
+
newColumns: [
|
|
4632
|
+
"content_tag"
|
|
4633
|
+
]
|
|
4634
|
+
};
|
|
4635
|
+
function getMigrationV3ForTable(tableName, dbType) {
|
|
4636
|
+
const migration = HAZO_FILES_MIGRATION_V3[dbType];
|
|
4637
|
+
const defaultName = HAZO_FILES_MIGRATION_V3.tableName;
|
|
4638
|
+
return {
|
|
4639
|
+
alterStatements: migration.alterStatements.map(
|
|
4640
|
+
(stmt) => stmt.replace(new RegExp(defaultName, "g"), tableName)
|
|
4641
|
+
),
|
|
4642
|
+
indexes: migration.indexes.map(
|
|
4643
|
+
(idx) => idx.replace(new RegExp(defaultName, "g"), tableName)
|
|
4644
|
+
),
|
|
4645
|
+
backfill: migration.backfill
|
|
4646
|
+
};
|
|
4647
|
+
}
|
|
4521
4648
|
|
|
4522
4649
|
// src/migrations/add-reference-tracking.ts
|
|
4523
4650
|
async function migrateToV2(executor, dbType, tableName) {
|
|
@@ -4536,6 +4663,20 @@ async function backfillV2Defaults(executor, dbType, tableName) {
|
|
|
4536
4663
|
const migration = tableName ? getMigrationForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V2[dbType];
|
|
4537
4664
|
await executor.run(migration.backfill);
|
|
4538
4665
|
}
|
|
4666
|
+
|
|
4667
|
+
// src/migrations/add-content-tag.ts
|
|
4668
|
+
async function migrateToV3(executor, dbType, tableName) {
|
|
4669
|
+
const migration = tableName ? getMigrationV3ForTable(tableName, dbType) : HAZO_FILES_MIGRATION_V3[dbType];
|
|
4670
|
+
for (const stmt of migration.alterStatements) {
|
|
4671
|
+
try {
|
|
4672
|
+
await executor.run(stmt);
|
|
4673
|
+
} catch {
|
|
4674
|
+
}
|
|
4675
|
+
}
|
|
4676
|
+
for (const idx of migration.indexes) {
|
|
4677
|
+
await executor.run(idx);
|
|
4678
|
+
}
|
|
4679
|
+
}
|
|
4539
4680
|
export {
|
|
4540
4681
|
ALL_SYSTEM_VARIABLES,
|
|
4541
4682
|
AuthenticationError,
|
|
@@ -4553,6 +4694,7 @@ export {
|
|
|
4553
4694
|
GoogleDriveModule,
|
|
4554
4695
|
HAZO_FILES_DEFAULT_TABLE_NAME,
|
|
4555
4696
|
HAZO_FILES_MIGRATION_V2,
|
|
4697
|
+
HAZO_FILES_MIGRATION_V3,
|
|
4556
4698
|
HAZO_FILES_NAMING_DEFAULT_TABLE_NAME,
|
|
4557
4699
|
HAZO_FILES_NAMING_TABLE_SCHEMA,
|
|
4558
4700
|
HAZO_FILES_TABLE_SCHEMA,
|
|
@@ -4622,6 +4764,7 @@ export {
|
|
|
4622
4764
|
getFileMetadataValues,
|
|
4623
4765
|
getMergedData,
|
|
4624
4766
|
getMigrationForTable,
|
|
4767
|
+
getMigrationV3ForTable,
|
|
4625
4768
|
getMimeType,
|
|
4626
4769
|
getNameWithoutExtension,
|
|
4627
4770
|
getNamingSchemaForTable,
|
|
@@ -4654,6 +4797,7 @@ export {
|
|
|
4654
4797
|
loadConfig,
|
|
4655
4798
|
loadConfigAsync,
|
|
4656
4799
|
migrateToV2,
|
|
4800
|
+
migrateToV3,
|
|
4657
4801
|
normalizePath,
|
|
4658
4802
|
parseConfig,
|
|
4659
4803
|
parseFileData,
|