@igoruehara/canvas-flow 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +152 -0
- package/bin/canvas-flow.js +1132 -0
- package/package.json +68 -0
- package/public/assets/index-PCQkqMUe.css +1 -0
- package/public/assets/index-qV8twxcq.js +767 -0
- package/public/index.html +13 -0
- package/server/api-key/api-key-connect-provider.d.ts +104 -0
- package/server/api-key/api-key-connect-provider.js +14 -0
- package/server/api-key/api-key-connect-provider.js.map +1 -0
- package/server/api-key/api-key-constants-model.d.ts +2 -0
- package/server/api-key/api-key-constants-model.js +6 -0
- package/server/api-key/api-key-constants-model.js.map +1 -0
- package/server/api-key/api-key-controller.d.ts +12 -0
- package/server/api-key/api-key-controller.js +86 -0
- package/server/api-key/api-key-controller.js.map +1 -0
- package/server/api-key/api-key-module.d.ts +2 -0
- package/server/api-key/api-key-module.js +27 -0
- package/server/api-key/api-key-module.js.map +1 -0
- package/server/api-key/api-key-schema.d.ts +72 -0
- package/server/api-key/api-key-schema.js +98 -0
- package/server/api-key/api-key-schema.js.map +1 -0
- package/server/api-key/api-key-service.d.ts +45 -0
- package/server/api-key/api-key-service.js +151 -0
- package/server/api-key/api-key-service.js.map +1 -0
- package/server/api-key/dto/create-api-key.dto.d.ts +8 -0
- package/server/api-key/dto/create-api-key.dto.js +7 -0
- package/server/api-key/dto/create-api-key.dto.js.map +1 -0
- package/server/app.module.d.ts +2 -0
- package/server/app.module.js +53 -0
- package/server/app.module.js.map +1 -0
- package/server/auth/auth-connect-provider.d.ts +140 -0
- package/server/auth/auth-connect-provider.js +20 -0
- package/server/auth/auth-connect-provider.js.map +1 -0
- package/server/auth/auth-constants-model.d.ts +4 -0
- package/server/auth/auth-constants-model.js +8 -0
- package/server/auth/auth-constants-model.js.map +1 -0
- package/server/auth/auth-controller.d.ts +25 -0
- package/server/auth/auth-controller.js +96 -0
- package/server/auth/auth-controller.js.map +1 -0
- package/server/auth/auth-module.d.ts +2 -0
- package/server/auth/auth-module.js +26 -0
- package/server/auth/auth-module.js.map +1 -0
- package/server/auth/auth-organization-schema.d.ts +44 -0
- package/server/auth/auth-organization-schema.js +62 -0
- package/server/auth/auth-organization-schema.js.map +1 -0
- package/server/auth/auth-schema.d.ts +56 -0
- package/server/auth/auth-schema.js +77 -0
- package/server/auth/auth-schema.js.map +1 -0
- package/server/auth/auth-service.d.ts +64 -0
- package/server/auth/auth-service.js +343 -0
- package/server/auth/auth-service.js.map +1 -0
- package/server/canvas-flow/canvas-flow-connect-provider.d.ts +278 -0
- package/server/canvas-flow/canvas-flow-connect-provider.js +24 -0
- package/server/canvas-flow/canvas-flow-connect-provider.js.map +1 -0
- package/server/canvas-flow/canvas-flow-constants-model.d.ts +6 -0
- package/server/canvas-flow/canvas-flow-constants-model.js +10 -0
- package/server/canvas-flow/canvas-flow-constants-model.js.map +1 -0
- package/server/canvas-flow/canvas-flow-controller.d.ts +98 -0
- package/server/canvas-flow/canvas-flow-controller.js +423 -0
- package/server/canvas-flow/canvas-flow-controller.js.map +1 -0
- package/server/canvas-flow/canvas-flow-module.d.ts +2 -0
- package/server/canvas-flow/canvas-flow-module.js +27 -0
- package/server/canvas-flow/canvas-flow-module.js.map +1 -0
- package/server/canvas-flow/canvas-flow-schema.d.ts +192 -0
- package/server/canvas-flow/canvas-flow-schema.js +239 -0
- package/server/canvas-flow/canvas-flow-schema.js.map +1 -0
- package/server/canvas-flow/canvas-flow-service.d.ts +250 -0
- package/server/canvas-flow/canvas-flow-service.js +1681 -0
- package/server/canvas-flow/canvas-flow-service.js.map +1 -0
- package/server/canvas-flow/dto/create-canvas-flow.dto.d.ts +11 -0
- package/server/canvas-flow/dto/create-canvas-flow.dto.js +61 -0
- package/server/canvas-flow/dto/create-canvas-flow.dto.js.map +1 -0
- package/server/canvas-flow/dto/update-canvas-flow.dto.d.ts +10 -0
- package/server/canvas-flow/dto/update-canvas-flow.dto.js +56 -0
- package/server/canvas-flow/dto/update-canvas-flow.dto.js.map +1 -0
- package/server/constants-global.d.ts +1 -0
- package/server/constants-global.js +5 -0
- package/server/constants-global.js.map +1 -0
- package/server/database/database.module.d.ts +2 -0
- package/server/database/database.module.js +23 -0
- package/server/database/database.module.js.map +1 -0
- package/server/database/database.providers.d.ts +7 -0
- package/server/database/database.providers.js +26 -0
- package/server/database/database.providers.js.map +1 -0
- package/server/documents/documents-connect-provider.d.ts +140 -0
- package/server/documents/documents-connect-provider.js +14 -0
- package/server/documents/documents-connect-provider.js.map +1 -0
- package/server/documents/documents-constants-model.d.ts +2 -0
- package/server/documents/documents-constants-model.js +6 -0
- package/server/documents/documents-constants-model.js.map +1 -0
- package/server/documents/documents-controller.d.ts +16 -0
- package/server/documents/documents-controller.js +117 -0
- package/server/documents/documents-controller.js.map +1 -0
- package/server/documents/documents-module.d.ts +2 -0
- package/server/documents/documents-module.js +27 -0
- package/server/documents/documents-module.js.map +1 -0
- package/server/documents/documents-schema.d.ts +96 -0
- package/server/documents/documents-schema.js +38 -0
- package/server/documents/documents-schema.js.map +1 -0
- package/server/documents/documents-service.d.ts +164 -0
- package/server/documents/documents-service.js +1417 -0
- package/server/documents/documents-service.js.map +1 -0
- package/server/flow-tag/flow-tag-connect-provider.d.ts +146 -0
- package/server/flow-tag/flow-tag-connect-provider.js +14 -0
- package/server/flow-tag/flow-tag-connect-provider.js.map +1 -0
- package/server/flow-tag/flow-tag-constants-model.d.ts +2 -0
- package/server/flow-tag/flow-tag-constants-model.js +6 -0
- package/server/flow-tag/flow-tag-constants-model.js.map +1 -0
- package/server/flow-tag/flow-tag-module.d.ts +2 -0
- package/server/flow-tag/flow-tag-module.js +24 -0
- package/server/flow-tag/flow-tag-module.js.map +1 -0
- package/server/flow-tag/flow-tag-schema.d.ts +100 -0
- package/server/flow-tag/flow-tag-schema.js +131 -0
- package/server/flow-tag/flow-tag-schema.js.map +1 -0
- package/server/flow-tag/flow-tag-service.d.ts +77 -0
- package/server/flow-tag/flow-tag-service.js +156 -0
- package/server/flow-tag/flow-tag-service.js.map +1 -0
- package/server/health.controller.d.ts +7 -0
- package/server/health.controller.js +33 -0
- package/server/health.controller.js.map +1 -0
- package/server/http-batch/http-batch-controller.d.ts +345 -0
- package/server/http-batch/http-batch-controller.js +40 -0
- package/server/http-batch/http-batch-controller.js.map +1 -0
- package/server/http-batch/http-batch-module.d.ts +2 -0
- package/server/http-batch/http-batch-module.js +25 -0
- package/server/http-batch/http-batch-module.js.map +1 -0
- package/server/http-batch/http-batch-service.d.ts +381 -0
- package/server/http-batch/http-batch-service.js +268 -0
- package/server/http-batch/http-batch-service.js.map +1 -0
- package/server/lambda.d.ts +2 -0
- package/server/lambda.js +115 -0
- package/server/lambda.js.map +1 -0
- package/server/llm/openai-provider.d.ts +8 -0
- package/server/llm/openai-provider.js +256 -0
- package/server/llm/openai-provider.js.map +1 -0
- package/server/main.d.ts +1 -0
- package/server/main.js +80 -0
- package/server/main.js.map +1 -0
- package/server/mcp-oauth/mcp-oauth-connect-provider.d.ts +164 -0
- package/server/mcp-oauth/mcp-oauth-connect-provider.js +14 -0
- package/server/mcp-oauth/mcp-oauth-connect-provider.js.map +1 -0
- package/server/mcp-oauth/mcp-oauth-constants-model.d.ts +2 -0
- package/server/mcp-oauth/mcp-oauth-constants-model.js +6 -0
- package/server/mcp-oauth/mcp-oauth-constants-model.js.map +1 -0
- package/server/mcp-oauth/mcp-oauth-controller.d.ts +66 -0
- package/server/mcp-oauth/mcp-oauth-controller.js +166 -0
- package/server/mcp-oauth/mcp-oauth-controller.js.map +1 -0
- package/server/mcp-oauth/mcp-oauth-module.d.ts +2 -0
- package/server/mcp-oauth/mcp-oauth-module.js +27 -0
- package/server/mcp-oauth/mcp-oauth-module.js.map +1 -0
- package/server/mcp-oauth/mcp-oauth-schema.d.ts +112 -0
- package/server/mcp-oauth/mcp-oauth-schema.js +148 -0
- package/server/mcp-oauth/mcp-oauth-schema.js.map +1 -0
- package/server/mcp-oauth/mcp-oauth-service.d.ts +189 -0
- package/server/mcp-oauth/mcp-oauth-service.js +545 -0
- package/server/mcp-oauth/mcp-oauth-service.js.map +1 -0
- package/server/memory/memory-connect-provider.d.ts +200 -0
- package/server/memory/memory-connect-provider.js +26 -0
- package/server/memory/memory-connect-provider.js.map +1 -0
- package/server/memory/memory-constants-model.d.ts +6 -0
- package/server/memory/memory-constants-model.js +10 -0
- package/server/memory/memory-constants-model.js.map +1 -0
- package/server/memory/memory-controller.d.ts +15 -0
- package/server/memory/memory-controller.js +53 -0
- package/server/memory/memory-controller.js.map +1 -0
- package/server/memory/memory-history-schema.d.ts +48 -0
- package/server/memory/memory-history-schema.js +62 -0
- package/server/memory/memory-history-schema.js.map +1 -0
- package/server/memory/memory-module.d.ts +2 -0
- package/server/memory/memory-module.js +26 -0
- package/server/memory/memory-module.js.map +1 -0
- package/server/memory/memory-schema.d.ts +48 -0
- package/server/memory/memory-schema.js +62 -0
- package/server/memory/memory-schema.js.map +1 -0
- package/server/memory/memory-service.d.ts +134 -0
- package/server/memory/memory-service.js +317 -0
- package/server/memory/memory-service.js.map +1 -0
- package/server/memory/memory-trace-history-schema.d.ts +48 -0
- package/server/memory/memory-trace-history-schema.js +62 -0
- package/server/memory/memory-trace-history-schema.js.map +1 -0
- package/server/observability/observability.d.ts +3 -0
- package/server/observability/observability.js +62 -0
- package/server/observability/observability.js.map +1 -0
- package/server/production-guard.d.ts +9 -0
- package/server/production-guard.js +105 -0
- package/server/production-guard.js.map +1 -0
- package/server/provider-config/provider-config-connect-provider.d.ts +44 -0
- package/server/provider-config/provider-config-connect-provider.js +14 -0
- package/server/provider-config/provider-config-connect-provider.js.map +1 -0
- package/server/provider-config/provider-config-constants-model.d.ts +3 -0
- package/server/provider-config/provider-config-constants-model.js +7 -0
- package/server/provider-config/provider-config-constants-model.js.map +1 -0
- package/server/provider-config/provider-config-controller.d.ts +23 -0
- package/server/provider-config/provider-config-controller.js +80 -0
- package/server/provider-config/provider-config-controller.js.map +1 -0
- package/server/provider-config/provider-config-module.d.ts +2 -0
- package/server/provider-config/provider-config-module.js +27 -0
- package/server/provider-config/provider-config-module.js.map +1 -0
- package/server/provider-config/provider-config-schema.d.ts +32 -0
- package/server/provider-config/provider-config-schema.js +46 -0
- package/server/provider-config/provider-config-schema.js.map +1 -0
- package/server/provider-config/provider-config-service.d.ts +178 -0
- package/server/provider-config/provider-config-service.js +689 -0
- package/server/provider-config/provider-config-service.js.map +1 -0
- package/server/queue/queue-job-connect-provider.d.ts +128 -0
- package/server/queue/queue-job-connect-provider.js +14 -0
- package/server/queue/queue-job-connect-provider.js.map +1 -0
- package/server/queue/queue-job-constants-model.d.ts +2 -0
- package/server/queue/queue-job-constants-model.js +6 -0
- package/server/queue/queue-job-constants-model.js.map +1 -0
- package/server/queue/queue-job-schema.d.ts +88 -0
- package/server/queue/queue-job-schema.js +119 -0
- package/server/queue/queue-job-schema.js.map +1 -0
- package/server/queue/queue-lock-connect-provider.d.ts +44 -0
- package/server/queue/queue-lock-connect-provider.js +14 -0
- package/server/queue/queue-lock-connect-provider.js.map +1 -0
- package/server/queue/queue-lock-constants-model.d.ts +2 -0
- package/server/queue/queue-lock-constants-model.js +6 -0
- package/server/queue/queue-lock-constants-model.js.map +1 -0
- package/server/queue/queue-lock-schema.d.ts +32 -0
- package/server/queue/queue-lock-schema.js +47 -0
- package/server/queue/queue-lock-schema.js.map +1 -0
- package/server/queue/queue-message-dedupe-connect-provider.d.ts +116 -0
- package/server/queue/queue-message-dedupe-connect-provider.js +14 -0
- package/server/queue/queue-message-dedupe-connect-provider.js.map +1 -0
- package/server/queue/queue-message-dedupe-constants-model.d.ts +2 -0
- package/server/queue/queue-message-dedupe-constants-model.js +6 -0
- package/server/queue/queue-message-dedupe-constants-model.js.map +1 -0
- package/server/queue/queue-message-dedupe-schema.d.ts +80 -0
- package/server/queue/queue-message-dedupe-schema.js +108 -0
- package/server/queue/queue-message-dedupe-schema.js.map +1 -0
- package/server/queue/queue-module.d.ts +2 -0
- package/server/queue/queue-module.js +33 -0
- package/server/queue/queue-module.js.map +1 -0
- package/server/queue/queue-rate-limit-connect-provider.d.ts +56 -0
- package/server/queue/queue-rate-limit-connect-provider.js +14 -0
- package/server/queue/queue-rate-limit-connect-provider.js.map +1 -0
- package/server/queue/queue-rate-limit-constants-model.d.ts +2 -0
- package/server/queue/queue-rate-limit-constants-model.js +6 -0
- package/server/queue/queue-rate-limit-constants-model.js.map +1 -0
- package/server/queue/queue-rate-limit-schema.d.ts +40 -0
- package/server/queue/queue-rate-limit-schema.js +57 -0
- package/server/queue/queue-rate-limit-schema.js.map +1 -0
- package/server/queue/sqs-transition-service.d.ts +123 -0
- package/server/queue/sqs-transition-service.js +442 -0
- package/server/queue/sqs-transition-service.js.map +1 -0
- package/server/rag/rag-controller.d.ts +167 -0
- package/server/rag/rag-controller.js +232 -0
- package/server/rag/rag-controller.js.map +1 -0
- package/server/rag/rag-module.d.ts +2 -0
- package/server/rag/rag-module.js +30 -0
- package/server/rag/rag-module.js.map +1 -0
- package/server/rag/rag-service.d.ts +361 -0
- package/server/rag/rag-service.js +2864 -0
- package/server/rag/rag-service.js.map +1 -0
- package/server/runner/flow-templates.d.ts +55 -0
- package/server/runner/flow-templates.js +388 -0
- package/server/runner/flow-templates.js.map +1 -0
- package/server/runner/langgraph-runtime.service.d.ts +77 -0
- package/server/runner/langgraph-runtime.service.js +221 -0
- package/server/runner/langgraph-runtime.service.js.map +1 -0
- package/server/runner/runner-controller.d.ts +1044 -0
- package/server/runner/runner-controller.js +751 -0
- package/server/runner/runner-controller.js.map +1 -0
- package/server/runner/runner-module.d.ts +2 -0
- package/server/runner/runner-module.js +37 -0
- package/server/runner/runner-module.js.map +1 -0
- package/server/runner/runner-queue-processor.d.ts +29 -0
- package/server/runner/runner-queue-processor.js +259 -0
- package/server/runner/runner-queue-processor.js.map +1 -0
- package/server/runner/runner-service.d.ts +1761 -0
- package/server/runner/runner-service.js +14256 -0
- package/server/runner/runner-service.js.map +1 -0
- package/server/scripts/migrate-canvas-flow-versions.d.ts +1 -0
- package/server/scripts/migrate-canvas-flow-versions.js +72 -0
- package/server/scripts/migrate-canvas-flow-versions.js.map +1 -0
- package/server/scripts/migrate-mcp-oauth-user-scope.d.ts +1 -0
- package/server/scripts/migrate-mcp-oauth-user-scope.js +95 -0
- package/server/scripts/migrate-mcp-oauth-user-scope.js.map +1 -0
- package/templates/config.example.json +204 -0
- package/templates/config.production.example.json +206 -0
- package/templates/docker-compose.yml +60 -0
|
@@ -0,0 +1,2864 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
|
3
|
+
var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
|
4
|
+
if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
|
5
|
+
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
6
|
+
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
7
|
+
};
|
|
8
|
+
var __metadata = (this && this.__metadata) || function (k, v) {
|
|
9
|
+
if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
|
|
10
|
+
};
|
|
11
|
+
var __param = (this && this.__param) || function (paramIndex, decorator) {
|
|
12
|
+
return function (target, key) { decorator(target, key, paramIndex); }
|
|
13
|
+
};
|
|
14
|
+
var RagService_1;
|
|
15
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
+
exports.RagService = void 0;
|
|
17
|
+
const common_1 = require("@nestjs/common");
|
|
18
|
+
const config_1 = require("@nestjs/config");
|
|
19
|
+
const milvus2_sdk_node_1 = require("@zilliz/milvus2-sdk-node");
|
|
20
|
+
const crypto_1 = require("crypto");
|
|
21
|
+
const mammoth = require("mammoth");
|
|
22
|
+
const ExcelJS = require("exceljs");
|
|
23
|
+
const storage_blob_1 = require("@azure/storage-blob");
|
|
24
|
+
const http_batch_service_1 = require("../http-batch/http-batch-service");
|
|
25
|
+
const memory_service_1 = require("../memory/memory-service");
|
|
26
|
+
const openai_provider_1 = require("../llm/openai-provider");
|
|
27
|
+
const provider_config_service_1 = require("../provider-config/provider-config-service");
|
|
28
|
+
const documents_service_1 = require("../documents/documents-service");
|
|
29
|
+
const pdfParseModule = require('pdf-parse');
|
|
30
|
+
let RagService = RagService_1 = class RagService {
|
|
31
|
+
constructor(configService, memoryService, httpBatchService, providerConfigService, documentsService) {
|
|
32
|
+
this.configService = configService;
|
|
33
|
+
this.memoryService = memoryService;
|
|
34
|
+
this.httpBatchService = httpBatchService;
|
|
35
|
+
this.providerConfigService = providerConfigService;
|
|
36
|
+
this.documentsService = documentsService;
|
|
37
|
+
this.logger = new common_1.Logger(RagService_1.name);
|
|
38
|
+
this.providerSignature = '';
|
|
39
|
+
this.azureSearchFieldCache = new Map();
|
|
40
|
+
this.runtimeSettings = this.providerConfigService.getEnvSettings();
|
|
41
|
+
this.openAIRuntimeConfig = this.providerConfigService.toOpenAIRuntimeConfig(this.runtimeSettings);
|
|
42
|
+
}
|
|
43
|
+
applyProviderSettings(settings) {
|
|
44
|
+
this.runtimeSettings = settings;
|
|
45
|
+
this.openAIRuntimeConfig = this.providerConfigService.toOpenAIRuntimeConfig(settings);
|
|
46
|
+
try {
|
|
47
|
+
this.openAIClient = (0, openai_provider_1.createOpenAIClient)(this.configService, this.openAIRuntimeConfig);
|
|
48
|
+
this.openAIClientError = undefined;
|
|
49
|
+
}
|
|
50
|
+
catch (error) {
|
|
51
|
+
this.openAIClient = undefined;
|
|
52
|
+
this.openAIClientError = error instanceof Error ? error : new Error(error?.message || String(error));
|
|
53
|
+
this.logger.warn(`LLM padrao nao esta pronto: ${this.openAIClientError.message}`);
|
|
54
|
+
}
|
|
55
|
+
const milvusAddressRaw = String(settings.milvus?.address || '').trim();
|
|
56
|
+
this.milvusClient = undefined;
|
|
57
|
+
if (milvusAddressRaw) {
|
|
58
|
+
const milvusAddress = milvusAddressRaw.replace(/^https?:\/\//i, '');
|
|
59
|
+
const milvusUseSsl = /^https:\/\//i.test(milvusAddressRaw) || /:(19530|19536|19544)$/i.test(milvusAddress);
|
|
60
|
+
const milvusOptions = { address: milvusAddress, ssl: milvusUseSsl };
|
|
61
|
+
if (settings.milvus?.token) {
|
|
62
|
+
milvusOptions.token = settings.milvus.token;
|
|
63
|
+
}
|
|
64
|
+
else if (settings.milvus?.username && settings.milvus?.password) {
|
|
65
|
+
milvusOptions.username = settings.milvus.username;
|
|
66
|
+
milvusOptions.password = settings.milvus.password;
|
|
67
|
+
}
|
|
68
|
+
this.milvusClient = new milvus2_sdk_node_1.MilvusClient(milvusOptions);
|
|
69
|
+
}
|
|
70
|
+
this.azureBlobContainer = undefined;
|
|
71
|
+
if (settings.azureBlob?.connectionString && settings.azureBlob?.containerName) {
|
|
72
|
+
this.azureBlobContainer = storage_blob_1.BlobServiceClient
|
|
73
|
+
.fromConnectionString(settings.azureBlob.connectionString)
|
|
74
|
+
.getContainerClient(settings.azureBlob.containerName);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
async refreshProviderSettings() {
|
|
78
|
+
const settings = await this.providerConfigService.getEffectiveSettings();
|
|
79
|
+
const signature = JSON.stringify(settings);
|
|
80
|
+
if (signature !== this.providerSignature) {
|
|
81
|
+
this.applyProviderSettings(settings);
|
|
82
|
+
this.providerSignature = signature;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
async onModuleInit() {
|
|
86
|
+
await this.refreshProviderSettings();
|
|
87
|
+
const defaultCollection = this.getDefaultCollectionName();
|
|
88
|
+
if (!this.milvusClient || !defaultCollection)
|
|
89
|
+
return;
|
|
90
|
+
void this.milvusClient
|
|
91
|
+
.loadCollection({ collection_name: defaultCollection })
|
|
92
|
+
.then(() => this.logger.log(`Milvus collection loaded: ${defaultCollection}`))
|
|
93
|
+
.catch((error) => this.logger.warn(`Milvus startup load skipped: ${error?.message || String(error)}`));
|
|
94
|
+
}
|
|
95
|
+
getDefaultCollectionName() {
|
|
96
|
+
return this.runtimeSettings?.milvus?.collectionName || this.configService.get('COLLECTION_NAME') || 'canvas_flow_docs';
|
|
97
|
+
}
|
|
98
|
+
getEmbeddingModel() {
|
|
99
|
+
return (0, openai_provider_1.getOpenAIEmbeddingModel)(this.configService, undefined, this.openAIRuntimeConfig);
|
|
100
|
+
}
|
|
101
|
+
getChatModel(model) {
|
|
102
|
+
return (0, openai_provider_1.getOpenAIChatModel)(this.configService, model, this.openAIRuntimeConfig);
|
|
103
|
+
}
|
|
104
|
+
getOcrModel() {
|
|
105
|
+
return (0, openai_provider_1.getOpenAIOcrModel)(this.configService, undefined, this.openAIRuntimeConfig);
|
|
106
|
+
}
|
|
107
|
+
getEmbeddingDimensions() {
|
|
108
|
+
return Number(this.runtimeSettings?.azureOpenai?.embeddingDimensions ||
|
|
109
|
+
this.configService.get('AZURE_OPENAI_EMBEDDING_DIMENSIONS') ||
|
|
110
|
+
this.configService.get('OPENAI_EMBEDDING_DIMENSIONS') ||
|
|
111
|
+
3072);
|
|
112
|
+
}
|
|
113
|
+
normalizeOpenAIProvider(value) {
|
|
114
|
+
const provider = String(value || '').trim().toLowerCase();
|
|
115
|
+
if (provider === 'azure' || provider === 'azure_openai' || provider === 'azure-openai')
|
|
116
|
+
return 'azure';
|
|
117
|
+
if (provider === 'openai')
|
|
118
|
+
return 'openai';
|
|
119
|
+
if (provider === 'gemini')
|
|
120
|
+
return 'gemini';
|
|
121
|
+
if (provider === 'claude' || provider === 'anthropic')
|
|
122
|
+
return 'claude';
|
|
123
|
+
if (provider === 'grok' || provider === 'xai')
|
|
124
|
+
return 'grok';
|
|
125
|
+
if (provider === 'bedrock' || provider === 'aws_bedrock')
|
|
126
|
+
return 'bedrock';
|
|
127
|
+
return '';
|
|
128
|
+
}
|
|
129
|
+
getOpenAIClientForProvider(provider) {
|
|
130
|
+
const normalized = this.normalizeOpenAIProvider(provider);
|
|
131
|
+
if (!normalized) {
|
|
132
|
+
if (this.openAIClient)
|
|
133
|
+
return this.openAIClient;
|
|
134
|
+
throw this.openAIClientError || new Error('Provider LLM padrao nao configurado.');
|
|
135
|
+
}
|
|
136
|
+
const runtime = this.providerConfigService.toOpenAIRuntimeConfig(this.runtimeSettings || this.providerConfigService.getEnvSettings(), normalized);
|
|
137
|
+
return (0, openai_provider_1.createOpenAIClient)(this.configService, runtime);
|
|
138
|
+
}
|
|
139
|
+
getEmbeddingModelForProvider(provider, model) {
|
|
140
|
+
const normalized = this.normalizeOpenAIProvider(provider);
|
|
141
|
+
const runtime = normalized
|
|
142
|
+
? this.providerConfigService.toOpenAIRuntimeConfig(this.runtimeSettings || this.providerConfigService.getEnvSettings(), normalized)
|
|
143
|
+
: this.openAIRuntimeConfig;
|
|
144
|
+
return (0, openai_provider_1.getOpenAIEmbeddingModel)(this.configService, model, runtime);
|
|
145
|
+
}
|
|
146
|
+
getChatModelForProvider(provider, model) {
|
|
147
|
+
const normalized = this.normalizeOpenAIProvider(provider);
|
|
148
|
+
const runtime = normalized
|
|
149
|
+
? this.providerConfigService.toOpenAIRuntimeConfig(this.runtimeSettings || this.providerConfigService.getEnvSettings(), normalized)
|
|
150
|
+
: this.openAIRuntimeConfig;
|
|
151
|
+
return (0, openai_provider_1.getOpenAIChatModel)(this.configService, model, runtime);
|
|
152
|
+
}
|
|
153
|
+
getAzureSearchEndpoint() {
|
|
154
|
+
return String(this.runtimeSettings?.azureSearch?.endpoint ||
|
|
155
|
+
this.configService.get('AZURE_SEARCH_API_BASE_PATH') ||
|
|
156
|
+
this.configService.get('AZURE_SEARCH_ENDPOINT') ||
|
|
157
|
+
'').replace(/\/+$/, '');
|
|
158
|
+
}
|
|
159
|
+
getAzureSearchApiKey() {
|
|
160
|
+
return this.runtimeSettings?.azureSearch?.apiKey || this.configService.get('AZURE_SEARCH_API_KEY') || this.configService.get('AZURE_SEARCH_KEY') || '';
|
|
161
|
+
}
|
|
162
|
+
getAzureSearchIndexName(collectionName) {
|
|
163
|
+
return collectionName || this.runtimeSettings?.azureSearch?.indexName || this.configService.get('AZURE_SEARCH_INDEX_NAME') || '';
|
|
164
|
+
}
|
|
165
|
+
getAzureSearchApiVersion() {
|
|
166
|
+
return this.runtimeSettings?.azureSearch?.apiVersion || this.configService.get('AZURE_SEARCH_API_VERSION') || '2024-07-01';
|
|
167
|
+
}
|
|
168
|
+
isAzureSearchConfigured(collectionName) {
|
|
169
|
+
return Boolean(this.getAzureSearchEndpoint() && this.getAzureSearchApiKey() && this.getAzureSearchIndexName(collectionName));
|
|
170
|
+
}
|
|
171
|
+
collectAzureSearchFieldSchema(fields, schema, prefix = '') {
|
|
172
|
+
for (const field of fields || []) {
|
|
173
|
+
const name = String(field?.name || '').trim();
|
|
174
|
+
if (!name)
|
|
175
|
+
continue;
|
|
176
|
+
const path = prefix ? `${prefix}/${name}` : name;
|
|
177
|
+
schema.fields.add(path);
|
|
178
|
+
schema.fieldTypes.set(path, String(field?.type || ''));
|
|
179
|
+
if (Number.isFinite(Number(field?.dimensions))) {
|
|
180
|
+
schema.vectorDimensions.set(path, Number(field.dimensions));
|
|
181
|
+
}
|
|
182
|
+
if (field?.filterable === true)
|
|
183
|
+
schema.filterableFields.add(path);
|
|
184
|
+
if (field?.searchable === true)
|
|
185
|
+
schema.searchableFields.add(path);
|
|
186
|
+
if (Array.isArray(field?.fields)) {
|
|
187
|
+
this.collectAzureSearchFieldSchema(field.fields, schema, path);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
async getAzureSearchIndexSchema(indexName) {
|
|
192
|
+
const cacheKey = `${this.getAzureSearchEndpoint()}::${indexName}`;
|
|
193
|
+
const cached = this.azureSearchFieldCache.get(cacheKey);
|
|
194
|
+
if (cached && cached.expiresAt > Date.now())
|
|
195
|
+
return cached.schema;
|
|
196
|
+
const url = `${this.getAzureSearchEndpoint()}/indexes/${encodeURIComponent(indexName)}?api-version=${this.getAzureSearchApiVersion()}`;
|
|
197
|
+
const response = await this.fetchWithRetry('azure search schema', url, {
|
|
198
|
+
headers: {
|
|
199
|
+
'api-key': this.getAzureSearchApiKey(),
|
|
200
|
+
},
|
|
201
|
+
});
|
|
202
|
+
const body = await response.json().catch(() => ({}));
|
|
203
|
+
if (!response.ok) {
|
|
204
|
+
throw new common_1.BadRequestException({
|
|
205
|
+
message: 'Azure AI Search schema failed',
|
|
206
|
+
status: response.status,
|
|
207
|
+
body,
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
const schema = {
|
|
211
|
+
fields: new Set(),
|
|
212
|
+
filterableFields: new Set(),
|
|
213
|
+
searchableFields: new Set(),
|
|
214
|
+
fieldTypes: new Map(),
|
|
215
|
+
vectorDimensions: new Map(),
|
|
216
|
+
semanticConfigurations: new Set((Array.isArray(body?.semantic?.configurations) ? body.semantic.configurations : [])
|
|
217
|
+
.map((configuration) => String(configuration?.name || '').trim())
|
|
218
|
+
.filter(Boolean)),
|
|
219
|
+
};
|
|
220
|
+
this.collectAzureSearchFieldSchema(Array.isArray(body?.fields) ? body.fields : [], schema);
|
|
221
|
+
this.azureSearchFieldCache.set(cacheKey, { schema, expiresAt: Date.now() + 60_000 });
|
|
222
|
+
return schema;
|
|
223
|
+
}
|
|
224
|
+
async getAzureSearchIndexFields(indexName) {
|
|
225
|
+
return (await this.getAzureSearchIndexSchema(indexName)).fields;
|
|
226
|
+
}
|
|
227
|
+
azureSearchSelectFields(fields) {
|
|
228
|
+
const preferred = [
|
|
229
|
+
'id',
|
|
230
|
+
'content',
|
|
231
|
+
'text',
|
|
232
|
+
'chunk',
|
|
233
|
+
'pageContent',
|
|
234
|
+
'body',
|
|
235
|
+
'metadata',
|
|
236
|
+
'embeddingName',
|
|
237
|
+
'embeddingId',
|
|
238
|
+
'title',
|
|
239
|
+
'name',
|
|
240
|
+
'documentId',
|
|
241
|
+
'agentId',
|
|
242
|
+
'contentHash',
|
|
243
|
+
'extraFieldsJson',
|
|
244
|
+
'blobName',
|
|
245
|
+
'blobUrl',
|
|
246
|
+
];
|
|
247
|
+
return preferred.filter((field) => fields.has(field)).join(',') || undefined;
|
|
248
|
+
}
|
|
249
|
+
azureSearchVectorField(fields) {
|
|
250
|
+
return ['content_vector', 'contentVector', 'vector', 'embedding', 'embeddingVector'].find((field) => fields.has(field)) || '';
|
|
251
|
+
}
|
|
252
|
+
azureSearchTextFields(fields) {
|
|
253
|
+
return ['content', 'text', 'chunk', 'pageContent', 'body'].filter((field) => fields.has(field));
|
|
254
|
+
}
|
|
255
|
+
pickAzureSearchDocumentField(document, fields, candidates, fallback) {
|
|
256
|
+
const field = candidates.find((candidate) => fields.has(candidate) && document?.[candidate] !== undefined && document?.[candidate] !== null);
|
|
257
|
+
return field ? document[field] : fallback;
|
|
258
|
+
}
|
|
259
|
+
azureSearchSemanticConfiguration(schema, params = {}) {
|
|
260
|
+
if (params?.semantic === false || params?.useSemantic === false)
|
|
261
|
+
return '';
|
|
262
|
+
const requested = String(params?.semanticConfigurationName || params?.semanticConfiguration || '').trim();
|
|
263
|
+
if (requested && schema.semanticConfigurations.has(requested))
|
|
264
|
+
return requested;
|
|
265
|
+
if (schema.semanticConfigurations.has('content'))
|
|
266
|
+
return 'content';
|
|
267
|
+
if (schema.semanticConfigurations.has('semantic-config'))
|
|
268
|
+
return 'semantic-config';
|
|
269
|
+
return Array.from(schema.semanticConfigurations)[0] || '';
|
|
270
|
+
}
|
|
271
|
+
async withTransientRetry(label, operation, attempts = 3) {
|
|
272
|
+
let lastError;
|
|
273
|
+
for (let attempt = 1; attempt <= attempts; attempt += 1) {
|
|
274
|
+
try {
|
|
275
|
+
return await operation();
|
|
276
|
+
}
|
|
277
|
+
catch (error) {
|
|
278
|
+
lastError = error;
|
|
279
|
+
const status = Number(error?.status || error?.statusCode || error?.response?.status || error?.cause?.status || 0);
|
|
280
|
+
const retryable = !status || status === 408 || status === 409 || status === 425 || status === 429 || status >= 500;
|
|
281
|
+
if (!retryable || attempt >= attempts)
|
|
282
|
+
break;
|
|
283
|
+
const delayMs = Math.min(1500 * attempt, 5000);
|
|
284
|
+
this.logger.warn(`${label} falhou na tentativa ${attempt}/${attempts}; tentando novamente em ${delayMs}ms: ${error?.message || String(error)}`);
|
|
285
|
+
await new Promise((resolve) => setTimeout(resolve, delayMs));
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
throw lastError;
|
|
289
|
+
}
|
|
290
|
+
async fetchWithRetry(label, url, init = {}) {
|
|
291
|
+
return await this.withTransientRetry(label, async () => {
|
|
292
|
+
const response = await fetch(url, init);
|
|
293
|
+
if (response.status === 408 || response.status === 409 || response.status === 425 || response.status === 429 || response.status >= 500) {
|
|
294
|
+
const error = new Error(`${label} retornou status ${response.status}`);
|
|
295
|
+
error.status = response.status;
|
|
296
|
+
throw error;
|
|
297
|
+
}
|
|
298
|
+
return response;
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
chunkArray(items, size) {
|
|
302
|
+
const chunks = [];
|
|
303
|
+
for (let index = 0; index < items.length; index += size) {
|
|
304
|
+
chunks.push(items.slice(index, index + size));
|
|
305
|
+
}
|
|
306
|
+
return chunks;
|
|
307
|
+
}
|
|
308
|
+
contentHash(value) {
|
|
309
|
+
return (0, crypto_1.createHash)('sha256').update(String(value || '')).digest('hex');
|
|
310
|
+
}
|
|
311
|
+
ensureVectorDimensions(vector, expected, label) {
|
|
312
|
+
if (!Array.isArray(vector) || !vector.length) {
|
|
313
|
+
throw new common_1.BadRequestException(`${label} gerou vetor vazio.`);
|
|
314
|
+
}
|
|
315
|
+
if (expected && vector.length !== expected) {
|
|
316
|
+
throw new common_1.BadRequestException(`${label} gerou vetor com ${vector.length} dimensoes, mas o destino espera ${expected}. Ajuste o modelo/deployment de embedding ou recrie o indice.`);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
shouldUseAzureSearch(params, collectionName) {
|
|
320
|
+
const provider = String(params?.searchProvider || params?.vectorProvider || params?.provider || params?.ragProvider || this.configService.get('RAG_VECTOR_PROVIDER') || '').toLowerCase();
|
|
321
|
+
return (this.isAzureSearchConfigured(collectionName) &&
|
|
322
|
+
(provider === 'azure_search' || provider === 'azure-search' || provider === 'azure' || provider === 'hybrid' || (!this.milvusClient && provider !== 'milvus')));
|
|
323
|
+
}
|
|
324
|
+
shouldUseMilvusSearch(params) {
|
|
325
|
+
const provider = String(params?.searchProvider || params?.vectorProvider || params?.provider || params?.ragProvider || this.configService.get('RAG_VECTOR_PROVIDER') || '').toLowerCase();
|
|
326
|
+
if (provider === 'azure_search' || provider === 'azure-search' || provider === 'azure')
|
|
327
|
+
return false;
|
|
328
|
+
return Boolean(this.milvusClient);
|
|
329
|
+
}
|
|
330
|
+
shouldUseAzureBlob(params) {
|
|
331
|
+
const storageProvider = String(params?.storageProvider || '').toLowerCase();
|
|
332
|
+
if (storageProvider === 'none')
|
|
333
|
+
return false;
|
|
334
|
+
if (storageProvider === 'azure_blob' || storageProvider === 'azure-blob' || storageProvider === 'blob')
|
|
335
|
+
return true;
|
|
336
|
+
return Boolean(this.azureBlobContainer);
|
|
337
|
+
}
|
|
338
|
+
parseBoolean(value) {
|
|
339
|
+
if (typeof value === 'boolean')
|
|
340
|
+
return value;
|
|
341
|
+
return ['true', '1', 'yes', 'sim'].includes(String(value || '').toLowerCase());
|
|
342
|
+
}
|
|
343
|
+
parseJsonField(value, fallback) {
|
|
344
|
+
if (value === undefined || value === null || value === '')
|
|
345
|
+
return fallback;
|
|
346
|
+
if (typeof value === 'object')
|
|
347
|
+
return value;
|
|
348
|
+
try {
|
|
349
|
+
return JSON.parse(String(value));
|
|
350
|
+
}
|
|
351
|
+
catch {
|
|
352
|
+
return fallback;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
escapeMilvusString(value) {
|
|
356
|
+
return String(value || '').replace(/\\/g, '\\\\').replace(/"/g, '\\"');
|
|
357
|
+
}
|
|
358
|
+
isBinaryDecodedText(value) {
|
|
359
|
+
const sample = String(value || '').slice(0, 2000);
|
|
360
|
+
const replacementCount = (sample.match(/\uFFFD/g) || []).length;
|
|
361
|
+
const controlCount = (sample.match(/[\x00-\x08\x0E-\x1F]/g) || []).length;
|
|
362
|
+
return (replacementCount > Math.max(8, sample.length * 0.03) ||
|
|
363
|
+
controlCount > Math.max(8, sample.length * 0.02));
|
|
364
|
+
}
|
|
365
|
+
buildDataUrl(buffer, mimeType) {
|
|
366
|
+
return `data:${mimeType || 'application/octet-stream'};base64,${buffer.toString('base64')}`;
|
|
367
|
+
}
|
|
368
|
+
sanitizeText(text) {
|
|
369
|
+
if (!text)
|
|
370
|
+
return '';
|
|
371
|
+
return String(text)
|
|
372
|
+
.replace(/\?\?o/g, 'ção')
|
|
373
|
+
.replace(/\?\?a/g, 'ção')
|
|
374
|
+
.replace(/\?\?/g, 'ç')
|
|
375
|
+
.replace(/n\?/g, 'nº')
|
|
376
|
+
.replace(/N\?/g, 'Nº')
|
|
377
|
+
.replace(/a\?o/g, 'ação')
|
|
378
|
+
.replace(/i\?o/g, 'ição')
|
|
379
|
+
.replace(/e\?o/g, 'eção')
|
|
380
|
+
.replace(/o\?o/g, 'oção')
|
|
381
|
+
.replace(/u\?o/g, 'ução')
|
|
382
|
+
.replace(/\s+/g, ' ')
|
|
383
|
+
.trim();
|
|
384
|
+
}
|
|
385
|
+
sanitizeObject(obj) {
|
|
386
|
+
if (typeof obj === 'string')
|
|
387
|
+
return this.sanitizeText(obj);
|
|
388
|
+
if (Array.isArray(obj))
|
|
389
|
+
return obj.map((item) => this.sanitizeObject(item));
|
|
390
|
+
if (obj && typeof obj === 'object') {
|
|
391
|
+
return Object.entries(obj).reduce((acc, [key, value]) => {
|
|
392
|
+
acc[key] = this.sanitizeObject(value);
|
|
393
|
+
return acc;
|
|
394
|
+
}, {});
|
|
395
|
+
}
|
|
396
|
+
return obj;
|
|
397
|
+
}
|
|
398
|
+
chunkText(text, chunkSize = 512, chunkOverlap = 70) {
|
|
399
|
+
const clean = String(text || '').trim();
|
|
400
|
+
if (!clean)
|
|
401
|
+
return [];
|
|
402
|
+
const size = Math.max(100, Math.floor(Number(chunkSize) || 512));
|
|
403
|
+
const overlap = Math.max(0, Math.min(Math.floor(Number(chunkOverlap) || 0), size - 1));
|
|
404
|
+
const chunks = [];
|
|
405
|
+
let cursor = 0;
|
|
406
|
+
while (cursor < clean.length) {
|
|
407
|
+
let end = Math.min(clean.length, cursor + size);
|
|
408
|
+
if (end < clean.length) {
|
|
409
|
+
const window = clean.slice(cursor, end);
|
|
410
|
+
const breakpoints = [window.lastIndexOf('\n\n'), window.lastIndexOf('\n'), window.lastIndexOf('. '), window.lastIndexOf(' ')]
|
|
411
|
+
.filter((index) => index > Math.floor(size * 0.55));
|
|
412
|
+
if (breakpoints.length)
|
|
413
|
+
end = cursor + Math.max(...breakpoints) + 1;
|
|
414
|
+
}
|
|
415
|
+
chunks.push(clean.slice(cursor, end).trim());
|
|
416
|
+
if (end === clean.length)
|
|
417
|
+
break;
|
|
418
|
+
cursor = Math.max(0, end - overlap);
|
|
419
|
+
}
|
|
420
|
+
return chunks.filter(Boolean);
|
|
421
|
+
}
|
|
422
|
+
normalizeMilvusResults(result, source) {
|
|
423
|
+
const raw = Array.isArray(result?.results)
|
|
424
|
+
? result.results
|
|
425
|
+
: Array.isArray(result?.data)
|
|
426
|
+
? result.data
|
|
427
|
+
: Array.isArray(result)
|
|
428
|
+
? result
|
|
429
|
+
: [];
|
|
430
|
+
const flat = raw.flat ? raw.flat() : raw;
|
|
431
|
+
return flat.map((hit) => {
|
|
432
|
+
const entity = hit?.entity || hit;
|
|
433
|
+
const id = String(entity?.id ?? hit?.id ?? hit?.pk ?? (0, crypto_1.randomUUID)());
|
|
434
|
+
return {
|
|
435
|
+
id,
|
|
436
|
+
source,
|
|
437
|
+
score: Number(hit?.score ?? hit?.distance ?? entity?.score ?? 0),
|
|
438
|
+
embeddingName: entity?.embeddingName,
|
|
439
|
+
agentId: entity?.agentId,
|
|
440
|
+
embeddingId: entity?.embeddingId,
|
|
441
|
+
extraFields: this.parseExtraFields(entity?.extraFields),
|
|
442
|
+
text: entity?.text || '',
|
|
443
|
+
};
|
|
444
|
+
});
|
|
445
|
+
}
|
|
446
|
+
normalizeMilvusData(response) {
|
|
447
|
+
const raw = Array.isArray(response?.data)
|
|
448
|
+
? response.data
|
|
449
|
+
: Array.isArray(response?.results)
|
|
450
|
+
? response.results
|
|
451
|
+
: Array.isArray(response)
|
|
452
|
+
? response
|
|
453
|
+
: [];
|
|
454
|
+
return raw.map((row) => ({
|
|
455
|
+
...row,
|
|
456
|
+
id: row?.id !== undefined && row?.id !== null ? String(row.id) : '',
|
|
457
|
+
embeddingId: row?.embeddingId !== undefined && row?.embeddingId !== null ? String(row.embeddingId) : '',
|
|
458
|
+
extraFields: this.parseJsonField(row?.extraFields, {}),
|
|
459
|
+
text: row?.text || '',
|
|
460
|
+
}));
|
|
461
|
+
}
|
|
462
|
+
getChunkIndex(row) {
|
|
463
|
+
const index = Number(row?.extraFields?.chunkIndex);
|
|
464
|
+
if (Number.isFinite(index))
|
|
465
|
+
return index;
|
|
466
|
+
const part = Number(row?.extraFields?.part);
|
|
467
|
+
return Number.isFinite(part) ? Math.max(0, part - 1) : 0;
|
|
468
|
+
}
|
|
469
|
+
stripChunkFields(extraFields = {}) {
|
|
470
|
+
const { chunkIndex, chunksCount, part, totalParts, ...cleanExtraFields } = extraFields || {};
|
|
471
|
+
return cleanExtraFields;
|
|
472
|
+
}
|
|
473
|
+
async streamToString(stream) {
|
|
474
|
+
if (!stream)
|
|
475
|
+
return '';
|
|
476
|
+
const chunks = [];
|
|
477
|
+
for await (const chunk of stream) {
|
|
478
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
479
|
+
}
|
|
480
|
+
return Buffer.concat(chunks).toString('utf-8');
|
|
481
|
+
}
|
|
482
|
+
normalizeBlobSearchTerms(value) {
|
|
483
|
+
const raw = String(value || '').trim().toLowerCase();
|
|
484
|
+
if (!raw)
|
|
485
|
+
return [];
|
|
486
|
+
return raw
|
|
487
|
+
.split(/[\s,;|]+/g)
|
|
488
|
+
.map((term) => term.trim())
|
|
489
|
+
.filter(Boolean);
|
|
490
|
+
}
|
|
491
|
+
blobMatchesTerms(haystack, terms) {
|
|
492
|
+
if (!terms.length)
|
|
493
|
+
return true;
|
|
494
|
+
return terms.every((term) => haystack.includes(term));
|
|
495
|
+
}
|
|
496
|
+
detectTextOverlap(left, right) {
|
|
497
|
+
const max = Math.min(600, left.length, right.length);
|
|
498
|
+
for (let size = max; size >= 20; size -= 1) {
|
|
499
|
+
if (left.slice(-size) === right.slice(0, size))
|
|
500
|
+
return size;
|
|
501
|
+
}
|
|
502
|
+
return 0;
|
|
503
|
+
}
|
|
504
|
+
joinChunkTexts(rows) {
|
|
505
|
+
const ordered = [...rows].sort((left, right) => this.getChunkIndex(left) - this.getChunkIndex(right));
|
|
506
|
+
return ordered.reduce((content, row) => {
|
|
507
|
+
const next = String(row?.text || '');
|
|
508
|
+
if (!content)
|
|
509
|
+
return next;
|
|
510
|
+
const overlap = this.detectTextOverlap(content, next);
|
|
511
|
+
return `${content}${next.slice(overlap)}`;
|
|
512
|
+
}, '');
|
|
513
|
+
}
|
|
514
|
+
buildAgentExpr(agentId) {
|
|
515
|
+
return agentId ? `agentId == "${this.escapeMilvusString(agentId)}"` : 'id >= 0';
|
|
516
|
+
}
|
|
517
|
+
buildDocumentExpr(idOrEmbeddingId, agentId) {
|
|
518
|
+
const value = String(idOrEmbeddingId || '').trim();
|
|
519
|
+
if (!value) {
|
|
520
|
+
throw new common_1.BadRequestException('id is required');
|
|
521
|
+
}
|
|
522
|
+
const parts = [`embeddingId == "${this.escapeMilvusString(value)}"`];
|
|
523
|
+
if (/^\d+$/.test(value))
|
|
524
|
+
parts.push(`id == ${value}`);
|
|
525
|
+
const documentExpr = `(${parts.join(' || ')})`;
|
|
526
|
+
return agentId ? `${documentExpr} && ${this.buildAgentExpr(agentId)}` : documentExpr;
|
|
527
|
+
}
|
|
528
|
+
groupDocumentRows(rows) {
|
|
529
|
+
const byKey = new Map();
|
|
530
|
+
rows.forEach((row) => {
|
|
531
|
+
const key = String(row?.embeddingId || row?.id || (0, crypto_1.randomUUID)());
|
|
532
|
+
const current = byKey.get(key) || [];
|
|
533
|
+
current.push(row);
|
|
534
|
+
byKey.set(key, current);
|
|
535
|
+
});
|
|
536
|
+
return Array.from(byKey.entries()).map(([key, group]) => {
|
|
537
|
+
const ordered = [...group].sort((left, right) => this.getChunkIndex(left) - this.getChunkIndex(right));
|
|
538
|
+
const first = ordered[0] || {};
|
|
539
|
+
const text = this.joinChunkTexts(ordered);
|
|
540
|
+
const extraFields = this.stripChunkFields(first.extraFields || {});
|
|
541
|
+
return {
|
|
542
|
+
id: first.id || key,
|
|
543
|
+
embeddingId: first.embeddingId || key,
|
|
544
|
+
embeddingName: first.embeddingName || extraFields?.title || 'Documento RAG',
|
|
545
|
+
agentId: first.agentId || '',
|
|
546
|
+
extraFields,
|
|
547
|
+
chunksCount: ordered.length,
|
|
548
|
+
ids: ordered.map((row) => row.id).filter(Boolean),
|
|
549
|
+
text,
|
|
550
|
+
textLength: text.length,
|
|
551
|
+
textPreview: text.slice(0, 260),
|
|
552
|
+
};
|
|
553
|
+
});
|
|
554
|
+
}
|
|
555
|
+
async queryRows(collectionName, filter, limit = 1000, offset = 0) {
|
|
556
|
+
if (!this.milvusClient) {
|
|
557
|
+
throw new common_1.BadRequestException('MILVUS_ADDRESS is not configured');
|
|
558
|
+
}
|
|
559
|
+
const response = await this.milvusClient.query({
|
|
560
|
+
collection_name: collectionName,
|
|
561
|
+
filter: filter || 'id >= 0',
|
|
562
|
+
output_fields: ['id', 'text', 'embeddingName', 'embeddingId', 'agentId', 'extraFields'],
|
|
563
|
+
limit,
|
|
564
|
+
offset,
|
|
565
|
+
timeout: 900000,
|
|
566
|
+
});
|
|
567
|
+
return this.normalizeMilvusData(response);
|
|
568
|
+
}
|
|
569
|
+
async flushCollection(collectionName) {
|
|
570
|
+
if (!this.milvusClient)
|
|
571
|
+
return;
|
|
572
|
+
try {
|
|
573
|
+
const client = this.milvusClient;
|
|
574
|
+
if (client.flushSync) {
|
|
575
|
+
await client.flushSync({ collection_names: [collectionName] });
|
|
576
|
+
}
|
|
577
|
+
else if (client.flush) {
|
|
578
|
+
await client.flush({ collection_names: [collectionName] });
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
catch {
|
|
582
|
+
return undefined;
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
mergeHybridResults(denseResults, sparseResults, params) {
|
|
586
|
+
const denseWeight = Number(params?.denseWeight ?? 0.7);
|
|
587
|
+
const sparseWeight = Number(params?.sparseWeight ?? 0.3);
|
|
588
|
+
const byId = new Map();
|
|
589
|
+
denseResults.forEach((item) => {
|
|
590
|
+
byId.set(item.id, {
|
|
591
|
+
...item,
|
|
592
|
+
denseScore: item.score,
|
|
593
|
+
sparseScore: 0,
|
|
594
|
+
score: item.score * denseWeight,
|
|
595
|
+
});
|
|
596
|
+
});
|
|
597
|
+
sparseResults.forEach((item) => {
|
|
598
|
+
const current = byId.get(item.id) || {
|
|
599
|
+
...item,
|
|
600
|
+
denseScore: 0,
|
|
601
|
+
sparseScore: 0,
|
|
602
|
+
score: 0,
|
|
603
|
+
};
|
|
604
|
+
current.sparseScore = item.score;
|
|
605
|
+
current.score = Number(current.score || 0) + item.score * sparseWeight;
|
|
606
|
+
byId.set(item.id, current);
|
|
607
|
+
});
|
|
608
|
+
return Array.from(byId.values()).sort((left, right) => right.score - left.score);
|
|
609
|
+
}
|
|
610
|
+
clampSearchInt(value, fallback, min, max) {
|
|
611
|
+
const parsed = Number(value);
|
|
612
|
+
if (!Number.isFinite(parsed))
|
|
613
|
+
return fallback;
|
|
614
|
+
return Math.max(min, Math.min(max, Math.floor(parsed)));
|
|
615
|
+
}
|
|
616
|
+
clampSearchFloat(value, fallback, min, max) {
|
|
617
|
+
const parsed = Number(value);
|
|
618
|
+
if (!Number.isFinite(parsed))
|
|
619
|
+
return fallback;
|
|
620
|
+
return Math.max(min, Math.min(max, parsed));
|
|
621
|
+
}
|
|
622
|
+
getSearchScore(item) {
|
|
623
|
+
const raw = Number(item?.combinedScore ?? item?.score ?? item?.distance ?? 0);
|
|
624
|
+
return Number.isFinite(raw) ? raw : 0;
|
|
625
|
+
}
|
|
626
|
+
parseExtraFields(extraFields) {
|
|
627
|
+
if (!extraFields)
|
|
628
|
+
return {};
|
|
629
|
+
if (typeof extraFields === 'string')
|
|
630
|
+
return this.parseJsonField(extraFields, {});
|
|
631
|
+
return typeof extraFields === 'object' ? extraFields : {};
|
|
632
|
+
}
|
|
633
|
+
checkRelevanceFlag(result) {
|
|
634
|
+
const extraFields = this.parseExtraFields(result?.extraFields);
|
|
635
|
+
return extraFields?.relevante === true || extraFields?.relevant === true;
|
|
636
|
+
}
|
|
637
|
+
applyRelevanceBoost(results, relevanceBoost = 1.5) {
|
|
638
|
+
if (!Array.isArray(results?.results) || relevanceBoost === 1)
|
|
639
|
+
return results;
|
|
640
|
+
const boostedResults = results.results
|
|
641
|
+
.map((result) => {
|
|
642
|
+
if (!this.checkRelevanceFlag(result))
|
|
643
|
+
return result;
|
|
644
|
+
const originalScore = this.getSearchScore(result);
|
|
645
|
+
return {
|
|
646
|
+
...result,
|
|
647
|
+
score: originalScore * relevanceBoost,
|
|
648
|
+
combinedScore: originalScore * relevanceBoost,
|
|
649
|
+
originalScore,
|
|
650
|
+
relevanceBoostApplied: true,
|
|
651
|
+
};
|
|
652
|
+
})
|
|
653
|
+
.sort((left, right) => this.getSearchScore(right) - this.getSearchScore(left));
|
|
654
|
+
return { ...results, results: boostedResults };
|
|
655
|
+
}
|
|
656
|
+
combineSearchResultsV2(denseResults, sparseResults, denseWeight, sparseWeight, topK, relevanceBoost = 1.5, options) {
|
|
657
|
+
const denseItems = Array.isArray(denseResults?.results) ? denseResults.results : [];
|
|
658
|
+
const sparseItems = Array.isArray(sparseResults?.results) ? sparseResults.results : [];
|
|
659
|
+
const scoreMap = new Map();
|
|
660
|
+
const dwRaw = Number(denseWeight);
|
|
661
|
+
const swRaw = Number(sparseWeight);
|
|
662
|
+
const dw = Number.isFinite(dwRaw) && dwRaw >= 0 ? dwRaw : 0.7;
|
|
663
|
+
const sw = Number.isFinite(swRaw) && swRaw >= 0 ? swRaw : 0.3;
|
|
664
|
+
const weightSum = dw + sw;
|
|
665
|
+
const denseW = weightSum > 0 ? dw / weightSum : 0.7;
|
|
666
|
+
const sparseW = weightSum > 0 ? sw / weightSum : 0.3;
|
|
667
|
+
const fusionStrategy = options?.fusionStrategy === 'weighted_score' ? 'weighted_score' : 'rrf';
|
|
668
|
+
const rrfK = Number.isFinite(Number(options?.rrfK)) ? Math.max(1, Math.floor(Number(options?.rrfK))) : 60;
|
|
669
|
+
const maxChunksPerDocument = Number.isFinite(Number(options?.maxChunksPerDocument))
|
|
670
|
+
? Math.max(0, Math.floor(Number(options?.maxChunksPerDocument)))
|
|
671
|
+
: 0;
|
|
672
|
+
const buildNormalizedScoreMap = (items) => {
|
|
673
|
+
const normalized = new Map();
|
|
674
|
+
const rows = items
|
|
675
|
+
.map((item) => ({ id: String(item?.id ?? ''), score: this.getSearchScore(item) }))
|
|
676
|
+
.filter((row) => row.id);
|
|
677
|
+
if (!rows.length)
|
|
678
|
+
return normalized;
|
|
679
|
+
const values = rows.map((row) => row.score);
|
|
680
|
+
const min = Math.min(...values);
|
|
681
|
+
const max = Math.max(...values);
|
|
682
|
+
const sameScore = Math.abs(max - min) < 1e-12;
|
|
683
|
+
rows.forEach((row) => {
|
|
684
|
+
normalized.set(row.id, sameScore ? 1 : (row.score - min) / (max - min));
|
|
685
|
+
});
|
|
686
|
+
return normalized;
|
|
687
|
+
};
|
|
688
|
+
const denseNormMap = buildNormalizedScoreMap(denseItems);
|
|
689
|
+
const sparseNormMap = buildNormalizedScoreMap(sparseItems);
|
|
690
|
+
const upsertBase = (item) => {
|
|
691
|
+
const idKey = String(item?.id ?? '');
|
|
692
|
+
if (!idKey)
|
|
693
|
+
return null;
|
|
694
|
+
if (!scoreMap.has(idKey)) {
|
|
695
|
+
scoreMap.set(idKey, {
|
|
696
|
+
...item,
|
|
697
|
+
denseScore: 0,
|
|
698
|
+
sparseScore: 0,
|
|
699
|
+
denseRank: null,
|
|
700
|
+
sparseRank: null,
|
|
701
|
+
denseNormalizedScore: 0,
|
|
702
|
+
sparseNormalizedScore: 0,
|
|
703
|
+
combinedScore: 0,
|
|
704
|
+
});
|
|
705
|
+
}
|
|
706
|
+
else {
|
|
707
|
+
const existing = scoreMap.get(idKey);
|
|
708
|
+
scoreMap.set(idKey, {
|
|
709
|
+
...item,
|
|
710
|
+
...existing,
|
|
711
|
+
id: existing?.id ?? item?.id,
|
|
712
|
+
text: existing?.text || item?.text,
|
|
713
|
+
embeddingName: existing?.embeddingName || item?.embeddingName,
|
|
714
|
+
embeddingId: existing?.embeddingId || item?.embeddingId,
|
|
715
|
+
agentId: existing?.agentId || item?.agentId,
|
|
716
|
+
extraFields: existing?.extraFields ?? item?.extraFields,
|
|
717
|
+
});
|
|
718
|
+
}
|
|
719
|
+
return scoreMap.get(idKey);
|
|
720
|
+
};
|
|
721
|
+
denseItems.forEach((item, index) => {
|
|
722
|
+
const row = upsertBase(item);
|
|
723
|
+
if (!row)
|
|
724
|
+
return;
|
|
725
|
+
const idKey = String(item?.id ?? '');
|
|
726
|
+
row.denseScore = this.getSearchScore(item);
|
|
727
|
+
row.denseRank = index + 1;
|
|
728
|
+
row.denseNormalizedScore = denseNormMap.get(idKey) ?? 0;
|
|
729
|
+
});
|
|
730
|
+
sparseItems.forEach((item, index) => {
|
|
731
|
+
const row = upsertBase(item);
|
|
732
|
+
if (!row)
|
|
733
|
+
return;
|
|
734
|
+
const idKey = String(item?.id ?? '');
|
|
735
|
+
row.sparseScore = this.getSearchScore(item);
|
|
736
|
+
row.sparseRank = index + 1;
|
|
737
|
+
row.sparseNormalizedScore = sparseNormMap.get(idKey) ?? 0;
|
|
738
|
+
});
|
|
739
|
+
for (const [idKey, row] of scoreMap.entries()) {
|
|
740
|
+
const denseRank = typeof row.denseRank === 'number' ? row.denseRank : null;
|
|
741
|
+
const sparseRank = typeof row.sparseRank === 'number' ? row.sparseRank : null;
|
|
742
|
+
let combinedScore = 0;
|
|
743
|
+
if (fusionStrategy === 'weighted_score') {
|
|
744
|
+
combinedScore =
|
|
745
|
+
(row.denseNormalizedScore || 0) * denseW +
|
|
746
|
+
(row.sparseNormalizedScore || 0) * sparseW;
|
|
747
|
+
}
|
|
748
|
+
else {
|
|
749
|
+
if (denseRank !== null)
|
|
750
|
+
combinedScore += denseW * (1 / (rrfK + denseRank));
|
|
751
|
+
if (sparseRank !== null)
|
|
752
|
+
combinedScore += sparseW * (1 / (rrfK + sparseRank));
|
|
753
|
+
combinedScore += 1e-6 * (((row.denseNormalizedScore || 0) * denseW) + ((row.sparseNormalizedScore || 0) * sparseW));
|
|
754
|
+
}
|
|
755
|
+
row.combinedScore = combinedScore;
|
|
756
|
+
row.score = combinedScore;
|
|
757
|
+
if (this.checkRelevanceFlag(row)) {
|
|
758
|
+
row.combinedScore *= relevanceBoost;
|
|
759
|
+
row.score = row.combinedScore;
|
|
760
|
+
row.relevanceBoostApplied = true;
|
|
761
|
+
this.logger.debug(`Relevance boost applied to document ${idKey}: score multiplied by ${relevanceBoost}`);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
const sorted = Array.from(scoreMap.values()).sort((left, right) => {
|
|
765
|
+
const scoreDiff = this.getSearchScore(right) - this.getSearchScore(left);
|
|
766
|
+
if (scoreDiff !== 0)
|
|
767
|
+
return scoreDiff;
|
|
768
|
+
const denseDiff = (right.denseNormalizedScore || 0) - (left.denseNormalizedScore || 0);
|
|
769
|
+
if (denseDiff !== 0)
|
|
770
|
+
return denseDiff;
|
|
771
|
+
return (right.sparseNormalizedScore || 0) - (left.sparseNormalizedScore || 0);
|
|
772
|
+
});
|
|
773
|
+
let diversified = false;
|
|
774
|
+
let finalResults = sorted;
|
|
775
|
+
if (maxChunksPerDocument > 0) {
|
|
776
|
+
const perDocCounts = new Map();
|
|
777
|
+
const selected = [];
|
|
778
|
+
const overflow = [];
|
|
779
|
+
for (const item of sorted) {
|
|
780
|
+
const groupKey = String(item?.embeddingId || item?.embeddingName || item?.id || '');
|
|
781
|
+
const current = perDocCounts.get(groupKey) || 0;
|
|
782
|
+
if (current < maxChunksPerDocument) {
|
|
783
|
+
selected.push(item);
|
|
784
|
+
perDocCounts.set(groupKey, current + 1);
|
|
785
|
+
}
|
|
786
|
+
else {
|
|
787
|
+
overflow.push(item);
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
finalResults = selected.length < topK ? [...selected, ...overflow] : selected;
|
|
791
|
+
diversified = true;
|
|
792
|
+
}
|
|
793
|
+
return {
|
|
794
|
+
results: finalResults.slice(0, topK),
|
|
795
|
+
status: { error_code: 'Success', reason: 'Combined search results' },
|
|
796
|
+
searchDebug: {
|
|
797
|
+
mode: 'hybrid',
|
|
798
|
+
topK,
|
|
799
|
+
candidateTopK: options?.candidateTopK ?? null,
|
|
800
|
+
fusionStrategy,
|
|
801
|
+
rrfK: fusionStrategy === 'rrf' ? rrfK : null,
|
|
802
|
+
denseWeight: denseW,
|
|
803
|
+
sparseWeight: sparseW,
|
|
804
|
+
denseEfSearch: options?.denseEfSearch ?? null,
|
|
805
|
+
sparseDropRatioSearch: options?.sparseDropRatioSearch ?? null,
|
|
806
|
+
denseResults: denseItems.length,
|
|
807
|
+
sparseResults: sparseItems.length,
|
|
808
|
+
uniqueCandidates: scoreMap.size,
|
|
809
|
+
maxChunksPerDocument: maxChunksPerDocument > 0 ? maxChunksPerDocument : null,
|
|
810
|
+
diversified,
|
|
811
|
+
},
|
|
812
|
+
};
|
|
813
|
+
}
|
|
814
|
+
buildExtraFieldsExpr(extraFieldsFilter) {
|
|
815
|
+
if (typeof extraFieldsFilter === 'string') {
|
|
816
|
+
extraFieldsFilter = this.parseJsonField(extraFieldsFilter, null);
|
|
817
|
+
}
|
|
818
|
+
if (!extraFieldsFilter || typeof extraFieldsFilter !== 'object')
|
|
819
|
+
return '';
|
|
820
|
+
const parts = [];
|
|
821
|
+
for (const [rawKey, value] of Object.entries(extraFieldsFilter)) {
|
|
822
|
+
if (value === undefined || value === null || (typeof value === 'string' && value.trim() === ''))
|
|
823
|
+
continue;
|
|
824
|
+
const key = this.escapeMilvusString(rawKey);
|
|
825
|
+
const field = `extraFields["${key}"]`;
|
|
826
|
+
if (Array.isArray(value)) {
|
|
827
|
+
const choices = value
|
|
828
|
+
.filter((item) => item !== undefined && item !== null && String(item).trim() !== '')
|
|
829
|
+
.map((item) => this.formatMilvusValue(item));
|
|
830
|
+
if (choices.length)
|
|
831
|
+
parts.push(`(${choices.map((item) => `${field} == ${item}`).join(' || ')})`);
|
|
832
|
+
continue;
|
|
833
|
+
}
|
|
834
|
+
parts.push(`${field} == ${this.formatMilvusValue(value)}`);
|
|
835
|
+
}
|
|
836
|
+
return parts.join(' && ');
|
|
837
|
+
}
|
|
838
|
+
formatMilvusValue(value) {
|
|
839
|
+
if (typeof value === 'boolean' || typeof value === 'number')
|
|
840
|
+
return String(value);
|
|
841
|
+
return `"${this.escapeMilvusString(String(value))}"`;
|
|
842
|
+
}
|
|
843
|
+
mergeExtraFieldsFilters(base, override) {
|
|
844
|
+
if (typeof base === 'string')
|
|
845
|
+
base = this.parseJsonField(base, null);
|
|
846
|
+
if (typeof override === 'string')
|
|
847
|
+
override = this.parseJsonField(override, null);
|
|
848
|
+
const merged = {};
|
|
849
|
+
if (base && typeof base === 'object')
|
|
850
|
+
Object.assign(merged, base);
|
|
851
|
+
if (override && typeof override === 'object') {
|
|
852
|
+
Object.entries(override).forEach(([key, value]) => {
|
|
853
|
+
if (value === undefined || value === null || (typeof value === 'string' && value.trim() === '')) {
|
|
854
|
+
delete merged[key];
|
|
855
|
+
}
|
|
856
|
+
else {
|
|
857
|
+
merged[key] = value;
|
|
858
|
+
}
|
|
859
|
+
});
|
|
860
|
+
}
|
|
861
|
+
return merged;
|
|
862
|
+
}
|
|
863
|
+
isEmptyRoundFilter(round) {
|
|
864
|
+
if (!round || typeof round !== 'object')
|
|
865
|
+
return true;
|
|
866
|
+
return Object.entries(round).every(([, value]) => (value === undefined ||
|
|
867
|
+
value === null ||
|
|
868
|
+
(typeof value === 'string' && value.trim() === '')));
|
|
869
|
+
}
|
|
870
|
+
getExtraField(item, field) {
|
|
871
|
+
let extraFields = item?.extraFields;
|
|
872
|
+
if (typeof extraFields === 'string') {
|
|
873
|
+
try {
|
|
874
|
+
extraFields = JSON.parse(extraFields);
|
|
875
|
+
}
|
|
876
|
+
catch {
|
|
877
|
+
return undefined;
|
|
878
|
+
}
|
|
879
|
+
}
|
|
880
|
+
return extraFields?.[field];
|
|
881
|
+
}
|
|
882
|
+
toComparableMetadataValue(value) {
|
|
883
|
+
if (typeof value === 'number' && Number.isFinite(value))
|
|
884
|
+
return value;
|
|
885
|
+
if (typeof value === 'string') {
|
|
886
|
+
const numeric = Number(value.trim().replace(',', '.').replace(/[^\d.-]/g, ''));
|
|
887
|
+
if (Number.isFinite(numeric))
|
|
888
|
+
return numeric;
|
|
889
|
+
return value.toLowerCase();
|
|
890
|
+
}
|
|
891
|
+
return value;
|
|
892
|
+
}
|
|
893
|
+
applyMetadataOrdering(results, params) {
|
|
894
|
+
const field = Array.isArray(params?.extraFieldsFilterOrderBy)
|
|
895
|
+
? String(params.extraFieldsFilterOrderBy[0] || '').trim()
|
|
896
|
+
: String(params?.orderBy || params?.sortBy || '').trim();
|
|
897
|
+
if (!field)
|
|
898
|
+
return results;
|
|
899
|
+
const order = String(params?.order || params?.sortOrder || 'desc').toLowerCase() === 'asc' ? 'asc' : 'desc';
|
|
900
|
+
return [...results].sort((left, right) => {
|
|
901
|
+
const leftValue = this.toComparableMetadataValue(this.getExtraField(left, field));
|
|
902
|
+
const rightValue = this.toComparableMetadataValue(this.getExtraField(right, field));
|
|
903
|
+
const leftMissing = leftValue === undefined || leftValue === null || leftValue === '';
|
|
904
|
+
const rightMissing = rightValue === undefined || rightValue === null || rightValue === '';
|
|
905
|
+
if (leftMissing && rightMissing)
|
|
906
|
+
return this.getSearchScore(right) - this.getSearchScore(left);
|
|
907
|
+
if (leftMissing)
|
|
908
|
+
return 1;
|
|
909
|
+
if (rightMissing)
|
|
910
|
+
return -1;
|
|
911
|
+
if (typeof leftValue === 'number' && typeof rightValue === 'number') {
|
|
912
|
+
return order === 'asc' ? leftValue - rightValue : rightValue - leftValue;
|
|
913
|
+
}
|
|
914
|
+
return order === 'asc'
|
|
915
|
+
? String(leftValue).localeCompare(String(rightValue))
|
|
916
|
+
: String(rightValue).localeCompare(String(leftValue));
|
|
917
|
+
}).slice(0, this.clampSearchInt(params?.k || params?.topK || results.length, results.length, 1, Math.max(results.length, 1)));
|
|
918
|
+
}
|
|
919
|
+
async createCollection(collectionName) {
|
|
920
|
+
await this.refreshProviderSettings();
|
|
921
|
+
if (!this.milvusClient) {
|
|
922
|
+
throw new common_1.BadRequestException('MILVUS_ADDRESS is not configured');
|
|
923
|
+
}
|
|
924
|
+
const targetCollection = collectionName || this.getDefaultCollectionName();
|
|
925
|
+
return await this.milvusClient.createCollection({
|
|
926
|
+
collection_name: targetCollection,
|
|
927
|
+
fields: [
|
|
928
|
+
{
|
|
929
|
+
name: 'id',
|
|
930
|
+
data_type: milvus2_sdk_node_1.DataType.Int64,
|
|
931
|
+
is_primary_key: true,
|
|
932
|
+
autoID: true,
|
|
933
|
+
},
|
|
934
|
+
{
|
|
935
|
+
name: 'embeddingName',
|
|
936
|
+
data_type: milvus2_sdk_node_1.DataType.VarChar,
|
|
937
|
+
max_length: 1000,
|
|
938
|
+
enable_analyzer: true,
|
|
939
|
+
},
|
|
940
|
+
{
|
|
941
|
+
name: 'agentId',
|
|
942
|
+
data_type: milvus2_sdk_node_1.DataType.VarChar,
|
|
943
|
+
max_length: 500,
|
|
944
|
+
enable_analyzer: true,
|
|
945
|
+
},
|
|
946
|
+
{
|
|
947
|
+
name: 'embeddingId',
|
|
948
|
+
data_type: milvus2_sdk_node_1.DataType.VarChar,
|
|
949
|
+
max_length: 500,
|
|
950
|
+
enable_analyzer: true,
|
|
951
|
+
},
|
|
952
|
+
{
|
|
953
|
+
name: 'extraFields',
|
|
954
|
+
data_type: milvus2_sdk_node_1.DataType.JSON,
|
|
955
|
+
enable_analyzer: true,
|
|
956
|
+
},
|
|
957
|
+
{
|
|
958
|
+
name: 'text',
|
|
959
|
+
data_type: milvus2_sdk_node_1.DataType.VarChar,
|
|
960
|
+
max_length: 10000,
|
|
961
|
+
enable_analyzer: true,
|
|
962
|
+
},
|
|
963
|
+
{
|
|
964
|
+
name: 'dense',
|
|
965
|
+
data_type: milvus2_sdk_node_1.DataType.FloatVector,
|
|
966
|
+
dim: this.getEmbeddingDimensions(),
|
|
967
|
+
},
|
|
968
|
+
{
|
|
969
|
+
name: 'sparse',
|
|
970
|
+
data_type: milvus2_sdk_node_1.DataType.SparseFloatVector,
|
|
971
|
+
},
|
|
972
|
+
],
|
|
973
|
+
functions: [
|
|
974
|
+
{
|
|
975
|
+
name: 'text_bm25_emb',
|
|
976
|
+
description: 'BM25 sparse vector from text',
|
|
977
|
+
type: milvus2_sdk_node_1.FunctionType.BM25,
|
|
978
|
+
input_field_names: ['text'],
|
|
979
|
+
output_field_names: ['sparse'],
|
|
980
|
+
params: {},
|
|
981
|
+
},
|
|
982
|
+
],
|
|
983
|
+
});
|
|
984
|
+
}
|
|
985
|
+
async createIndex(collectionName) {
|
|
986
|
+
await this.refreshProviderSettings();
|
|
987
|
+
const targetCollection = collectionName || this.getDefaultCollectionName();
|
|
988
|
+
const azureSearchConfigured = this.isAzureSearchConfigured(targetCollection);
|
|
989
|
+
if (!this.milvusClient && !azureSearchConfigured) {
|
|
990
|
+
throw new common_1.BadRequestException('Milvus ou Azure AI Search precisa estar configurado.');
|
|
991
|
+
}
|
|
992
|
+
let dense = null;
|
|
993
|
+
let sparse = null;
|
|
994
|
+
if (this.milvusClient) {
|
|
995
|
+
dense = await this.milvusClient.createIndex({
|
|
996
|
+
collection_name: targetCollection,
|
|
997
|
+
field_name: 'dense',
|
|
998
|
+
index_name: 'hnsw_index',
|
|
999
|
+
index_type: 'HNSW',
|
|
1000
|
+
metric_type: 'COSINE',
|
|
1001
|
+
params: { M: 16, efConstruction: 200 },
|
|
1002
|
+
});
|
|
1003
|
+
try {
|
|
1004
|
+
sparse = await this.milvusClient.createIndex({
|
|
1005
|
+
collection_name: targetCollection,
|
|
1006
|
+
field_name: 'sparse',
|
|
1007
|
+
index_name: 'bm25_index',
|
|
1008
|
+
index_type: 'SPARSE_INVERTED_INDEX',
|
|
1009
|
+
metric_type: 'BM25',
|
|
1010
|
+
params: {
|
|
1011
|
+
drop_ratio_build: 0.2,
|
|
1012
|
+
bm25_k1: 1.2,
|
|
1013
|
+
bm25_b: 0.75,
|
|
1014
|
+
},
|
|
1015
|
+
});
|
|
1016
|
+
}
|
|
1017
|
+
catch (error) {
|
|
1018
|
+
this.logger.warn(`Sparse BM25 index creation skipped: ${error?.message || String(error)}`);
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
const azureSearch = azureSearchConfigured ? await this.createAzureSearchIndex(targetCollection) : null;
|
|
1022
|
+
return { collectionName: targetCollection, dense, sparse, azureSearch };
|
|
1023
|
+
}
|
|
1024
|
+
async embeddingCreate(text, provider, model) {
|
|
1025
|
+
const vectors = await this.embeddingCreateBatch([text], provider, model);
|
|
1026
|
+
return vectors[0];
|
|
1027
|
+
}
|
|
1028
|
+
async embeddingCreateBatch(texts, provider, model) {
|
|
1029
|
+
await this.refreshProviderSettings();
|
|
1030
|
+
const cleanTexts = (texts || []).map((text) => String(text || ''));
|
|
1031
|
+
if (!cleanTexts.length)
|
|
1032
|
+
return [];
|
|
1033
|
+
const normalizedProvider = this.normalizeOpenAIProvider(provider);
|
|
1034
|
+
const embeddingProvider = normalizedProvider === 'azure' || normalizedProvider === 'openai' ? provider : 'openai';
|
|
1035
|
+
const client = this.getOpenAIClientForProvider(embeddingProvider);
|
|
1036
|
+
const embeddingModel = this.getEmbeddingModelForProvider(embeddingProvider, model);
|
|
1037
|
+
const response = await this.withTransientRetry('embedding batch', () => client.embeddings.create({
|
|
1038
|
+
model: embeddingModel,
|
|
1039
|
+
input: cleanTexts,
|
|
1040
|
+
}));
|
|
1041
|
+
const vectors = (response.data || [])
|
|
1042
|
+
.sort((left, right) => Number(left.index || 0) - Number(right.index || 0))
|
|
1043
|
+
.map((item) => item.embedding);
|
|
1044
|
+
if (vectors.length !== cleanTexts.length) {
|
|
1045
|
+
throw new common_1.BadRequestException(`Embedding retornou ${vectors.length} vetores para ${cleanTexts.length} textos.`);
|
|
1046
|
+
}
|
|
1047
|
+
return vectors;
|
|
1048
|
+
}
|
|
1049
|
+
toAzureSafeId(value) {
|
|
1050
|
+
return Buffer.from(String(value || (0, crypto_1.randomUUID)())).toString('base64url').slice(0, 900);
|
|
1051
|
+
}
|
|
1052
|
+
escapeODataString(value) {
|
|
1053
|
+
return String(value || '').replace(/'/g, "''");
|
|
1054
|
+
}
|
|
1055
|
+
isAzureFilterable(schema, field) {
|
|
1056
|
+
if (!schema)
|
|
1057
|
+
return true;
|
|
1058
|
+
return schema.filterableFields.has(field);
|
|
1059
|
+
}
|
|
1060
|
+
pushAzureFilter(parts, schema, field, value) {
|
|
1061
|
+
if (value === undefined || value === null || value === '')
|
|
1062
|
+
return false;
|
|
1063
|
+
if (!this.isAzureFilterable(schema, field))
|
|
1064
|
+
return false;
|
|
1065
|
+
parts.push(`${field} eq '${this.escapeODataString(String(value))}'`);
|
|
1066
|
+
return true;
|
|
1067
|
+
}
|
|
1068
|
+
buildAzureFilter(agentId, extraFieldsFilter, schema) {
|
|
1069
|
+
const parts = [];
|
|
1070
|
+
if (agentId) {
|
|
1071
|
+
this.pushAzureFilter(parts, schema, 'agentId', agentId) ||
|
|
1072
|
+
this.pushAzureFilter(parts, schema, 'metadata/agentId', agentId);
|
|
1073
|
+
}
|
|
1074
|
+
const extra = typeof extraFieldsFilter === 'string' ? this.parseJsonField(extraFieldsFilter, null) : extraFieldsFilter;
|
|
1075
|
+
if (extra && typeof extra === 'object') {
|
|
1076
|
+
for (const [key, value] of Object.entries(extra)) {
|
|
1077
|
+
if (value === undefined || value === null || value === '')
|
|
1078
|
+
continue;
|
|
1079
|
+
if (key === 'embeddingName') {
|
|
1080
|
+
this.pushAzureFilter(parts, schema, 'embeddingName', value) ||
|
|
1081
|
+
this.pushAzureFilter(parts, schema, 'metadata/nomeEmbedding', value);
|
|
1082
|
+
}
|
|
1083
|
+
else if (key === 'embeddingId') {
|
|
1084
|
+
this.pushAzureFilter(parts, schema, 'embeddingId', value) ||
|
|
1085
|
+
this.pushAzureFilter(parts, schema, 'metadata/embeddingId', value);
|
|
1086
|
+
}
|
|
1087
|
+
else if (key === 'source' || key === 'marca') {
|
|
1088
|
+
this.pushAzureFilter(parts, schema, 'source', value) ||
|
|
1089
|
+
this.pushAzureFilter(parts, schema, 'metadata/source', value);
|
|
1090
|
+
}
|
|
1091
|
+
else if (key === 'attributes' || key === 'origem') {
|
|
1092
|
+
this.pushAzureFilter(parts, schema, 'attributes', value) ||
|
|
1093
|
+
this.pushAzureFilter(parts, schema, 'metadata/attributes', value);
|
|
1094
|
+
}
|
|
1095
|
+
else if (key === 'contentHash') {
|
|
1096
|
+
this.pushAzureFilter(parts, schema, 'contentHash', value) ||
|
|
1097
|
+
this.pushAzureFilter(parts, schema, 'metadata/contentHash', value);
|
|
1098
|
+
}
|
|
1099
|
+
else if (String(key).includes('/')) {
|
|
1100
|
+
this.pushAzureFilter(parts, schema, String(key), value);
|
|
1101
|
+
}
|
|
1102
|
+
else {
|
|
1103
|
+
this.pushAzureFilter(parts, schema, String(key), value);
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
}
|
|
1107
|
+
return parts.join(' and ');
|
|
1108
|
+
}
|
|
1109
|
+
buildAzureRagMetadata(row, blob) {
|
|
1110
|
+
const extraFields = row?.extraFields || {};
|
|
1111
|
+
return {
|
|
1112
|
+
source: String(extraFields.source ?? extraFields.marca ?? ''),
|
|
1113
|
+
attributes: String(extraFields.attributes ?? extraFields.origem ?? ''),
|
|
1114
|
+
embeddingId: String(row?.embeddingId || ''),
|
|
1115
|
+
agentId: String(row?.agentId || ''),
|
|
1116
|
+
nomeEmbedding: String(row?.embeddingName || ''),
|
|
1117
|
+
filename: String(extraFields.filename || extraFields.arquivo || ''),
|
|
1118
|
+
chunkIndex: String(extraFields.chunkIndex ?? ''),
|
|
1119
|
+
chunksCount: String(extraFields.chunksCount ?? ''),
|
|
1120
|
+
part: String(extraFields.part ?? ''),
|
|
1121
|
+
totalParts: String(extraFields.totalParts ?? ''),
|
|
1122
|
+
contentHash: String(extraFields.contentHash || this.contentHash(row?.text || '')),
|
|
1123
|
+
blobName: String(blob?.blobName || ''),
|
|
1124
|
+
blobUrl: String(blob?.blobUrl || ''),
|
|
1125
|
+
};
|
|
1126
|
+
}
|
|
1127
|
+
buildAzureDocumentMetadata(schema, row, blob) {
|
|
1128
|
+
const metadata = this.buildAzureRagMetadata(row, blob);
|
|
1129
|
+
const complexMetadata = Object.entries(metadata).reduce((acc, [key, value]) => {
|
|
1130
|
+
if (schema.fields.has(`metadata/${key}`))
|
|
1131
|
+
acc[key] = value;
|
|
1132
|
+
return acc;
|
|
1133
|
+
}, {});
|
|
1134
|
+
if (Object.keys(complexMetadata).length)
|
|
1135
|
+
return complexMetadata;
|
|
1136
|
+
return JSON.stringify({
|
|
1137
|
+
...row?.extraFields,
|
|
1138
|
+
embeddingId: row?.embeddingId || '',
|
|
1139
|
+
agentId: row?.agentId || '',
|
|
1140
|
+
nomeEmbedding: row?.embeddingName || '',
|
|
1141
|
+
blobName: blob?.blobName || '',
|
|
1142
|
+
blobUrl: blob?.blobUrl || '',
|
|
1143
|
+
});
|
|
1144
|
+
}
|
|
1145
|
+
buildAzureBlobChunkPayload(row) {
|
|
1146
|
+
return {
|
|
1147
|
+
content: String(row?.text || ''),
|
|
1148
|
+
text: String(row?.text || ''),
|
|
1149
|
+
content_vector: Array.isArray(row?.dense) ? row.dense : undefined,
|
|
1150
|
+
vectorDimensions: Array.isArray(row?.dense) ? row.dense.length : 0,
|
|
1151
|
+
contentHash: row?.extraFields?.contentHash || this.contentHash(row?.text || ''),
|
|
1152
|
+
embeddingProvider: row?.embeddingProvider || '',
|
|
1153
|
+
embeddingModel: row?.embeddingModel || '',
|
|
1154
|
+
metadata: {
|
|
1155
|
+
...this.buildAzureRagMetadata(row),
|
|
1156
|
+
...(row?.extraFields || {}),
|
|
1157
|
+
},
|
|
1158
|
+
extraFields: row?.extraFields || {},
|
|
1159
|
+
agenteId: row?.agentId || '',
|
|
1160
|
+
agentId: row?.agentId || '',
|
|
1161
|
+
nomeEmbedding: row?.embeddingName || '',
|
|
1162
|
+
embeddingName: row?.embeddingName || '',
|
|
1163
|
+
embeddingId: row?.embeddingId || '',
|
|
1164
|
+
createdAt: new Date().toISOString(),
|
|
1165
|
+
};
|
|
1166
|
+
}
|
|
1167
|
+
normalizeAzureBlobJsonPayload(value) {
|
|
1168
|
+
const parsed = this.parseJsonField(value, null);
|
|
1169
|
+
if (!parsed || typeof parsed !== 'object')
|
|
1170
|
+
return null;
|
|
1171
|
+
const vector = parsed.content_vector || parsed.contentVector || parsed.vector || parsed.embedding;
|
|
1172
|
+
return {
|
|
1173
|
+
parsed,
|
|
1174
|
+
text: String(parsed.content ?? parsed.text ?? parsed.pageContent ?? parsed.chunk ?? parsed.body ?? ''),
|
|
1175
|
+
vectorDimensions: Array.isArray(vector) ? vector.length : 0,
|
|
1176
|
+
metadata: parsed.metadata || parsed.extraFields || {},
|
|
1177
|
+
};
|
|
1178
|
+
}
|
|
1179
|
+
async uploadRowsToAzureBlob(rows, collectionName) {
|
|
1180
|
+
if (!this.azureBlobContainer)
|
|
1181
|
+
return [];
|
|
1182
|
+
await this.azureBlobContainer.createIfNotExists().catch(() => undefined);
|
|
1183
|
+
const uploaded = [];
|
|
1184
|
+
for (const row of rows) {
|
|
1185
|
+
const id = this.toAzureSafeId(`${row.embeddingId}-${row.extraFields?.chunkIndex ?? (0, crypto_1.randomUUID)()}`);
|
|
1186
|
+
const blobName = `${collectionName}/${row.agentId || 'global'}/${row.embeddingId || id}/${row.extraFields?.chunkIndex ?? 0}.json`;
|
|
1187
|
+
const blob = this.azureBlobContainer.getBlockBlobClient(blobName);
|
|
1188
|
+
const payload = this.buildAzureBlobChunkPayload(row);
|
|
1189
|
+
const content = JSON.stringify(payload);
|
|
1190
|
+
await this.withTransientRetry('azure blob upload', () => blob.upload(content, Buffer.byteLength(content, 'utf-8'), {
|
|
1191
|
+
blobHTTPHeaders: { blobContentType: 'application/json; charset=utf-8' },
|
|
1192
|
+
metadata: {
|
|
1193
|
+
embeddingId: String(row.embeddingId || '').slice(0, 1024),
|
|
1194
|
+
agentId: String(row.agentId || '').slice(0, 1024),
|
|
1195
|
+
source: String(row.extraFields?.source || row.extraFields?.marca || '').slice(0, 1024),
|
|
1196
|
+
attributes: String(row.extraFields?.attributes || row.extraFields?.origem || '').slice(0, 1024),
|
|
1197
|
+
nomeEmbedding: String(row.embeddingName || '').slice(0, 1024),
|
|
1198
|
+
chunkIndex: String(row.extraFields?.chunkIndex ?? '').slice(0, 1024),
|
|
1199
|
+
contentHash: String(row.extraFields?.contentHash || this.contentHash(row.text || '')).slice(0, 1024),
|
|
1200
|
+
},
|
|
1201
|
+
}));
|
|
1202
|
+
uploaded.push({
|
|
1203
|
+
id,
|
|
1204
|
+
blobName,
|
|
1205
|
+
blobUrl: blob.url,
|
|
1206
|
+
text: String(row.text || ''),
|
|
1207
|
+
embeddingName: row.embeddingName || '',
|
|
1208
|
+
embeddingId: row.embeddingId || '',
|
|
1209
|
+
agentId: row.agentId || '',
|
|
1210
|
+
extraFields: row.extraFields || {},
|
|
1211
|
+
});
|
|
1212
|
+
}
|
|
1213
|
+
return uploaded;
|
|
1214
|
+
}
|
|
1215
|
+
async uploadTextToAzureBlob(blobName, content, contentType = 'text/plain') {
|
|
1216
|
+
await this.refreshProviderSettings();
|
|
1217
|
+
if (!this.azureBlobContainer) {
|
|
1218
|
+
throw new common_1.BadRequestException('Azure Blob Storage nao esta configurado.');
|
|
1219
|
+
}
|
|
1220
|
+
const safeBlobName = String(blobName || `${(0, crypto_1.randomUUID)()}.txt`)
|
|
1221
|
+
.replace(/^\/+/, '')
|
|
1222
|
+
.replace(/\\/g, '/')
|
|
1223
|
+
.replace(/\.\./g, '.');
|
|
1224
|
+
await this.azureBlobContainer.createIfNotExists().catch(() => undefined);
|
|
1225
|
+
const blob = this.azureBlobContainer.getBlockBlobClient(safeBlobName);
|
|
1226
|
+
await this.withTransientRetry('azure blob upload', () => blob.upload(content || '', Buffer.byteLength(content || '', 'utf-8'), {
|
|
1227
|
+
blobHTTPHeaders: { blobContentType: contentType || 'text/plain' },
|
|
1228
|
+
}));
|
|
1229
|
+
return {
|
|
1230
|
+
blobName: safeBlobName,
|
|
1231
|
+
blobUrl: blob.url,
|
|
1232
|
+
contentType: contentType || 'text/plain',
|
|
1233
|
+
bytes: Buffer.byteLength(content || '', 'utf-8'),
|
|
1234
|
+
};
|
|
1235
|
+
}
|
|
1236
|
+
async uploadChunksToAzureBlob(params) {
|
|
1237
|
+
await this.refreshProviderSettings();
|
|
1238
|
+
if (!this.azureBlobContainer) {
|
|
1239
|
+
throw new common_1.BadRequestException('Azure Blob Storage nao esta configurado.');
|
|
1240
|
+
}
|
|
1241
|
+
const text = this.sanitizeText(params?.text || '');
|
|
1242
|
+
if (!text) {
|
|
1243
|
+
throw new common_1.BadRequestException('text is required');
|
|
1244
|
+
}
|
|
1245
|
+
const chunkSize = this.clampSearchInt(params?.chunkSize, 512, 100, 10000);
|
|
1246
|
+
const chunkOverlap = this.clampSearchInt(params?.chunkOverlap, 70, 0, Math.max(0, chunkSize - 1));
|
|
1247
|
+
const embeddingBatchSize = this.clampSearchInt(params?.embeddingBatchSize, 64, 1, 256);
|
|
1248
|
+
const chunks = this.chunkText(text, chunkSize, chunkOverlap);
|
|
1249
|
+
const embeddingId = params?.embeddingId || (0, crypto_1.randomUUID)();
|
|
1250
|
+
const baseExtraFields = this.sanitizeObject(params?.extraFields || {});
|
|
1251
|
+
const rows = [];
|
|
1252
|
+
for (let index = 0; index < chunks.length; index += 1) {
|
|
1253
|
+
const chunk = chunks[index];
|
|
1254
|
+
const hash = this.contentHash(chunk);
|
|
1255
|
+
rows.push({
|
|
1256
|
+
embeddingName: this.sanitizeText(params?.embeddingName || baseExtraFields?.title || 'document'),
|
|
1257
|
+
embeddingId,
|
|
1258
|
+
agentId: params?.agentId || '',
|
|
1259
|
+
extraFields: {
|
|
1260
|
+
...baseExtraFields,
|
|
1261
|
+
chunkIndex: index,
|
|
1262
|
+
chunksCount: chunks.length,
|
|
1263
|
+
part: index + 1,
|
|
1264
|
+
totalParts: chunks.length,
|
|
1265
|
+
contentHash: hash,
|
|
1266
|
+
},
|
|
1267
|
+
text: chunk,
|
|
1268
|
+
embeddingProvider: params?.embeddingProvider || '',
|
|
1269
|
+
embeddingModel: params?.embeddingModel || '',
|
|
1270
|
+
});
|
|
1271
|
+
}
|
|
1272
|
+
for (const batch of this.chunkArray(rows, embeddingBatchSize)) {
|
|
1273
|
+
const vectors = await this.embeddingCreateBatch(batch.map((row) => row.text), params?.embeddingProvider, params?.embeddingModel);
|
|
1274
|
+
vectors.forEach((vector, index) => {
|
|
1275
|
+
batch[index].dense = vector;
|
|
1276
|
+
});
|
|
1277
|
+
}
|
|
1278
|
+
const collectionName = params?.collectionName || this.getDefaultCollectionName();
|
|
1279
|
+
const blobs = await this.uploadRowsToAzureBlob(rows, collectionName);
|
|
1280
|
+
return {
|
|
1281
|
+
collectionName,
|
|
1282
|
+
embeddingName: rows[0]?.embeddingName,
|
|
1283
|
+
embeddingId,
|
|
1284
|
+
chunks: rows.length,
|
|
1285
|
+
blobs,
|
|
1286
|
+
};
|
|
1287
|
+
}
|
|
1288
|
+
async listAzureBlobDocuments(prefix = '', options = {}) {
|
|
1289
|
+
await this.refreshProviderSettings();
|
|
1290
|
+
if (!this.azureBlobContainer) {
|
|
1291
|
+
throw new common_1.BadRequestException('Azure Blob Storage nao esta configurado.');
|
|
1292
|
+
}
|
|
1293
|
+
const safePrefix = String(prefix || '').replace(/^\/+/, '').replace(/\\/g, '/').replace(/\.\./g, '.');
|
|
1294
|
+
const limit = this.clampSearchInt(options?.limit, 100, 1, 1000);
|
|
1295
|
+
const includeText = options?.includeText === true;
|
|
1296
|
+
const query = String(options?.query || '').trim().toLowerCase();
|
|
1297
|
+
const queryTerms = this.normalizeBlobSearchTerms(query);
|
|
1298
|
+
const contentTypeFilter = String(options?.contentType || '').trim().toLowerCase();
|
|
1299
|
+
const modifiedAfter = options?.modifiedAfter ? new Date(options.modifiedAfter) : null;
|
|
1300
|
+
const modifiedBefore = options?.modifiedBefore ? new Date(options.modifiedBefore) : null;
|
|
1301
|
+
const minBytes = Number(options?.minBytes);
|
|
1302
|
+
const maxBytes = Number(options?.maxBytes);
|
|
1303
|
+
const maxTextBytes = this.clampSearchInt(options?.maxTextBytes, 2_000_000, 1, 10_000_000);
|
|
1304
|
+
const blobs = [];
|
|
1305
|
+
const debug = {
|
|
1306
|
+
scanned: 0,
|
|
1307
|
+
skippedByContentType: 0,
|
|
1308
|
+
skippedBySize: 0,
|
|
1309
|
+
skippedByDate: 0,
|
|
1310
|
+
skippedByQuery: 0,
|
|
1311
|
+
textDownloads: 0,
|
|
1312
|
+
textDownloadErrors: 0,
|
|
1313
|
+
textSkippedBySize: 0,
|
|
1314
|
+
};
|
|
1315
|
+
for await (const item of this.azureBlobContainer.listBlobsFlat({
|
|
1316
|
+
prefix: safePrefix || undefined,
|
|
1317
|
+
includeMetadata: true,
|
|
1318
|
+
})) {
|
|
1319
|
+
debug.scanned += 1;
|
|
1320
|
+
const blob = this.azureBlobContainer.getBlockBlobClient(item.name);
|
|
1321
|
+
const row = {
|
|
1322
|
+
blobName: item.name,
|
|
1323
|
+
blobUrl: blob.url,
|
|
1324
|
+
contentType: item.properties.contentType || '',
|
|
1325
|
+
size: item.properties.contentLength || 0,
|
|
1326
|
+
lastModified: item.properties.lastModified?.toISOString?.() || item.properties.lastModified || null,
|
|
1327
|
+
metadata: item.metadata || {},
|
|
1328
|
+
};
|
|
1329
|
+
const rowModified = row.lastModified ? new Date(row.lastModified) : null;
|
|
1330
|
+
if (contentTypeFilter && !String(row.contentType || '').toLowerCase().includes(contentTypeFilter)) {
|
|
1331
|
+
debug.skippedByContentType += 1;
|
|
1332
|
+
continue;
|
|
1333
|
+
}
|
|
1334
|
+
if (Number.isFinite(minBytes) && row.size < minBytes) {
|
|
1335
|
+
debug.skippedBySize += 1;
|
|
1336
|
+
continue;
|
|
1337
|
+
}
|
|
1338
|
+
if (Number.isFinite(maxBytes) && row.size > maxBytes) {
|
|
1339
|
+
debug.skippedBySize += 1;
|
|
1340
|
+
continue;
|
|
1341
|
+
}
|
|
1342
|
+
if (modifiedAfter && !Number.isNaN(modifiedAfter.getTime()) && rowModified && rowModified < modifiedAfter) {
|
|
1343
|
+
debug.skippedByDate += 1;
|
|
1344
|
+
continue;
|
|
1345
|
+
}
|
|
1346
|
+
if (modifiedBefore && !Number.isNaN(modifiedBefore.getTime()) && rowModified && rowModified > modifiedBefore) {
|
|
1347
|
+
debug.skippedByDate += 1;
|
|
1348
|
+
continue;
|
|
1349
|
+
}
|
|
1350
|
+
let haystack = [
|
|
1351
|
+
row.blobName,
|
|
1352
|
+
row.contentType,
|
|
1353
|
+
JSON.stringify(row.metadata || {}),
|
|
1354
|
+
].join('\n').toLowerCase();
|
|
1355
|
+
const metadataMatchesQuery = this.blobMatchesTerms(haystack, queryTerms);
|
|
1356
|
+
if (includeText && (!queryTerms.length || !metadataMatchesQuery)) {
|
|
1357
|
+
if (row.size <= maxTextBytes) {
|
|
1358
|
+
try {
|
|
1359
|
+
const download = await blob.download(0);
|
|
1360
|
+
row.text = await this.streamToString(download.readableStreamBody);
|
|
1361
|
+
const jsonPayload = this.normalizeAzureBlobJsonPayload(row.text);
|
|
1362
|
+
if (jsonPayload) {
|
|
1363
|
+
row.payload = jsonPayload.parsed;
|
|
1364
|
+
row.text = jsonPayload.text;
|
|
1365
|
+
row.metadata = { ...(row.metadata || {}), ...(jsonPayload.metadata || {}) };
|
|
1366
|
+
row.vectorDimensions = jsonPayload.vectorDimensions;
|
|
1367
|
+
row.hasVector = jsonPayload.vectorDimensions > 0;
|
|
1368
|
+
}
|
|
1369
|
+
row.textPreview = String(row.text || '').slice(0, 500);
|
|
1370
|
+
debug.textDownloads += 1;
|
|
1371
|
+
haystack = `${haystack}\n${String(row.text || '').toLowerCase()}`;
|
|
1372
|
+
}
|
|
1373
|
+
catch (error) {
|
|
1374
|
+
row.text = '';
|
|
1375
|
+
row.textError = error?.message || String(error);
|
|
1376
|
+
debug.textDownloadErrors += 1;
|
|
1377
|
+
}
|
|
1378
|
+
}
|
|
1379
|
+
else {
|
|
1380
|
+
row.text = '';
|
|
1381
|
+
row.textSkipped = `Blob com ${row.size} bytes maior que o limite de leitura ${maxTextBytes}.`;
|
|
1382
|
+
debug.textSkippedBySize += 1;
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
if (queryTerms.length && !this.blobMatchesTerms(haystack, queryTerms)) {
|
|
1386
|
+
debug.skippedByQuery += 1;
|
|
1387
|
+
continue;
|
|
1388
|
+
}
|
|
1389
|
+
blobs.push(row);
|
|
1390
|
+
if (blobs.length >= limit)
|
|
1391
|
+
break;
|
|
1392
|
+
}
|
|
1393
|
+
return {
|
|
1394
|
+
prefix: safePrefix,
|
|
1395
|
+
filters: {
|
|
1396
|
+
query,
|
|
1397
|
+
contentType: contentTypeFilter,
|
|
1398
|
+
modifiedAfter: options?.modifiedAfter || '',
|
|
1399
|
+
modifiedBefore: options?.modifiedBefore || '',
|
|
1400
|
+
minBytes: Number.isFinite(minBytes) ? minBytes : null,
|
|
1401
|
+
maxBytes: Number.isFinite(maxBytes) ? maxBytes : null,
|
|
1402
|
+
maxTextBytes,
|
|
1403
|
+
},
|
|
1404
|
+
debug,
|
|
1405
|
+
total: blobs.length,
|
|
1406
|
+
blobs,
|
|
1407
|
+
};
|
|
1408
|
+
}
|
|
1409
|
+
async readAzureBlobDocument(blobName) {
|
|
1410
|
+
await this.refreshProviderSettings();
|
|
1411
|
+
if (!this.azureBlobContainer) {
|
|
1412
|
+
throw new common_1.BadRequestException('Azure Blob Storage nao esta configurado.');
|
|
1413
|
+
}
|
|
1414
|
+
const safeBlobName = String(blobName || '').replace(/^\/+/, '').replace(/\\/g, '/').replace(/\.\./g, '.');
|
|
1415
|
+
if (!safeBlobName) {
|
|
1416
|
+
throw new common_1.BadRequestException('blobName is required');
|
|
1417
|
+
}
|
|
1418
|
+
const blob = this.azureBlobContainer.getBlockBlobClient(safeBlobName);
|
|
1419
|
+
const exists = await blob.exists();
|
|
1420
|
+
if (!exists) {
|
|
1421
|
+
throw new common_1.BadRequestException('Blob nao encontrado.');
|
|
1422
|
+
}
|
|
1423
|
+
const properties = await blob.getProperties();
|
|
1424
|
+
const download = await blob.download(0);
|
|
1425
|
+
const rawText = await this.streamToString(download.readableStreamBody);
|
|
1426
|
+
const jsonPayload = this.normalizeAzureBlobJsonPayload(rawText);
|
|
1427
|
+
return {
|
|
1428
|
+
blobName: safeBlobName,
|
|
1429
|
+
blobUrl: blob.url,
|
|
1430
|
+
contentType: properties.contentType || '',
|
|
1431
|
+
size: properties.contentLength || Buffer.byteLength(rawText, 'utf-8'),
|
|
1432
|
+
lastModified: properties.lastModified?.toISOString?.() || properties.lastModified || null,
|
|
1433
|
+
metadata: { ...(properties.metadata || {}), ...(jsonPayload?.metadata || {}) },
|
|
1434
|
+
text: jsonPayload?.text ?? rawText,
|
|
1435
|
+
rawText: jsonPayload ? undefined : rawText,
|
|
1436
|
+
payload: jsonPayload?.parsed,
|
|
1437
|
+
hasVector: Boolean(jsonPayload?.vectorDimensions),
|
|
1438
|
+
vectorDimensions: jsonPayload?.vectorDimensions || 0,
|
|
1439
|
+
};
|
|
1440
|
+
}
|
|
1441
|
+
async indexRowsInAzureSearch(collectionName, rows, uploadedBlobs = []) {
|
|
1442
|
+
if (!this.isAzureSearchConfigured(collectionName) || !rows.length)
|
|
1443
|
+
return null;
|
|
1444
|
+
const endpoint = this.getAzureSearchEndpoint();
|
|
1445
|
+
const indexName = this.getAzureSearchIndexName(collectionName);
|
|
1446
|
+
const schema = await this.getAzureSearchIndexSchema(indexName);
|
|
1447
|
+
const fields = schema.fields;
|
|
1448
|
+
const vectorField = this.azureSearchVectorField(fields);
|
|
1449
|
+
const textFields = this.azureSearchTextFields(fields);
|
|
1450
|
+
if (!vectorField) {
|
|
1451
|
+
throw new common_1.BadRequestException('Azure AI Search precisa de um campo vetorial no indice: content_vector, contentVector, vector, embedding ou embeddingVector.');
|
|
1452
|
+
}
|
|
1453
|
+
if (!textFields.length) {
|
|
1454
|
+
throw new common_1.BadRequestException('Azure AI Search precisa de um campo textual pesquisavel: content, text, chunk, pageContent ou body.');
|
|
1455
|
+
}
|
|
1456
|
+
const expectedDimensions = schema.vectorDimensions.get(vectorField);
|
|
1457
|
+
const url = `${endpoint}/indexes/${encodeURIComponent(indexName)}/docs/index?api-version=${this.getAzureSearchApiVersion()}`;
|
|
1458
|
+
const blobByIndex = new Map(uploadedBlobs.map((blob, index) => [index, blob]));
|
|
1459
|
+
const value = rows.map((row, index) => {
|
|
1460
|
+
const blob = blobByIndex.get(index);
|
|
1461
|
+
const id = blob?.id || this.toAzureSafeId(`${row.embeddingId}-${row.extraFields?.chunkIndex ?? index}`);
|
|
1462
|
+
this.ensureVectorDimensions(row.dense, expectedDimensions, `Azure AI Search/${vectorField}`);
|
|
1463
|
+
const document = {
|
|
1464
|
+
'@search.action': 'mergeOrUpload',
|
|
1465
|
+
id,
|
|
1466
|
+
embeddingName: row.embeddingName || '',
|
|
1467
|
+
embeddingId: row.embeddingId || '',
|
|
1468
|
+
agentId: row.agentId || '',
|
|
1469
|
+
source: row.extraFields?.source || row.extraFields?.marca || '',
|
|
1470
|
+
attributes: row.extraFields?.attributes || row.extraFields?.origem || '',
|
|
1471
|
+
contentHash: row.extraFields?.contentHash || this.contentHash(row.text || ''),
|
|
1472
|
+
extraFieldsJson: JSON.stringify(row.extraFields || {}),
|
|
1473
|
+
blobName: blob?.blobName || '',
|
|
1474
|
+
blobUrl: blob?.blobUrl || '',
|
|
1475
|
+
[vectorField]: row.dense,
|
|
1476
|
+
};
|
|
1477
|
+
if (fields.has('metadata')) {
|
|
1478
|
+
document.metadata = this.buildAzureDocumentMetadata(schema, row, blob);
|
|
1479
|
+
}
|
|
1480
|
+
Object.entries(row.extraFields || {}).forEach(([key, value]) => {
|
|
1481
|
+
if (!fields.has(key) || Object.prototype.hasOwnProperty.call(document, key))
|
|
1482
|
+
return;
|
|
1483
|
+
if (value === undefined || value === null)
|
|
1484
|
+
return;
|
|
1485
|
+
document[key] = typeof value === 'object' ? JSON.stringify(value) : value;
|
|
1486
|
+
});
|
|
1487
|
+
textFields.forEach((field) => {
|
|
1488
|
+
document[field] = row.text;
|
|
1489
|
+
});
|
|
1490
|
+
Object.keys(document).forEach((key) => {
|
|
1491
|
+
if (key !== '@search.action' && !fields.has(key))
|
|
1492
|
+
delete document[key];
|
|
1493
|
+
});
|
|
1494
|
+
return document;
|
|
1495
|
+
});
|
|
1496
|
+
const batches = this.chunkArray(value, 500);
|
|
1497
|
+
const responses = [];
|
|
1498
|
+
for (const batch of batches) {
|
|
1499
|
+
const response = await this.fetchWithRetry('azure search index', url, {
|
|
1500
|
+
method: 'POST',
|
|
1501
|
+
headers: {
|
|
1502
|
+
'api-key': this.getAzureSearchApiKey(),
|
|
1503
|
+
'Content-Type': 'application/json',
|
|
1504
|
+
},
|
|
1505
|
+
body: JSON.stringify({ value: batch }),
|
|
1506
|
+
});
|
|
1507
|
+
const body = await response.json().catch(() => ({}));
|
|
1508
|
+
if (!response.ok) {
|
|
1509
|
+
throw new common_1.BadRequestException({
|
|
1510
|
+
message: 'Azure AI Search index failed',
|
|
1511
|
+
status: response.status,
|
|
1512
|
+
body,
|
|
1513
|
+
});
|
|
1514
|
+
}
|
|
1515
|
+
responses.push(body);
|
|
1516
|
+
}
|
|
1517
|
+
return {
|
|
1518
|
+
indexed: value.length,
|
|
1519
|
+
batches: batches.length,
|
|
1520
|
+
vectorField,
|
|
1521
|
+
textFields,
|
|
1522
|
+
expectedDimensions: expectedDimensions || null,
|
|
1523
|
+
responses,
|
|
1524
|
+
};
|
|
1525
|
+
}
|
|
1526
|
+
async createAzureSearchIndex(collectionName) {
|
|
1527
|
+
if (!this.getAzureSearchEndpoint() || !this.getAzureSearchApiKey())
|
|
1528
|
+
return null;
|
|
1529
|
+
const indexName = this.getAzureSearchIndexName(collectionName);
|
|
1530
|
+
if (!indexName)
|
|
1531
|
+
return null;
|
|
1532
|
+
const dimensions = this.getEmbeddingDimensions();
|
|
1533
|
+
const url = `${this.getAzureSearchEndpoint()}/indexes/${encodeURIComponent(indexName)}?api-version=${this.getAzureSearchApiVersion()}`;
|
|
1534
|
+
const response = await this.fetchWithRetry('azure search create index', url, {
|
|
1535
|
+
method: 'PUT',
|
|
1536
|
+
headers: {
|
|
1537
|
+
'api-key': this.getAzureSearchApiKey(),
|
|
1538
|
+
'Content-Type': 'application/json',
|
|
1539
|
+
},
|
|
1540
|
+
body: JSON.stringify({
|
|
1541
|
+
name: indexName,
|
|
1542
|
+
fields: [
|
|
1543
|
+
{ name: 'id', type: 'Edm.String', key: true, filterable: true },
|
|
1544
|
+
{ name: 'content', type: 'Edm.String', searchable: true },
|
|
1545
|
+
{ name: 'text', type: 'Edm.String', searchable: true },
|
|
1546
|
+
{ name: 'embeddingName', type: 'Edm.String', searchable: true, filterable: true, sortable: true },
|
|
1547
|
+
{ name: 'embeddingId', type: 'Edm.String', filterable: true },
|
|
1548
|
+
{ name: 'agentId', type: 'Edm.String', filterable: true },
|
|
1549
|
+
{ name: 'source', type: 'Edm.String', searchable: true, filterable: true },
|
|
1550
|
+
{ name: 'attributes', type: 'Edm.String', searchable: true, filterable: true },
|
|
1551
|
+
{ name: 'contentHash', type: 'Edm.String', filterable: true },
|
|
1552
|
+
{ name: 'extraFieldsJson', type: 'Edm.String', searchable: true },
|
|
1553
|
+
{
|
|
1554
|
+
name: 'metadata',
|
|
1555
|
+
type: 'Edm.ComplexType',
|
|
1556
|
+
fields: [
|
|
1557
|
+
{ name: 'source', type: 'Edm.String', searchable: true, filterable: true },
|
|
1558
|
+
{ name: 'attributes', type: 'Edm.String', searchable: true, filterable: true },
|
|
1559
|
+
{ name: 'embeddingId', type: 'Edm.String', filterable: true },
|
|
1560
|
+
{ name: 'agentId', type: 'Edm.String', filterable: true },
|
|
1561
|
+
{ name: 'nomeEmbedding', type: 'Edm.String', searchable: true, filterable: true },
|
|
1562
|
+
{ name: 'filename', type: 'Edm.String', searchable: true, filterable: true },
|
|
1563
|
+
{ name: 'chunkIndex', type: 'Edm.String', filterable: true },
|
|
1564
|
+
{ name: 'chunksCount', type: 'Edm.String', filterable: true },
|
|
1565
|
+
{ name: 'part', type: 'Edm.String', filterable: true },
|
|
1566
|
+
{ name: 'totalParts', type: 'Edm.String', filterable: true },
|
|
1567
|
+
{ name: 'contentHash', type: 'Edm.String', filterable: true },
|
|
1568
|
+
{ name: 'blobName', type: 'Edm.String', filterable: true },
|
|
1569
|
+
{ name: 'blobUrl', type: 'Edm.String' },
|
|
1570
|
+
],
|
|
1571
|
+
},
|
|
1572
|
+
{ name: 'blobName', type: 'Edm.String', filterable: true },
|
|
1573
|
+
{ name: 'blobUrl', type: 'Edm.String' },
|
|
1574
|
+
{
|
|
1575
|
+
name: 'content_vector',
|
|
1576
|
+
type: 'Collection(Edm.Single)',
|
|
1577
|
+
searchable: true,
|
|
1578
|
+
dimensions,
|
|
1579
|
+
vectorSearchProfile: 'vector-profile',
|
|
1580
|
+
},
|
|
1581
|
+
{
|
|
1582
|
+
name: 'contentVector',
|
|
1583
|
+
type: 'Collection(Edm.Single)',
|
|
1584
|
+
searchable: true,
|
|
1585
|
+
dimensions,
|
|
1586
|
+
vectorSearchProfile: 'vector-profile',
|
|
1587
|
+
},
|
|
1588
|
+
],
|
|
1589
|
+
vectorSearch: {
|
|
1590
|
+
algorithms: [
|
|
1591
|
+
{
|
|
1592
|
+
name: 'hnsw',
|
|
1593
|
+
kind: 'hnsw',
|
|
1594
|
+
hnswParameters: {
|
|
1595
|
+
metric: 'cosine',
|
|
1596
|
+
m: 16,
|
|
1597
|
+
efConstruction: 200,
|
|
1598
|
+
efSearch: 128,
|
|
1599
|
+
},
|
|
1600
|
+
},
|
|
1601
|
+
],
|
|
1602
|
+
profiles: [
|
|
1603
|
+
{
|
|
1604
|
+
name: 'vector-profile',
|
|
1605
|
+
algorithm: 'hnsw',
|
|
1606
|
+
},
|
|
1607
|
+
],
|
|
1608
|
+
},
|
|
1609
|
+
semantic: {
|
|
1610
|
+
configurations: [
|
|
1611
|
+
{
|
|
1612
|
+
name: 'semantic-config',
|
|
1613
|
+
prioritizedFields: {
|
|
1614
|
+
contentFields: [{ fieldName: 'content' }],
|
|
1615
|
+
titleField: { fieldName: 'embeddingName' },
|
|
1616
|
+
},
|
|
1617
|
+
},
|
|
1618
|
+
{
|
|
1619
|
+
name: 'content',
|
|
1620
|
+
prioritizedFields: {
|
|
1621
|
+
contentFields: [{ fieldName: 'content' }],
|
|
1622
|
+
titleField: { fieldName: 'embeddingName' },
|
|
1623
|
+
},
|
|
1624
|
+
},
|
|
1625
|
+
],
|
|
1626
|
+
},
|
|
1627
|
+
}),
|
|
1628
|
+
});
|
|
1629
|
+
const body = await response.json().catch(() => ({}));
|
|
1630
|
+
if (!response.ok) {
|
|
1631
|
+
throw new common_1.BadRequestException({
|
|
1632
|
+
message: 'Azure AI Search create index failed',
|
|
1633
|
+
status: response.status,
|
|
1634
|
+
body,
|
|
1635
|
+
});
|
|
1636
|
+
}
|
|
1637
|
+
this.azureSearchFieldCache.delete(`${this.getAzureSearchEndpoint()}::${indexName}`);
|
|
1638
|
+
return body;
|
|
1639
|
+
}
|
|
1640
|
+
async searchAzureSearch(query, collectionName, agentId, params = {}) {
|
|
1641
|
+
if (!this.isAzureSearchConfigured(collectionName)) {
|
|
1642
|
+
return { results: [], warning: 'Azure AI Search is not configured' };
|
|
1643
|
+
}
|
|
1644
|
+
const denseVector = await this.embeddingCreate(query, params?.embeddingProvider, params?.embeddingModel);
|
|
1645
|
+
const topK = this.clampSearchInt(params?.k ?? params?.topK, 15, 1, 100);
|
|
1646
|
+
const candidateTopK = this.clampSearchInt(params?.candidateTopK, Math.max(topK, topK * 4), topK, 200);
|
|
1647
|
+
const endpoint = this.getAzureSearchEndpoint();
|
|
1648
|
+
const indexName = this.getAzureSearchIndexName(collectionName);
|
|
1649
|
+
const schema = await this.getAzureSearchIndexSchema(indexName);
|
|
1650
|
+
const fields = schema.fields;
|
|
1651
|
+
const select = this.azureSearchSelectFields(fields);
|
|
1652
|
+
const vectorField = this.azureSearchVectorField(fields);
|
|
1653
|
+
if (!vectorField) {
|
|
1654
|
+
throw new common_1.BadRequestException('Azure AI Search precisa de um campo vetorial no indice: content_vector, contentVector, vector, embedding ou embeddingVector.');
|
|
1655
|
+
}
|
|
1656
|
+
this.ensureVectorDimensions(denseVector, schema.vectorDimensions.get(vectorField), `Azure AI Search/${vectorField}`);
|
|
1657
|
+
const semanticConfiguration = this.azureSearchSemanticConfiguration(schema, params);
|
|
1658
|
+
const url = `${endpoint}/indexes/${encodeURIComponent(indexName)}/docs/search?api-version=${this.getAzureSearchApiVersion()}`;
|
|
1659
|
+
const baseExtraFieldsFilter = params?.extraFieldsFilter || params?.metadataFilter || params?.meta || null;
|
|
1660
|
+
const perRoundFilters = Array.isArray(params?.extraFieldsFilterPerRound) ? params.extraFieldsFilterPerRound : null;
|
|
1661
|
+
const perRoundLimitValues = Array.isArray(params?.extraFieldsFilterPerRoundLimits)
|
|
1662
|
+
? params.extraFieldsFilterPerRoundLimits.map((value) => {
|
|
1663
|
+
if (value === undefined || value === null || value === '')
|
|
1664
|
+
return null;
|
|
1665
|
+
const parsed = Number(value);
|
|
1666
|
+
return Number.isFinite(parsed) ? Math.max(0, Math.floor(parsed)) : null;
|
|
1667
|
+
})
|
|
1668
|
+
: null;
|
|
1669
|
+
const hasPerRoundLimits = Boolean(perRoundLimitValues?.some((value) => value !== null));
|
|
1670
|
+
const roundStopFind = params?.roundStopFind !== false;
|
|
1671
|
+
const roundMixHalf = params?.roundMixHalf === true;
|
|
1672
|
+
const rawFilter = String(params?.filterExpression || params?.filterExpr || '').trim();
|
|
1673
|
+
const runRound = async (extraFieldsFilter) => {
|
|
1674
|
+
const structuredFilter = this.buildAzureFilter(agentId, extraFieldsFilter, schema);
|
|
1675
|
+
const filter = [structuredFilter, rawFilter].filter(Boolean).join(' and ');
|
|
1676
|
+
const payload = {
|
|
1677
|
+
search: query,
|
|
1678
|
+
top: topK,
|
|
1679
|
+
filter: filter || undefined,
|
|
1680
|
+
select,
|
|
1681
|
+
vectorQueries: [
|
|
1682
|
+
{
|
|
1683
|
+
kind: 'vector',
|
|
1684
|
+
vector: denseVector,
|
|
1685
|
+
fields: vectorField,
|
|
1686
|
+
k: candidateTopK,
|
|
1687
|
+
},
|
|
1688
|
+
],
|
|
1689
|
+
};
|
|
1690
|
+
if (semanticConfiguration) {
|
|
1691
|
+
payload.queryType = 'semantic';
|
|
1692
|
+
payload.semanticConfiguration = semanticConfiguration;
|
|
1693
|
+
payload.captions = 'extractive';
|
|
1694
|
+
payload.answers = 'extractive|count-3';
|
|
1695
|
+
}
|
|
1696
|
+
if (params?.exhaustive === true) {
|
|
1697
|
+
payload.vectorQueries[0].exhaustive = true;
|
|
1698
|
+
}
|
|
1699
|
+
const response = await this.fetchWithRetry('azure search query', url, {
|
|
1700
|
+
method: 'POST',
|
|
1701
|
+
headers: {
|
|
1702
|
+
'api-key': this.getAzureSearchApiKey(),
|
|
1703
|
+
'Content-Type': 'application/json',
|
|
1704
|
+
},
|
|
1705
|
+
body: JSON.stringify(payload),
|
|
1706
|
+
});
|
|
1707
|
+
const body = await response.json().catch(() => ({}));
|
|
1708
|
+
if (!response.ok) {
|
|
1709
|
+
throw new common_1.BadRequestException({
|
|
1710
|
+
message: 'Azure AI Search query failed',
|
|
1711
|
+
status: response.status,
|
|
1712
|
+
body,
|
|
1713
|
+
});
|
|
1714
|
+
}
|
|
1715
|
+
const results = (Array.isArray(body?.value) ? body.value : []).map((item) => {
|
|
1716
|
+
const metadata = this.pickAzureSearchDocumentField(item, fields, ['metadata'], {});
|
|
1717
|
+
const extraFields = this.parseJsonField(this.pickAzureSearchDocumentField(item, fields, ['extraFieldsJson'], {}), {});
|
|
1718
|
+
return {
|
|
1719
|
+
id: String(this.pickAzureSearchDocumentField(item, fields, ['id'], (0, crypto_1.randomUUID)())),
|
|
1720
|
+
source: 'azure_search',
|
|
1721
|
+
score: Number(item['@search.score'] || item.score || 0),
|
|
1722
|
+
rerankerScore: Number(item['@search.rerankerScore'] || 0),
|
|
1723
|
+
embeddingName: this.pickAzureSearchDocumentField(item, fields, ['embeddingName', 'title', 'name'], metadata?.nomeEmbedding || ''),
|
|
1724
|
+
embeddingId: this.pickAzureSearchDocumentField(item, fields, ['embeddingId', 'documentId'], metadata?.embeddingId || ''),
|
|
1725
|
+
agentId: this.pickAzureSearchDocumentField(item, fields, ['agentId'], metadata?.agentId || ''),
|
|
1726
|
+
extraFields: { ...(typeof metadata === 'object' ? metadata : {}), ...(extraFields || {}) },
|
|
1727
|
+
text: this.pickAzureSearchDocumentField(item, fields, ['content', 'text', 'chunk', 'pageContent', 'body'], ''),
|
|
1728
|
+
blobName: this.pickAzureSearchDocumentField(item, fields, ['blobName'], metadata?.blobName || ''),
|
|
1729
|
+
blobUrl: this.pickAzureSearchDocumentField(item, fields, ['blobUrl'], metadata?.blobUrl || ''),
|
|
1730
|
+
contentHash: this.pickAzureSearchDocumentField(item, fields, ['contentHash'], metadata?.contentHash || ''),
|
|
1731
|
+
captions: item['@search.captions'] || undefined,
|
|
1732
|
+
};
|
|
1733
|
+
});
|
|
1734
|
+
return {
|
|
1735
|
+
results,
|
|
1736
|
+
denseCount: results.length,
|
|
1737
|
+
sparseCount: 0,
|
|
1738
|
+
searchDebug: {
|
|
1739
|
+
mode: 'azure_search_hybrid',
|
|
1740
|
+
topK,
|
|
1741
|
+
candidateTopK,
|
|
1742
|
+
indexName,
|
|
1743
|
+
select,
|
|
1744
|
+
vectorField,
|
|
1745
|
+
semanticConfiguration: semanticConfiguration || null,
|
|
1746
|
+
answers: body?.['@search.answers'] || undefined,
|
|
1747
|
+
filter: filter || null,
|
|
1748
|
+
},
|
|
1749
|
+
};
|
|
1750
|
+
};
|
|
1751
|
+
const filtersToTry = perRoundFilters?.length
|
|
1752
|
+
? perRoundFilters.map((round) => (this.isEmptyRoundFilter(round) ? null : this.mergeExtraFieldsFilters(baseExtraFieldsFilter, round)))
|
|
1753
|
+
: [baseExtraFieldsFilter];
|
|
1754
|
+
if (filtersToTry.length <= 1) {
|
|
1755
|
+
const single = await runRound(filtersToTry[0]);
|
|
1756
|
+
return {
|
|
1757
|
+
...single,
|
|
1758
|
+
results: this.applyMetadataOrdering(single.results || [], params).slice(0, topK),
|
|
1759
|
+
};
|
|
1760
|
+
}
|
|
1761
|
+
let lastResult = null;
|
|
1762
|
+
const collected = [];
|
|
1763
|
+
const baseRoundLimit = filtersToTry.length ? Math.floor(topK / filtersToTry.length) : topK;
|
|
1764
|
+
const remainder = filtersToTry.length ? topK - baseRoundLimit * filtersToTry.length : 0;
|
|
1765
|
+
for (let roundIndex = 0; roundIndex < filtersToTry.length; roundIndex += 1) {
|
|
1766
|
+
const result = await runRound(filtersToTry[roundIndex]);
|
|
1767
|
+
lastResult = result;
|
|
1768
|
+
if (result.results.length && roundStopFind) {
|
|
1769
|
+
return {
|
|
1770
|
+
...result,
|
|
1771
|
+
results: this.applyMetadataOrdering(result.results, params).slice(0, topK),
|
|
1772
|
+
roundIndex,
|
|
1773
|
+
rounds: filtersToTry.length,
|
|
1774
|
+
};
|
|
1775
|
+
}
|
|
1776
|
+
if (result.results.length) {
|
|
1777
|
+
const configuredLimit = hasPerRoundLimits ? perRoundLimitValues?.[roundIndex] : null;
|
|
1778
|
+
const roundLimit = typeof configuredLimit === 'number'
|
|
1779
|
+
? configuredLimit
|
|
1780
|
+
: roundMixHalf
|
|
1781
|
+
? Math.max(1, baseRoundLimit + (roundIndex < remainder ? 1 : 0))
|
|
1782
|
+
: result.results.length;
|
|
1783
|
+
result.results.slice(0, roundLimit).forEach((item) => collected.push({ item, roundIndex }));
|
|
1784
|
+
}
|
|
1785
|
+
}
|
|
1786
|
+
if (collected.length) {
|
|
1787
|
+
const byId = new Map();
|
|
1788
|
+
collected.forEach((entry) => {
|
|
1789
|
+
const id = String(entry.item?.id || entry.item?.embeddingId || entry.item?.text || '');
|
|
1790
|
+
const previous = byId.get(id);
|
|
1791
|
+
if (!previous || entry.roundIndex < previous.roundIndex || this.getSearchScore(entry.item) > this.getSearchScore(previous.item)) {
|
|
1792
|
+
byId.set(id, entry);
|
|
1793
|
+
}
|
|
1794
|
+
});
|
|
1795
|
+
const mergedRounds = Array.from(byId.values())
|
|
1796
|
+
.sort((left, right) => {
|
|
1797
|
+
if (left.roundIndex !== right.roundIndex)
|
|
1798
|
+
return left.roundIndex - right.roundIndex;
|
|
1799
|
+
return this.getSearchScore(right.item) - this.getSearchScore(left.item);
|
|
1800
|
+
})
|
|
1801
|
+
.map((entry) => entry.item);
|
|
1802
|
+
return {
|
|
1803
|
+
...(lastResult || {}),
|
|
1804
|
+
results: this.applyMetadataOrdering(mergedRounds, params).slice(0, topK),
|
|
1805
|
+
rounds: filtersToTry.length,
|
|
1806
|
+
};
|
|
1807
|
+
}
|
|
1808
|
+
return {
|
|
1809
|
+
...(lastResult || { denseCount: 0, sparseCount: 0 }),
|
|
1810
|
+
results: this.applyMetadataOrdering(lastResult?.results || [], params).slice(0, topK),
|
|
1811
|
+
rounds: filtersToTry.length,
|
|
1812
|
+
};
|
|
1813
|
+
}
|
|
1814
|
+
async addDocuments(collectionName, documents, options = {}) {
|
|
1815
|
+
await this.refreshProviderSettings();
|
|
1816
|
+
const targetCollection = collectionName || this.getDefaultCollectionName();
|
|
1817
|
+
const azureSearchConfigured = this.isAzureSearchConfigured(targetCollection);
|
|
1818
|
+
const requestedSearchProvider = String(options?.searchProvider || options?.vectorProvider || options?.provider || options?.ragProvider || '').toLowerCase();
|
|
1819
|
+
if (requestedSearchProvider === 'milvus' && !this.milvusClient) {
|
|
1820
|
+
throw new common_1.BadRequestException('Milvus foi selecionado, mas nao esta configurado.');
|
|
1821
|
+
}
|
|
1822
|
+
if ((requestedSearchProvider === 'azure_search' || requestedSearchProvider === 'azure-search' || requestedSearchProvider === 'azure') && !azureSearchConfigured) {
|
|
1823
|
+
throw new common_1.BadRequestException('Azure AI Search foi selecionado, mas nao esta configurado.');
|
|
1824
|
+
}
|
|
1825
|
+
if (!this.milvusClient && !azureSearchConfigured) {
|
|
1826
|
+
throw new common_1.BadRequestException('MILVUS_ADDRESS or Azure AI Search must be configured');
|
|
1827
|
+
}
|
|
1828
|
+
const chunkSize = this.clampSearchInt(options?.chunkSize, 512, 100, 10000);
|
|
1829
|
+
const chunkOverlap = this.clampSearchInt(options?.chunkOverlap, 70, 0, Math.max(0, chunkSize - 1));
|
|
1830
|
+
const batchSize = this.clampSearchInt(options?.batchSize, 100, 1, 500);
|
|
1831
|
+
const embeddingBatchSize = this.clampSearchInt(options?.embeddingBatchSize, 64, 1, 256);
|
|
1832
|
+
const deduplicate = options?.deduplicate === true;
|
|
1833
|
+
const noSplit = options?.noSplit === true;
|
|
1834
|
+
const noHeader = options?.noHeader === true;
|
|
1835
|
+
const rows = [];
|
|
1836
|
+
const processedTexts = new Set();
|
|
1837
|
+
const sanitizedDocuments = (documents || []).map((document) => ({
|
|
1838
|
+
...document,
|
|
1839
|
+
text: this.sanitizeText(document?.text || ''),
|
|
1840
|
+
embeddingName: this.sanitizeText(document?.embeddingName || document?.extraFields?.title || 'document'),
|
|
1841
|
+
extraFields: this.sanitizeObject(document?.extraFields || {}),
|
|
1842
|
+
}));
|
|
1843
|
+
for (const document of sanitizedDocuments) {
|
|
1844
|
+
if (!document.text)
|
|
1845
|
+
continue;
|
|
1846
|
+
const embeddingId = document.embeddingId || (0, crypto_1.randomUUID)();
|
|
1847
|
+
const baseExtraFields = document.extraFields || {};
|
|
1848
|
+
const chunks = noSplit ? [document.text] : this.chunkText(document.text, chunkSize, chunkOverlap);
|
|
1849
|
+
const chunksCount = chunks.length;
|
|
1850
|
+
const extraFieldsHeader = !noHeader
|
|
1851
|
+
? Object.entries(baseExtraFields)
|
|
1852
|
+
.filter(([, value]) => value !== null && value !== undefined && value !== '')
|
|
1853
|
+
.map(([key, value]) => `${key}: ${String(value)}`)
|
|
1854
|
+
.join('\n')
|
|
1855
|
+
: '';
|
|
1856
|
+
for (let index = 0; index < chunks.length; index += 1) {
|
|
1857
|
+
const rawChunk = chunks[index];
|
|
1858
|
+
const part = index + 1;
|
|
1859
|
+
const partHeader = noHeader ? '' : `part: ${part}/${chunksCount}`;
|
|
1860
|
+
const header = [extraFieldsHeader, partHeader].filter(Boolean).join('\n');
|
|
1861
|
+
const enrichedText = header ? `${header}\ntexto: ${rawChunk}` : rawChunk;
|
|
1862
|
+
const hash = this.contentHash(rawChunk);
|
|
1863
|
+
if (deduplicate) {
|
|
1864
|
+
const uniqueIdentifier = `${document.agentId || ''}_${hash}`;
|
|
1865
|
+
if (processedTexts.has(uniqueIdentifier))
|
|
1866
|
+
continue;
|
|
1867
|
+
processedTexts.add(uniqueIdentifier);
|
|
1868
|
+
}
|
|
1869
|
+
rows.push({
|
|
1870
|
+
embeddingName: document.embeddingName || baseExtraFields?.title || 'document',
|
|
1871
|
+
embeddingId,
|
|
1872
|
+
agentId: document.agentId || '',
|
|
1873
|
+
extraFields: {
|
|
1874
|
+
...baseExtraFields,
|
|
1875
|
+
chunkIndex: index,
|
|
1876
|
+
chunksCount,
|
|
1877
|
+
part,
|
|
1878
|
+
totalParts: chunksCount,
|
|
1879
|
+
contentHash: hash,
|
|
1880
|
+
},
|
|
1881
|
+
text: enrichedText,
|
|
1882
|
+
embeddingProvider: options?.embeddingProvider || '',
|
|
1883
|
+
embeddingModel: options?.embeddingModel || '',
|
|
1884
|
+
});
|
|
1885
|
+
}
|
|
1886
|
+
}
|
|
1887
|
+
if (!rows.length) {
|
|
1888
|
+
return {
|
|
1889
|
+
success: true,
|
|
1890
|
+
inserted: 0,
|
|
1891
|
+
totalChunks: 0,
|
|
1892
|
+
batches: 0,
|
|
1893
|
+
message: 'No documents to insert after processing',
|
|
1894
|
+
};
|
|
1895
|
+
}
|
|
1896
|
+
for (const batch of this.chunkArray(rows, embeddingBatchSize)) {
|
|
1897
|
+
const vectors = await this.embeddingCreateBatch(batch.map((row) => row.text), options?.embeddingProvider, options?.embeddingModel);
|
|
1898
|
+
vectors.forEach((vector, index) => {
|
|
1899
|
+
batch[index].dense = vector;
|
|
1900
|
+
});
|
|
1901
|
+
}
|
|
1902
|
+
const responses = [];
|
|
1903
|
+
const searchProvider = requestedSearchProvider;
|
|
1904
|
+
const writeMilvus = this.milvusClient && searchProvider !== 'azure_search' && searchProvider !== 'azure-search' && searchProvider !== 'azure';
|
|
1905
|
+
const writeAzureSearch = azureSearchConfigured && searchProvider !== 'milvus';
|
|
1906
|
+
if (writeMilvus) {
|
|
1907
|
+
const expectedMilvusDimensions = this.getEmbeddingDimensions();
|
|
1908
|
+
rows.forEach((row) => this.ensureVectorDimensions(row.dense, expectedMilvusDimensions, 'Milvus/dense'));
|
|
1909
|
+
}
|
|
1910
|
+
if (writeMilvus) {
|
|
1911
|
+
for (let index = 0; index < rows.length; index += batchSize) {
|
|
1912
|
+
const batch = rows.slice(index, index + batchSize);
|
|
1913
|
+
this.logger.log(`Inserting RAG batch ${Math.floor(index / batchSize) + 1}/${Math.ceil(rows.length / batchSize)} (${batch.length} chunks)`);
|
|
1914
|
+
responses.push(await this.milvusClient.insert({
|
|
1915
|
+
collection_name: targetCollection,
|
|
1916
|
+
fields_data: batch,
|
|
1917
|
+
}));
|
|
1918
|
+
}
|
|
1919
|
+
await this.flushCollection(targetCollection);
|
|
1920
|
+
}
|
|
1921
|
+
const uploadedBlobs = this.shouldUseAzureBlob(options)
|
|
1922
|
+
? await this.uploadRowsToAzureBlob(rows, targetCollection)
|
|
1923
|
+
: [];
|
|
1924
|
+
const azureSearch = writeAzureSearch
|
|
1925
|
+
? await this.indexRowsInAzureSearch(targetCollection, rows, uploadedBlobs)
|
|
1926
|
+
: null;
|
|
1927
|
+
return {
|
|
1928
|
+
success: true,
|
|
1929
|
+
inserted: rows.length,
|
|
1930
|
+
totalChunks: rows.length,
|
|
1931
|
+
batches: responses.length,
|
|
1932
|
+
response: responses[responses.length - 1],
|
|
1933
|
+
responses,
|
|
1934
|
+
azureSearch,
|
|
1935
|
+
azureBlobs: uploadedBlobs.length,
|
|
1936
|
+
message: `Successfully inserted ${rows.length} chunks in ${responses.length} batches`,
|
|
1937
|
+
};
|
|
1938
|
+
}
|
|
1939
|
+
formatWorksheetCell(value) {
|
|
1940
|
+
if (value === null || value === undefined)
|
|
1941
|
+
return '';
|
|
1942
|
+
if (value instanceof Date)
|
|
1943
|
+
return value.toISOString();
|
|
1944
|
+
if (typeof value === 'object') {
|
|
1945
|
+
if ('text' in value)
|
|
1946
|
+
return String(value.text || '');
|
|
1947
|
+
if ('result' in value)
|
|
1948
|
+
return String(value.result || '');
|
|
1949
|
+
if ('richText' in value && Array.isArray(value.richText)) {
|
|
1950
|
+
return value.richText.map((item) => item?.text || '').join('');
|
|
1951
|
+
}
|
|
1952
|
+
return JSON.stringify(value);
|
|
1953
|
+
}
|
|
1954
|
+
return String(value);
|
|
1955
|
+
}
|
|
1956
|
+
async extractXlsxText(buffer) {
|
|
1957
|
+
return (await this.extractXlsxStructure(buffer)).text;
|
|
1958
|
+
}
|
|
1959
|
+
async extractPdfText(buffer) {
|
|
1960
|
+
if (pdfParseModule.PDFParse) {
|
|
1961
|
+
const parser = new pdfParseModule.PDFParse({ data: buffer });
|
|
1962
|
+
try {
|
|
1963
|
+
return String((await parser.getText())?.text || '').trim();
|
|
1964
|
+
}
|
|
1965
|
+
finally {
|
|
1966
|
+
await parser.destroy().catch(() => undefined);
|
|
1967
|
+
}
|
|
1968
|
+
}
|
|
1969
|
+
if (typeof pdfParseModule === 'function') {
|
|
1970
|
+
return String((await pdfParseModule(buffer))?.text || '').trim();
|
|
1971
|
+
}
|
|
1972
|
+
throw new common_1.BadRequestException('Leitor de PDF indisponivel.');
|
|
1973
|
+
}
|
|
1974
|
+
async extractXlsxStructure(buffer) {
|
|
1975
|
+
const workbook = new ExcelJS.Workbook();
|
|
1976
|
+
await workbook.xlsx.load(buffer);
|
|
1977
|
+
const sheets = [];
|
|
1978
|
+
const worksheets = [];
|
|
1979
|
+
workbook.worksheets.forEach((worksheet) => {
|
|
1980
|
+
const rows = [];
|
|
1981
|
+
const structuredRows = [];
|
|
1982
|
+
worksheet.eachRow({ includeEmpty: false }, (row, rowNumber) => {
|
|
1983
|
+
const values = Array.isArray(row.values) ? row.values.slice(1) : [];
|
|
1984
|
+
const structured = values.map((value) => this.formatWorksheetCell(value));
|
|
1985
|
+
const line = structured.join(',');
|
|
1986
|
+
if (line.trim())
|
|
1987
|
+
rows.push(line);
|
|
1988
|
+
if (structured.some((value) => value.trim()))
|
|
1989
|
+
structuredRows.push({ rowNumber, values: structured });
|
|
1990
|
+
});
|
|
1991
|
+
if (rows.length) {
|
|
1992
|
+
sheets.push(`--- Aba: ${worksheet.name} ---\n${rows.join('\n')}`);
|
|
1993
|
+
worksheets.push({ name: worksheet.name, rows: structuredRows });
|
|
1994
|
+
}
|
|
1995
|
+
});
|
|
1996
|
+
return { text: sheets.join('\n\n').trim(), sheets: worksheets };
|
|
1997
|
+
}
|
|
1998
|
+
stripHtml(value) {
|
|
1999
|
+
return String(value || '')
|
|
2000
|
+
.replace(/<br\s*\/?>/gi, '\n')
|
|
2001
|
+
.replace(/<\/(p|h[1-6]|li|tr)>/gi, '\n')
|
|
2002
|
+
.replace(/<\/(td|th)>/gi, ' | ')
|
|
2003
|
+
.replace(/<[^>]+>/g, '')
|
|
2004
|
+
.replace(/ /gi, ' ')
|
|
2005
|
+
.replace(/&/gi, '&')
|
|
2006
|
+
.replace(/</gi, '<')
|
|
2007
|
+
.replace(/>/gi, '>')
|
|
2008
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
2009
|
+
.trim();
|
|
2010
|
+
}
|
|
2011
|
+
extractPlaceholders(value) {
|
|
2012
|
+
return Array.from(new Set(Array.from(String(value || '').matchAll(/\{\{\s*([^{}]+?)\s*\}\}/g))
|
|
2013
|
+
.map((match) => String(match[1] || '').trim())
|
|
2014
|
+
.filter(Boolean)));
|
|
2015
|
+
}
|
|
2016
|
+
textSections(text) {
|
|
2017
|
+
return String(text || '')
|
|
2018
|
+
.split(/\r?\n/)
|
|
2019
|
+
.map((line) => line.trim())
|
|
2020
|
+
.filter((line) => line.length > 0 && line.length <= 180)
|
|
2021
|
+
.filter((line) => /^(\d+(?:\.\d+)*[.)-]?\s+|clausula\b|se[cç][aã]o\b|anexo\b|[A-ZÀ-Ý0-9][A-ZÀ-Ý0-9\s._-]{4,})/i.test(line))
|
|
2022
|
+
.slice(0, 200);
|
|
2023
|
+
}
|
|
2024
|
+
async extractFileStructure(file, text) {
|
|
2025
|
+
const filename = String(file.originalname || 'documento').toLowerCase();
|
|
2026
|
+
const mimeType = String(file.mimetype || '').toLowerCase();
|
|
2027
|
+
const base = {
|
|
2028
|
+
placeholders: this.extractPlaceholders(text),
|
|
2029
|
+
sections: this.textSections(text),
|
|
2030
|
+
};
|
|
2031
|
+
if (mimeType.includes('wordprocessingml') || filename.endsWith('.docx')) {
|
|
2032
|
+
const html = String((await mammoth.convertToHtml({ buffer: file.buffer }))?.value || '');
|
|
2033
|
+
return {
|
|
2034
|
+
...base,
|
|
2035
|
+
type: 'docx',
|
|
2036
|
+
html,
|
|
2037
|
+
tables: Array.from(html.matchAll(/<table[\s\S]*?<\/table>/gi)).map((match) => this.stripHtml(match[0])).slice(0, 100),
|
|
2038
|
+
};
|
|
2039
|
+
}
|
|
2040
|
+
if (mimeType.includes('spreadsheetml') || filename.endsWith('.xlsx')) {
|
|
2041
|
+
const workbook = await this.extractXlsxStructure(file.buffer);
|
|
2042
|
+
return { ...base, type: 'xlsx', sheets: workbook.sheets };
|
|
2043
|
+
}
|
|
2044
|
+
return { ...base, type: filename.split('.').pop() || mimeType || 'binary' };
|
|
2045
|
+
}
|
|
2046
|
+
async extractTextDirect(file) {
|
|
2047
|
+
const buffer = file.buffer;
|
|
2048
|
+
const filename = String(file.originalname || 'documento').toLowerCase();
|
|
2049
|
+
const mimeType = String(file.mimetype || '').toLowerCase();
|
|
2050
|
+
if (mimeType.includes('pdf') || filename.endsWith('.pdf') || buffer.subarray(0, 4).toString('latin1') === '%PDF') {
|
|
2051
|
+
return await this.extractPdfText(buffer);
|
|
2052
|
+
}
|
|
2053
|
+
if (mimeType.includes('wordprocessingml') ||
|
|
2054
|
+
mimeType.includes('msword') ||
|
|
2055
|
+
filename.endsWith('.docx')) {
|
|
2056
|
+
const parsed = await mammoth.extractRawText({ buffer });
|
|
2057
|
+
return String(parsed?.value || '').trim();
|
|
2058
|
+
}
|
|
2059
|
+
if (filename.endsWith('.xls')) {
|
|
2060
|
+
throw new common_1.BadRequestException('Arquivos .xls legados nao sao aceitos por seguranca. Converta para .xlsx ou .csv.');
|
|
2061
|
+
}
|
|
2062
|
+
if (filename.endsWith('.csv') || mimeType.includes('csv')) {
|
|
2063
|
+
const decoded = buffer.toString('utf-8');
|
|
2064
|
+
return this.isBinaryDecodedText(decoded) ? '' : decoded.trim();
|
|
2065
|
+
}
|
|
2066
|
+
if (mimeType.includes('spreadsheetml') ||
|
|
2067
|
+
mimeType.includes('excel') ||
|
|
2068
|
+
filename.endsWith('.xlsx')) {
|
|
2069
|
+
return await this.extractXlsxText(buffer);
|
|
2070
|
+
}
|
|
2071
|
+
if (mimeType.startsWith('text/') || filename.match(/\.(txt|md|json|csv)$/)) {
|
|
2072
|
+
const decoded = buffer.toString('utf-8');
|
|
2073
|
+
return this.isBinaryDecodedText(decoded) ? '' : decoded.trim();
|
|
2074
|
+
}
|
|
2075
|
+
return '';
|
|
2076
|
+
}
|
|
2077
|
+
async extractTextWithOpenAI(file) {
|
|
2078
|
+
const buffer = file.buffer;
|
|
2079
|
+
const filename = file.originalname || 'documento';
|
|
2080
|
+
const mimeType = file.mimetype || 'application/octet-stream';
|
|
2081
|
+
const isImage = mimeType.startsWith('image/') || /\.(jpg|jpeg|png|webp|gif)$/i.test(filename);
|
|
2082
|
+
const instruction = [
|
|
2083
|
+
'Extraia por OCR o texto do arquivo anexado.',
|
|
2084
|
+
'Transcreva fielmente títulos, seções, datas, números e tabelas quando possível.',
|
|
2085
|
+
'Retorne somente o texto extraido, sem resumo.',
|
|
2086
|
+
].join('\n');
|
|
2087
|
+
const responseContent = [
|
|
2088
|
+
{ type: 'input_text', text: instruction },
|
|
2089
|
+
isImage
|
|
2090
|
+
? { type: 'input_image', image_url: this.buildDataUrl(buffer, mimeType), detail: 'high' }
|
|
2091
|
+
: { type: 'input_file', filename, file_data: this.buildDataUrl(buffer, mimeType) },
|
|
2092
|
+
];
|
|
2093
|
+
const openAIClient = this.getOpenAIClientForProvider();
|
|
2094
|
+
const responsesApi = openAIClient.responses;
|
|
2095
|
+
if (responsesApi?.create) {
|
|
2096
|
+
const response = await responsesApi.create({
|
|
2097
|
+
model: this.getOcrModel(),
|
|
2098
|
+
input: [
|
|
2099
|
+
{
|
|
2100
|
+
role: 'system',
|
|
2101
|
+
content: [{ type: 'input_text', text: 'Você e um motor de OCR. Extraia texto fielmente de documentos enviados.' }],
|
|
2102
|
+
},
|
|
2103
|
+
{ role: 'user', content: responseContent },
|
|
2104
|
+
],
|
|
2105
|
+
temperature: 0,
|
|
2106
|
+
});
|
|
2107
|
+
return String(response?.output_text || '').trim();
|
|
2108
|
+
}
|
|
2109
|
+
const chatContent = [
|
|
2110
|
+
{
|
|
2111
|
+
type: 'text',
|
|
2112
|
+
text: instruction,
|
|
2113
|
+
},
|
|
2114
|
+
];
|
|
2115
|
+
if (isImage) {
|
|
2116
|
+
chatContent.push({
|
|
2117
|
+
type: 'image_url',
|
|
2118
|
+
image_url: { url: this.buildDataUrl(buffer, mimeType), detail: 'high' },
|
|
2119
|
+
});
|
|
2120
|
+
}
|
|
2121
|
+
else {
|
|
2122
|
+
chatContent.push({
|
|
2123
|
+
type: 'file',
|
|
2124
|
+
file: {
|
|
2125
|
+
filename,
|
|
2126
|
+
file_data: this.buildDataUrl(buffer, mimeType),
|
|
2127
|
+
},
|
|
2128
|
+
});
|
|
2129
|
+
}
|
|
2130
|
+
const response = await openAIClient.chat.completions.create({
|
|
2131
|
+
model: this.getOcrModel(),
|
|
2132
|
+
messages: [
|
|
2133
|
+
{ role: 'system', content: 'Você e um motor de OCR. Extraia texto fielmente de documentos enviados.' },
|
|
2134
|
+
{ role: 'user', content: chatContent },
|
|
2135
|
+
],
|
|
2136
|
+
temperature: 0,
|
|
2137
|
+
});
|
|
2138
|
+
return String(response?.choices?.[0]?.message?.content || '').trim();
|
|
2139
|
+
}
|
|
2140
|
+
async extractTextFromUploadedFile(file, preferOcr) {
|
|
2141
|
+
const isImage = String(file.mimetype || '').startsWith('image/');
|
|
2142
|
+
const directFirst = !preferOcr && !isImage;
|
|
2143
|
+
const errors = [];
|
|
2144
|
+
if (directFirst) {
|
|
2145
|
+
try {
|
|
2146
|
+
const directText = await this.extractTextDirect(file);
|
|
2147
|
+
if (directText)
|
|
2148
|
+
return { text: directText, strategy: 'direct', errors };
|
|
2149
|
+
}
|
|
2150
|
+
catch (error) {
|
|
2151
|
+
errors.push(error?.message || String(error));
|
|
2152
|
+
}
|
|
2153
|
+
}
|
|
2154
|
+
try {
|
|
2155
|
+
const ocrText = await this.extractTextWithOpenAI(file);
|
|
2156
|
+
if (ocrText)
|
|
2157
|
+
return { text: ocrText, strategy: 'ocr-openai', errors };
|
|
2158
|
+
}
|
|
2159
|
+
catch (error) {
|
|
2160
|
+
errors.push(error?.message || String(error));
|
|
2161
|
+
}
|
|
2162
|
+
if (!directFirst) {
|
|
2163
|
+
try {
|
|
2164
|
+
const directText = await this.extractTextDirect(file);
|
|
2165
|
+
if (directText)
|
|
2166
|
+
return { text: directText, strategy: 'direct', errors };
|
|
2167
|
+
}
|
|
2168
|
+
catch (error) {
|
|
2169
|
+
errors.push(error?.message || String(error));
|
|
2170
|
+
}
|
|
2171
|
+
}
|
|
2172
|
+
return { text: '', strategy: 'none', errors };
|
|
2173
|
+
}
|
|
2174
|
+
async extractFiles(files, body = {}) {
|
|
2175
|
+
await this.refreshProviderSettings();
|
|
2176
|
+
if (!files?.length) {
|
|
2177
|
+
throw new common_1.BadRequestException('arquivos is required');
|
|
2178
|
+
}
|
|
2179
|
+
const preferOcr = this.parseBoolean(body?.ocr);
|
|
2180
|
+
const maxTextChars = Math.max(0, Number(body?.maxTextChars || 0) || 0);
|
|
2181
|
+
const extractedFiles = [];
|
|
2182
|
+
for (const file of files) {
|
|
2183
|
+
const extracted = await this.extractTextFromUploadedFile(file, preferOcr);
|
|
2184
|
+
const filename = file.originalname || 'documento';
|
|
2185
|
+
const rawText = String(extracted.text || '');
|
|
2186
|
+
const text = maxTextChars > 0 ? rawText.slice(0, maxTextChars) : rawText;
|
|
2187
|
+
const structure = await this.extractFileStructure(file, text).catch((error) => ({
|
|
2188
|
+
placeholders: this.extractPlaceholders(text),
|
|
2189
|
+
sections: this.textSections(text),
|
|
2190
|
+
structureError: error?.message || String(error),
|
|
2191
|
+
}));
|
|
2192
|
+
const stored = this.documentsService
|
|
2193
|
+
? await this.documentsService.storeOriginal({
|
|
2194
|
+
buffer: file.buffer,
|
|
2195
|
+
filename,
|
|
2196
|
+
mimeType: file.mimetype || 'application/octet-stream',
|
|
2197
|
+
source: body?.source === 'url' ? 'url' : 'upload',
|
|
2198
|
+
text,
|
|
2199
|
+
structure,
|
|
2200
|
+
scope: {
|
|
2201
|
+
organizationId: body?.organizationId,
|
|
2202
|
+
agentId: body?.agentId,
|
|
2203
|
+
flowId: body?.flowId,
|
|
2204
|
+
conversationId: body?.conversationId,
|
|
2205
|
+
},
|
|
2206
|
+
metadata: {
|
|
2207
|
+
extractionStrategy: extracted.strategy,
|
|
2208
|
+
textLength: rawText.length,
|
|
2209
|
+
truncated: maxTextChars > 0 && rawText.length > maxTextChars,
|
|
2210
|
+
},
|
|
2211
|
+
})
|
|
2212
|
+
: null;
|
|
2213
|
+
extractedFiles.push({
|
|
2214
|
+
id: stored?.documentId || stored?.id || (0, crypto_1.randomUUID)(),
|
|
2215
|
+
documentId: stored?.documentId || stored?.id,
|
|
2216
|
+
filename,
|
|
2217
|
+
title: filename,
|
|
2218
|
+
mimeType: file.mimetype || '',
|
|
2219
|
+
size: file.size || file.buffer?.length || 0,
|
|
2220
|
+
ok: Boolean(rawText),
|
|
2221
|
+
strategy: extracted.strategy,
|
|
2222
|
+
text,
|
|
2223
|
+
textLength: rawText.length,
|
|
2224
|
+
truncated: maxTextChars > 0 && rawText.length > maxTextChars,
|
|
2225
|
+
errors: extracted.errors,
|
|
2226
|
+
structure,
|
|
2227
|
+
storage: stored?.storage,
|
|
2228
|
+
storageKey: stored?.key,
|
|
2229
|
+
downloadPath: stored?.downloadPath,
|
|
2230
|
+
});
|
|
2231
|
+
}
|
|
2232
|
+
return {
|
|
2233
|
+
files: extractedFiles,
|
|
2234
|
+
documents: extractedFiles.filter((file) => file.ok).length,
|
|
2235
|
+
};
|
|
2236
|
+
}
|
|
2237
|
+
async addDocumentsFromFiles(files, body = {}) {
|
|
2238
|
+
await this.refreshProviderSettings();
|
|
2239
|
+
if (!files?.length) {
|
|
2240
|
+
throw new common_1.BadRequestException('arquivos is required');
|
|
2241
|
+
}
|
|
2242
|
+
const collectionName = body?.collectionName || this.getDefaultCollectionName();
|
|
2243
|
+
const options = this.parseJsonField(body?.options, {});
|
|
2244
|
+
const baseExtraFields = this.parseJsonField(body?.extraFields, {});
|
|
2245
|
+
const preferOcr = this.parseBoolean(body?.ocr);
|
|
2246
|
+
const documents = [];
|
|
2247
|
+
const extractedFiles = [];
|
|
2248
|
+
for (const file of files) {
|
|
2249
|
+
const extracted = await this.extractTextFromUploadedFile(file, preferOcr);
|
|
2250
|
+
const filename = file.originalname || 'documento';
|
|
2251
|
+
if (!extracted.text) {
|
|
2252
|
+
extractedFiles.push({
|
|
2253
|
+
filename,
|
|
2254
|
+
ok: false,
|
|
2255
|
+
strategy: extracted.strategy,
|
|
2256
|
+
errors: extracted.errors,
|
|
2257
|
+
});
|
|
2258
|
+
continue;
|
|
2259
|
+
}
|
|
2260
|
+
documents.push({
|
|
2261
|
+
text: extracted.text,
|
|
2262
|
+
agentId: body?.agentId || '',
|
|
2263
|
+
embeddingName: body?.embeddingName || filename,
|
|
2264
|
+
embeddingId: body?.embeddingId || (0, crypto_1.randomUUID)(),
|
|
2265
|
+
extraFields: {
|
|
2266
|
+
...baseExtraFields,
|
|
2267
|
+
source: baseExtraFields.source || 'canvas-flow-rag-upload',
|
|
2268
|
+
filename,
|
|
2269
|
+
mimeType: file.mimetype || '',
|
|
2270
|
+
size: file.size || file.buffer?.length || 0,
|
|
2271
|
+
extractionStrategy: extracted.strategy,
|
|
2272
|
+
},
|
|
2273
|
+
});
|
|
2274
|
+
extractedFiles.push({
|
|
2275
|
+
filename,
|
|
2276
|
+
ok: true,
|
|
2277
|
+
strategy: extracted.strategy,
|
|
2278
|
+
textLength: extracted.text.length,
|
|
2279
|
+
errors: extracted.errors,
|
|
2280
|
+
});
|
|
2281
|
+
}
|
|
2282
|
+
if (!documents.length) {
|
|
2283
|
+
throw new common_1.BadRequestException({
|
|
2284
|
+
message: 'Não foi possível extrair texto dos arquivos enviados',
|
|
2285
|
+
files: extractedFiles,
|
|
2286
|
+
});
|
|
2287
|
+
}
|
|
2288
|
+
const added = await this.addDocuments(collectionName, documents, options);
|
|
2289
|
+
return {
|
|
2290
|
+
...added,
|
|
2291
|
+
collectionName,
|
|
2292
|
+
files: extractedFiles,
|
|
2293
|
+
documents: documents.length,
|
|
2294
|
+
};
|
|
2295
|
+
}
|
|
2296
|
+
async listDocuments(collectionName, agentId, query, options = {}) {
|
|
2297
|
+
await this.refreshProviderSettings();
|
|
2298
|
+
if (!this.milvusClient) {
|
|
2299
|
+
throw new common_1.BadRequestException('MILVUS_ADDRESS is not configured');
|
|
2300
|
+
}
|
|
2301
|
+
const targetCollection = collectionName || this.getDefaultCollectionName();
|
|
2302
|
+
const limit = Math.min(Math.max(Number(options?.limit || 200), 1), 1000);
|
|
2303
|
+
const offset = Math.max(Number(options?.offset || 0), 0);
|
|
2304
|
+
const scanLimit = Math.min(Math.max(limit + offset, 300), 2000);
|
|
2305
|
+
const rows = await this.queryRows(targetCollection, this.buildAgentExpr(agentId), scanLimit, 0);
|
|
2306
|
+
const q = String(query || '').trim().toLowerCase();
|
|
2307
|
+
const documents = this.groupDocumentRows(rows)
|
|
2308
|
+
.filter((document) => {
|
|
2309
|
+
if (!q)
|
|
2310
|
+
return true;
|
|
2311
|
+
const haystack = [
|
|
2312
|
+
document.embeddingName,
|
|
2313
|
+
document.embeddingId,
|
|
2314
|
+
document.agentId,
|
|
2315
|
+
document.text,
|
|
2316
|
+
JSON.stringify(document.extraFields || {}),
|
|
2317
|
+
].join('\n').toLowerCase();
|
|
2318
|
+
return haystack.includes(q);
|
|
2319
|
+
})
|
|
2320
|
+
.sort((left, right) => String(left.embeddingName).localeCompare(String(right.embeddingName)));
|
|
2321
|
+
return {
|
|
2322
|
+
collectionName: targetCollection,
|
|
2323
|
+
total: documents.length,
|
|
2324
|
+
documents: documents.slice(offset, offset + limit),
|
|
2325
|
+
};
|
|
2326
|
+
}
|
|
2327
|
+
async getDocument(collectionName, idOrEmbeddingId, agentId) {
|
|
2328
|
+
await this.refreshProviderSettings();
|
|
2329
|
+
const targetCollection = collectionName || this.getDefaultCollectionName();
|
|
2330
|
+
const rows = await this.queryRows(targetCollection, this.buildDocumentExpr(idOrEmbeddingId, agentId), 1000, 0);
|
|
2331
|
+
const document = this.groupDocumentRows(rows)[0];
|
|
2332
|
+
if (!document) {
|
|
2333
|
+
throw new common_1.BadRequestException('Documento RAG não encontrado');
|
|
2334
|
+
}
|
|
2335
|
+
return {
|
|
2336
|
+
collectionName: targetCollection,
|
|
2337
|
+
document,
|
|
2338
|
+
};
|
|
2339
|
+
}
|
|
2340
|
+
async deleteDocument(collectionName, idOrEmbeddingId, agentId) {
|
|
2341
|
+
await this.refreshProviderSettings();
|
|
2342
|
+
if (!this.milvusClient) {
|
|
2343
|
+
throw new common_1.BadRequestException('MILVUS_ADDRESS is not configured');
|
|
2344
|
+
}
|
|
2345
|
+
const targetCollection = collectionName || this.getDefaultCollectionName();
|
|
2346
|
+
const filter = this.buildDocumentExpr(idOrEmbeddingId, agentId);
|
|
2347
|
+
const response = await this.milvusClient.delete({
|
|
2348
|
+
collection_name: targetCollection,
|
|
2349
|
+
filter,
|
|
2350
|
+
});
|
|
2351
|
+
await this.flushCollection(targetCollection);
|
|
2352
|
+
return {
|
|
2353
|
+
collectionName: targetCollection,
|
|
2354
|
+
deleted: true,
|
|
2355
|
+
filter,
|
|
2356
|
+
response,
|
|
2357
|
+
};
|
|
2358
|
+
}
|
|
2359
|
+
async updateDocument(collectionName, idOrEmbeddingId, payload = {}) {
|
|
2360
|
+
await this.refreshProviderSettings();
|
|
2361
|
+
const targetCollection = collectionName || this.getDefaultCollectionName();
|
|
2362
|
+
const existing = await this.getDocument(targetCollection, idOrEmbeddingId, payload?.agentId);
|
|
2363
|
+
const document = existing.document;
|
|
2364
|
+
const incomingExtraFields = payload?.extraFields && typeof payload.extraFields === 'object'
|
|
2365
|
+
? payload.extraFields
|
|
2366
|
+
: {};
|
|
2367
|
+
const extraFields = payload?.mergeExtraFields === false
|
|
2368
|
+
? incomingExtraFields
|
|
2369
|
+
: { ...(document.extraFields || {}), ...incomingExtraFields };
|
|
2370
|
+
const text = String(payload?.text ?? document.text ?? '').trim();
|
|
2371
|
+
if (!text) {
|
|
2372
|
+
throw new common_1.BadRequestException('text is required');
|
|
2373
|
+
}
|
|
2374
|
+
await this.deleteDocument(targetCollection, document.embeddingId || document.id, payload?.agentId);
|
|
2375
|
+
const added = await this.addDocuments(targetCollection, [
|
|
2376
|
+
{
|
|
2377
|
+
text,
|
|
2378
|
+
embeddingName: payload?.embeddingName || document.embeddingName,
|
|
2379
|
+
embeddingId: document.embeddingId || idOrEmbeddingId,
|
|
2380
|
+
agentId: payload?.agentId ?? document.agentId,
|
|
2381
|
+
extraFields: this.stripChunkFields(extraFields),
|
|
2382
|
+
},
|
|
2383
|
+
], payload?.options || {});
|
|
2384
|
+
return {
|
|
2385
|
+
collectionName: targetCollection,
|
|
2386
|
+
embeddingId: document.embeddingId || idOrEmbeddingId,
|
|
2387
|
+
updated: true,
|
|
2388
|
+
...added,
|
|
2389
|
+
};
|
|
2390
|
+
}
|
|
2391
|
+
async listCollections() {
|
|
2392
|
+
await this.refreshProviderSettings();
|
|
2393
|
+
if (!this.milvusClient)
|
|
2394
|
+
return [];
|
|
2395
|
+
const response = await this.milvusClient.showCollections();
|
|
2396
|
+
return response?.data || [];
|
|
2397
|
+
}
|
|
2398
|
+
combineProviderSearchResults(primary, secondary, topK, params) {
|
|
2399
|
+
if (!secondary?.results?.length)
|
|
2400
|
+
return primary;
|
|
2401
|
+
if (!primary?.results?.length)
|
|
2402
|
+
return {
|
|
2403
|
+
...secondary,
|
|
2404
|
+
results: this.applyMetadataOrdering(secondary.results || [], params).slice(0, topK),
|
|
2405
|
+
searchDebug: {
|
|
2406
|
+
...(secondary.searchDebug || {}),
|
|
2407
|
+
mode: 'provider_hybrid',
|
|
2408
|
+
primaryResults: 0,
|
|
2409
|
+
secondaryResults: secondary.results.length,
|
|
2410
|
+
},
|
|
2411
|
+
};
|
|
2412
|
+
const byKey = new Map();
|
|
2413
|
+
[...(primary.results || []), ...(secondary.results || [])].forEach((item) => {
|
|
2414
|
+
const key = String(item?.source || 'milvus') + ':' + String(item?.id || item?.embeddingId || item?.text || (0, crypto_1.randomUUID)());
|
|
2415
|
+
const existing = byKey.get(key);
|
|
2416
|
+
if (!existing || this.getSearchScore(item) > this.getSearchScore(existing)) {
|
|
2417
|
+
byKey.set(key, item);
|
|
2418
|
+
}
|
|
2419
|
+
});
|
|
2420
|
+
const merged = Array.from(byKey.values()).sort((left, right) => this.getSearchScore(right) - this.getSearchScore(left));
|
|
2421
|
+
return {
|
|
2422
|
+
...primary,
|
|
2423
|
+
results: this.applyMetadataOrdering(merged, params).slice(0, topK),
|
|
2424
|
+
providerResults: {
|
|
2425
|
+
milvus: primary.results?.length || 0,
|
|
2426
|
+
azureSearch: secondary.results?.length || 0,
|
|
2427
|
+
},
|
|
2428
|
+
searchDebug: {
|
|
2429
|
+
...(primary.searchDebug || {}),
|
|
2430
|
+
mode: 'provider_hybrid',
|
|
2431
|
+
primaryMode: primary.searchDebug?.mode || 'milvus',
|
|
2432
|
+
secondaryMode: secondary.searchDebug?.mode || 'azure_search',
|
|
2433
|
+
primaryResults: primary.results?.length || 0,
|
|
2434
|
+
secondaryResults: secondary.results?.length || 0,
|
|
2435
|
+
uniqueCandidates: byKey.size,
|
|
2436
|
+
},
|
|
2437
|
+
};
|
|
2438
|
+
}
|
|
2439
|
+
async searchHybrid(query, collectionName, agentId, params = {}) {
|
|
2440
|
+
await this.refreshProviderSettings();
|
|
2441
|
+
if (!query || typeof query !== 'string') {
|
|
2442
|
+
throw new common_1.BadRequestException('query is required');
|
|
2443
|
+
}
|
|
2444
|
+
const targetCollection = collectionName || this.getDefaultCollectionName();
|
|
2445
|
+
const searchProvider = String(params?.searchProvider || params?.vectorProvider || params?.provider || params?.ragProvider || '').toLowerCase();
|
|
2446
|
+
const providerHybrid = ['hybrid', 'milvus_azure_search', 'milvus+azure_search', 'milvus+azure'].includes(searchProvider);
|
|
2447
|
+
const effectiveTopK = this.clampSearchInt(params?.k ?? params?.topK, 15, 1, 100);
|
|
2448
|
+
if (this.shouldUseAzureSearch(params, targetCollection) && !providerHybrid) {
|
|
2449
|
+
return await this.searchAzureSearch(query, targetCollection, agentId, params);
|
|
2450
|
+
}
|
|
2451
|
+
if (!this.milvusClient) {
|
|
2452
|
+
if (this.isAzureSearchConfigured(targetCollection)) {
|
|
2453
|
+
return await this.searchAzureSearch(query, targetCollection, agentId, params);
|
|
2454
|
+
}
|
|
2455
|
+
return { results: [], warning: 'MILVUS_ADDRESS is not configured' };
|
|
2456
|
+
}
|
|
2457
|
+
const azureSearchPromise = providerHybrid && this.isAzureSearchConfigured(targetCollection)
|
|
2458
|
+
? this.searchAzureSearch(query, targetCollection, agentId, params).catch((error) => ({
|
|
2459
|
+
results: [],
|
|
2460
|
+
warning: error?.message || String(error),
|
|
2461
|
+
searchDebug: { mode: 'azure_search_error' },
|
|
2462
|
+
}))
|
|
2463
|
+
: null;
|
|
2464
|
+
const useHybrid = params?.useHybrid !== false;
|
|
2465
|
+
const candidateMultiplier = this.clampSearchInt(params?.candidateMultiplier, useHybrid ? 4 : 2, 1, 10);
|
|
2466
|
+
const candidateTopK = this.clampSearchInt(params?.candidateTopK, Math.max(effectiveTopK, effectiveTopK * candidateMultiplier), effectiveTopK, 200);
|
|
2467
|
+
const denseEfSearch = this.clampSearchInt(params?.denseEfSearch ?? params?.efSearch, Math.max(128, candidateTopK * 8), Math.max(16, candidateTopK), 4096);
|
|
2468
|
+
const sparseDropRatioSearch = this.clampSearchFloat(params?.sparseDropRatioSearch, 0.15, 0, 0.95);
|
|
2469
|
+
const denseWeight = this.clampSearchFloat(params?.denseWeight, 0.7, 0, 1);
|
|
2470
|
+
const sparseWeight = this.clampSearchFloat(params?.sparseWeight, 0.3, 0, 1);
|
|
2471
|
+
const relevanceBoost = this.clampSearchFloat(params?.relevanceBoost, 1.5, 0.1, 10);
|
|
2472
|
+
const fusionStrategyRaw = String(params?.fusionStrategy || 'rrf').toLowerCase().trim();
|
|
2473
|
+
const fusionStrategy = fusionStrategyRaw === 'weighted_score' || fusionStrategyRaw === 'score'
|
|
2474
|
+
? 'weighted_score'
|
|
2475
|
+
: 'rrf';
|
|
2476
|
+
const rrfK = this.clampSearchInt(params?.rrfK, 60, 1, 1000);
|
|
2477
|
+
const maxChunksPerDocument = this.clampSearchInt(params?.maxChunksPerDocument, 0, 0, 20);
|
|
2478
|
+
const baseExtraFieldsFilter = params?.extraFieldsFilter || params?.metadataFilter || params?.meta || null;
|
|
2479
|
+
const perRoundFilters = Array.isArray(params?.extraFieldsFilterPerRound) ? params.extraFieldsFilterPerRound : null;
|
|
2480
|
+
const perRoundLimitValues = Array.isArray(params?.extraFieldsFilterPerRoundLimits)
|
|
2481
|
+
? params.extraFieldsFilterPerRoundLimits.map((value) => {
|
|
2482
|
+
if (value === undefined || value === null || value === '')
|
|
2483
|
+
return null;
|
|
2484
|
+
const parsed = Number(value);
|
|
2485
|
+
return Number.isFinite(parsed) ? Math.max(0, Math.floor(parsed)) : null;
|
|
2486
|
+
})
|
|
2487
|
+
: null;
|
|
2488
|
+
const hasPerRoundLimits = Boolean(perRoundLimitValues?.some((value) => value !== null));
|
|
2489
|
+
const roundStopFind = params?.roundStopFind !== false;
|
|
2490
|
+
const roundMixHalf = params?.roundMixHalf === true;
|
|
2491
|
+
const denseVector = await this.embeddingCreate(query, params?.embeddingProvider, params?.embeddingModel);
|
|
2492
|
+
this.ensureVectorDimensions(denseVector, this.getEmbeddingDimensions(), 'Milvus/dense');
|
|
2493
|
+
const buildExpr = (extraFieldsFilter) => {
|
|
2494
|
+
const exprParts = [];
|
|
2495
|
+
if (agentId)
|
|
2496
|
+
exprParts.push(`agentId == "${this.escapeMilvusString(agentId)}"`);
|
|
2497
|
+
const extraFieldsExpr = this.buildExtraFieldsExpr(extraFieldsFilter);
|
|
2498
|
+
if (extraFieldsExpr)
|
|
2499
|
+
exprParts.push(`(${extraFieldsExpr})`);
|
|
2500
|
+
if (params?.filterExpr)
|
|
2501
|
+
exprParts.push(`(${params.filterExpr})`);
|
|
2502
|
+
return exprParts.length ? exprParts.join(' && ') : undefined;
|
|
2503
|
+
};
|
|
2504
|
+
const runRound = async (extraFieldsFilter) => {
|
|
2505
|
+
const expr = buildExpr(extraFieldsFilter);
|
|
2506
|
+
const densePromise = this.milvusClient.search({
|
|
2507
|
+
collection_name: targetCollection,
|
|
2508
|
+
vector_type: milvus2_sdk_node_1.DataType.FloatVector,
|
|
2509
|
+
vectors: [denseVector],
|
|
2510
|
+
search_params: {
|
|
2511
|
+
anns_field: 'dense',
|
|
2512
|
+
topk: candidateTopK,
|
|
2513
|
+
metric_type: 'COSINE',
|
|
2514
|
+
params: JSON.stringify({ ef: denseEfSearch }),
|
|
2515
|
+
},
|
|
2516
|
+
expr,
|
|
2517
|
+
output_fields: ['id', 'extraFields', 'embeddingName', 'embeddingId', 'agentId', 'text'],
|
|
2518
|
+
timeout: 900000,
|
|
2519
|
+
});
|
|
2520
|
+
if (!useHybrid) {
|
|
2521
|
+
const denseRaw = await densePromise;
|
|
2522
|
+
const denseResults = this.normalizeMilvusResults(denseRaw, 'dense');
|
|
2523
|
+
const boosted = this.applyRelevanceBoost({ results: denseResults }, relevanceBoost);
|
|
2524
|
+
return {
|
|
2525
|
+
...boosted,
|
|
2526
|
+
results: (boosted.results || []).slice(0, effectiveTopK),
|
|
2527
|
+
denseCount: denseResults.length,
|
|
2528
|
+
sparseCount: 0,
|
|
2529
|
+
expr,
|
|
2530
|
+
searchDebug: {
|
|
2531
|
+
mode: 'dense',
|
|
2532
|
+
topK: effectiveTopK,
|
|
2533
|
+
candidateTopK,
|
|
2534
|
+
denseEfSearch,
|
|
2535
|
+
relevanceBoost,
|
|
2536
|
+
denseResults: denseResults.length,
|
|
2537
|
+
},
|
|
2538
|
+
};
|
|
2539
|
+
}
|
|
2540
|
+
const sparsePromise = this.milvusClient.search({
|
|
2541
|
+
collection_name: targetCollection,
|
|
2542
|
+
data: { text: query },
|
|
2543
|
+
search_params: {
|
|
2544
|
+
anns_field: 'sparse',
|
|
2545
|
+
topk: candidateTopK,
|
|
2546
|
+
metric_type: 'BM25',
|
|
2547
|
+
params: JSON.stringify({ drop_ratio_search: sparseDropRatioSearch }),
|
|
2548
|
+
},
|
|
2549
|
+
expr,
|
|
2550
|
+
output_fields: ['id', 'extraFields', 'embeddingName', 'embeddingId', 'agentId', 'text'],
|
|
2551
|
+
timeout: 900000,
|
|
2552
|
+
}).catch((error) => {
|
|
2553
|
+
this.logger.warn(`Sparse search skipped: ${error?.message || String(error)}`);
|
|
2554
|
+
return { results: [] };
|
|
2555
|
+
});
|
|
2556
|
+
const [denseRaw, sparseRaw] = await Promise.all([densePromise, sparsePromise]);
|
|
2557
|
+
const denseResults = this.normalizeMilvusResults(denseRaw, 'dense');
|
|
2558
|
+
const sparseResults = this.normalizeMilvusResults(sparseRaw, 'sparse');
|
|
2559
|
+
if (!sparseResults.length) {
|
|
2560
|
+
const boosted = this.applyRelevanceBoost({ results: denseResults }, relevanceBoost);
|
|
2561
|
+
return {
|
|
2562
|
+
...boosted,
|
|
2563
|
+
results: (boosted.results || []).slice(0, effectiveTopK),
|
|
2564
|
+
denseCount: denseResults.length,
|
|
2565
|
+
sparseCount: 0,
|
|
2566
|
+
expr,
|
|
2567
|
+
searchDebug: {
|
|
2568
|
+
mode: 'dense_fallback',
|
|
2569
|
+
topK: effectiveTopK,
|
|
2570
|
+
candidateTopK,
|
|
2571
|
+
denseEfSearch,
|
|
2572
|
+
sparseDropRatioSearch,
|
|
2573
|
+
relevanceBoost,
|
|
2574
|
+
denseResults: denseResults.length,
|
|
2575
|
+
sparseResults: 0,
|
|
2576
|
+
},
|
|
2577
|
+
};
|
|
2578
|
+
}
|
|
2579
|
+
const combined = this.combineSearchResultsV2({ results: denseResults }, { results: sparseResults }, denseWeight, sparseWeight, effectiveTopK, relevanceBoost, {
|
|
2580
|
+
candidateTopK,
|
|
2581
|
+
fusionStrategy,
|
|
2582
|
+
rrfK,
|
|
2583
|
+
maxChunksPerDocument,
|
|
2584
|
+
denseEfSearch,
|
|
2585
|
+
sparseDropRatioSearch,
|
|
2586
|
+
});
|
|
2587
|
+
return {
|
|
2588
|
+
...combined,
|
|
2589
|
+
denseCount: denseResults.length,
|
|
2590
|
+
sparseCount: sparseResults.length,
|
|
2591
|
+
expr,
|
|
2592
|
+
};
|
|
2593
|
+
};
|
|
2594
|
+
const filtersToTry = perRoundFilters?.length
|
|
2595
|
+
? perRoundFilters.map((round) => (this.isEmptyRoundFilter(round) ? null : this.mergeExtraFieldsFilters(baseExtraFieldsFilter, round)))
|
|
2596
|
+
: [baseExtraFieldsFilter];
|
|
2597
|
+
let lastResult = null;
|
|
2598
|
+
const collected = [];
|
|
2599
|
+
const baseRoundLimit = filtersToTry.length ? Math.floor(effectiveTopK / filtersToTry.length) : effectiveTopK;
|
|
2600
|
+
const remainder = filtersToTry.length ? effectiveTopK - baseRoundLimit * filtersToTry.length : 0;
|
|
2601
|
+
for (let roundIndex = 0; roundIndex < filtersToTry.length; roundIndex += 1) {
|
|
2602
|
+
const result = await runRound(filtersToTry[roundIndex]);
|
|
2603
|
+
lastResult = result;
|
|
2604
|
+
if (result.results.length && roundStopFind) {
|
|
2605
|
+
const ordered = this.applyMetadataOrdering(result.results, params).slice(0, effectiveTopK);
|
|
2606
|
+
this.logger.log(`RAG topK result (round ${roundIndex + 1}/${filtersToTry.length}): count=${result.results.length}, topK=${effectiveTopK}, ids=${JSON.stringify(ordered.map((item) => item?.id).filter(Boolean))}`);
|
|
2607
|
+
const milvusResult = {
|
|
2608
|
+
...result,
|
|
2609
|
+
results: ordered,
|
|
2610
|
+
roundIndex,
|
|
2611
|
+
rounds: filtersToTry.length,
|
|
2612
|
+
};
|
|
2613
|
+
return azureSearchPromise
|
|
2614
|
+
? this.combineProviderSearchResults(milvusResult, await azureSearchPromise, effectiveTopK, params)
|
|
2615
|
+
: milvusResult;
|
|
2616
|
+
}
|
|
2617
|
+
if (result.results.length) {
|
|
2618
|
+
const configuredLimit = hasPerRoundLimits ? perRoundLimitValues?.[roundIndex] : null;
|
|
2619
|
+
const roundLimit = typeof configuredLimit === 'number'
|
|
2620
|
+
? configuredLimit
|
|
2621
|
+
: roundMixHalf
|
|
2622
|
+
? Math.max(1, baseRoundLimit + (roundIndex < remainder ? 1 : 0))
|
|
2623
|
+
: result.results.length;
|
|
2624
|
+
result.results.slice(0, roundLimit).forEach((item) => collected.push({ item, roundIndex }));
|
|
2625
|
+
this.logger.log(`RAG topK result (round ${roundIndex + 1}/${filtersToTry.length}): count=${result.results.length}, topK=${effectiveTopK}`);
|
|
2626
|
+
}
|
|
2627
|
+
}
|
|
2628
|
+
if (collected.length) {
|
|
2629
|
+
const byId = new Map();
|
|
2630
|
+
collected.forEach((entry) => {
|
|
2631
|
+
const id = String(entry.item?.id || '');
|
|
2632
|
+
const previous = byId.get(id);
|
|
2633
|
+
if (!previous || entry.roundIndex < previous.roundIndex || this.getSearchScore(entry.item) > this.getSearchScore(previous.item)) {
|
|
2634
|
+
byId.set(id, entry);
|
|
2635
|
+
}
|
|
2636
|
+
});
|
|
2637
|
+
const mergedRounds = Array.from(byId.values())
|
|
2638
|
+
.sort((left, right) => {
|
|
2639
|
+
if (left.roundIndex !== right.roundIndex)
|
|
2640
|
+
return left.roundIndex - right.roundIndex;
|
|
2641
|
+
return this.getSearchScore(right.item) - this.getSearchScore(left.item);
|
|
2642
|
+
})
|
|
2643
|
+
.map((entry) => entry.item);
|
|
2644
|
+
const milvusResult = {
|
|
2645
|
+
...(lastResult || {}),
|
|
2646
|
+
results: this.applyMetadataOrdering(mergedRounds, params).slice(0, effectiveTopK),
|
|
2647
|
+
rounds: filtersToTry.length,
|
|
2648
|
+
};
|
|
2649
|
+
return azureSearchPromise
|
|
2650
|
+
? this.combineProviderSearchResults(milvusResult, await azureSearchPromise, effectiveTopK, params)
|
|
2651
|
+
: milvusResult;
|
|
2652
|
+
}
|
|
2653
|
+
const milvusResult = {
|
|
2654
|
+
...(lastResult || { denseCount: 0, sparseCount: 0 }),
|
|
2655
|
+
results: this.applyMetadataOrdering(lastResult?.results || [], params).slice(0, effectiveTopK),
|
|
2656
|
+
rounds: filtersToTry.length,
|
|
2657
|
+
};
|
|
2658
|
+
return azureSearchPromise
|
|
2659
|
+
? this.combineProviderSearchResults(milvusResult, await azureSearchPromise, effectiveTopK, params)
|
|
2660
|
+
: milvusResult;
|
|
2661
|
+
}
|
|
2662
|
+
buildHttpBatchTool() {
|
|
2663
|
+
return {
|
|
2664
|
+
type: 'function',
|
|
2665
|
+
function: {
|
|
2666
|
+
name: 'httpBatch',
|
|
2667
|
+
description: 'Execute one or more HTTP requests and return their status/body.',
|
|
2668
|
+
parameters: {
|
|
2669
|
+
type: 'object',
|
|
2670
|
+
properties: {
|
|
2671
|
+
requests: {
|
|
2672
|
+
type: 'array',
|
|
2673
|
+
items: {
|
|
2674
|
+
type: 'object',
|
|
2675
|
+
properties: {
|
|
2676
|
+
url: { type: 'string' },
|
|
2677
|
+
method: { type: 'string', enum: ['GET', 'POST', 'PUT', 'PATCH', 'DELETE'] },
|
|
2678
|
+
headers: { type: 'object', additionalProperties: true },
|
|
2679
|
+
body: { type: 'object', additionalProperties: true },
|
|
2680
|
+
},
|
|
2681
|
+
required: ['url'],
|
|
2682
|
+
},
|
|
2683
|
+
},
|
|
2684
|
+
},
|
|
2685
|
+
required: ['requests'],
|
|
2686
|
+
},
|
|
2687
|
+
},
|
|
2688
|
+
};
|
|
2689
|
+
}
|
|
2690
|
+
toOpenAIHistory(turns) {
|
|
2691
|
+
return (turns || [])
|
|
2692
|
+
.filter((turn) => ['system', 'user', 'assistant', 'tool'].includes(turn.role))
|
|
2693
|
+
.filter((turn) => !turn?.metadata?.kind || turn.metadata.kind === 'message')
|
|
2694
|
+
.map((turn) => ({
|
|
2695
|
+
role: turn.role,
|
|
2696
|
+
content: turn.content || '',
|
|
2697
|
+
}));
|
|
2698
|
+
}
|
|
2699
|
+
extractTextFromCompletion(completion) {
|
|
2700
|
+
return completion?.choices?.[0]?.message?.content || '';
|
|
2701
|
+
}
|
|
2702
|
+
async chatLlmRag(text, agentId, params = {}) {
|
|
2703
|
+
await this.refreshProviderSettings();
|
|
2704
|
+
if (!text || typeof text !== 'string') {
|
|
2705
|
+
throw new common_1.BadRequestException('text is required');
|
|
2706
|
+
}
|
|
2707
|
+
const conversationId = params?.conversationId || (0, crypto_1.randomUUID)();
|
|
2708
|
+
const turnHistoricMessages = Number(params?.turnHistoricMessages ?? 20);
|
|
2709
|
+
const collectionName = params?.collectionName || this.getDefaultCollectionName();
|
|
2710
|
+
const k = Number(params?.k ?? 8);
|
|
2711
|
+
const searchAgentId = Object.prototype.hasOwnProperty.call(params || {}, 'ragAgentId')
|
|
2712
|
+
? params.ragAgentId
|
|
2713
|
+
: agentId;
|
|
2714
|
+
const model = this.getChatModelForProvider(params?.llmProvider, params?.model);
|
|
2715
|
+
const prompt = params?.prompt ||
|
|
2716
|
+
'Você é uma IA RAG. Responda em pt-BR, use o contexto quando relevante e seja objetivo.';
|
|
2717
|
+
const history = await this.memoryService.findRecent(agentId, conversationId, turnHistoricMessages);
|
|
2718
|
+
const providedDocs = Array.isArray(params?.docs)
|
|
2719
|
+
? params.docs
|
|
2720
|
+
: Array.isArray(params?.documents)
|
|
2721
|
+
? params.documents
|
|
2722
|
+
: null;
|
|
2723
|
+
const ragResults = providedDocs
|
|
2724
|
+
? { results: providedDocs, searchDebug: { mode: 'provided_docs', count: providedDocs.length } }
|
|
2725
|
+
: k > 0
|
|
2726
|
+
? await this.searchHybrid(text, collectionName, searchAgentId, { ...params, k }).catch((error) => ({
|
|
2727
|
+
results: [],
|
|
2728
|
+
warning: error?.message || String(error),
|
|
2729
|
+
}))
|
|
2730
|
+
: { results: [] };
|
|
2731
|
+
const docsContextText = (ragResults.results || [])
|
|
2732
|
+
.map((item, index) => {
|
|
2733
|
+
const extra = {
|
|
2734
|
+
...this.parseExtraFields(item.extraFields),
|
|
2735
|
+
...this.parseExtraFields(item.metadata),
|
|
2736
|
+
};
|
|
2737
|
+
const title = item.embeddingName || item.title || item.filename || extra?.title || extra?.filename || `doc_${index + 1}`;
|
|
2738
|
+
return [
|
|
2739
|
+
`Documento ${index + 1}`,
|
|
2740
|
+
`Embedding name: ${title};`,
|
|
2741
|
+
`Text of doc: ${item.text || ''};`,
|
|
2742
|
+
`Extra fields: ${JSON.stringify(extra)};`,
|
|
2743
|
+
].join('\n');
|
|
2744
|
+
})
|
|
2745
|
+
.join('\n-------------------------------------------------\n\n');
|
|
2746
|
+
const dynamicContextText = String(params?.contextText || params?.dynamicContextText || '').trim();
|
|
2747
|
+
const contextText = [
|
|
2748
|
+
dynamicContextText
|
|
2749
|
+
? [
|
|
2750
|
+
'Contexto dinamico',
|
|
2751
|
+
dynamicContextText,
|
|
2752
|
+
].join('\n')
|
|
2753
|
+
: '',
|
|
2754
|
+
docsContextText,
|
|
2755
|
+
].filter(Boolean).join('\n-------------------------------------------------\n\n');
|
|
2756
|
+
const ragSystemInstruction = [
|
|
2757
|
+
prompt,
|
|
2758
|
+
'Use TODOS os materiais de referência entre <contexto_rag> e </contexto_rag> quando forem relevantes para a pergunta.',
|
|
2759
|
+
'Analise cada documento individualmente, não omita documentos pertinentes e destaque os mais recentes quando houver metadados de ano/número.',
|
|
2760
|
+
'Se o contexto recuperado não contiver suporte suficiente, diga isso com clareza.',
|
|
2761
|
+
].join('\n');
|
|
2762
|
+
const openAIHistory = this.toOpenAIHistory(history);
|
|
2763
|
+
const lastHistoryTurn = openAIHistory[openAIHistory.length - 1];
|
|
2764
|
+
const shouldAppendCurrentUserTurn = !(lastHistoryTurn?.role === 'user' &&
|
|
2765
|
+
String(lastHistoryTurn.content || '').trim() === String(text || '').trim());
|
|
2766
|
+
const messages = [
|
|
2767
|
+
{
|
|
2768
|
+
role: 'system',
|
|
2769
|
+
content: `${ragSystemInstruction}\n\n<contexto_rag>\n${contextText || 'Sem documentos recuperados.'}\n</contexto_rag>`,
|
|
2770
|
+
},
|
|
2771
|
+
...openAIHistory,
|
|
2772
|
+
...(shouldAppendCurrentUserTurn ? [{ role: 'user', content: text }] : []),
|
|
2773
|
+
];
|
|
2774
|
+
const allowHttpBatchTool = params?.allowHttpBatchTool === true || params?.enableHttpBatchTool === true;
|
|
2775
|
+
const tools = [
|
|
2776
|
+
...(allowHttpBatchTool ? [this.buildHttpBatchTool()] : []),
|
|
2777
|
+
...(Array.isArray(params?.tools) ? params.tools : []),
|
|
2778
|
+
];
|
|
2779
|
+
const trace = [];
|
|
2780
|
+
await this.memoryService.addTurn({
|
|
2781
|
+
agentId,
|
|
2782
|
+
conversationId,
|
|
2783
|
+
role: 'user',
|
|
2784
|
+
content: text,
|
|
2785
|
+
metadata: { source: 'canvas-flow' },
|
|
2786
|
+
});
|
|
2787
|
+
for (let step = 0; step < 4; step += 1) {
|
|
2788
|
+
const completion = await this.getOpenAIClientForProvider(params?.llmProvider).chat.completions.create({
|
|
2789
|
+
model,
|
|
2790
|
+
messages,
|
|
2791
|
+
tools: tools,
|
|
2792
|
+
tool_choice: params?.tool_choice || 'auto',
|
|
2793
|
+
temperature: Number(params?.temperature ?? 0.2),
|
|
2794
|
+
});
|
|
2795
|
+
const message = completion.choices?.[0]?.message;
|
|
2796
|
+
messages.push(message);
|
|
2797
|
+
if (!message?.tool_calls?.length) {
|
|
2798
|
+
const answer = this.extractTextFromCompletion(completion);
|
|
2799
|
+
await this.memoryService.addTurn({
|
|
2800
|
+
agentId,
|
|
2801
|
+
conversationId,
|
|
2802
|
+
role: 'assistant',
|
|
2803
|
+
content: answer,
|
|
2804
|
+
metadata: {
|
|
2805
|
+
source: 'canvas-flow',
|
|
2806
|
+
docs: (ragResults.results || []).slice(0, 5),
|
|
2807
|
+
trace,
|
|
2808
|
+
},
|
|
2809
|
+
});
|
|
2810
|
+
return {
|
|
2811
|
+
text: answer,
|
|
2812
|
+
conversationId,
|
|
2813
|
+
docs: ragResults.results || [],
|
|
2814
|
+
searchDebug: ragResults.searchDebug,
|
|
2815
|
+
trace,
|
|
2816
|
+
model,
|
|
2817
|
+
};
|
|
2818
|
+
}
|
|
2819
|
+
for (const call of message.tool_calls) {
|
|
2820
|
+
const args = JSON.parse(call.function?.arguments || '{}');
|
|
2821
|
+
let result;
|
|
2822
|
+
if (call.function?.name === 'httpBatch') {
|
|
2823
|
+
result = await this.httpBatchService.execute(args?.requests || []);
|
|
2824
|
+
}
|
|
2825
|
+
else {
|
|
2826
|
+
result = { ok: true, received_args: args };
|
|
2827
|
+
}
|
|
2828
|
+
trace.push({ tool: call.function?.name, args, result });
|
|
2829
|
+
messages.push({
|
|
2830
|
+
role: 'tool',
|
|
2831
|
+
tool_call_id: call.id,
|
|
2832
|
+
content: JSON.stringify(result),
|
|
2833
|
+
});
|
|
2834
|
+
}
|
|
2835
|
+
}
|
|
2836
|
+
const fallback = 'Não consegui concluir a resposta dentro do limite de chamadas de ferramenta.';
|
|
2837
|
+
await this.memoryService.addTurn({
|
|
2838
|
+
agentId,
|
|
2839
|
+
conversationId,
|
|
2840
|
+
role: 'assistant',
|
|
2841
|
+
content: fallback,
|
|
2842
|
+
metadata: { source: 'canvas-flow', trace },
|
|
2843
|
+
});
|
|
2844
|
+
return {
|
|
2845
|
+
text: fallback,
|
|
2846
|
+
conversationId,
|
|
2847
|
+
docs: ragResults.results || [],
|
|
2848
|
+
searchDebug: ragResults.searchDebug,
|
|
2849
|
+
trace,
|
|
2850
|
+
model,
|
|
2851
|
+
};
|
|
2852
|
+
}
|
|
2853
|
+
};
|
|
2854
|
+
exports.RagService = RagService;
|
|
2855
|
+
exports.RagService = RagService = RagService_1 = __decorate([
|
|
2856
|
+
(0, common_1.Injectable)(),
|
|
2857
|
+
__param(4, (0, common_1.Optional)()),
|
|
2858
|
+
__metadata("design:paramtypes", [config_1.ConfigService,
|
|
2859
|
+
memory_service_1.MemoryService,
|
|
2860
|
+
http_batch_service_1.HttpBatchService,
|
|
2861
|
+
provider_config_service_1.ProviderConfigService,
|
|
2862
|
+
documents_service_1.DocumentsService])
|
|
2863
|
+
], RagService);
|
|
2864
|
+
//# sourceMappingURL=rag-service.js.map
|