@tstdl/base 0.91.51 → 0.92.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ai/data-extracting.d.ts +35 -0
- package/ai/data-extracting.js +195 -41
- package/document-management/api/document-management.api.d.ts +377 -0
- package/document-management/api/document-management.api.js +15 -1
- package/document-management/index.d.ts +3 -0
- package/document-management/index.js +3 -0
- package/file/mime-type.d.ts +1 -1
- package/file/mime-type.js +8 -6
- package/file/mime-types.js +5 -32
- package/orm/schemas/numeric-date.d.ts +1 -1
- package/orm/schemas/timestamp.d.ts +1 -1
- package/package.json +2 -1
- package/schema/converters/index.d.ts +1 -0
- package/schema/converters/index.js +1 -0
- package/schema/converters/openapi-converter.d.ts +3 -0
- package/schema/converters/openapi-converter.js +113 -0
- package/schema/decorators/description.d.ts +3 -0
- package/schema/decorators/description.js +10 -0
- package/schema/decorators/property.d.ts +3 -1
- package/schema/decorators/property.js +13 -5
- package/schema/decorators/types.d.ts +5 -5
- package/schema/schema.d.ts +7 -0
- package/schema/schema.js +6 -0
- package/schema/schemas/any.d.ts +4 -3
- package/schema/schemas/any.js +4 -4
- package/schema/schemas/array.d.ts +10 -5
- package/schema/schemas/array.js +7 -3
- package/schema/schemas/bigint.d.ts +6 -6
- package/schema/schemas/bigint.js +30 -13
- package/schema/schemas/boolean.d.ts +1 -1
- package/schema/schemas/boolean.js +2 -2
- package/schema/schemas/date.d.ts +1 -1
- package/schema/schemas/date.js +2 -2
- package/schema/schemas/defaulted.d.ts +5 -4
- package/schema/schemas/defaulted.js +6 -6
- package/schema/schemas/deferred.d.ts +1 -1
- package/schema/schemas/deferred.js +2 -2
- package/schema/schemas/enumeration.d.ts +5 -4
- package/schema/schemas/enumeration.js +4 -2
- package/schema/schemas/function.d.ts +3 -3
- package/schema/schemas/function.js +5 -4
- package/schema/schemas/instance.d.ts +5 -4
- package/schema/schemas/instance.js +6 -6
- package/schema/schemas/literal.d.ts +5 -4
- package/schema/schemas/literal.js +6 -6
- package/schema/schemas/never.d.ts +3 -2
- package/schema/schemas/never.js +2 -2
- package/schema/schemas/nullable.d.ts +1 -1
- package/schema/schemas/nullable.js +1 -1
- package/schema/schemas/number.d.ts +4 -2
- package/schema/schemas/number.js +6 -3
- package/schema/schemas/object.d.ts +2 -2
- package/schema/schemas/object.js +12 -7
- package/schema/schemas/one-or-many.d.ts +5 -4
- package/schema/schemas/one-or-many.js +6 -6
- package/schema/schemas/optional.d.ts +1 -1
- package/schema/schemas/optional.js +1 -1
- package/schema/schemas/readable-stream.d.ts +1 -1
- package/schema/schemas/readable-stream.js +2 -2
- package/schema/schemas/regexp.d.ts +1 -1
- package/schema/schemas/regexp.js +2 -2
- package/schema/schemas/simple.d.ts +3 -3
- package/schema/schemas/simple.js +1 -1
- package/schema/schemas/string.d.ts +1 -1
- package/schema/schemas/string.js +2 -2
- package/schema/schemas/symbol.d.ts +6 -6
- package/schema/schemas/symbol.js +8 -13
- package/schema/schemas/uint8-array.d.ts +1 -1
- package/schema/schemas/uint8-array.js +2 -2
- package/schema/schemas/union.js +3 -3
- package/schema/schemas/unknown.d.ts +4 -3
- package/schema/schemas/unknown.js +4 -4
- package/search-index/memory/memory-search-index.js +1 -1
- package/types.d.ts +1 -1
- package/utils/helpers.d.ts +0 -40
- package/utils/helpers.js +0 -29
- package/utils/index.d.ts +1 -0
- package/utils/index.js +1 -0
- package/utils/string/index.d.ts +1 -0
- package/utils/string/index.js +1 -0
- package/utils/string/normalize.d.ts +50 -0
- package/utils/string/normalize.js +39 -0
- package/utils/try-ignore.d.ts +2 -0
- package/utils/try-ignore.js +12 -0
package/ai/data-extracting.d.ts
CHANGED
|
@@ -1 +1,36 @@
|
|
|
1
1
|
import '../polyfills.js';
|
|
2
|
+
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
3
|
+
import { type FileMetadataResponse, GoogleAIFileManager } from '@google/generative-ai/server';
|
|
4
|
+
import { Resolvable, type resolveArgumentType } from '../injector/interfaces.js';
|
|
5
|
+
import { SchemaTestable } from '../schema/index.js';
|
|
6
|
+
import { Enumeration as EnumerationType, EnumerationValue } from '../types.js';
|
|
7
|
+
import { LiteralUnion } from 'type-fest';
|
|
8
|
+
export type FileInput = {
|
|
9
|
+
path: string;
|
|
10
|
+
mimeType: string;
|
|
11
|
+
} | Blob;
|
|
12
|
+
export type GenerativeAIModel = LiteralUnion<'gemini-2.0-flash-exp' | 'gemini-exp-1206' | 'gemini-2.0-flash-thinking-exp-1219', string>;
|
|
13
|
+
export type AiServiceOptions = {
|
|
14
|
+
apiKey: string;
|
|
15
|
+
model?: GenerativeAIModel;
|
|
16
|
+
};
|
|
17
|
+
export type AiServiceArgument = AiServiceOptions;
|
|
18
|
+
export declare class AiService implements Resolvable<AiServiceArgument> {
|
|
19
|
+
#private;
|
|
20
|
+
readonly genAI: GoogleGenerativeAI;
|
|
21
|
+
readonly fileManager: GoogleAIFileManager;
|
|
22
|
+
readonly model: import("@google/generative-ai").GenerativeModel;
|
|
23
|
+
readonly [resolveArgumentType]: AiServiceArgument;
|
|
24
|
+
getFile(fileInput: FileInput): Promise<FileMetadataResponse>;
|
|
25
|
+
getFiles(files: FileInput[]): Promise<FileMetadataResponse[]>;
|
|
26
|
+
classify<T extends EnumerationType>(fileInput: FileInput, types: T): Promise<{
|
|
27
|
+
reasoning: string;
|
|
28
|
+
types: {
|
|
29
|
+
type: EnumerationValue<T>;
|
|
30
|
+
confidence: 'high' | 'medium' | 'low';
|
|
31
|
+
}[] | null;
|
|
32
|
+
}>;
|
|
33
|
+
extractData<T>(fileInput: FileInput, schema: SchemaTestable<T>): Promise<T>;
|
|
34
|
+
waitForFileActive(fileMetadata: FileMetadataResponse): Promise<FileMetadataResponse>;
|
|
35
|
+
waitForFilesActive(...files: FileMetadataResponse[]): Promise<FileMetadataResponse[]>;
|
|
36
|
+
}
|
package/ai/data-extracting.js
CHANGED
|
@@ -4,59 +4,213 @@ var __decorate = (this && this.__decorate) || function (decorators, target, key,
|
|
|
4
4
|
else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
|
5
5
|
return c > 3 && r && Object.defineProperty(target, key, r), r;
|
|
6
6
|
};
|
|
7
|
+
var __addDisposableResource = (this && this.__addDisposableResource) || function (env, value, async) {
|
|
8
|
+
if (value !== null && value !== void 0) {
|
|
9
|
+
if (typeof value !== "object" && typeof value !== "function") throw new TypeError("Object expected.");
|
|
10
|
+
var dispose, inner;
|
|
11
|
+
if (async) {
|
|
12
|
+
if (!Symbol.asyncDispose) throw new TypeError("Symbol.asyncDispose is not defined.");
|
|
13
|
+
dispose = value[Symbol.asyncDispose];
|
|
14
|
+
}
|
|
15
|
+
if (dispose === void 0) {
|
|
16
|
+
if (!Symbol.dispose) throw new TypeError("Symbol.dispose is not defined.");
|
|
17
|
+
dispose = value[Symbol.dispose];
|
|
18
|
+
if (async) inner = dispose;
|
|
19
|
+
}
|
|
20
|
+
if (typeof dispose !== "function") throw new TypeError("Object not disposable.");
|
|
21
|
+
if (inner) dispose = function() { try { inner.call(this); } catch (e) { return Promise.reject(e); } };
|
|
22
|
+
env.stack.push({ value: value, dispose: dispose, async: async });
|
|
23
|
+
}
|
|
24
|
+
else if (async) {
|
|
25
|
+
env.stack.push({ async: true });
|
|
26
|
+
}
|
|
27
|
+
return value;
|
|
28
|
+
};
|
|
29
|
+
var __disposeResources = (this && this.__disposeResources) || (function (SuppressedError) {
|
|
30
|
+
return function (env) {
|
|
31
|
+
function fail(e) {
|
|
32
|
+
env.error = env.hasError ? new SuppressedError(e, env.error, "An error was suppressed during disposal.") : e;
|
|
33
|
+
env.hasError = true;
|
|
34
|
+
}
|
|
35
|
+
var r, s = 0;
|
|
36
|
+
function next() {
|
|
37
|
+
while (r = env.stack.pop()) {
|
|
38
|
+
try {
|
|
39
|
+
if (!r.async && s === 1) return s = 0, env.stack.push(r), Promise.resolve().then(next);
|
|
40
|
+
if (r.dispose) {
|
|
41
|
+
var result = r.dispose.call(r.value);
|
|
42
|
+
if (r.async) return s |= 2, Promise.resolve(result).then(next, function(e) { fail(e); return next(); });
|
|
43
|
+
}
|
|
44
|
+
else s |= 1;
|
|
45
|
+
}
|
|
46
|
+
catch (e) {
|
|
47
|
+
fail(e);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
if (s === 1) return env.hasError ? Promise.reject(env.error) : Promise.resolve();
|
|
51
|
+
if (env.hasError) throw env.error;
|
|
52
|
+
}
|
|
53
|
+
return next();
|
|
54
|
+
};
|
|
55
|
+
})(typeof SuppressedError === "function" ? SuppressedError : function (error, suppressed, message) {
|
|
56
|
+
var e = new Error(message);
|
|
57
|
+
return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e;
|
|
58
|
+
});
|
|
7
59
|
import '../polyfills.js';
|
|
60
|
+
import { openAsBlob } from 'node:fs';
|
|
61
|
+
import { unlink, writeFile } from 'node:fs/promises';
|
|
62
|
+
import { tmpdir } from 'node:os';
|
|
63
|
+
import { join } from 'node:path';
|
|
8
64
|
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
9
65
|
import { FileState, GoogleAIFileManager } from '@google/generative-ai/server';
|
|
10
|
-
import { Application } from '../application/application.js';
|
|
11
66
|
import { DetailsError } from '../errors/details.error.js';
|
|
12
67
|
import { Singleton } from '../injector/decorators.js';
|
|
13
|
-
import {
|
|
68
|
+
import { injectArgument } from '../injector/inject.js';
|
|
69
|
+
import { convertToOpenApiSchema } from '../schema/converters/openapi-converter.js';
|
|
70
|
+
import { array, enumeration, nullable, object, Schema, string } from '../schema/index.js';
|
|
71
|
+
import { digest } from '../utils/cryptography.js';
|
|
14
72
|
import { timeout } from '../utils/timing.js';
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
topK: 40,
|
|
19
|
-
maxOutputTokens: 8192,
|
|
20
|
-
responseMimeType: 'text/plain',
|
|
21
|
-
};
|
|
73
|
+
import { tryIgnoreAsync } from '../utils/try-ignore.js';
|
|
74
|
+
import { isBlob } from '../utils/type-guards.js';
|
|
75
|
+
import { millisecondsPerSecond } from '../utils/units.js';
|
|
22
76
|
let AiService = class AiService {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
const
|
|
31
|
-
await
|
|
32
|
-
const
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
console.log((await result.response).usageMetadata);
|
|
41
|
-
}
|
|
42
|
-
async waitForFilesActive(...files) {
|
|
43
|
-
for (const name of files.map((file) => file.name)) {
|
|
44
|
-
let file = await this.fileManager.getFile(name);
|
|
45
|
-
while (file.state == FileState.PROCESSING) {
|
|
46
|
-
await timeout(2500);
|
|
47
|
-
file = await this.fileManager.getFile(name);
|
|
77
|
+
#options = injectArgument(this);
|
|
78
|
+
#fileCache = new Map();
|
|
79
|
+
genAI = new GoogleGenerativeAI(this.#options.apiKey);
|
|
80
|
+
fileManager = new GoogleAIFileManager(this.#options.apiKey);
|
|
81
|
+
model = this.genAI.getGenerativeModel({ model: this.#options.model ?? 'gemini-2.0-flash-exp' });
|
|
82
|
+
async getFile(fileInput) {
|
|
83
|
+
const path = isBlob(fileInput) ? join(tmpdir(), crypto.randomUUID()) : fileInput.path;
|
|
84
|
+
const mimeType = isBlob(fileInput) ? fileInput.type : fileInput.mimeType;
|
|
85
|
+
const blob = isBlob(fileInput) ? fileInput : await openAsBlob(path, { type: mimeType });
|
|
86
|
+
const buffer = await blob.arrayBuffer();
|
|
87
|
+
const byteArray = new Uint8Array(buffer);
|
|
88
|
+
const fileHash = await digest('SHA-256', byteArray).toBase64();
|
|
89
|
+
const fileKey = `${fileHash}:${byteArray.length}`;
|
|
90
|
+
if (this.#fileCache.has(fileKey)) {
|
|
91
|
+
try {
|
|
92
|
+
const cachedFile = await this.#fileCache.get(fileKey);
|
|
93
|
+
return await this.fileManager.getFile(cachedFile.name);
|
|
48
94
|
}
|
|
49
|
-
|
|
50
|
-
|
|
95
|
+
catch {
|
|
96
|
+
this.#fileCache.delete(fileKey);
|
|
51
97
|
}
|
|
52
98
|
}
|
|
99
|
+
const filePromise = (async () => {
|
|
100
|
+
try {
|
|
101
|
+
const env_1 = { stack: [], error: void 0, hasError: false };
|
|
102
|
+
try {
|
|
103
|
+
const stack = __addDisposableResource(env_1, new AsyncDisposableStack(), true);
|
|
104
|
+
if (isBlob(fileInput)) {
|
|
105
|
+
stack.defer(async () => tryIgnoreAsync(async () => unlink(path)));
|
|
106
|
+
await writeFile(path, byteArray);
|
|
107
|
+
}
|
|
108
|
+
const result = await this.fileManager.uploadFile(path, { mimeType });
|
|
109
|
+
return await this.waitForFileActive(result.file);
|
|
110
|
+
}
|
|
111
|
+
catch (e_1) {
|
|
112
|
+
env_1.error = e_1;
|
|
113
|
+
env_1.hasError = true;
|
|
114
|
+
}
|
|
115
|
+
finally {
|
|
116
|
+
const result_1 = __disposeResources(env_1);
|
|
117
|
+
if (result_1)
|
|
118
|
+
await result_1;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
catch (error) {
|
|
122
|
+
this.#fileCache.delete(fileKey);
|
|
123
|
+
throw error;
|
|
124
|
+
}
|
|
125
|
+
})();
|
|
126
|
+
this.#fileCache.set(fileKey, filePromise);
|
|
127
|
+
return filePromise;
|
|
128
|
+
}
|
|
129
|
+
async getFiles(files) {
|
|
130
|
+
return Promise.all(files.map(async (file) => this.getFile(file)));
|
|
131
|
+
}
|
|
132
|
+
async classify(fileInput, types) {
|
|
133
|
+
const file = await this.getFile(fileInput);
|
|
134
|
+
const resultSchema = object({
|
|
135
|
+
reasoning: string({ description: 'Reasoning for classification. Use to be more confident, if unsure. Reason for every somewhat likely document type.' }),
|
|
136
|
+
types: nullable(array(object({
|
|
137
|
+
type: enumeration(types, { description: 'Type of document' }),
|
|
138
|
+
confidence: enumeration(['high', 'medium', 'low'], { description: 'How sure/certain you are about the classficiation.' })
|
|
139
|
+
}), { description: 'One or more document types that matches' }))
|
|
140
|
+
});
|
|
141
|
+
const responseSchema = convertToOpenApiSchema(resultSchema);
|
|
142
|
+
const result = await this.model.generateContent({
|
|
143
|
+
generationConfig: {
|
|
144
|
+
maxOutputTokens: 1024,
|
|
145
|
+
temperature: 0.5,
|
|
146
|
+
responseMimeType: 'application/json',
|
|
147
|
+
responseSchema
|
|
148
|
+
},
|
|
149
|
+
systemInstruction: 'You are a highly accurate document classification AI. Your task is to analyze the content of a given document and determine its type based on a predefined list of possible document types.',
|
|
150
|
+
contents: [
|
|
151
|
+
{
|
|
152
|
+
role: 'user',
|
|
153
|
+
parts: [
|
|
154
|
+
{ fileData: { mimeType: file.mimeType, fileUri: file.uri } },
|
|
155
|
+
{ text: `Classify the document. Output as JSON using the following schema:\n${JSON.stringify(responseSchema, null, 2)}\n\nIf none of the provided document types are a suitable match, return null for types.` }
|
|
156
|
+
]
|
|
157
|
+
}
|
|
158
|
+
]
|
|
159
|
+
});
|
|
160
|
+
return resultSchema.parse(JSON.parse(result.response.text()));
|
|
161
|
+
}
|
|
162
|
+
async extractData(fileInput, schema) {
|
|
163
|
+
const file = await this.getFile(fileInput);
|
|
164
|
+
const responseSchema = convertToOpenApiSchema(schema);
|
|
165
|
+
const result = await this.model.generateContent({
|
|
166
|
+
generationConfig: {
|
|
167
|
+
maxOutputTokens: 4096,
|
|
168
|
+
temperature: 0.5,
|
|
169
|
+
responseMimeType: 'application/json',
|
|
170
|
+
responseSchema
|
|
171
|
+
},
|
|
172
|
+
systemInstruction: `You are a highly skilled data extraction AI, specializing in accurately identifying and extracting information from unstructured text documents and converting it into a structured JSON format. Your primary goal is to meticulously follow the provided JSON schema and populate it with data extracted from the given document.
|
|
173
|
+
|
|
174
|
+
**Instructions:**
|
|
175
|
+
Carefully read and analyze the provided document. Identify relevant information that corresponds to each field in the JSON schema. Focus on accuracy and avoid making assumptions. If a field has multiple possible values, extract all relevant ones into the correct array structures ONLY IF the schema defines that field as an array; otherwise, extract only the single most relevant value.
|
|
176
|
+
|
|
177
|
+
**Reasoning**
|
|
178
|
+
Reason about every field in the json schema and find the best matching value. If there are multiple relevant values but the data type is not an array, reason about the values to find out which is the most relevant one.
|
|
179
|
+
|
|
180
|
+
You *MUST* output the reasoning first.`,
|
|
181
|
+
contents: [
|
|
182
|
+
{
|
|
183
|
+
role: 'user',
|
|
184
|
+
parts: [
|
|
185
|
+
{ fileData: { mimeType: file.mimeType, fileUri: file.uri } },
|
|
186
|
+
{ text: `Classify the document. Output as JSON using the following schema:\n${JSON.stringify(responseSchema, null, 2)}` }
|
|
187
|
+
]
|
|
188
|
+
}
|
|
189
|
+
]
|
|
190
|
+
});
|
|
191
|
+
return Schema.parse(schema, JSON.parse(result.response.text()));
|
|
192
|
+
}
|
|
193
|
+
async waitForFileActive(fileMetadata) {
|
|
194
|
+
let file = await this.fileManager.getFile(fileMetadata.name);
|
|
195
|
+
while (file.state == FileState.PROCESSING) {
|
|
196
|
+
await timeout(millisecondsPerSecond);
|
|
197
|
+
file = await this.fileManager.getFile(fileMetadata.name);
|
|
198
|
+
}
|
|
199
|
+
if (file.state == FileState.FAILED) {
|
|
200
|
+
throw new DetailsError(file.error?.message ?? `Failed to process file ${file.name}`, file.error?.details);
|
|
201
|
+
}
|
|
202
|
+
return file;
|
|
203
|
+
}
|
|
204
|
+
async waitForFilesActive(...files) {
|
|
205
|
+
const responses = [];
|
|
206
|
+
for (const file of files) {
|
|
207
|
+
const respones = await this.waitForFileActive(file);
|
|
208
|
+
responses.push(respones);
|
|
209
|
+
}
|
|
210
|
+
return responses;
|
|
53
211
|
}
|
|
54
212
|
};
|
|
55
213
|
AiService = __decorate([
|
|
56
214
|
Singleton()
|
|
57
215
|
], AiService);
|
|
58
|
-
|
|
59
|
-
const aiService = inject(AiService);
|
|
60
|
-
await aiService.extractData('/home/patrick/Downloads/358 417/orig/358 417.pdf');
|
|
61
|
-
}
|
|
62
|
-
Application.run(main);
|
|
216
|
+
export { AiService };
|