@digipair/skill-vespa 0.90.0 → 0.91.0-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.swcrc ADDED
@@ -0,0 +1,28 @@
1
+ {
2
+ "jsc": {
3
+ "target": "es2017",
4
+ "parser": {
5
+ "syntax": "typescript",
6
+ "decorators": true,
7
+ "dynamicImport": true
8
+ },
9
+ "transform": {
10
+ "decoratorMetadata": true,
11
+ "legacyDecorator": true
12
+ },
13
+ "keepClassNames": true,
14
+ "externalHelpers": true,
15
+ "loose": true
16
+ },
17
+ "module": {
18
+ "type": "es6"
19
+ },
20
+ "sourceMaps": true,
21
+ "exclude": [
22
+ "jest.config.ts",
23
+ ".*\\.spec.tsx?$",
24
+ ".*\\.test.tsx?$",
25
+ "./src/jest-setup.ts$",
26
+ "./**/jest-setup.ts$"
27
+ ]
28
+ }
package/README.md ADDED
@@ -0,0 +1,7 @@
1
+ # mylib
2
+
3
+ This library was generated with [Nx](https://nx.dev).
4
+
5
+ ## Building
6
+
7
+ Run `nx build mylib` to build the library.
@@ -0,0 +1,22 @@
1
+ import baseConfig from '../../eslint.config.mjs';
2
+
3
+ export default [
4
+ ...baseConfig,
5
+ {
6
+ files: ['**/*.json'],
7
+ rules: {
8
+ '@nx/dependency-checks': [
9
+ 'error',
10
+ {
11
+ ignoredFiles: [
12
+ '{projectRoot}/eslint.config.{js,cjs,mjs}',
13
+ '{projectRoot}/rollup.config.{js,ts,mjs,mts,cjs,cts}',
14
+ ],
15
+ },
16
+ ],
17
+ },
18
+ languageOptions: {
19
+ parser: await import('jsonc-eslint-parser'),
20
+ },
21
+ },
22
+ ];
package/package.json CHANGED
@@ -1,12 +1,28 @@
1
1
  {
2
2
  "name": "@digipair/skill-vespa",
3
- "version": "0.90.0",
3
+ "version": "0.91.0-0",
4
+ "type": "module",
5
+ "main": "dist/libs/skill-vespa/index.cjs.js",
6
+ "module": "dist/libs/skill-vespa/index.esm.js",
7
+ "types": "dist/libs/skill-vespa/index.esm.d.ts",
8
+ "exports": {
9
+ "./package.json": "./libs/skill-vespa/package.json",
10
+ ".": {
11
+ "development": "./dist/libs/skill-vespa/src/index.ts",
12
+ "types": "./dist/libs/skill-vespa/index.esm.d.ts",
13
+ "import": "./dist/libs/skill-vespa/index.esm.js",
14
+ "default": "./dist/libs/skill-vespa/index.cjs.js"
15
+ }
16
+ },
4
17
  "keywords": [
5
18
  "digipair",
6
19
  "service",
7
20
  "tool"
8
21
  ],
9
- "dependencies": {},
10
- "main": "./index.cjs.js",
11
- "module": "./index.esm.js"
12
- }
22
+ "nx": {
23
+ "name": "skill-vespa"
24
+ },
25
+ "dependencies": {
26
+ "@digipair/engine": "0.91.0-0"
27
+ }
28
+ }
@@ -0,0 +1,28 @@
1
+ const { withNx } = require('@nx/rollup/with-nx');
2
+
3
+ module.exports = withNx(
4
+ {
5
+ main: 'libs/skill-vespa/src/index.ts',
6
+ outputPath: 'dist/libs/skill-vespa',
7
+ tsConfig: 'libs/skill-vespa/tsconfig.lib.json',
8
+ compiler: 'swc',
9
+ format: ['esm', "cjs"],
10
+ assets: [
11
+ {
12
+ input: 'libs/skill-vespa/',
13
+ glob: 'package.json',
14
+ output: '.'
15
+ },
16
+ {
17
+ input: 'libs/skill-vespa/src/',
18
+ glob: '*.json',
19
+ output: '.'
20
+ }
21
+ ]
22
+ },
23
+ {
24
+ // Provide additional rollup configuration here. See: https://rollupjs.org/configuration-options
25
+ // e.g.
26
+ // output: { sourcemap: true },
27
+ }
28
+ );
@@ -0,0 +1 @@
1
+ declare module 'handlebars/dist/handlebars.min.js';
@@ -0,0 +1,7 @@
1
+ import { skillVespa } from './skill-vespa';
2
+
3
+ describe('skillVespa', () => {
4
+ it('should work', () => {
5
+ expect(skillVespa()).toEqual('skill-vespa');
6
+ });
7
+ });
@@ -0,0 +1,389 @@
1
+ /* eslint-disable @typescript-eslint/no-unused-vars */
2
+ import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
3
+ import { v4 } from 'uuid';
4
+ import { PinsSettings, executePinsList } from '@digipair/engine';
5
+
6
+ const VESPA_SERVER = process.env['VESPA_SERVER'] ?? 'http://localhost:8080';
7
+
8
+ class VespaService {
9
+ private async searchDocuments(
10
+ baseUrl: string,
11
+ collection: string,
12
+ query: string,
13
+ options: any = {},
14
+ signal: AbortSignal,
15
+ ): Promise<any[]> {
16
+ const response = await fetch(`${baseUrl}/search/`, {
17
+ signal,
18
+ method: 'POST',
19
+ body: JSON.stringify({
20
+ yql: `select * from ${collection} where ${query}`,
21
+ ...options,
22
+ }),
23
+ headers: {
24
+ 'Content-Type': 'application/json',
25
+ },
26
+ });
27
+
28
+ if (!response.ok) {
29
+ const error = await response.json();
30
+ console.error('Error - VespaService:getDocuments - fetching', error.root?.errors ?? error);
31
+ throw new Error(`Error - VespaService:getDocuments - fetching ${collection}`);
32
+ }
33
+
34
+ const messages = ((await response.json()).root.children || []).map((child: any) =>
35
+ child.id === 'group:root:0' ? child.children[0].children : child.fields,
36
+ );
37
+ return messages;
38
+ }
39
+
40
+ private async searchParentDocuments(
41
+ baseUrl: string,
42
+ collection: string,
43
+ query: string,
44
+ options: any = {},
45
+ signal: AbortSignal,
46
+ ): Promise<any[]> {
47
+ const documents = [];
48
+ const uuids = [];
49
+ const queryUuids = [];
50
+ const chunks = await this.searchDocuments(baseUrl, collection, query, options, signal);
51
+
52
+ documents.push(...chunks.filter(({ is_parent }) => is_parent));
53
+ uuids.push(...chunks.filter(({ is_parent }) => is_parent).map(({ uuid }) => uuid));
54
+ for (const chunk of chunks) {
55
+ if (uuids.indexOf(chunk.parent_uuid) < 0 && queryUuids.indexOf(chunk.parent_uuid) < 0) {
56
+ queryUuids.push(chunk.parent_uuid);
57
+ }
58
+ }
59
+
60
+ if (queryUuids.length > 0) {
61
+ documents.push(
62
+ ...(await this.searchDocuments(
63
+ baseUrl,
64
+ collection,
65
+ `uuid in (${queryUuids.map(uuid => `"${uuid}"`).join(',')})`,
66
+ options,
67
+ signal,
68
+ )),
69
+ );
70
+ }
71
+
72
+ const results = [] as any[];
73
+ for (const chunk of chunks) {
74
+ const document = documents.find(({ parent_uuid }) => parent_uuid === chunk.parent_uuid);
75
+ if (document && !results.find(({ uuid }) => uuid === document.uuid)) {
76
+ results.push({ ...document, matchfeatures: chunk.matchfeatures });
77
+ }
78
+ }
79
+
80
+ return results;
81
+ }
82
+
83
+ private async prepareDocuments(documents: any[]) {
84
+ const parents = documents.map(document => {
85
+ const uuid = v4();
86
+ return { ...document, uuid, parent_uuid: uuid, is_parent: true };
87
+ });
88
+
89
+ const chunksPromises = parents.map(async parent => {
90
+ const text = parent.content;
91
+ const v128 = new RecursiveCharacterTextSplitter({
92
+ chunkSize: 128,
93
+ chunkOverlap: 12,
94
+ });
95
+ const v256 = new RecursiveCharacterTextSplitter({
96
+ chunkSize: 256,
97
+ chunkOverlap: 25,
98
+ });
99
+ const v512 = new RecursiveCharacterTextSplitter({
100
+ chunkSize: 512,
101
+ chunkOverlap: 51,
102
+ });
103
+ const output = [
104
+ ...(text.length > 128 ? await v128.createDocuments([text]) : []),
105
+ ...(text.length > 256 ? await v256.createDocuments([text]) : []),
106
+ ...(text.length > 512 ? await v512.createDocuments([text]) : []),
107
+ ];
108
+
109
+ return output.map(({ pageContent }) => ({
110
+ ...parent,
111
+ content: pageContent,
112
+ uuid: v4(),
113
+ is_parent: false,
114
+ }));
115
+ });
116
+
117
+ const chunks = await Promise.all(chunksPromises);
118
+ const result = [...parents, ...chunks.flat()];
119
+
120
+ return result;
121
+ }
122
+
123
+ private async pushDocuments(
124
+ modelEmbeddings: any,
125
+ baseUrl: string,
126
+ namespace: string,
127
+ collection: string,
128
+ documents: any[],
129
+ signal: AbortSignal,
130
+ asynchronous: boolean,
131
+ ) {
132
+ const results = [];
133
+
134
+ for (const document of documents) {
135
+ // eslint-disable-next-line no-control-regex
136
+ const content = document.content.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '');
137
+ const content_embedding = await modelEmbeddings.embedQuery(content);
138
+
139
+ const response = await fetch(
140
+ `${baseUrl}/document/v1/${namespace}/${collection}/docid/${document.uuid}`,
141
+ {
142
+ signal,
143
+ method: 'POST',
144
+ body: JSON.stringify(
145
+ asynchronous
146
+ ? { ...document, content }
147
+ : {
148
+ ...document,
149
+ content,
150
+ content_embedding: await modelEmbeddings.embedQuery(content),
151
+ },
152
+ ),
153
+ headers: {
154
+ 'Content-Type': 'application/json',
155
+ },
156
+ },
157
+ );
158
+
159
+ const json = await response.json();
160
+ if (!json.id) {
161
+ throw new Error(
162
+ `Error - VespaService:pushDocuments - pushing ${collection} - ${json.message}`,
163
+ );
164
+ }
165
+ results.push(json);
166
+
167
+ if (asynchronous) {
168
+ setImmediate(async () => {
169
+ const content_embedding = await modelEmbeddings.embedQuery(content);
170
+ await fetch(`${baseUrl}/document/v1/${namespace}/${collection}/docid/${document.uuid}`, {
171
+ method: 'POST',
172
+ body: JSON.stringify({
173
+ fields: { ...document, content, content_embedding },
174
+ }),
175
+ headers: {
176
+ 'Content-Type': 'application/json',
177
+ },
178
+ });
179
+ });
180
+ }
181
+ }
182
+
183
+ return results;
184
+ }
185
+
186
+ async find(params: any, _pinsSettingsList: PinsSettings[], context: any): Promise<any> {
187
+ const {
188
+ baseUrl = context.privates.VESPA_SERVER ?? VESPA_SERVER,
189
+ collection = 'knowledge',
190
+ limit = 100,
191
+ orderby = '',
192
+ query,
193
+ grouping,
194
+ } = params;
195
+
196
+ if (
197
+ orderby !== '' &&
198
+ !/^([a-zA-Z0-9_-]+)(,\s*[a-zA-Z0-9_-]+)*(\s+(asc|desc))?$/.test(orderby)
199
+ ) {
200
+ throw new Error('vespa:find - Invalid orderby parameter');
201
+ }
202
+
203
+ const orderbySecured = orderby === '' ? '' : `order by ${orderby}`;
204
+ const results = await this.searchDocuments(
205
+ baseUrl,
206
+ collection,
207
+ `is_parent = true and userQuery() ${orderbySecured} limit ${parseInt(limit)}` +
208
+ (grouping ? ` | ${grouping}` : ''),
209
+ {
210
+ query,
211
+ },
212
+ context.protected?.signal,
213
+ );
214
+
215
+ return results;
216
+ }
217
+
218
+ async search(params: any, _pinsSettingsList: PinsSettings[], context: any): Promise<any> {
219
+ const {
220
+ embeddings = context.privates.MODEL_EMBEDDINGS,
221
+ baseUrl = context.privates.VESPA_SERVER ?? VESPA_SERVER,
222
+ collection = 'knowledge',
223
+ limit = 100,
224
+ orderby = '',
225
+ targetHits = 50,
226
+ filter = 'true',
227
+ language,
228
+ query,
229
+ } = params;
230
+
231
+ if (
232
+ orderby !== '' &&
233
+ !/^([a-zA-Z0-9_-]+)(,\s*[a-zA-Z0-9_-]+)*(\s+(asc|desc))?$/.test(orderby)
234
+ ) {
235
+ throw new Error('vespa:find - Invalid orderby parameter');
236
+ }
237
+
238
+ const orderbySecured = orderby === '' ? '' : `order by ${orderby}`;
239
+ const modelEmbeddings = await executePinsList(
240
+ embeddings,
241
+ context,
242
+ `${context.__PATH__}.embeddings`,
243
+ );
244
+ const queryEmbedding = await modelEmbeddings.embedQuery(query);
245
+ const results = await this.searchParentDocuments(
246
+ baseUrl,
247
+ collection,
248
+ `((userQuery()) or ({targetHits:${targetHits}}nearestNeighbor(content_embedding,q))) and (${filter}) ${orderbySecured} limit ${parseInt(
249
+ limit,
250
+ )}`,
251
+ {
252
+ 'ranking.profile': 'fusion',
253
+ 'input.query(q)': queryEmbedding,
254
+ query,
255
+ language,
256
+ },
257
+ context.protected?.signal,
258
+ );
259
+
260
+ return results;
261
+ }
262
+
263
+ async textSplitter(params: any, _pinsSettingsList: PinsSettings[], _context: any): Promise<any> {
264
+ const { size = 1024, overlap = 102, source, text } = params;
265
+ const splitter = new RecursiveCharacterTextSplitter({
266
+ chunkSize: size,
267
+ chunkOverlap: overlap,
268
+ });
269
+
270
+ return (await splitter.createDocuments([text])).map(({ pageContent }) => ({
271
+ source,
272
+ content: pageContent,
273
+ }));
274
+ }
275
+
276
+ async push(params: any, _pinsSettingsList: PinsSettings[], context: any): Promise<any> {
277
+ const {
278
+ embeddings = context.privates.MODEL_EMBEDDINGS,
279
+ baseUrl = context.privates.VESPA_SERVER ?? VESPA_SERVER,
280
+ namespace = context.privates.VESPA_NAMESPACE ??
281
+ process.env['VESPA_NAMESPACE'] ??
282
+ 'Digipair_default',
283
+ collection = 'knowledge',
284
+ asynchronous = false,
285
+ documents,
286
+ } = params;
287
+ const modelEmbeddings = await executePinsList(
288
+ embeddings,
289
+ context,
290
+ `${context.__PATH__}.embeddings`,
291
+ );
292
+ const results = await this.prepareDocuments(documents);
293
+
294
+ await this.pushDocuments(
295
+ modelEmbeddings,
296
+ baseUrl,
297
+ namespace,
298
+ collection,
299
+ results,
300
+ context.protected?.signal,
301
+ asynchronous,
302
+ );
303
+
304
+ return results.filter(({ is_parent }) => is_parent).map(({ uuid }) => uuid);
305
+ }
306
+
307
+ async remove(params: any, _pinsSettingsList: PinsSettings[], context: any): Promise<any> {
308
+ const {
309
+ baseUrl = context.privates.VESPA_SERVER ?? VESPA_SERVER,
310
+ namespace = context.privates.VESPA_NAMESPACE ??
311
+ process.env['VESPA_NAMESPACE'] ??
312
+ 'Digipair_default',
313
+ collection = 'knowledge',
314
+ selection,
315
+ } = params;
316
+ const response = await fetch(
317
+ `${baseUrl}/document/v1/${namespace}/${collection}/docid?selection=${encodeURI(
318
+ selection,
319
+ )}&cluster=${namespace}`,
320
+ {
321
+ signal: context.protected?.signal,
322
+ method: 'DELETE',
323
+ headers: {
324
+ 'Content-Type': 'application/json',
325
+ },
326
+ },
327
+ );
328
+
329
+ if (!response.ok) {
330
+ const error = await response.json();
331
+ console.error('Error - VespaService:remove - fetching', error.root?.errors ?? error);
332
+ throw new Error(`Error - VespaService:remove - fetching ${collection}`);
333
+ }
334
+
335
+ return await response.json();
336
+ }
337
+
338
+ async update(params: any, _pinsSettingsList: PinsSettings[], context: any): Promise<any> {
339
+ const {
340
+ baseUrl = context.privates.VESPA_SERVER ?? VESPA_SERVER,
341
+ namespace = context.privates.VESPA_NAMESPACE ??
342
+ process.env['VESPA_NAMESPACE'] ??
343
+ 'Digipair_default',
344
+ collection,
345
+ id,
346
+ fields,
347
+ } = params;
348
+
349
+ let response = await fetch(`${baseUrl}/document/v1/${namespace}/${collection}/docid/${id}`, {
350
+ signal: context.protected.signal,
351
+ method: 'GET',
352
+ headers: {
353
+ 'Content-Type': 'application/json',
354
+ },
355
+ });
356
+ const document = (await response.json()).fields;
357
+
358
+ response = await fetch(`${baseUrl}/document/v1/${namespace}/${collection}/docid/${id}`, {
359
+ signal: context.protected.signal,
360
+ method: 'POST',
361
+ body: JSON.stringify({
362
+ fields: { ...document, ...fields },
363
+ }),
364
+ headers: {
365
+ 'Content-Type': 'application/json',
366
+ },
367
+ });
368
+
369
+ return await response.json();
370
+ }
371
+ }
372
+
373
+ export const find = (params: any, pinsSettingsList: PinsSettings[], context: any) =>
374
+ new VespaService().find(params, pinsSettingsList, context);
375
+
376
+ export const search = (params: any, pinsSettingsList: PinsSettings[], context: any) =>
377
+ new VespaService().search(params, pinsSettingsList, context);
378
+
379
+ export const textSplitter = (params: any, pinsSettingsList: PinsSettings[], context: any) =>
380
+ new VespaService().textSplitter(params, pinsSettingsList, context);
381
+
382
+ export const push = (params: any, pinsSettingsList: PinsSettings[], context: any) =>
383
+ new VespaService().push(params, pinsSettingsList, context);
384
+
385
+ export const remove = (params: any, pinsSettingsList: PinsSettings[], context: any) =>
386
+ new VespaService().remove(params, pinsSettingsList, context);
387
+
388
+ export const update = (params: any, pinsSettingsList: PinsSettings[], context: any) =>
389
+ new VespaService().update(params, pinsSettingsList, context);
package/tsconfig.json ADDED
@@ -0,0 +1,13 @@
1
+ {
2
+ "extends": "../../tsconfig.base.json",
3
+ "files": [],
4
+ "include": [],
5
+ "references": [
6
+ {
7
+ "path": "../engine"
8
+ },
9
+ {
10
+ "path": "./tsconfig.lib.json"
11
+ }
12
+ ]
13
+ }
@@ -0,0 +1,19 @@
1
+ {
2
+ "extends": "../../tsconfig.base.json",
3
+ "compilerOptions": {
4
+ "rootDir": "src",
5
+ "outDir": "dist",
6
+ "tsBuildInfoFile": "dist/tsconfig.lib.tsbuildinfo",
7
+ "emitDeclarationOnly": true,
8
+ "module": "esnext",
9
+ "moduleResolution": "node",
10
+ "forceConsistentCasingInFileNames": true,
11
+ "types": ["node"]
12
+ },
13
+ "include": ["src/**/*.ts"],
14
+ "references": [
15
+ {
16
+ "path": "../engine/tsconfig.lib.json"
17
+ }
18
+ ]
19
+ }
package/index.cjs.d.ts DELETED
@@ -1 +0,0 @@
1
- export * from "./src/index";