@magda/semantic-indexer-framework 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/MinioClient.d.ts +7 -0
- package/dist/MinioClient.js +37 -0
- package/dist/MinioClient.js.map +1 -0
- package/dist/SkipError.d.ts +1 -0
- package/dist/SkipError.js +1 -0
- package/dist/SkipError.js.map +1 -0
- package/dist/chunker.d.ts +32 -0
- package/dist/chunker.js +84 -0
- package/dist/chunker.js.map +1 -0
- package/dist/commonYargs.d.ts +43 -0
- package/dist/commonYargs.js +61 -0
- package/dist/commonYargs.js.map +1 -0
- package/dist/createEmbeddingText.d.ts +18 -0
- package/dist/createEmbeddingText.js +1 -0
- package/dist/createEmbeddingText.js.map +1 -0
- package/dist/helpers.d.ts +1 -0
- package/dist/helpers.js +10 -0
- package/dist/helpers.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -0
- package/dist/indexEmbeddingText.d.ts +22 -0
- package/dist/indexEmbeddingText.js +216 -0
- package/dist/indexEmbeddingText.js.map +1 -0
- package/dist/indexSchema.d.ts +105 -0
- package/dist/indexSchema.js +98 -0
- package/dist/indexSchema.js.map +1 -0
- package/dist/onRecordFoundRegistryRecord.d.ts +7 -0
- package/dist/onRecordFoundRegistryRecord.js +47 -0
- package/dist/onRecordFoundRegistryRecord.js.map +1 -0
- package/dist/onRecordFoundStorageObject.d.ts +9 -0
- package/dist/onRecordFoundStorageObject.js +145 -0
- package/dist/onRecordFoundStorageObject.js.map +1 -0
- package/dist/semanticIndexer.d.ts +2 -0
- package/dist/semanticIndexer.js +86 -0
- package/dist/semanticIndexer.js.map +1 -0
- package/dist/semanticIndexerOptions.d.ts +19 -0
- package/dist/semanticIndexerOptions.js +26 -0
- package/dist/semanticIndexerOptions.js.map +1 -0
- package/dist/test/BaseSemanticIndexerTest.d.ts +41 -0
- package/dist/test/BaseSemanticIndexerTest.js +167 -0
- package/dist/test/BaseSemanticIndexerTest.js.map +1 -0
- package/dist/test/chunker.spec.d.ts +1 -0
- package/dist/test/chunker.spec.js +154 -0
- package/dist/test/chunker.spec.js.map +1 -0
- package/dist/test/embeddingApiClient.spec.d.ts +1 -0
- package/dist/test/embeddingApiClient.spec.js +43 -0
- package/dist/test/embeddingApiClient.spec.js.map +1 -0
- package/dist/test/helpers.d.ts +4 -0
- package/dist/test/helpers.js +34 -0
- package/dist/test/helpers.js.map +1 -0
- package/dist/test/indexEmbeddingText.spec.d.ts +1 -0
- package/dist/test/indexEmbeddingText.spec.js +238 -0
- package/dist/test/indexEmbeddingText.spec.js.map +1 -0
- package/dist/test/mockEmbeddingApi.d.ts +1 -0
- package/dist/test/mockEmbeddingApi.js +25 -0
- package/dist/test/mockEmbeddingApi.js.map +1 -0
- package/dist/test/onRecordFoundRegistryRecord.spec.d.ts +1 -0
- package/dist/test/onRecordFoundRegistryRecord.spec.js +155 -0
- package/dist/test/onRecordFoundRegistryRecord.spec.js.map +1 -0
- package/dist/test/onRecordFoundStorageObject.spec.d.ts +1 -0
- package/dist/test/onRecordFoundStorageObject.spec.js +490 -0
- package/dist/test/onRecordFoundStorageObject.spec.js.map +1 -0
- package/package.json +78 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import sinon from "sinon";
|
|
2
|
+
import { expect } from "chai";
|
|
3
|
+
export class BaseSemanticIndexerTest {
|
|
4
|
+
chunker;
|
|
5
|
+
embeddingApiClient;
|
|
6
|
+
opensearchApiClient;
|
|
7
|
+
minioClient;
|
|
8
|
+
registry;
|
|
9
|
+
createEmbeddingTextStub;
|
|
10
|
+
consoleLogStub;
|
|
11
|
+
consoleWarnStub;
|
|
12
|
+
consoleErrorStub;
|
|
13
|
+
clock;
|
|
14
|
+
DEFAULT_FAKE_TIME = new Date("2023-01-01T00:00:00.000Z");
|
|
15
|
+
DEFAULT_PARENT_RECORD_ID = "fake-parent-record-id";
|
|
16
|
+
DEFAULT_CREATE_EMBEDDING_TEXT_RESULT = { text: "embedding text" }; // result of user-provided createEmbeddingText function
|
|
17
|
+
userConfig;
|
|
18
|
+
constructor({ createEmbeddingTextResult, fakeTime, overridesConfig, suppressConsoleLogs } = {}) {
|
|
19
|
+
this.chunker = { chunk: sinon.stub() };
|
|
20
|
+
this.embeddingApiClient = { get: sinon.stub() };
|
|
21
|
+
this.opensearchApiClient = {
|
|
22
|
+
bulkIndexDocument: sinon.stub().resolves(),
|
|
23
|
+
deleteByQuery: sinon.stub().resolves({
|
|
24
|
+
body: { version_conflicts: 0, timed_out: false }
|
|
25
|
+
})
|
|
26
|
+
};
|
|
27
|
+
this.minioClient = { downloadFile: sinon.stub().resolves() };
|
|
28
|
+
this.registry = {
|
|
29
|
+
getRecords: sinon.stub().resolves({
|
|
30
|
+
records: [{ id: this.DEFAULT_PARENT_RECORD_ID }]
|
|
31
|
+
})
|
|
32
|
+
};
|
|
33
|
+
this.clock = sinon.useFakeTimers(fakeTime?.getTime() || this.DEFAULT_FAKE_TIME.getTime());
|
|
34
|
+
this.createEmbeddingTextStub = sinon.
|
|
35
|
+
stub().
|
|
36
|
+
resolves(createEmbeddingTextResult ||
|
|
37
|
+
this.DEFAULT_CREATE_EMBEDDING_TEXT_RESULT);
|
|
38
|
+
if (suppressConsoleLogs === undefined || suppressConsoleLogs) {
|
|
39
|
+
this.consoleLogStub = sinon.stub(console, "log");
|
|
40
|
+
this.consoleWarnStub = sinon.stub(console, "warn");
|
|
41
|
+
this.consoleErrorStub = sinon.stub(console, "error");
|
|
42
|
+
}
|
|
43
|
+
this.userConfig = createFakeSemanticIndexerConfig({
|
|
44
|
+
createEmbeddingText: this.createEmbeddingTextStub,
|
|
45
|
+
...overridesConfig
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
cleanup() {
|
|
49
|
+
this.clock?.restore();
|
|
50
|
+
this.consoleLogStub?.restore();
|
|
51
|
+
this.consoleWarnStub?.restore();
|
|
52
|
+
this.consoleErrorStub?.restore();
|
|
53
|
+
}
|
|
54
|
+
updateUserConfig(overrides = {}) {
|
|
55
|
+
this.userConfig = createFakeSemanticIndexerConfig({
|
|
56
|
+
createEmbeddingText: this.createEmbeddingTextStub,
|
|
57
|
+
...overrides
|
|
58
|
+
});
|
|
59
|
+
return this.userConfig;
|
|
60
|
+
}
|
|
61
|
+
getCurrentTimeString() {
|
|
62
|
+
return this.DEFAULT_FAKE_TIME.toISOString();
|
|
63
|
+
}
|
|
64
|
+
expectCalledWith(stub, callIndex, ...expectedArgs) {
|
|
65
|
+
expect(stub.getCall(callIndex).calledWith(...expectedArgs)).to.be.true;
|
|
66
|
+
}
|
|
67
|
+
expectSuccessCalls(options = {}) {
|
|
68
|
+
if (options.createEmbeddingTextCallCount) {
|
|
69
|
+
expect(this.createEmbeddingTextStub.callCount).to.equal(options.createEmbeddingTextCallCount);
|
|
70
|
+
}
|
|
71
|
+
if (options.chunkCallCount) {
|
|
72
|
+
expect(this.chunker.chunk.callCount).to.equal(options.chunkCallCount);
|
|
73
|
+
}
|
|
74
|
+
if (options.embeddingApiCallCount) {
|
|
75
|
+
expect(this.embeddingApiClient.get.callCount).to.equal(options.embeddingApiCallCount);
|
|
76
|
+
}
|
|
77
|
+
if (options.bulkIndexCallCount) {
|
|
78
|
+
expect(this.opensearchApiClient.bulkIndexDocument.callCount).to.equal(options.bulkIndexCallCount);
|
|
79
|
+
}
|
|
80
|
+
if (options.deleteByQueryCallCount) {
|
|
81
|
+
expect(this.opensearchApiClient.deleteByQuery.callCount).to.equal(options.deleteByQueryCallCount);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
expectIndexedDoc(expectedDoc, callIndex = 0) {
|
|
85
|
+
const actualDocs = this.opensearchApiClient.bulkIndexDocument.getCall(callIndex).args[1];
|
|
86
|
+
expect(actualDocs).to.deep.equal([expectedDoc]);
|
|
87
|
+
}
|
|
88
|
+
expectIndexedDocs(expectedDocs, callIndex = 0) {
|
|
89
|
+
const actualDocs = this.opensearchApiClient.bulkIndexDocument.getCall(callIndex).args[1];
|
|
90
|
+
expect(actualDocs).to.deep.equal(expectedDocs);
|
|
91
|
+
}
|
|
92
|
+
getIndexedDocs(callIndex = 0) {
|
|
93
|
+
return this.opensearchApiClient.bulkIndexDocument.getCall(callIndex).
|
|
94
|
+
args[1];
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
export function createFakeSemanticIndexerConfig(overrideConfig = {}) {
|
|
98
|
+
const originalEnv = process.env;
|
|
99
|
+
process.env = {
|
|
100
|
+
...originalEnv,
|
|
101
|
+
JWT_SECRET: "test-secret",
|
|
102
|
+
USER_ID: "test-user"
|
|
103
|
+
};
|
|
104
|
+
const commonArgs = {
|
|
105
|
+
listenPort: 6100,
|
|
106
|
+
internalUrl: "http://localhost:6100",
|
|
107
|
+
jwtSecret: "test-secret",
|
|
108
|
+
userId: "test-user",
|
|
109
|
+
registryUrl: "http://localhost:6101",
|
|
110
|
+
minioConfig: {
|
|
111
|
+
endPoint: "localhost",
|
|
112
|
+
port: 9000,
|
|
113
|
+
useSSL: false,
|
|
114
|
+
region: "us-east-1",
|
|
115
|
+
defaultDatasetBucket: "test-bucket"
|
|
116
|
+
},
|
|
117
|
+
minioAccessKey: "minioadmin",
|
|
118
|
+
minioSecretKey: "minioadmin",
|
|
119
|
+
enableMultiTenant: false,
|
|
120
|
+
tenantUrl: "http://localhost:6101",
|
|
121
|
+
retries: 3,
|
|
122
|
+
semanticIndexerConfig: {
|
|
123
|
+
numberOfShards: 1,
|
|
124
|
+
numberOfReplicas: 0,
|
|
125
|
+
indexName: "semantic-index",
|
|
126
|
+
indexVersion: 1,
|
|
127
|
+
chunkSizeLimit: 512,
|
|
128
|
+
overlap: 64,
|
|
129
|
+
bulkEmbeddingsSize: 1,
|
|
130
|
+
bulkIndexSize: 50,
|
|
131
|
+
fullIndexName: "semantic-index-v1",
|
|
132
|
+
knnVectorFieldConfig: {
|
|
133
|
+
mode: "in_memory",
|
|
134
|
+
dimension: 768,
|
|
135
|
+
spaceType: "l2",
|
|
136
|
+
efConstruction: 100,
|
|
137
|
+
efSearch: 100,
|
|
138
|
+
m: 16,
|
|
139
|
+
encoder: {
|
|
140
|
+
name: "sq",
|
|
141
|
+
type: "fp16",
|
|
142
|
+
clip: false
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
},
|
|
146
|
+
opensearchApiURL: "http://localhost:9200",
|
|
147
|
+
embeddingApiURL: "http://localhost:3000",
|
|
148
|
+
registryReadonlyURL: "http://localhost:6101"
|
|
149
|
+
};
|
|
150
|
+
const defaultConfig = {
|
|
151
|
+
argv: commonArgs,
|
|
152
|
+
id: "test-minion",
|
|
153
|
+
itemType: overrideConfig.itemType ?? "registryRecord",
|
|
154
|
+
aspects: ["test-aspect"],
|
|
155
|
+
optionalAspects: [],
|
|
156
|
+
formatTypes: ["txt"],
|
|
157
|
+
createEmbeddingText: overrideConfig.createEmbeddingText ?? (
|
|
158
|
+
(input) => Promise.resolve({
|
|
159
|
+
text: "This is a test text"
|
|
160
|
+
})),
|
|
161
|
+
chunkSizeLimit: 100,
|
|
162
|
+
overlap: 0,
|
|
163
|
+
autoDownloadFile: false,
|
|
164
|
+
timeout: "3m"
|
|
165
|
+
};
|
|
166
|
+
return { ...defaultConfig, ...overrideConfig };
|
|
167
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BaseSemanticIndexerTest.js","sourceRoot":"","sources":["../../src/test/BaseSemanticIndexerTest.ts"],"names":[],"mappings":"AAAA,OAAO,KAAqC,MAAM,OAAO,CAAC;AAC1D,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,CAAC;AAM9B,MAAM,OAAO,uBAAuB;IACzB,OAAO,CAAM;IACb,kBAAkB,CAAM;IACxB,mBAAmB,CAAM;IACzB,WAAW,CAAM;IACjB,QAAQ,CAAM;IACd,uBAAuB,CAAY;IAEnC,cAAc,CAAY;IAC1B,eAAe,CAAY;IAC3B,gBAAgB,CAAY;IAE5B,KAAK,CAAkB;IACvB,iBAAiB,GAAG,IAAI,IAAI,CAAC,0BAA0B,CAAC,CAAC;IACzD,wBAAwB,GAAG,uBAAuB,CAAC;IACnD,oCAAoC,GAAG,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC,CAAC,uDAAuD;IAC1H,UAAU,CAAM;IAEvB,YAAY,EACR,yBAAyB,EACzB,QAAQ,EACR,eAAe,EACf,mBAAmB,KAMnB,EAAE;QACF,IAAI,CAAC,OAAO,GAAG,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;QACvC,IAAI,CAAC,kBAAkB,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;QAChD,IAAI,CAAC,mBAAmB,GAAG;YACvB,iBAAiB,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,QAAQ,EAAE;YAC1C,aAAa,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC;gBACjC,IAAI,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE,SAAS,EAAE,KAAK,EAAE;aACnD,CAAC;SACL,CAAC;QACF,IAAI,CAAC,WAAW,GAAG,EAAE,YAAY,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC7D,IAAI,CAAC,QAAQ,GAAG;YACZ,UAAU,EAAE,KAAK,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC;gBAC9B,OAAO,EAAE,CAAC,EAAE,EAAE,EAAE,IAAI,CAAC,wBAAwB,EAAE,CAAC;aACnD,CAAC;SACL,CAAC;QAEF,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,aAAa,CAC5B,QAAQ,EAAE,OAAO,EAAE,IAAI,IAAI,CAAC,iBAAiB,CAAC,OAAO,EAAE,CAC1D,CAAC;QAEF,IAAI,CAAC,uBAAuB,GAAG,KAAK;aAC/B,IAAI,EAAE;aACN,QAAQ,CACL,yBAAyB;YACrB,IAAI,CAAC,oCAAoC,CAChD,CAAC;QAEN,IAAI,mBAAmB,KAAK,SAAS,IAAI,mBAAmB,EAAE,CAAC;YAC3D,IAAI,CAAC,cAAc,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YACjD,IAAI,CAAC,eAAe,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YACnD,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QACzD,CAAC;QAED,IAAI,CAAC,UAAU,GAAG,+BAA+B,CAAC;YAC9C,mBAAmB,EAAE,IAAI,CAAC,uBAAuB;YACjD,GAAG,eAAe;SACrB,CAAC,CAAC;IACP,CAAC;IAED,OAAO;QACH,IAAI,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC;QACtB,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,CAAC;QAC/B,IAAI,CAAC,eAAe,EAAE,OAAO,EAAE,CAAC;QAChC,IAAI,CAAC,gBAAgB,EAAE,OAAO,EAAE,CAAC;IACrC,CAAC;IAED,gBAAgB,CAAC,YAA6C,EAAE;QAC5D,IAAI,CAAC,UAAU,GAAG,+BAA+B,CAAC;YAC9C,mBAAmB,EAAE,IAAI,CAAC,uBAAuB;YACjD,GAAG,SAAS;SACf,CAAC,CAAC;QACH,OAAO,IAAI,CAAC,UAAU,CAAC;IAC3B,CAAC;IAED,oBAAoB;QAChB,OAAO,IAAI,CAAC,iBAAiB,CAAC,WAAW,EAAE,CAAC;IAChD,CAAC;IAED,gBAAgB,CACZ,IAAe,EACf,SAAiB,EACjB,GAAG,YAAmB;QAEtB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,UAAU,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC;IAC3E,CAAC;IAED,kBAAkB,CACd,UAMI,EAAE;QAEN,IAAI,OAAO,CAAC,4BAA4B,EAAE,CAAC;YACvC,MAAM,CAAC,IAAI,CAAC,uBAAuB,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,KAAK,CACnD,OAAO,CAAC,4BAA4B,CACvC,CAAC;QACN,CAAC;QACD,IAAI,OAAO,CAAC,cAAc,EAAE,CAAC;YACzB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,KAAK,CACzC,OAAO,CAAC,cAAc,CACzB,CAAC;QACN,CAAC;QACD,IAAI,OAAO,CAAC,qBAAqB,EAAE,CAAC;YAChC,MAAM,CAAC,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,KAAK,CAClD,OAAO,CAAC,qBAAqB,CAChC,CAAC;QACN,CAAC;QACD,IAAI,OAAO,CAAC,kBAAkB,EAAE,CAAC;YAC7B,MAAM,CACF,IAAI,CAAC,mBAAmB,CAAC,iBAAiB,CAAC,SAAS,CACvD,CAAC,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;QAC3C,CAAC;QACD,IAAI,OAAO,CAAC,sBAAsB,EAAE,CAAC;YACjC,MAAM,CAAC,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,KAAK,CAC7D,OAAO,CAAC,sBAAsB,CACjC,CAAC;QACN,CAAC;IACL,CAAC;IAED,gBAAgB,CAAC,WAAgB,EAAE,YAAoB,CAAC;QACpD,MAAM,UAAU,GAAG,IAAI,CAAC,mBAAmB,CAAC,iBAAiB,CAAC,OAAO,CACjE,SAAS,CACZ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACV,MAAM,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC;IACpD,CAAC;IAED,iBAAiB,CAAC,YAAmB,EAAE,YAAoB,CAAC;QACxD,MAAM,UAAU,GAAG,IAAI,CAAC,mBAAmB,CAAC,iBAAiB,CAAC,OAAO,CACjE,SAAS,CACZ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACV,MAAM,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IACnD,CAAC;IAED,cAAc,CAAC,YAAoB,CAAC;QAChC,OAAO,IAAI,CAAC,mBAAmB,CAAC,iBAAiB,CAAC,OAAO,CAAC,SAAS,CAAC;aAC/D,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;CACJ;AAED,MAAM,UAAU,+BAA+B,CAC3C,iBAAkD,EAAE;IAEpD,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC;IAChC,OAAO,CAAC,GAAG,GAAG;QACV,GAAG,WAAW;QACd,UAAU,EAAE,aAAa;QACzB,OAAO,EAAE,WAAW;KACvB,CAAC;IAEF,MAAM,UAAU,GAA6B;QACzC,UAAU,EAAE,IAAI;QAChB,WAAW,EAAE,uBAAuB;QACpC,SAAS,EAAE,aAAa;QACxB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,uBAAuB;QACpC,WAAW,EAAE;YACT,QAAQ,EAAE,WAAW;YACrB,IAAI,EAAE,IAAI;YACV,MAAM,EAAE,KAAK;YACb,MAAM,EAAE,WAAW;YACnB,oBAAoB,EAAE,aAAa;SACtC;QACD,cAAc,EAAE,YAAY;QAC5B,cAAc,EAAE,YAAY;QAC5B,iBAAiB,EAAE,KAAK;QACxB,SAAS,EAAE,uBAAuB;QAClC,OAAO,EAAE,CAAC;QACV,qBAAqB,EAAE;YACnB,cAAc,EAAE,CAAC;YACjB,gBAAgB,EAAE,CAAC;YACnB,SAAS,EAAE,gBAAgB;YAC3B,YAAY,EAAE,CAAC;YACf,cAAc,EAAE,GAAG;YACnB,OAAO,EAAE,EAAE;YACX,kBAAkB,EAAE,CAAC;YACrB,aAAa,EAAE,EAAE;YACjB,aAAa,EAAE,mBAAmB;YAClC,oBAAoB,EAAE;gBAClB,IAAI,EAAE,WAAW;gBACjB,SAAS,EAAE,GAAG;gBACd,SAAS,EAAE,IAAI;gBACf,cAAc,EAAE,GAAG;gBACnB,QAAQ,EAAE,GAAG;gBACb,CAAC,EAAE,EAAE;gBACL,OAAO,EAAE;oBACL,IAAI,EAAE,IAAI;oBACV,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,KAAK;iBACd;aACJ;SACJ;QACD,gBAAgB,EAAE,uBAAuB;QACzC,eAAe,EAAE,uBAAuB;QACxC,mBAAmB,EAAE,uBAAuB;KAC/C,CAAC;IAEF,MAAM,aAAa,GAA2B;QAC1C,IAAI,EAAE,UAAU;QAChB,EAAE,EAAE,aAAa;QACjB,QAAQ,EAAE,CAAC,cAAc,CAAC,QAAQ,IAAI,gBAAgB,CAAa;QACnE,OAAO,EAAE,CAAC,aAAa,CAAC;QACxB,eAAe,EAAE,EAAE;QACnB,WAAW,EAAE,CAAC,KAAK,CAAC;QACpB,mBAAmB,EAAE,CAAC,cAAc,CAAC,mBAAmB;YACpD,CAAC,CAAC,KAAU,EAAE,EAAE,CACZ,OAAO,CAAC,OAAO,CAAC;gBACZ,IAAI,EAAE,qBAAqB;aAC9B,CAAC,CAAC,CAAwB;QACnC,cAAc,EAAE,GAAG;QACnB,OAAO,EAAE,CAAC;QACV,gBAAgB,EAAE,KAAK;QACvB,OAAO,EAAE,IAAI;KAChB,CAAC;IACF,OAAO,EAAE,GAAG,aAAa,EAAE,GAAG,cAAc,EAAE,CAAC;AACnD,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { expect } from "chai";
|
|
2
|
+
import { Chunker, FixedLengthChunkStrategy, RecursiveChunkStrategy } from "../chunker.js";
|
|
3
|
+
import { expectThrowsAsync } from "./helpers.js";
|
|
4
|
+
describe("RecursiveChunker", () => {
|
|
5
|
+
it("should chunk the text into smaller chunks with proper overlapping", async () => {
|
|
6
|
+
const text = "This is a test text, for testing the recursive chunking strategy.";
|
|
7
|
+
const chunkSize = 10;
|
|
8
|
+
const overlap = 4;
|
|
9
|
+
const chunker = new Chunker(new RecursiveChunkStrategy(chunkSize, overlap));
|
|
10
|
+
const chunks = await chunker.chunk(text);
|
|
11
|
+
expect(chunks).to.be.not.null;
|
|
12
|
+
expect(chunks.length).to.be.greaterThan(0);
|
|
13
|
+
chunks.forEach((chunk) => {
|
|
14
|
+
// validate length property
|
|
15
|
+
expect(chunk.length).to.be.equal(chunk.text.length);
|
|
16
|
+
// validate position property
|
|
17
|
+
expect(text.slice(chunk.position, chunk.position + chunk.length)).to.be.equal(chunk.text);
|
|
18
|
+
});
|
|
19
|
+
// should be able to reconstruct
|
|
20
|
+
let reconstructedText = "";
|
|
21
|
+
chunks.forEach((chunk) => {
|
|
22
|
+
reconstructedText += chunk.text.slice(chunk.overlap);
|
|
23
|
+
});
|
|
24
|
+
expect(reconstructedText).to.be.equal(text);
|
|
25
|
+
});
|
|
26
|
+
it("should handle text with multiple newlines", async () => {
|
|
27
|
+
const text = "This is a test text,\n\nfor testing the recursive chunking strategy.\n\n";
|
|
28
|
+
const chunkSize = 10;
|
|
29
|
+
const overlap = 4;
|
|
30
|
+
const chunker = new Chunker(new RecursiveChunkStrategy(chunkSize, overlap));
|
|
31
|
+
const chunks = await chunker.chunk(text);
|
|
32
|
+
expect(chunks).to.be.not.null;
|
|
33
|
+
expect(chunks.length).to.be.greaterThan(0);
|
|
34
|
+
chunks.forEach((chunk) => {
|
|
35
|
+
expect(chunk.length).to.be.equal(chunk.text.length);
|
|
36
|
+
expect(text.slice(chunk.position, chunk.position + chunk.length)).to.be.equal(chunk.text);
|
|
37
|
+
});
|
|
38
|
+
// should be able to reconstruct
|
|
39
|
+
let reconstructedText = "";
|
|
40
|
+
chunks.forEach((chunk) => {
|
|
41
|
+
reconstructedText += chunk.text.slice(chunk.overlap);
|
|
42
|
+
});
|
|
43
|
+
expect(reconstructedText).to.be.equal(text);
|
|
44
|
+
});
|
|
45
|
+
it("should be able to handle text smaller than chunk size", async () => {
|
|
46
|
+
const text = "abc";
|
|
47
|
+
const chunkSize = 5;
|
|
48
|
+
const overlap = 2;
|
|
49
|
+
const chunker = new Chunker(new RecursiveChunkStrategy(chunkSize, overlap));
|
|
50
|
+
const chunks = await chunker.chunk(text);
|
|
51
|
+
expect(chunks).to.deep.equal([
|
|
52
|
+
{
|
|
53
|
+
text: "abc",
|
|
54
|
+
length: 3,
|
|
55
|
+
position: 0,
|
|
56
|
+
overlap: 0
|
|
57
|
+
}
|
|
58
|
+
]);
|
|
59
|
+
});
|
|
60
|
+
it("should throw error when overlap is greater than chunk size", async () => {
|
|
61
|
+
const text = "abcde";
|
|
62
|
+
const chunkSize = 5;
|
|
63
|
+
const overlap = 6;
|
|
64
|
+
expectThrowsAsync(async () => {
|
|
65
|
+
const chunker = new Chunker(new RecursiveChunkStrategy(chunkSize, overlap));
|
|
66
|
+
await chunker.chunk(text);
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
it("should be able to handle empty text", async () => {
|
|
70
|
+
const text = "";
|
|
71
|
+
const chunkSize = 10;
|
|
72
|
+
const overlap = 4;
|
|
73
|
+
const chunker = new Chunker(new RecursiveChunkStrategy(chunkSize, overlap));
|
|
74
|
+
const chunks = await chunker.chunk(text);
|
|
75
|
+
expect(chunks).to.deep.equal([]);
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
describe("FixedLengthChunker", () => {
|
|
79
|
+
it("should chunk the text into smaller chunks with proper overlapping", async () => {
|
|
80
|
+
const text = "TestText";
|
|
81
|
+
const chunkSize = 4;
|
|
82
|
+
const overlap = 1;
|
|
83
|
+
const chucker = new Chunker(new FixedLengthChunkStrategy(chunkSize, overlap));
|
|
84
|
+
const chunks = await chucker.chunk(text);
|
|
85
|
+
expect(chunks).to.deep.equal([
|
|
86
|
+
{
|
|
87
|
+
text: "Test",
|
|
88
|
+
length: 4,
|
|
89
|
+
position: 0,
|
|
90
|
+
overlap: 1
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
text: "tTex",
|
|
94
|
+
length: 4,
|
|
95
|
+
position: 3,
|
|
96
|
+
overlap: 1
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
text: "xt",
|
|
100
|
+
length: 2,
|
|
101
|
+
position: 6,
|
|
102
|
+
overlap: 1
|
|
103
|
+
}
|
|
104
|
+
]);
|
|
105
|
+
});
|
|
106
|
+
it("should be able to handle empty text", async () => {
|
|
107
|
+
const text = "";
|
|
108
|
+
const chunkSize = 4;
|
|
109
|
+
const overlap = 1;
|
|
110
|
+
const chunker = new Chunker(new FixedLengthChunkStrategy(chunkSize, overlap));
|
|
111
|
+
const chunks = await chunker.chunk(text);
|
|
112
|
+
expect(chunks).to.deep.equal([]);
|
|
113
|
+
});
|
|
114
|
+
it("should be able to handle text smaller than chunk size", async () => {
|
|
115
|
+
const text = "abc";
|
|
116
|
+
const chunkSize = 5;
|
|
117
|
+
const overlap = 2;
|
|
118
|
+
const chunker = new Chunker(new FixedLengthChunkStrategy(chunkSize, overlap));
|
|
119
|
+
const chunks = await chunker.chunk(text);
|
|
120
|
+
expect(chunks).to.deep.equal([
|
|
121
|
+
{
|
|
122
|
+
text: "abc",
|
|
123
|
+
length: 3,
|
|
124
|
+
position: 0,
|
|
125
|
+
overlap: 2
|
|
126
|
+
}
|
|
127
|
+
]);
|
|
128
|
+
});
|
|
129
|
+
it("should be able to handle text equal to chunk size", async () => {
|
|
130
|
+
const text = "12345";
|
|
131
|
+
const chunkSize = 5;
|
|
132
|
+
const overlap = 2;
|
|
133
|
+
const chunker = new Chunker(new FixedLengthChunkStrategy(chunkSize, overlap));
|
|
134
|
+
const chunks = await chunker.chunk(text);
|
|
135
|
+
expect(chunks).to.deep.equal([
|
|
136
|
+
{
|
|
137
|
+
text: "12345",
|
|
138
|
+
length: 5,
|
|
139
|
+
position: 0,
|
|
140
|
+
overlap: 2
|
|
141
|
+
}
|
|
142
|
+
]);
|
|
143
|
+
});
|
|
144
|
+
it("should throw error when overlap is greater than chunk size", async () => {
|
|
145
|
+
const text = "12345";
|
|
146
|
+
const chunkSize = 5;
|
|
147
|
+
const overlap = 6;
|
|
148
|
+
expectThrowsAsync(async () => {
|
|
149
|
+
const chunker = new Chunker(new FixedLengthChunkStrategy(chunkSize, overlap));
|
|
150
|
+
await chunker.chunk(text);
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
//# sourceMappingURL=chunker.spec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.spec.js","sourceRoot":"","sources":["../../src/test/chunker.spec.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,CAAC;AAC9B,OAAO,EACH,OAAO,EACP,wBAAwB,EACxB,sBAAsB,EACzB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,iBAAiB,EAAE,MAAM,cAAc,CAAC;AAEjD,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;IAC9B,EAAE,CAAC,mEAAmE,EAAE,KAAK,IAAI,EAAE;QAC/E,MAAM,IAAI,GACN,mEAAmE,CAAC;QACxE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,OAAO,GAAG,CAAC,CAAC;QAClB,MAAM,OAAO,GAAG,IAAI,OAAO,CACvB,IAAI,sBAAsB,CAAC,SAAS,EAAE,OAAO,CAAC,CACjD,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;YACrB,2BAA2B;YAC3B,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACpD,6BAA6B;YAC7B,MAAM,CACF,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAC5D,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;QAEH,gCAAgC;QAChC,IAAI,iBAAiB,GAAG,EAAE,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;YACrB,iBAAiB,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;QACvD,MAAM,IAAI,GACN,0EAA0E,CAAC;QAC/E,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,OAAO,GAAG,CAAC,CAAC;QAClB,MAAM,OAAO,GAAG,IAAI,OAAO,CACvB,IAAI,sBAAsB,CAAC,SAAS,EAAE,OAAO,CAAC,CACjD,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEzC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAE3C,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;YACrB,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACpD,MAAM,CACF,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,CAC5D,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;QAEH,gCAAgC;QAChC,IAAI,iBAAiB,GAAG,EAAE,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;YACrB,iBAAiB,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,KAAK,IAAI,EAAE;QACnE,MAAM,IAAI,GAAG,KAAK,CAAC;QACnB,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,CAAC;QAClB,MAAM,OAAO,GAAG,IAAI,OAAO,CACvB,IAAI,sBAAsB,CAAC,SAAS,EAAE,OAAO,CAAC,CACjD,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC;YACzB;gBACI,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,CAAC;gBACT,QAAQ,EAAE,CAAC;gBACX,OAAO,EAAE,CAAC;aACb;SACJ,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;QACxE,MAAM,IAAI,GAAG,OAAO,CAAC;QACrB,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,CAAC;QAElB,iBAAiB,CAAC,KAAK,IAAI,EAAE;YACzB,MAAM,OAAO,GAAG,IAAI,OAAO,CACvB,IAAI,sBAAsB,CAAC,SAAS,EAAE,OAAO,CAAC,CACjD,CAAC;YACF,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,IAAI,GAAG,EAAE,CAAC;QAChB,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,OAAO,GAAG,CAAC,CAAC;QAClB,MAAM,OAAO,GAAG,IAAI,OAAO,CACvB,IAAI,sBAAsB,CAAC,SAAS,EAAE,OAAO,CAAC,CACjD,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAChC,EAAE,CAAC,mEAAmE,EAAE,KAAK,IAAI,EAAE;QAC/E,MAAM,IAAI,GAAG,UAAU,CAAC;QACxB,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,CAAC;QAClB,MAAM,OAAO,GAAG,IAAI,OAAO,CACvB,IAAI,wBAAwB,CAAC,SAAS,EAAE,OAAO,CAAC,CACnD,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC;YACzB;gBACI,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,CAAC;gBACT,QAAQ,EAAE,CAAC;gBACX,OAAO,EAAE,CAAC;aACb;YACD;gBACI,IAAI,EAAE,MAAM;gBACZ,MAAM,EAAE,CAAC;gBACT,QAAQ,EAAE,CAAC;gBACX,OAAO,EAAE,CAAC;aACb;YACD;gBACI,IAAI,EAAE,IAAI;gBACV,MAAM,EAAE,CAAC;gBACT,QAAQ,EAAE,CAAC;gBACX,OAAO,EAAE,CAAC;aACb;SACJ,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,IAAI,GAAG,EAAE,CAAC;QAChB,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,CAAC;QAClB,MAAM,OAAO,GAAG,IAAI,OAAO,CACvB,IAAI,wBAAwB,CAAC,SAAS,EAAE,OAAO,CAAC,CACnD,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,KAAK,IAAI,EAAE;QACnE,MAAM,IAAI,GAAG,KAAK,CAAC;QACnB,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,CAAC;QAClB,MAAM,OAAO,GAAG,IAAI,OAAO,CACvB,IAAI,wBAAwB,CAAC,SAAS,EAAE,OAAO,CAAC,CACnD,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC;YACzB;gBACI,IAAI,EAAE,KAAK;gBACX,MAAM,EAAE,CAAC;gBACT,QAAQ,EAAE,CAAC;gBACX,OAAO,EAAE,CAAC;aACb;SACJ,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,IAAI,GAAG,OAAO,CAAC;QACrB,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,CAAC;QAClB,MAAM,OAAO,GAAG,IAAI,OAAO,CACvB,IAAI,wBAAwB,CAAC,SAAS,EAAE,OAAO,CAAC,CACnD,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC;YACzB;gBACI,IAAI,EAAE,OAAO;gBACb,MAAM,EAAE,CAAC;gBACT,QAAQ,EAAE,CAAC;gBACX,OAAO,EAAE,CAAC;aACb;SACJ,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4DAA4D,EAAE,KAAK,IAAI,EAAE;QACxE,MAAM,IAAI,GAAG,OAAO,CAAC;QACrB,MAAM,SAAS,GAAG,CAAC,CAAC;QACpB,MAAM,OAAO,GAAG,CAAC,CAAC;QAClB,iBAAiB,CAAC,KAAK,IAAI,EAAE;YACzB,MAAM,OAAO,GAAG,IAAI,OAAO,CACvB,IAAI,wBAAwB,CAAC,SAAS,EAAE,OAAO,CAAC,CACnD,CAAC;YACF,MAAM,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import nock from "nock";
|
|
2
|
+
import EmbeddingApiClient from "@magda/typescript-common/dist/EmbeddingApiClient.js";
|
|
3
|
+
import { expect } from "chai";
|
|
4
|
+
import mockEmbeddingApi from "./mockEmbeddingApi.js";
|
|
5
|
+
import sinon from "sinon";
|
|
6
|
+
describe("EmbeddingApiClient", () => {
|
|
7
|
+
const baseApiUrl = "http://localhost:3000";
|
|
8
|
+
const path = "/v1/embeddings";
|
|
9
|
+
const dim = 768;
|
|
10
|
+
let client;
|
|
11
|
+
let consoleLogStub;
|
|
12
|
+
before(() => {
|
|
13
|
+
mockEmbeddingApi(baseApiUrl, path, dim);
|
|
14
|
+
client = new EmbeddingApiClient({ baseApiUrl: baseApiUrl });
|
|
15
|
+
consoleLogStub = sinon.stub(console, "log");
|
|
16
|
+
});
|
|
17
|
+
after(() => {
|
|
18
|
+
nock.cleanAll();
|
|
19
|
+
consoleLogStub.restore();
|
|
20
|
+
});
|
|
21
|
+
it("should return embedding for single text", async () => {
|
|
22
|
+
const vec = await client.get("hello world");
|
|
23
|
+
expect(vec).to.be.an.instanceOf(Array);
|
|
24
|
+
expect(vec.length).to.equal(dim);
|
|
25
|
+
});
|
|
26
|
+
it("should return embeddings for batch texts", async () => {
|
|
27
|
+
const vecs = await client.get(["foo", "bar"]);
|
|
28
|
+
expect(vecs).to.be.an.instanceOf(Array);
|
|
29
|
+
expect(vecs[0]).to.be.an.instanceOf(Array);
|
|
30
|
+
expect(vecs.length).to.equal(2);
|
|
31
|
+
expect(vecs[0].length).to.equal(dim);
|
|
32
|
+
});
|
|
33
|
+
it("should properly handle large text arrays by batch", async () => {
|
|
34
|
+
const texts = Array(40).fill("test text");
|
|
35
|
+
const vecs = await client.get(texts);
|
|
36
|
+
expect(vecs).to.be.an.instanceOf(Array);
|
|
37
|
+
expect(vecs.length).to.equal(40);
|
|
38
|
+
vecs.forEach((vec) => {
|
|
39
|
+
expect(vec.length).to.equal(dim);
|
|
40
|
+
});
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
//# sourceMappingURL=embeddingApiClient.spec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embeddingApiClient.spec.js","sourceRoot":"","sources":["../../src/test/embeddingApiClient.spec.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,kBAAkB,MAAM,qDAAqD,CAAC;AACrF,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,CAAC;AAC9B,OAAO,gBAAgB,MAAM,uBAAuB,CAAC;AACrD,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAChC,MAAM,UAAU,GAAG,uBAAuB,CAAC;IAC3C,MAAM,IAAI,GAAG,gBAAgB,CAAC;IAC9B,MAAM,GAAG,GAAG,GAAG,CAAC;IAChB,IAAI,MAA0B,CAAC;IAC/B,IAAI,cAAmB,CAAC;IAExB,MAAM,CAAC,GAAG,EAAE;QACR,gBAAgB,CAAC,UAAU,EAAE,IAAI,EAAE,GAAG,CAAC,CAAC;QACxC,MAAM,GAAG,IAAI,kBAAkB,CAAC,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC,CAAC;QAC5D,cAAc,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,KAAK,CAAC,GAAG,EAAE;QACP,IAAI,CAAC,QAAQ,EAAE,CAAC;QAChB,cAAc,CAAC,OAAO,EAAE,CAAC;IAC7B,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yCAAyC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;QAC5C,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACvC,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACrC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACtD,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC;QAC9C,MAAM,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAChC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,KAAK,GAAG,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC1C,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QACrC,MAAM,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QACxC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACjC,IAAI,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;YACjB,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;IACP,CAAC,CAAC,CAAC;AACP,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import { Record } from "@magda/typescript-common/dist/generated/registry/api.js";
|
|
2
|
+
export declare function createRecord(partial: Partial<Record>): Record;
|
|
3
|
+
export declare const expectThrowsAsync: (method: () => Promise<void>, errorMessage?: string) => Promise<void>;
|
|
4
|
+
export declare const expectNoThrowsAsync: (method: () => Promise<void>) => Promise<void>;
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { expect } from "chai";
|
|
2
|
+
export function createRecord(partial) {
|
|
3
|
+
return {
|
|
4
|
+
id: partial.id || "id",
|
|
5
|
+
name: partial.name || "name",
|
|
6
|
+
aspects: partial.aspects || {},
|
|
7
|
+
sourceTag: partial.sourceTag || "source",
|
|
8
|
+
tenantId: partial.tenantId || 0,
|
|
9
|
+
...partial
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
export const expectThrowsAsync = async (method, errorMessage) => {
|
|
13
|
+
let error = null;
|
|
14
|
+
try {
|
|
15
|
+
await method();
|
|
16
|
+
}
|
|
17
|
+
catch (err) {
|
|
18
|
+
error = err;
|
|
19
|
+
}
|
|
20
|
+
expect(error).to.be.an("Error");
|
|
21
|
+
if (errorMessage) {
|
|
22
|
+
expect(error.message).to.equal(errorMessage);
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
export const expectNoThrowsAsync = async (method) => {
|
|
26
|
+
let error = null;
|
|
27
|
+
try {
|
|
28
|
+
await method();
|
|
29
|
+
}
|
|
30
|
+
catch (err) {
|
|
31
|
+
error = err;
|
|
32
|
+
}
|
|
33
|
+
expect(error).to.be.null;
|
|
34
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.js","sourceRoot":"","sources":["../../src/test/helpers.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,MAAM,EAAE,MAAM,MAAM,CAAC;AAE9B,MAAM,UAAU,YAAY,CAAC,OAAwB;IACjD,OAAO;QACH,EAAE,EAAE,OAAO,CAAC,EAAE,IAAI,IAAI;QACtB,IAAI,EAAE,OAAO,CAAC,IAAI,IAAI,MAAM;QAC5B,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;QAC9B,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,QAAQ;QACxC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,CAAC;QAC/B,GAAG,OAAO;KACb,CAAC;AACN,CAAC;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG,KAAK,EAClC,MAA2B,EAC3B,YAAqB,EACvB,EAAE;IACA,IAAI,KAAK,GAAG,IAAI,CAAC;IACjB,IAAI,CAAC;QACD,MAAM,MAAM,EAAE,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACX,KAAK,GAAG,GAAG,CAAC;IAChB,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC;IAChC,IAAI,YAAY,EAAE,CAAC;QACf,MAAM,CAAE,KAAe,CAAC,OAAO,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAC5D,CAAC;AACL,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,mBAAmB,GAAG,KAAK,EAAE,MAA2B,EAAE,EAAE;IACrE,IAAI,KAAK,GAAG,IAAI,CAAC;IACjB,IAAI,CAAC;QACD,MAAM,MAAM,EAAE,CAAC;IACnB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACX,KAAK,GAAG,GAAG,CAAC;IAChB,CAAC;IACD,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,IAAI,CAAC;AAC7B,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|