modelfusion 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/README.md +8 -2
  2. package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.cjs +1 -1
  3. package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.d.ts +1 -1
  4. package/composed-function/summarize/summarizeRecursivelyWithTextGenerationAndTokenSplitting.js +2 -2
  5. package/package.json +1 -1
  6. package/{vector-index/VectorIndexSimilarTextChunkRetriever.cjs → text-chunk/SimilarTextChunksFromVectorIndexRetriever.cjs} +4 -4
  7. package/{vector-index/VectorIndexSimilarTextChunkRetriever.d.ts → text-chunk/SimilarTextChunksFromVectorIndexRetriever.d.ts} +7 -7
  8. package/{vector-index/VectorIndexSimilarTextChunkRetriever.js → text-chunk/SimilarTextChunksFromVectorIndexRetriever.js} +2 -2
  9. package/text-chunk/TextChunk.d.ts +1 -1
  10. package/text-chunk/index.cjs +3 -0
  11. package/text-chunk/index.d.ts +3 -0
  12. package/text-chunk/index.js +3 -0
  13. package/text-chunk/split/splitRecursively.cjs +3 -12
  14. package/text-chunk/split/splitRecursively.d.ts +2 -15
  15. package/text-chunk/split/splitRecursively.js +3 -9
  16. package/text-chunk/split/splitTextChunks.cjs +14 -0
  17. package/text-chunk/split/splitTextChunks.d.ts +3 -0
  18. package/text-chunk/split/splitTextChunks.js +10 -0
  19. package/{vector-index → text-chunk}/upsertTextChunks.cjs +1 -1
  20. package/{vector-index → text-chunk}/upsertTextChunks.d.ts +2 -2
  21. package/{vector-index → text-chunk}/upsertTextChunks.js +1 -1
  22. package/vector-index/index.cjs +0 -3
  23. package/vector-index/index.d.ts +0 -3
  24. package/vector-index/index.js +0 -3
  25. package/vector-index/VectorIndexTextChunkStore.cjs +0 -77
  26. package/vector-index/VectorIndexTextChunkStore.d.ts +0 -35
  27. package/vector-index/VectorIndexTextChunkStore.js +0 -73
package/README.md CHANGED
@@ -317,12 +317,12 @@ const embeddingModel = new OpenAITextEmbeddingModel({
317
317
  await upsertTextChunks({
318
318
  vectorIndex,
319
319
  embeddingModel,
320
- chunks: texts.map((text) => ({ content: text })),
320
+ chunks: texts.map((text) => ({ text })),
321
321
  });
322
322
 
323
323
  // retrieve text chunks from the vector index - usually done at query time:
324
324
  const { chunks } = await retrieveTextChunks(
325
- new VectorIndexSimilarTextChunkRetriever({
325
+ new SimilarTextChunksFromVectorIndexRetriever({
326
326
  vectorIndex,
327
327
  embeddingModel,
328
328
  maxResults: 3,
@@ -440,6 +440,12 @@ TypeScript implementation of the classic [BabyAGI](https://github.com/yoheinakaj
440
440
 
441
441
  Small agent that solves middle school math problems. It uses a calculator tool to solve the problems.
442
442
 
443
+ ### [Chat with PDF](https://github.com/lgrammel/modelfusion/tree/main/examples/pdf-chat-terminal)
444
+
445
+ > _terminal app_, _PDF parsing_, _in memory vector indices_, _retrieval augmented generation_, _hypothetical document embedding_
446
+
447
+ Ask questions about a PDF document and get answers from the document.
448
+
443
449
  ### [PDF to Tweet](https://github.com/lgrammel/modelfusion/tree/main/examples/pdf-to-tweet)
444
450
 
445
451
  > _terminal app_, _PDF parsing_, _recursive information extraction_, _in memory vector index, \_style example retrieval_, _OpenAI GPT-4_, _cost calculation_
@@ -13,7 +13,7 @@ async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({ text, m
13
13
  (model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
14
14
  const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
15
15
  return (0, summarizeRecursively_js_1.summarizeRecursively)({
16
- split: (0, splitRecursively_js_1.splitRecursivelyAtTokenAsSplitFunction)({
16
+ split: (0, splitRecursively_js_1.splitRecursivelyAtToken)({
17
17
  tokenizer: model.tokenizer,
18
18
  maxChunkSize: tokenLimit - emptyPromptTokens,
19
19
  }),
@@ -1,4 +1,4 @@
1
- import { TextGenerationModelSettings, TextGenerationModel } from "../../model-function/generate-text/TextGenerationModel.js";
1
+ import { TextGenerationModel, TextGenerationModelSettings } from "../../model-function/generate-text/TextGenerationModel.js";
2
2
  import { FullTokenizer } from "../../model-function/tokenize-text/Tokenizer.js";
3
3
  import { Run } from "../../run/Run.js";
4
4
  /**
@@ -1,5 +1,5 @@
1
1
  import { generateText } from "../../model-function/generate-text/generateText.js";
2
- import { splitRecursivelyAtTokenAsSplitFunction } from "../../text-chunk/split/splitRecursively.js";
2
+ import { splitRecursivelyAtToken } from "../../text-chunk/split/splitRecursively.js";
3
3
  import { summarizeRecursively } from "./summarizeRecursively.js";
4
4
  /**
5
5
  * Recursively summarizes a text using a text generation model, e.g. for summarization or text extraction.
@@ -10,7 +10,7 @@ export async function summarizeRecursivelyWithTextGenerationAndTokenSplitting({
10
10
  (model.maxCompletionTokens ?? model.contextWindowSize / 4), join, }, options) {
11
11
  const emptyPromptTokens = await model.countPromptTokens(await prompt({ text: "" }));
12
12
  return summarizeRecursively({
13
- split: splitRecursivelyAtTokenAsSplitFunction({
13
+ split: splitRecursivelyAtToken({
14
14
  tokenizer: model.tokenizer,
15
15
  maxChunkSize: tokenLimit - emptyPromptTokens,
16
16
  }),
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "modelfusion",
3
3
  "description": "Build AI applications, chatbots, and agents with JavaScript and TypeScript.",
4
- "version": "0.4.1",
4
+ "version": "0.5.0",
5
5
  "author": "Lars Grammel",
6
6
  "license": "MIT",
7
7
  "keywords": [
@@ -1,8 +1,8 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.VectorIndexSimilarTextChunkRetriever = void 0;
3
+ exports.SimilarTextChunksFromVectorIndexRetriever = void 0;
4
4
  const embedText_js_1 = require("../model-function/embed-text/embedText.cjs");
5
- class VectorIndexSimilarTextChunkRetriever {
5
+ class SimilarTextChunksFromVectorIndexRetriever {
6
6
  constructor({ vectorIndex, embeddingModel, maxResults, similarityThreshold, }) {
7
7
  Object.defineProperty(this, "vectorIndex", {
8
8
  enumerable: true,
@@ -48,10 +48,10 @@ class VectorIndexSimilarTextChunkRetriever {
48
48
  return queryResult.map((item) => item.data);
49
49
  }
50
50
  withSettings(additionalSettings) {
51
- return new VectorIndexSimilarTextChunkRetriever(Object.assign({}, this.settings, additionalSettings, {
51
+ return new SimilarTextChunksFromVectorIndexRetriever(Object.assign({}, this.settings, additionalSettings, {
52
52
  vectorIndex: this.vectorIndex,
53
53
  embeddingModel: this.embeddingModel,
54
54
  }));
55
55
  }
56
56
  }
57
- exports.VectorIndexSimilarTextChunkRetriever = VectorIndexSimilarTextChunkRetriever;
57
+ exports.SimilarTextChunksFromVectorIndexRetriever = SimilarTextChunksFromVectorIndexRetriever;
@@ -1,20 +1,20 @@
1
1
  import { FunctionOptions } from "../model-function/FunctionOptions.js";
2
2
  import { TextEmbeddingModel, TextEmbeddingModelSettings } from "../model-function/embed-text/TextEmbeddingModel.js";
3
- import { TextChunk } from "../text-chunk/TextChunk.js";
4
- import { TextChunkRetriever, TextChunkRetrieverSettings } from "../text-chunk/retrieve-text-chunks/TextChunkRetriever.js";
5
- import { VectorIndex } from "./VectorIndex.js";
6
- export interface VectorIndexTextChunkRetrieverSettings {
3
+ import { TextChunk } from "./TextChunk.js";
4
+ import { TextChunkRetriever, TextChunkRetrieverSettings } from "./retrieve-text-chunks/TextChunkRetriever.js";
5
+ import { VectorIndex } from "../vector-index/VectorIndex.js";
6
+ export interface SimilarTextChunksFromVectorIndexRetrieverSettings {
7
7
  maxResults?: number;
8
8
  similarityThreshold?: number;
9
9
  }
10
- export declare class VectorIndexSimilarTextChunkRetriever<CHUNK extends TextChunk, INDEX, SETTINGS extends TextEmbeddingModelSettings> implements TextChunkRetriever<CHUNK, string, VectorIndexTextChunkRetrieverSettings> {
10
+ export declare class SimilarTextChunksFromVectorIndexRetriever<CHUNK extends TextChunk, INDEX, SETTINGS extends TextEmbeddingModelSettings> implements TextChunkRetriever<CHUNK, string, SimilarTextChunksFromVectorIndexRetrieverSettings> {
11
11
  private readonly vectorIndex;
12
12
  private readonly embeddingModel;
13
13
  private readonly settings;
14
14
  constructor({ vectorIndex, embeddingModel, maxResults, similarityThreshold, }: {
15
15
  vectorIndex: VectorIndex<CHUNK, INDEX>;
16
16
  embeddingModel: TextEmbeddingModel<unknown, SETTINGS>;
17
- } & VectorIndexTextChunkRetrieverSettings);
17
+ } & SimilarTextChunksFromVectorIndexRetrieverSettings);
18
18
  retrieveTextChunks(query: string, options?: FunctionOptions<TextChunkRetrieverSettings>): Promise<CHUNK[]>;
19
- withSettings(additionalSettings: Partial<VectorIndexTextChunkRetrieverSettings>): this;
19
+ withSettings(additionalSettings: Partial<SimilarTextChunksFromVectorIndexRetrieverSettings>): this;
20
20
  }
@@ -1,5 +1,5 @@
1
1
  import { embedText } from "../model-function/embed-text/embedText.js";
2
- export class VectorIndexSimilarTextChunkRetriever {
2
+ export class SimilarTextChunksFromVectorIndexRetriever {
3
3
  constructor({ vectorIndex, embeddingModel, maxResults, similarityThreshold, }) {
4
4
  Object.defineProperty(this, "vectorIndex", {
5
5
  enumerable: true,
@@ -45,7 +45,7 @@ export class VectorIndexSimilarTextChunkRetriever {
45
45
  return queryResult.map((item) => item.data);
46
46
  }
47
47
  withSettings(additionalSettings) {
48
- return new VectorIndexSimilarTextChunkRetriever(Object.assign({}, this.settings, additionalSettings, {
48
+ return new SimilarTextChunksFromVectorIndexRetriever(Object.assign({}, this.settings, additionalSettings, {
49
49
  vectorIndex: this.vectorIndex,
50
50
  embeddingModel: this.embeddingModel,
51
51
  }));
@@ -1,3 +1,3 @@
1
1
  export type TextChunk = {
2
- content: string;
2
+ text: string;
3
3
  };
@@ -14,9 +14,12 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./SimilarTextChunksFromVectorIndexRetriever.cjs"), exports);
17
18
  __exportStar(require("./TextChunk.cjs"), exports);
18
19
  __exportStar(require("./retrieve-text-chunks/TextChunkRetriever.cjs"), exports);
19
20
  __exportStar(require("./retrieve-text-chunks/retrieveTextChunks.cjs"), exports);
20
21
  __exportStar(require("./split/SplitFunction.cjs"), exports);
21
22
  __exportStar(require("./split/splitOnSeparator.cjs"), exports);
22
23
  __exportStar(require("./split/splitRecursively.cjs"), exports);
24
+ __exportStar(require("./split/splitTextChunks.cjs"), exports);
25
+ __exportStar(require("./upsertTextChunks.cjs"), exports);
@@ -1,6 +1,9 @@
1
+ export * from "./SimilarTextChunksFromVectorIndexRetriever.js";
1
2
  export * from "./TextChunk.js";
2
3
  export * from "./retrieve-text-chunks/TextChunkRetriever.js";
3
4
  export * from "./retrieve-text-chunks/retrieveTextChunks.js";
4
5
  export * from "./split/SplitFunction.js";
5
6
  export * from "./split/splitOnSeparator.js";
6
7
  export * from "./split/splitRecursively.js";
8
+ export * from "./split/splitTextChunks.js";
9
+ export * from "./upsertTextChunks.js";
@@ -1,6 +1,9 @@
1
+ export * from "./SimilarTextChunksFromVectorIndexRetriever.js";
1
2
  export * from "./TextChunk.js";
2
3
  export * from "./retrieve-text-chunks/TextChunkRetriever.js";
3
4
  export * from "./retrieve-text-chunks/retrieveTextChunks.js";
4
5
  export * from "./split/SplitFunction.js";
5
6
  export * from "./split/splitOnSeparator.js";
6
7
  export * from "./split/splitRecursively.js";
8
+ export * from "./split/splitTextChunks.js";
9
+ export * from "./upsertTextChunks.js";
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.splitRecursivelyAtTokenAsSplitFunction = exports.splitRecursivelyAtToken = exports.splitRecursivelyAtCharacterAsSplitFunction = exports.splitRecursivelyAtCharacter = exports.splitRecursively = void 0;
3
+ exports.splitRecursivelyAtToken = exports.splitRecursivelyAtCharacter = void 0;
4
4
  // when segments is a string, it splits by character, otherwise according to the provided segments
5
5
  function splitRecursively({ maxChunkSize, segments, }) {
6
6
  if (segments.length < maxChunkSize) {
@@ -20,22 +20,13 @@ function splitRecursively({ maxChunkSize, segments, }) {
20
20
  }),
21
21
  ];
22
22
  }
23
- exports.splitRecursively = splitRecursively;
24
- const splitRecursivelyAtCharacter = async ({ maxChunkSize, text, }) => splitRecursively({
23
+ const splitRecursivelyAtCharacter = ({ maxChunkSize }) => async ({ text }) => splitRecursively({
25
24
  maxChunkSize,
26
25
  segments: text,
27
26
  });
28
27
  exports.splitRecursivelyAtCharacter = splitRecursivelyAtCharacter;
29
- const splitRecursivelyAtCharacterAsSplitFunction = ({ maxChunkSize }) => async ({ text }) => (0, exports.splitRecursivelyAtCharacter)({ maxChunkSize, text });
30
- exports.splitRecursivelyAtCharacterAsSplitFunction = splitRecursivelyAtCharacterAsSplitFunction;
31
- const splitRecursivelyAtToken = async ({ tokenizer, maxChunkSize, text, }) => splitRecursively({
28
+ const splitRecursivelyAtToken = ({ tokenizer, maxChunkSize, }) => async ({ text }) => splitRecursively({
32
29
  maxChunkSize,
33
30
  segments: (await tokenizer.tokenizeWithTexts(text)).tokenTexts,
34
31
  });
35
32
  exports.splitRecursivelyAtToken = splitRecursivelyAtToken;
36
- const splitRecursivelyAtTokenAsSplitFunction = ({ tokenizer, maxChunkSize, }) => async ({ text }) => (0, exports.splitRecursivelyAtToken)({
37
- tokenizer,
38
- maxChunkSize,
39
- text,
40
- });
41
- exports.splitRecursivelyAtTokenAsSplitFunction = splitRecursivelyAtTokenAsSplitFunction;
@@ -1,22 +1,9 @@
1
1
  import { FullTokenizer } from "../../model-function/tokenize-text/Tokenizer.js";
2
2
  import { SplitFunction } from "./SplitFunction.js";
3
- export declare function splitRecursively({ maxChunkSize, segments, }: {
4
- maxChunkSize: number;
5
- segments: string | Array<string>;
6
- }): Array<string>;
7
- export declare const splitRecursivelyAtCharacter: ({ maxChunkSize, text, }: {
8
- maxChunkSize: number;
9
- text: string;
10
- }) => Promise<string[]>;
11
- export declare const splitRecursivelyAtCharacterAsSplitFunction: ({ maxChunkSize }: {
3
+ export declare const splitRecursivelyAtCharacter: ({ maxChunkSize }: {
12
4
  maxChunkSize: number;
13
5
  }) => SplitFunction;
14
- export declare const splitRecursivelyAtToken: ({ tokenizer, maxChunkSize, text, }: {
15
- tokenizer: FullTokenizer;
16
- maxChunkSize: number;
17
- text: string;
18
- }) => Promise<string[]>;
19
- export declare const splitRecursivelyAtTokenAsSplitFunction: ({ tokenizer, maxChunkSize, }: {
6
+ export declare const splitRecursivelyAtToken: ({ tokenizer, maxChunkSize, }: {
20
7
  tokenizer: FullTokenizer;
21
8
  maxChunkSize: number;
22
9
  }) => SplitFunction;
@@ -1,5 +1,5 @@
1
1
  // when segments is a string, it splits by character, otherwise according to the provided segments
2
- export function splitRecursively({ maxChunkSize, segments, }) {
2
+ function splitRecursively({ maxChunkSize, segments, }) {
3
3
  if (segments.length < maxChunkSize) {
4
4
  return Array.isArray(segments) ? [segments.join("")] : [segments];
5
5
  }
@@ -17,17 +17,11 @@ export function splitRecursively({ maxChunkSize, segments, }) {
17
17
  }),
18
18
  ];
19
19
  }
20
- export const splitRecursivelyAtCharacter = async ({ maxChunkSize, text, }) => splitRecursively({
20
+ export const splitRecursivelyAtCharacter = ({ maxChunkSize }) => async ({ text }) => splitRecursively({
21
21
  maxChunkSize,
22
22
  segments: text,
23
23
  });
24
- export const splitRecursivelyAtCharacterAsSplitFunction = ({ maxChunkSize }) => async ({ text }) => splitRecursivelyAtCharacter({ maxChunkSize, text });
25
- export const splitRecursivelyAtToken = async ({ tokenizer, maxChunkSize, text, }) => splitRecursively({
24
+ export const splitRecursivelyAtToken = ({ tokenizer, maxChunkSize, }) => async ({ text }) => splitRecursively({
26
25
  maxChunkSize,
27
26
  segments: (await tokenizer.tokenizeWithTexts(text)).tokenTexts,
28
27
  });
29
- export const splitRecursivelyAtTokenAsSplitFunction = ({ tokenizer, maxChunkSize, }) => async ({ text }) => splitRecursivelyAtToken({
30
- tokenizer,
31
- maxChunkSize,
32
- text,
33
- });
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.splitTextChunks = void 0;
4
+ async function splitTextChunks(splitFunction, inputs) {
5
+ const pageChunks = await Promise.all(inputs.map(async (input) => {
6
+ const parts = await splitFunction(input);
7
+ return parts.map((text) => ({
8
+ ...input,
9
+ text,
10
+ }));
11
+ }));
12
+ return pageChunks.flat();
13
+ }
14
+ exports.splitTextChunks = splitTextChunks;
@@ -0,0 +1,3 @@
1
+ import { TextChunk } from "../TextChunk.js";
2
+ import { SplitFunction } from "./SplitFunction.js";
3
+ export declare function splitTextChunks<CHUNK extends TextChunk>(splitFunction: SplitFunction, inputs: CHUNK[]): Promise<CHUNK[]>;
@@ -0,0 +1,10 @@
1
+ export async function splitTextChunks(splitFunction, inputs) {
2
+ const pageChunks = await Promise.all(inputs.map(async (input) => {
3
+ const parts = await splitFunction(input);
4
+ return parts.map((text) => ({
5
+ ...input,
6
+ text,
7
+ }));
8
+ }));
9
+ return pageChunks.flat();
10
+ }
@@ -5,7 +5,7 @@ const nanoid_1 = require("nanoid");
5
5
  const embedText_js_1 = require("../model-function/embed-text/embedText.cjs");
6
6
  async function upsertTextChunks({ vectorIndex, embeddingModel, generateId = nanoid_1.nanoid, chunks, ids, }, options) {
7
7
  // many embedding models support bulk embedding, so we first embed all texts:
8
- const { embeddings } = await (0, embedText_js_1.embedTexts)(embeddingModel, chunks.map((chunk) => chunk.content), options);
8
+ const { embeddings } = await (0, embedText_js_1.embedTexts)(embeddingModel, chunks.map((chunk) => chunk.text), options);
9
9
  await vectorIndex.upsertMany(chunks.map((chunk, i) => ({
10
10
  id: ids?.[i] ?? generateId(),
11
11
  vector: embeddings[i],
@@ -1,7 +1,7 @@
1
1
  import { FunctionOptions } from "../model-function/FunctionOptions.js";
2
2
  import { TextEmbeddingModel, TextEmbeddingModelSettings } from "../model-function/embed-text/TextEmbeddingModel.js";
3
- import { TextChunk } from "../text-chunk/TextChunk.js";
4
- import { VectorIndex } from "./VectorIndex.js";
3
+ import { TextChunk } from "./TextChunk.js";
4
+ import { VectorIndex } from "../vector-index/VectorIndex.js";
5
5
  export declare function upsertTextChunks<CHUNK extends TextChunk, SETTINGS extends TextEmbeddingModelSettings>({ vectorIndex, embeddingModel, generateId, chunks, ids, }: {
6
6
  vectorIndex: VectorIndex<CHUNK, unknown>;
7
7
  embeddingModel: TextEmbeddingModel<unknown, SETTINGS>;
@@ -2,7 +2,7 @@ import { nanoid as createId } from "nanoid";
2
2
  import { embedTexts } from "../model-function/embed-text/embedText.js";
3
3
  export async function upsertTextChunks({ vectorIndex, embeddingModel, generateId = createId, chunks, ids, }, options) {
4
4
  // many embedding models support bulk embedding, so we first embed all texts:
5
- const { embeddings } = await embedTexts(embeddingModel, chunks.map((chunk) => chunk.content), options);
5
+ const { embeddings } = await embedTexts(embeddingModel, chunks.map((chunk) => chunk.text), options);
6
6
  await vectorIndex.upsertMany(chunks.map((chunk, i) => ({
7
7
  id: ids?.[i] ?? generateId(),
8
8
  vector: embeddings[i],
@@ -15,8 +15,5 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
17
  __exportStar(require("./VectorIndex.cjs"), exports);
18
- __exportStar(require("./VectorIndexSimilarTextChunkRetriever.cjs"), exports);
19
- __exportStar(require("./VectorIndexTextChunkStore.cjs"), exports);
20
18
  __exportStar(require("./memory/MemoryVectorIndex.cjs"), exports);
21
19
  __exportStar(require("./pinecone/PineconeVectorIndex.cjs"), exports);
22
- __exportStar(require("./upsertTextChunks.cjs"), exports);
@@ -1,6 +1,3 @@
1
1
  export * from "./VectorIndex.js";
2
- export * from "./VectorIndexSimilarTextChunkRetriever.js";
3
- export * from "./VectorIndexTextChunkStore.js";
4
2
  export * from "./memory/MemoryVectorIndex.js";
5
3
  export * from "./pinecone/PineconeVectorIndex.js";
6
- export * from "./upsertTextChunks.js";
@@ -1,6 +1,3 @@
1
1
  export * from "./VectorIndex.js";
2
- export * from "./VectorIndexSimilarTextChunkRetriever.js";
3
- export * from "./VectorIndexTextChunkStore.js";
4
2
  export * from "./memory/MemoryVectorIndex.js";
5
3
  export * from "./pinecone/PineconeVectorIndex.js";
6
- export * from "./upsertTextChunks.js";
@@ -1,77 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.VectorIndexTextChunkStore = void 0;
4
- const nanoid_1 = require("nanoid");
5
- const embedText_js_1 = require("../model-function/embed-text/embedText.cjs");
6
- class VectorIndexTextChunkStore {
7
- constructor({ index, generateId = nanoid_1.nanoid, embeddingModel, queryFunctionId, upsertFunctionId, }) {
8
- Object.defineProperty(this, "_index", {
9
- enumerable: true,
10
- configurable: true,
11
- writable: true,
12
- value: void 0
13
- });
14
- Object.defineProperty(this, "generateId", {
15
- enumerable: true,
16
- configurable: true,
17
- writable: true,
18
- value: void 0
19
- });
20
- Object.defineProperty(this, "embeddingModel", {
21
- enumerable: true,
22
- configurable: true,
23
- writable: true,
24
- value: void 0
25
- });
26
- Object.defineProperty(this, "queryFunctionId", {
27
- enumerable: true,
28
- configurable: true,
29
- writable: true,
30
- value: void 0
31
- });
32
- Object.defineProperty(this, "upsertFunctionId", {
33
- enumerable: true,
34
- configurable: true,
35
- writable: true,
36
- value: void 0
37
- });
38
- this._index = index;
39
- this.generateId = generateId;
40
- this.embeddingModel = embeddingModel;
41
- this.queryFunctionId = queryFunctionId;
42
- this.upsertFunctionId = upsertFunctionId;
43
- }
44
- async upsertChunk({ id = this.generateId(), chunk, }, options) {
45
- this.upsertManyChunks({
46
- ids: [id],
47
- chunks: [chunk],
48
- }, options);
49
- }
50
- async upsertManyChunks({ ids, chunks, }, options) {
51
- const { embeddings } = await (0, embedText_js_1.embedTexts)(this.embeddingModel, chunks.map((chunk) => chunk.content), {
52
- functionId: this.upsertFunctionId,
53
- run: options?.run,
54
- });
55
- this._index.upsertMany(embeddings.map((embedding, i) => ({
56
- id: ids?.[i] ?? this.generateId(),
57
- vector: embedding,
58
- data: chunks[i],
59
- })));
60
- }
61
- async retrieveSimilarTextChunks(queryText, options) {
62
- const { embedding } = await (0, embedText_js_1.embedText)(this.embeddingModel, queryText, {
63
- functionId: this.queryFunctionId,
64
- run: options?.run,
65
- });
66
- const queryResult = await this._index.queryByVector({
67
- queryVector: embedding,
68
- maxResults: 1,
69
- similarityThreshold: undefined,
70
- });
71
- return queryResult.map((item) => item.data);
72
- }
73
- get index() {
74
- return this._index.asIndex();
75
- }
76
- }
77
- exports.VectorIndexTextChunkStore = VectorIndexTextChunkStore;
@@ -1,35 +0,0 @@
1
- import { TextEmbeddingModel, TextEmbeddingModelSettings } from "../model-function/embed-text/TextEmbeddingModel.js";
2
- import { Run } from "../run/Run.js";
3
- import { TextChunk } from "../text-chunk/TextChunk.js";
4
- import { TextChunkRetrieverSettings } from "../text-chunk/retrieve-text-chunks/TextChunkRetriever.js";
5
- import { VectorIndex } from "./VectorIndex.js";
6
- import { FunctionOptions } from "../model-function/FunctionOptions.js";
7
- export declare class VectorIndexTextChunkStore<CHUNK extends TextChunk, INDEX, MODEL extends TextEmbeddingModel<unknown, TextEmbeddingModelSettings>> {
8
- private readonly _index;
9
- private readonly generateId;
10
- private readonly embeddingModel;
11
- private readonly queryFunctionId?;
12
- private readonly upsertFunctionId?;
13
- constructor({ index, generateId, embeddingModel, queryFunctionId, upsertFunctionId, }: {
14
- index: VectorIndex<CHUNK, INDEX>;
15
- generateId?: () => string;
16
- embeddingModel: MODEL;
17
- queryFunctionId?: string;
18
- upsertFunctionId?: string;
19
- });
20
- upsertChunk({ id, chunk, }: {
21
- id?: string;
22
- keyText: string;
23
- chunk: CHUNK;
24
- }, options?: {
25
- run?: Run;
26
- }): Promise<void>;
27
- upsertManyChunks({ ids, chunks, }: {
28
- ids?: Array<string | undefined>;
29
- chunks: CHUNK[];
30
- }, options?: {
31
- run?: Run;
32
- }): Promise<void>;
33
- retrieveSimilarTextChunks(queryText: string, options?: FunctionOptions<TextChunkRetrieverSettings> | undefined): Promise<CHUNK[]>;
34
- get index(): INDEX;
35
- }
@@ -1,73 +0,0 @@
1
- import { nanoid as createId } from "nanoid";
2
- import { embedText, embedTexts, } from "../model-function/embed-text/embedText.js";
3
- export class VectorIndexTextChunkStore {
4
- constructor({ index, generateId = createId, embeddingModel, queryFunctionId, upsertFunctionId, }) {
5
- Object.defineProperty(this, "_index", {
6
- enumerable: true,
7
- configurable: true,
8
- writable: true,
9
- value: void 0
10
- });
11
- Object.defineProperty(this, "generateId", {
12
- enumerable: true,
13
- configurable: true,
14
- writable: true,
15
- value: void 0
16
- });
17
- Object.defineProperty(this, "embeddingModel", {
18
- enumerable: true,
19
- configurable: true,
20
- writable: true,
21
- value: void 0
22
- });
23
- Object.defineProperty(this, "queryFunctionId", {
24
- enumerable: true,
25
- configurable: true,
26
- writable: true,
27
- value: void 0
28
- });
29
- Object.defineProperty(this, "upsertFunctionId", {
30
- enumerable: true,
31
- configurable: true,
32
- writable: true,
33
- value: void 0
34
- });
35
- this._index = index;
36
- this.generateId = generateId;
37
- this.embeddingModel = embeddingModel;
38
- this.queryFunctionId = queryFunctionId;
39
- this.upsertFunctionId = upsertFunctionId;
40
- }
41
- async upsertChunk({ id = this.generateId(), chunk, }, options) {
42
- this.upsertManyChunks({
43
- ids: [id],
44
- chunks: [chunk],
45
- }, options);
46
- }
47
- async upsertManyChunks({ ids, chunks, }, options) {
48
- const { embeddings } = await embedTexts(this.embeddingModel, chunks.map((chunk) => chunk.content), {
49
- functionId: this.upsertFunctionId,
50
- run: options?.run,
51
- });
52
- this._index.upsertMany(embeddings.map((embedding, i) => ({
53
- id: ids?.[i] ?? this.generateId(),
54
- vector: embedding,
55
- data: chunks[i],
56
- })));
57
- }
58
- async retrieveSimilarTextChunks(queryText, options) {
59
- const { embedding } = await embedText(this.embeddingModel, queryText, {
60
- functionId: this.queryFunctionId,
61
- run: options?.run,
62
- });
63
- const queryResult = await this._index.queryByVector({
64
- queryVector: embedding,
65
- maxResults: 1,
66
- similarityThreshold: undefined,
67
- });
68
- return queryResult.map((item) => item.data);
69
- }
70
- get index() {
71
- return this._index.asIndex();
72
- }
73
- }