@anvia/transformers 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -19,7 +19,8 @@ pnpm --filter @anvia/transformers build
|
|
|
19
19
|
## Usage
|
|
20
20
|
|
|
21
21
|
```ts
|
|
22
|
-
import { embedDocuments
|
|
22
|
+
import { embedDocuments } from "@anvia/core/embeddings";
|
|
23
|
+
import { InMemoryVectorStore } from "@anvia/core/vector-store";
|
|
23
24
|
import { createTransformersEmbeddingModel } from "@anvia/transformers";
|
|
24
25
|
|
|
25
26
|
const embeddingModel = await createTransformersEmbeddingModel();
|
package/dist/index.d.ts
CHANGED
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["import type { Embedding, EmbeddingModel } from \"@anvia/core\";\nimport { pipeline as transformersPipeline } from \"@huggingface/transformers\";\n\nexport const DEFAULT_TRANSFORMERS_EMBEDDING_MODEL = \"Xenova/all-MiniLM-L6-v2\";\n\nexport type TransformersPooling = \"mean\" | \"cls\";\n\nexport type TransformersFeatureExtractionPipeline = (\n texts: string[],\n options: { pooling: TransformersPooling; normalize: boolean },\n) => Promise<{ tolist(): unknown }>;\n\nexport type TransformersEmbeddingModelOptions = {\n model?: string | undefined;\n pooling?: TransformersPooling | undefined;\n normalize?: boolean | undefined;\n maxBatchSize?: number | undefined;\n};\n\nexport class TransformersEmbeddingModel implements EmbeddingModel {\n readonly model: string;\n readonly maxBatchSize: number;\n\n private readonly pooling: TransformersPooling;\n private readonly normalize: boolean;\n\n constructor(\n private readonly extractor: TransformersFeatureExtractionPipeline,\n options: TransformersEmbeddingModelOptions = {},\n ) {\n this.model = options.model ?? DEFAULT_TRANSFORMERS_EMBEDDING_MODEL;\n this.pooling = options.pooling ?? \"mean\";\n this.normalize = options.normalize ?? true;\n this.maxBatchSize = Math.max(1, Math.trunc(options.maxBatchSize ?? 16));\n }\n\n static async create(\n options: TransformersEmbeddingModelOptions = {},\n ): Promise<TransformersEmbeddingModel> {\n const model = options.model ?? DEFAULT_TRANSFORMERS_EMBEDDING_MODEL;\n const extractor = (await transformersPipeline(\n \"feature-extraction\",\n model,\n )) as TransformersFeatureExtractionPipeline;\n\n return new TransformersEmbeddingModel(extractor, { ...options, model });\n }\n\n async embedTexts(texts: string[]): Promise<Embedding[]> {\n if (texts.length === 0) {\n return [];\n }\n\n const output = await this.extractor(texts, {\n pooling: this.pooling,\n normalize: this.normalize,\n });\n const vectors = parseVectors(output.tolist(), texts.length);\n\n return texts.map((document, index) => ({\n document,\n vector: vectors[index] as number[],\n }));\n }\n}\n\nexport function createTransformersEmbeddingModel(\n options: TransformersEmbeddingModelOptions = {},\n): Promise<TransformersEmbeddingModel> {\n return TransformersEmbeddingModel.create(options);\n}\n\nfunction parseVectors(value: unknown, expectedLength: number): number[][] {\n if (!Array.isArray(value) || value.length !== expectedLength) {\n throw new Error(\n `Transformers embedding model returned ${Array.isArray(value) ? value.length : 0} embeddings for ${expectedLength} texts`,\n );\n }\n\n return value.map((vector, index) => {\n if (!Array.isArray(vector) || !vector.every((item) => typeof item === \"number\")) {\n throw new Error(`Transformers embedding model returned an invalid vector at index ${index}`);\n }\n return vector;\n });\n}\n"],"mappings":";AACA,SAAS,YAAY,4BAA4B;AAE1C,IAAM,uCAAuC;AAgB7C,IAAM,6BAAN,MAAM,4BAAqD;AAAA,EAOhE,YACmB,WACjB,UAA6C,CAAC,GAC9C;AAFiB;AAGjB,SAAK,QAAQ,QAAQ,SAAS;AAC9B,SAAK,UAAU,QAAQ,WAAW;AAClC,SAAK,YAAY,QAAQ,aAAa;AACtC,SAAK,eAAe,KAAK,IAAI,GAAG,KAAK,MAAM,QAAQ,gBAAgB,EAAE,CAAC;AAAA,EACxE;AAAA,EAPmB;AAAA,EAPV;AAAA,EACA;AAAA,EAEQ;AAAA,EACA;AAAA,EAYjB,aAAa,OACX,UAA6C,CAAC,GACT;AACrC,UAAM,QAAQ,QAAQ,SAAS;AAC/B,UAAM,YAAa,MAAM;AAAA,MACvB;AAAA,MACA;AAAA,IACF;AAEA,WAAO,IAAI,4BAA2B,WAAW,EAAE,GAAG,SAAS,MAAM,CAAC;AAAA,EACxE;AAAA,EAEA,MAAM,WAAW,OAAuC;AACtD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,SAAS,MAAM,KAAK,UAAU,OAAO;AAAA,MACzC,SAAS,KAAK;AAAA,MACd,WAAW,KAAK;AAAA,IAClB,CAAC;AACD,UAAM,UAAU,aAAa,OAAO,OAAO,GAAG,MAAM,MAAM;AAE1D,WAAO,MAAM,IAAI,CAAC,UAAU,WAAW;AAAA,MACrC;AAAA,MACA,QAAQ,QAAQ,KAAK;AAAA,IACvB,EAAE;AAAA,EACJ;AACF;AAEO,SAAS,iCACd,UAA6C,CAAC,GACT;AACrC,SAAO,2BAA2B,OAAO,OAAO;AAClD;AAEA,SAAS,aAAa,OAAgB,gBAAoC;AACxE,MAAI,CAAC,MAAM,QAAQ,KAAK,KAAK,MAAM,WAAW,gBAAgB;AAC5D,UAAM,IAAI;AAAA,MACR,yCAAyC,MAAM,QAAQ,KAAK,IAAI,MAAM,SAAS,CAAC,mBAAmB,cAAc;AAAA,IACnH;AAAA,EACF;AAEA,SAAO,MAAM,IAAI,CAAC,QAAQ,UAAU;AAClC,QAAI,CAAC,MAAM,QAAQ,MAAM,KAAK,CAAC,OAAO,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AAC/E,YAAM,IAAI,MAAM,oEAAoE,KAAK,EAAE;AAAA,IAC7F;AACA,WAAO;AAAA,EACT,CAAC;AACH;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["import type { Embedding, EmbeddingModel } from \"@anvia/core/embeddings\";\nimport { pipeline as transformersPipeline } from \"@huggingface/transformers\";\n\nexport const DEFAULT_TRANSFORMERS_EMBEDDING_MODEL = \"Xenova/all-MiniLM-L6-v2\";\n\nexport type TransformersPooling = \"mean\" | \"cls\";\n\nexport type TransformersFeatureExtractionPipeline = (\n texts: string[],\n options: { pooling: TransformersPooling; normalize: boolean },\n) => Promise<{ tolist(): unknown }>;\n\nexport type TransformersEmbeddingModelOptions = {\n model?: string | undefined;\n pooling?: TransformersPooling | undefined;\n normalize?: boolean | undefined;\n maxBatchSize?: number | undefined;\n};\n\nexport class TransformersEmbeddingModel implements EmbeddingModel {\n readonly model: string;\n readonly maxBatchSize: number;\n\n private readonly pooling: TransformersPooling;\n private readonly normalize: boolean;\n\n constructor(\n private readonly extractor: TransformersFeatureExtractionPipeline,\n options: TransformersEmbeddingModelOptions = {},\n ) {\n this.model = options.model ?? DEFAULT_TRANSFORMERS_EMBEDDING_MODEL;\n this.pooling = options.pooling ?? \"mean\";\n this.normalize = options.normalize ?? true;\n this.maxBatchSize = Math.max(1, Math.trunc(options.maxBatchSize ?? 16));\n }\n\n static async create(\n options: TransformersEmbeddingModelOptions = {},\n ): Promise<TransformersEmbeddingModel> {\n const model = options.model ?? DEFAULT_TRANSFORMERS_EMBEDDING_MODEL;\n const extractor = (await transformersPipeline(\n \"feature-extraction\",\n model,\n )) as TransformersFeatureExtractionPipeline;\n\n return new TransformersEmbeddingModel(extractor, { ...options, model });\n }\n\n async embedTexts(texts: string[]): Promise<Embedding[]> {\n if (texts.length === 0) {\n return [];\n }\n\n const output = await this.extractor(texts, {\n pooling: this.pooling,\n normalize: this.normalize,\n });\n const vectors = parseVectors(output.tolist(), texts.length);\n\n return texts.map((document, index) => ({\n document,\n vector: vectors[index] as number[],\n }));\n }\n}\n\nexport function createTransformersEmbeddingModel(\n options: TransformersEmbeddingModelOptions = {},\n): Promise<TransformersEmbeddingModel> {\n return TransformersEmbeddingModel.create(options);\n}\n\nfunction parseVectors(value: unknown, expectedLength: number): number[][] {\n if (!Array.isArray(value) || value.length !== expectedLength) {\n throw new Error(\n `Transformers embedding model returned ${Array.isArray(value) ? value.length : 0} embeddings for ${expectedLength} texts`,\n );\n }\n\n return value.map((vector, index) => {\n if (!Array.isArray(vector) || !vector.every((item) => typeof item === \"number\")) {\n throw new Error(`Transformers embedding model returned an invalid vector at index ${index}`);\n }\n return vector;\n });\n}\n"],"mappings":";AACA,SAAS,YAAY,4BAA4B;AAE1C,IAAM,uCAAuC;AAgB7C,IAAM,6BAAN,MAAM,4BAAqD;AAAA,EAOhE,YACmB,WACjB,UAA6C,CAAC,GAC9C;AAFiB;AAGjB,SAAK,QAAQ,QAAQ,SAAS;AAC9B,SAAK,UAAU,QAAQ,WAAW;AAClC,SAAK,YAAY,QAAQ,aAAa;AACtC,SAAK,eAAe,KAAK,IAAI,GAAG,KAAK,MAAM,QAAQ,gBAAgB,EAAE,CAAC;AAAA,EACxE;AAAA,EAPmB;AAAA,EAPV;AAAA,EACA;AAAA,EAEQ;AAAA,EACA;AAAA,EAYjB,aAAa,OACX,UAA6C,CAAC,GACT;AACrC,UAAM,QAAQ,QAAQ,SAAS;AAC/B,UAAM,YAAa,MAAM;AAAA,MACvB;AAAA,MACA;AAAA,IACF;AAEA,WAAO,IAAI,4BAA2B,WAAW,EAAE,GAAG,SAAS,MAAM,CAAC;AAAA,EACxE;AAAA,EAEA,MAAM,WAAW,OAAuC;AACtD,QAAI,MAAM,WAAW,GAAG;AACtB,aAAO,CAAC;AAAA,IACV;AAEA,UAAM,SAAS,MAAM,KAAK,UAAU,OAAO;AAAA,MACzC,SAAS,KAAK;AAAA,MACd,WAAW,KAAK;AAAA,IAClB,CAAC;AACD,UAAM,UAAU,aAAa,OAAO,OAAO,GAAG,MAAM,MAAM;AAE1D,WAAO,MAAM,IAAI,CAAC,UAAU,WAAW;AAAA,MACrC;AAAA,MACA,QAAQ,QAAQ,KAAK;AAAA,IACvB,EAAE;AAAA,EACJ;AACF;AAEO,SAAS,iCACd,UAA6C,CAAC,GACT;AACrC,SAAO,2BAA2B,OAAO,OAAO;AAClD;AAEA,SAAS,aAAa,OAAgB,gBAAoC;AACxE,MAAI,CAAC,MAAM,QAAQ,KAAK,KAAK,MAAM,WAAW,gBAAgB;AAC5D,UAAM,IAAI;AAAA,MACR,yCAAyC,MAAM,QAAQ,KAAK,IAAI,MAAM,SAAS,CAAC,mBAAmB,cAAc;AAAA,IACnH;AAAA,EACF;AAEA,SAAO,MAAM,IAAI,CAAC,QAAQ,UAAU;AAClC,QAAI,CAAC,MAAM,QAAQ,MAAM,KAAK,CAAC,OAAO,MAAM,CAAC,SAAS,OAAO,SAAS,QAAQ,GAAG;AAC/E,YAAM,IAAI,MAAM,oEAAoE,KAAK,EAAE;AAAA,IAC7F;AACA,WAAO;AAAA,EACT,CAAC;AACH;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@anvia/transformers",
|
|
3
|
-
"version": "0.1
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "Transformers.js embedding model adapter for Anvia.",
|
|
5
5
|
"author": "anvia",
|
|
6
6
|
"maintainer": "Indra Zulfi",
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
},
|
|
28
28
|
"dependencies": {
|
|
29
29
|
"@huggingface/transformers": "^4.2.0",
|
|
30
|
-
"@anvia/core": "0.
|
|
30
|
+
"@anvia/core": "0.4.1"
|
|
31
31
|
},
|
|
32
32
|
"devDependencies": {
|
|
33
33
|
"@types/node": "^24.9.1",
|