@grepr/cli 1.1.4 → 1.3.0-4e666db
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/dist/commands/base-command.d.ts +3 -3
- package/build/dist/commands/base-command.d.ts.map +1 -1
- package/build/dist/commands/base-command.js +1 -7
- package/build/dist/commands/base-command.js.map +1 -1
- package/build/dist/commands/config-command.d.ts +5 -9
- package/build/dist/commands/config-command.d.ts.map +1 -1
- package/build/dist/commands/config-command.js +32 -4
- package/build/dist/commands/config-command.js.map +1 -1
- package/build/dist/commands/crud-command.d.ts +11 -18
- package/build/dist/commands/crud-command.d.ts.map +1 -1
- package/build/dist/commands/crud-command.js +27 -11
- package/build/dist/commands/crud-command.js.map +1 -1
- package/build/dist/commands/dataset-command.d.ts +8 -13
- package/build/dist/commands/dataset-command.d.ts.map +1 -1
- package/build/dist/commands/dataset-command.js.map +1 -1
- package/build/dist/commands/docs-command.d.ts +87 -0
- package/build/dist/commands/docs-command.d.ts.map +1 -0
- package/build/dist/commands/docs-command.js +164 -0
- package/build/dist/commands/docs-command.js.map +1 -0
- package/build/dist/commands/docs-get-command.d.ts +53 -0
- package/build/dist/commands/docs-get-command.d.ts.map +1 -0
- package/build/dist/commands/docs-get-command.js +75 -0
- package/build/dist/commands/docs-get-command.js.map +1 -0
- package/build/dist/commands/grok-command.d.ts +71 -0
- package/build/dist/commands/grok-command.d.ts.map +1 -0
- package/build/dist/commands/grok-command.js +258 -0
- package/build/dist/commands/grok-command.js.map +1 -0
- package/build/dist/commands/integration-command.d.ts +5 -7
- package/build/dist/commands/integration-command.d.ts.map +1 -1
- package/build/dist/commands/integration-command.js +4 -4
- package/build/dist/commands/integration-command.js.map +1 -1
- package/build/dist/commands/job-command.d.ts +7 -43
- package/build/dist/commands/job-command.d.ts.map +1 -1
- package/build/dist/commands/job-command.js +5 -3
- package/build/dist/commands/job-command.js.map +1 -1
- package/build/dist/commands/job-to-test-command.d.ts +74 -0
- package/build/dist/commands/job-to-test-command.d.ts.map +1 -0
- package/build/dist/commands/job-to-test-command.js +159 -0
- package/build/dist/commands/job-to-test-command.js.map +1 -0
- package/build/dist/commands/list-command.d.ts +5 -7
- package/build/dist/commands/list-command.d.ts.map +1 -1
- package/build/dist/commands/list-command.js +20 -4
- package/build/dist/commands/list-command.js.map +1 -1
- package/build/dist/commands/query-command.d.ts +3 -8
- package/build/dist/commands/query-command.d.ts.map +1 -1
- package/build/dist/commands/query-command.js +34 -14
- package/build/dist/commands/query-command.js.map +1 -1
- package/build/dist/docs-index/catalog.json +1 -0
- package/build/dist/docs-index/index.json +1 -0
- package/build/dist/grepr.js +43 -7
- package/build/dist/grepr.js.map +1 -1
- package/build/dist/lib/api-client-factory.d.ts +2 -2
- package/build/dist/lib/api-client-factory.d.ts.map +1 -1
- package/build/dist/lib/api-client-factory.js +1 -1
- package/build/dist/lib/api-client-factory.js.map +1 -1
- package/build/dist/lib/auth.d.ts +17 -9
- package/build/dist/lib/auth.d.ts.map +1 -1
- package/build/dist/lib/auth.js +53 -14
- package/build/dist/lib/auth.js.map +1 -1
- package/build/dist/lib/command-registry.d.ts +3 -2
- package/build/dist/lib/command-registry.d.ts.map +1 -1
- package/build/dist/lib/command-registry.js.map +1 -1
- package/build/dist/lib/config.d.ts +15 -0
- package/build/dist/lib/config.d.ts.map +1 -1
- package/build/dist/lib/config.js +45 -11
- package/build/dist/lib/config.js.map +1 -1
- package/build/dist/lib/docs-search.d.ts +154 -0
- package/build/dist/lib/docs-search.d.ts.map +1 -0
- package/build/dist/lib/docs-search.js +208 -0
- package/build/dist/lib/docs-search.js.map +1 -0
- package/build/dist/lib/grepr-api-client.d.ts +33 -193
- package/build/dist/lib/grepr-api-client.d.ts.map +1 -1
- package/build/dist/lib/grepr-api-client.js +58 -38
- package/build/dist/lib/grepr-api-client.js.map +1 -1
- package/build/dist/lib/job-graph-transformer.d.ts +89 -0
- package/build/dist/lib/job-graph-transformer.d.ts.map +1 -0
- package/build/dist/lib/job-graph-transformer.js +497 -0
- package/build/dist/lib/job-graph-transformer.js.map +1 -0
- package/build/dist/lib/job-graph-utils.d.ts +50 -0
- package/build/dist/lib/job-graph-utils.d.ts.map +1 -0
- package/build/dist/lib/job-graph-utils.js +84 -0
- package/build/dist/lib/job-graph-utils.js.map +1 -0
- package/build/dist/lib/json-formatter.d.ts +1 -0
- package/build/dist/lib/json-formatter.d.ts.map +1 -1
- package/build/dist/lib/json-formatter.js +28 -16
- package/build/dist/lib/json-formatter.js.map +1 -1
- package/build/dist/lib/parser.d.ts.map +1 -1
- package/build/dist/lib/parser.js +3 -6
- package/build/dist/lib/parser.js.map +1 -1
- package/build/dist/lib/streaming-job-executor.d.ts +3 -3
- package/build/dist/lib/streaming-job-executor.d.ts.map +1 -1
- package/build/dist/lib/streaming-job-executor.js +54 -21
- package/build/dist/lib/streaming-job-executor.js.map +1 -1
- package/build/dist/lib/time-utils.js +1 -1
- package/build/dist/lib/time-utils.js.map +1 -1
- package/build/dist/lib/transformers-embeddings.d.ts +76 -0
- package/build/dist/lib/transformers-embeddings.d.ts.map +1 -0
- package/build/dist/lib/transformers-embeddings.js +109 -0
- package/build/dist/lib/transformers-embeddings.js.map +1 -0
- package/build/dist/openapi/openApiTypes.d.ts +6926 -3458
- package/build/dist/openapi/openApiTypes.d.ts.map +1 -1
- package/build/dist/openapi/openApiTypes.js +372 -85
- package/build/dist/openapi/openApiTypes.js.map +1 -1
- package/build/dist/types.d.ts +48 -78
- package/build/dist/types.d.ts.map +1 -1
- package/build/dist/types.js +73 -0
- package/build/dist/types.js.map +1 -1
- package/package.json +41 -11
|
@@ -73,7 +73,7 @@ export function parseSinceOption(since) {
|
|
|
73
73
|
const sinceTime = new Date(now.getTime() - milliseconds);
|
|
74
74
|
return sinceTime.toISOString();
|
|
75
75
|
}
|
|
76
|
-
catch
|
|
76
|
+
catch {
|
|
77
77
|
throw new Error(`Invalid duration format: ${since}. Use ISO 8601 duration format like PT5H, P1D, PT30M, P1Y6M, P2W`);
|
|
78
78
|
}
|
|
79
79
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"time-utils.js","sourceRoot":"","sources":["../../../src/main/typescript/lib/time-utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,QAAQ,MAAM,0BAA0B,CAAC;AAEhD,uDAAuD;AACvD,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;AAEvB;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,2BAA2B,CAAC,QAAgB;IAC1D,0CAA0C;IAC1C,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,2EAA2E,CAAC,CAAC;IACnI,CAAC;IAED,IAAI,CAAC;QACH,2CAA2C;QAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAExC,0BAA0B;QAC1B,MAAM,iBAAiB,GAAG,MAAM,CAAC,cAAc,EAAE,CAAC;QAElD,4CAA4C;QAC5C,IAAI,iBAAiB,KAAK,CAAC,EAAE,CAAC;YAC5B,kEAAkE;YAClE,IAAI,QAAQ,CAAC,KAAK,CAAC,2BAA2B,CAAC,EAAE,CAAC;gBAChD,MAAM,IAAI,KAAK,CAAC,uCAAuC,QAAQ,EAAE,CAAC,CAAC;YACrE,CAAC;YACD,0EAA0E;YAC1E,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,2EAA2E,CAAC,CAAC;QACnI,CAAC;QAED,IAAI,CAAC,QAAQ,CAAC,iBAAiB,CAAC,IAAI,iBAAiB,GAAG,CAAC,EAAE,CAAC;YAC1D,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,EAAE,CAAC,CAAC;QACnD,CAAC;QAED,OAAO,iBAAiB,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,gDAAgD;QAChD,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,oCAAoC,CAAC,EAAE,CAAC;YAC3F,MAAM,KAAK,CAAC;QACd,CAAC;QACD,oCAAoC;QACpC,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,2EAA2E,CAAC,CAAC;IACnI,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAa;IAC5C,8CAA8C;IAC9C,IAAI,KAAK,CAAC,KAAK,CAAC,sCAAsC,CAAC,EAAE,CAAC;QACxD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,qDAAqD;IACrD,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,YAAY,GAAG,2BAA2B,CAAC,KAAK,CAAC,CAAC;YACxD,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,YAAY,CAAC,CAAC;YACzD,OAAO,SAAS,CAAC,WAAW,EAAE,CAAC;QACjC,CAAC;QAAC,
|
|
1
|
+
{"version":3,"file":"time-utils.js","sourceRoot":"","sources":["../../../src/main/typescript/lib/time-utils.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,QAAQ,MAAM,0BAA0B,CAAC;AAEhD,uDAAuD;AACvD,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;AAEvB;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,2BAA2B,CAAC,QAAgB;IAC1D,0CAA0C;IAC1C,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,2EAA2E,CAAC,CAAC;IACnI,CAAC;IAED,IAAI,CAAC;QACH,2CAA2C;QAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAExC,0BAA0B;QAC1B,MAAM,iBAAiB,GAAG,MAAM,CAAC,cAAc,EAAE,CAAC;QAElD,4CAA4C;QAC5C,IAAI,iBAAiB,KAAK,CAAC,EAAE,CAAC;YAC5B,kEAAkE;YAClE,IAAI,QAAQ,CAAC,KAAK,CAAC,2BAA2B,CAAC,EAAE,CAAC;gBAChD,MAAM,IAAI,KAAK,CAAC,uCAAuC,QAAQ,EAAE,CAAC,CAAC;YACrE,CAAC;YACD,0EAA0E;YAC1E,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,2EAA2E,CAAC,CAAC;QACnI,CAAC;QAED,IAAI,CAAC,QAAQ,CAAC,iBAAiB,CAAC,IAAI,iBAAiB,GAAG,CAAC,EAAE,CAAC;YAC1D,MAAM,IAAI,KAAK,CAAC,qBAAqB,QAAQ,EAAE,CAAC,CAAC;QACnD,CAAC;QAED,OAAO,iBAAiB,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,gDAAgD;QAChD,IAAI,KAAK,YAAY,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,oCAAoC,CAAC,EAAE,CAAC;YAC3F,MAAM,KAAK,CAAC;QACd,CAAC;QACD,oCAAoC;QACpC,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,2EAA2E,CAAC,CAAC;IACnI,CAAC;AACH,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAa;IAC5C,8CAA8C;IAC9C,IAAI,KAAK,CAAC,KAAK,CAAC,sCAAsC,CAAC,EAAE,CAAC;QACxD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,qDAAqD;IACrD,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,YAAY,GAAG,2BAA2B,CAAC,KAAK,CAAC,CAAC;YACxD,MAAM,SAAS,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,YAAY,CAAC,CAAC;YACzD,OAAO,SAAS,CAAC,WAAW,EAAE,CAAC;QACjC,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,4BAA4B,KAAK,kEAAkE,CAAC,CAAC;QACvH,CAAC;IACH,CAAC;IAED,iEAAiE;IACjE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACjC,IAAI,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;QAClB,MAAM,IAAI,KAAK,CAAC,wBAAwB,KAAK,iEAAiE,CAAC,CAAC;IAClH,CAAC;IAED,OAAO,IAAI,IAAI,CAAC,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;AACxC,CAAC"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { EmbeddingsModel, EmbeddingsResponse } from 'vectra';
|
|
2
|
+
/**
|
|
3
|
+
* Embeddings provider for Vectra using Transformers.js.
|
|
4
|
+
*
|
|
5
|
+
* This class implements Vectra's EmbeddingsModel interface using Transformers.js,
|
|
6
|
+
* a pure JavaScript implementation of transformer models. This approach provides:
|
|
7
|
+
* - Zero external dependencies (no Docker, no server startup)
|
|
8
|
+
* - Automatic model caching (~90MB downloaded once, reused thereafter)
|
|
9
|
+
* - Cross-platform compatibility (works on any Node.js environment)
|
|
10
|
+
*
|
|
11
|
+
* The model generates dense vector embeddings for text, enabling semantic similarity
|
|
12
|
+
* search. The embeddings are generated using mean pooling and normalization, which
|
|
13
|
+
* is standard for sentence similarity tasks.
|
|
14
|
+
*
|
|
15
|
+
* Design decision: We chose Transformers.js over alternatives like Ollama because:
|
|
16
|
+
* 1. No runtime server dependency (Ollama requires Docker container)
|
|
17
|
+
* 2. Faster startup time (no container initialization)
|
|
18
|
+
* 3. Smaller distribution size (model cached by npm, not bundled)
|
|
19
|
+
* 4. Better integration with Node.js ecosystem
|
|
20
|
+
*/
|
|
21
|
+
export declare class TransformersEmbeddings implements EmbeddingsModel {
|
|
22
|
+
private pipeline;
|
|
23
|
+
/**
|
|
24
|
+
* Maximum number of tokens the model can process per input.
|
|
25
|
+
* This is exposed publicly so Vectra can use it for chunking decisions.
|
|
26
|
+
*/
|
|
27
|
+
readonly maxTokens: number;
|
|
28
|
+
private readonly modelName;
|
|
29
|
+
/**
|
|
30
|
+
* Creates a new TransformersEmbeddings instance.
|
|
31
|
+
*
|
|
32
|
+
* @param modelName - HuggingFace model identifier (e.g., 'Xenova/all-MiniLM-L6-v2')
|
|
33
|
+
* Default model produces 384-dimensional embeddings and is ~90MB.
|
|
34
|
+
* See https://huggingface.co/models?library=transformers.js&pipeline_tag=feature-extraction
|
|
35
|
+
* for other compatible models.
|
|
36
|
+
* @param maxTokens - Maximum tokens per input. Should match model's configuration.
|
|
37
|
+
* Used by Vectra for automatic document chunking.
|
|
38
|
+
*/
|
|
39
|
+
constructor(modelName?: string, maxTokens?: number);
|
|
40
|
+
/**
|
|
41
|
+
* Initializes the embedding model pipeline.
|
|
42
|
+
*
|
|
43
|
+
* This method is lazy - it only loads the model when first called. The model
|
|
44
|
+
* is downloaded from HuggingFace on first run and cached locally by Transformers.js
|
|
45
|
+
* in ~/.cache/huggingface/ (or platform equivalent).
|
|
46
|
+
*
|
|
47
|
+
* Subsequent calls to this method are no-ops if the pipeline is already initialized.
|
|
48
|
+
*
|
|
49
|
+
* @throws Error if model download or initialization fails (e.g., network issues,
|
|
50
|
+
* unsupported model architecture)
|
|
51
|
+
*/
|
|
52
|
+
initialize(): Promise<void>;
|
|
53
|
+
/**
|
|
54
|
+
* Generates vector embeddings for one or more text inputs.
|
|
55
|
+
*
|
|
56
|
+
* This is the main method called by Vectra to generate embeddings for documents
|
|
57
|
+
* and queries. The implementation:
|
|
58
|
+
* 1. Initializes the pipeline if not already initialized (lazy loading)
|
|
59
|
+
* 2. Normalizes input to always be an array
|
|
60
|
+
* 3. Generates embeddings for each input sequentially
|
|
61
|
+
* 4. Returns embeddings as a 2D array of numbers
|
|
62
|
+
*
|
|
63
|
+
* Important: We use mean pooling and normalization for the embeddings. This is
|
|
64
|
+
* standard practice for sentence similarity tasks:
|
|
65
|
+
* - Mean pooling: Averages token embeddings to get a single vector per sentence
|
|
66
|
+
* - Normalization: Scales vectors to unit length, enabling cosine similarity
|
|
67
|
+
* via simple dot product
|
|
68
|
+
*
|
|
69
|
+
* @param inputs - Single text string or array of strings to embed
|
|
70
|
+
* @returns Promise resolving to EmbeddingsResponse with either:
|
|
71
|
+
* - success status and array of embedding vectors
|
|
72
|
+
* - error status and error message
|
|
73
|
+
*/
|
|
74
|
+
createEmbeddings(inputs: string | string[]): Promise<EmbeddingsResponse>;
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=transformers-embeddings.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transformers-embeddings.d.ts","sourceRoot":"","sources":["../../../src/main/typescript/lib/transformers-embeddings.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,eAAe,EAAE,kBAAkB,EAAE,MAAM,QAAQ,CAAC;AAE7D;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,sBAAuB,YAAW,eAAe;IAC5D,OAAO,CAAC,QAAQ,CAA0C;IAE1D;;;OAGG;IACH,SAAgB,SAAS,EAAE,MAAM,CAAC;IAElC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IAEnC;;;;;;;;;OASG;gBACS,SAAS,SAA4B,EAAE,SAAS,SAAM;IAKlE;;;;;;;;;;;OAWG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAMjC;;;;;;;;;;;;;;;;;;;;OAoBG;IACG,gBAAgB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC,kBAAkB,CAAC;CA8B/E"}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import { pipeline } from '@xenova/transformers';
|
|
2
|
+
/**
|
|
3
|
+
* Embeddings provider for Vectra using Transformers.js.
|
|
4
|
+
*
|
|
5
|
+
* This class implements Vectra's EmbeddingsModel interface using Transformers.js,
|
|
6
|
+
* a pure JavaScript implementation of transformer models. This approach provides:
|
|
7
|
+
* - Zero external dependencies (no Docker, no server startup)
|
|
8
|
+
* - Automatic model caching (~90MB downloaded once, reused thereafter)
|
|
9
|
+
* - Cross-platform compatibility (works on any Node.js environment)
|
|
10
|
+
*
|
|
11
|
+
* The model generates dense vector embeddings for text, enabling semantic similarity
|
|
12
|
+
* search. The embeddings are generated using mean pooling and normalization, which
|
|
13
|
+
* is standard for sentence similarity tasks.
|
|
14
|
+
*
|
|
15
|
+
* Design decision: We chose Transformers.js over alternatives like Ollama because:
|
|
16
|
+
* 1. No runtime server dependency (Ollama requires Docker container)
|
|
17
|
+
* 2. Faster startup time (no container initialization)
|
|
18
|
+
* 3. Smaller distribution size (model cached by npm, not bundled)
|
|
19
|
+
* 4. Better integration with Node.js ecosystem
|
|
20
|
+
*/
|
|
21
|
+
export class TransformersEmbeddings {
|
|
22
|
+
pipeline = null;
|
|
23
|
+
/**
|
|
24
|
+
* Maximum number of tokens the model can process per input.
|
|
25
|
+
* This is exposed publicly so Vectra can use it for chunking decisions.
|
|
26
|
+
*/
|
|
27
|
+
maxTokens;
|
|
28
|
+
modelName;
|
|
29
|
+
/**
|
|
30
|
+
* Creates a new TransformersEmbeddings instance.
|
|
31
|
+
*
|
|
32
|
+
* @param modelName - HuggingFace model identifier (e.g., 'Xenova/all-MiniLM-L6-v2')
|
|
33
|
+
* Default model produces 384-dimensional embeddings and is ~90MB.
|
|
34
|
+
* See https://huggingface.co/models?library=transformers.js&pipeline_tag=feature-extraction
|
|
35
|
+
* for other compatible models.
|
|
36
|
+
* @param maxTokens - Maximum tokens per input. Should match model's configuration.
|
|
37
|
+
* Used by Vectra for automatic document chunking.
|
|
38
|
+
*/
|
|
39
|
+
constructor(modelName = 'Xenova/all-MiniLM-L6-v2', maxTokens = 512) {
|
|
40
|
+
this.modelName = modelName;
|
|
41
|
+
this.maxTokens = maxTokens;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Initializes the embedding model pipeline.
|
|
45
|
+
*
|
|
46
|
+
* This method is lazy - it only loads the model when first called. The model
|
|
47
|
+
* is downloaded from HuggingFace on first run and cached locally by Transformers.js
|
|
48
|
+
* in ~/.cache/huggingface/ (or platform equivalent).
|
|
49
|
+
*
|
|
50
|
+
* Subsequent calls to this method are no-ops if the pipeline is already initialized.
|
|
51
|
+
*
|
|
52
|
+
* @throws Error if model download or initialization fails (e.g., network issues,
|
|
53
|
+
* unsupported model architecture)
|
|
54
|
+
*/
|
|
55
|
+
async initialize() {
|
|
56
|
+
if (!this.pipeline) {
|
|
57
|
+
this.pipeline = await pipeline('feature-extraction', this.modelName);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Generates vector embeddings for one or more text inputs.
|
|
62
|
+
*
|
|
63
|
+
* This is the main method called by Vectra to generate embeddings for documents
|
|
64
|
+
* and queries. The implementation:
|
|
65
|
+
* 1. Initializes the pipeline if not already initialized (lazy loading)
|
|
66
|
+
* 2. Normalizes input to always be an array
|
|
67
|
+
* 3. Generates embeddings for each input sequentially
|
|
68
|
+
* 4. Returns embeddings as a 2D array of numbers
|
|
69
|
+
*
|
|
70
|
+
* Important: We use mean pooling and normalization for the embeddings. This is
|
|
71
|
+
* standard practice for sentence similarity tasks:
|
|
72
|
+
* - Mean pooling: Averages token embeddings to get a single vector per sentence
|
|
73
|
+
* - Normalization: Scales vectors to unit length, enabling cosine similarity
|
|
74
|
+
* via simple dot product
|
|
75
|
+
*
|
|
76
|
+
* @param inputs - Single text string or array of strings to embed
|
|
77
|
+
* @returns Promise resolving to EmbeddingsResponse with either:
|
|
78
|
+
* - success status and array of embedding vectors
|
|
79
|
+
* - error status and error message
|
|
80
|
+
*/
|
|
81
|
+
async createEmbeddings(inputs) {
|
|
82
|
+
try {
|
|
83
|
+
if (!this.pipeline) {
|
|
84
|
+
await this.initialize();
|
|
85
|
+
}
|
|
86
|
+
if (!this.pipeline) {
|
|
87
|
+
throw new Error('Pipeline not initialized');
|
|
88
|
+
}
|
|
89
|
+
const inputArray = Array.isArray(inputs) ? inputs : [inputs];
|
|
90
|
+
const embeddings = [];
|
|
91
|
+
for (const text of inputArray) {
|
|
92
|
+
const output = await this.pipeline(text, { pooling: 'mean', normalize: true });
|
|
93
|
+
const embedding = Array.from(output.data);
|
|
94
|
+
embeddings.push(embedding);
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
status: 'success',
|
|
98
|
+
output: embeddings
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
catch (error) {
|
|
102
|
+
return {
|
|
103
|
+
status: 'error',
|
|
104
|
+
message: error instanceof Error ? error.message : 'Unknown error generating embeddings'
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=transformers-embeddings.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"transformers-embeddings.js","sourceRoot":"","sources":["../../../src/main/typescript/lib/transformers-embeddings.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAA6B,MAAM,sBAAsB,CAAC;AAG3E;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,OAAO,sBAAsB;IACzB,QAAQ,GAAqC,IAAI,CAAC;IAE1D;;;OAGG;IACa,SAAS,CAAS;IAEjB,SAAS,CAAS;IAEnC;;;;;;;;;OASG;IACH,YAAY,SAAS,GAAG,yBAAyB,EAAE,SAAS,GAAG,GAAG;QAChE,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAC3B,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC7B,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,UAAU;QACd,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,IAAI,CAAC,QAAQ,GAAG,MAAM,QAAQ,CAAC,oBAAoB,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,KAAK,CAAC,gBAAgB,CAAC,MAAyB;QAC9C,IAAI,CAAC;YACH,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACnB,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;YAC1B,CAAC;YAED,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACnB,MAAM,IAAI,KAAK,CAAC,0BAA0B,CAAC,CAAC;YAC9C,CAAC;YAED,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;YAC7D,MAAM,UAAU,GAAe,EAAE,CAAC;YAElC,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;gBAC9B,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC/E,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAoB,CAAC,CAAC;gBAC1D,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC7B,CAAC;YAED,OAAO;gBACL,MAAM,EAAE,SAAS;gBACjB,MAAM,EAAE,UAAU;aACnB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,OAAO;gBACf,OAAO,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,qCAAqC;aACxF,CAAC;QACJ,CAAC;IACH,CAAC;CACF"}
|