langchain 0.1.22 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/agent.cjs +4 -3
- package/dist/agents/agent.d.ts +4 -4
- package/dist/agents/agent.js +4 -3
- package/dist/agents/executor.cjs +17 -4
- package/dist/agents/executor.d.ts +10 -1
- package/dist/agents/executor.js +17 -4
- package/dist/chains/base.cjs +2 -1
- package/dist/chains/base.js +2 -1
- package/dist/embeddings/fake.cjs +0 -6
- package/dist/embeddings/fake.js +0 -6
- package/dist/load/import_map.cjs +3 -2
- package/dist/load/import_map.d.ts +1 -0
- package/dist/load/import_map.js +1 -0
- package/dist/retrievers/matryoshka_retriever.cjs +148 -0
- package/dist/retrievers/matryoshka_retriever.d.ts +93 -0
- package/dist/retrievers/matryoshka_retriever.js +144 -0
- package/package.json +17 -4
- package/retrievers/matryoshka_retriever.cjs +1 -0
- package/retrievers/matryoshka_retriever.d.cts +1 -0
- package/retrievers/matryoshka_retriever.d.ts +1 -0
- package/retrievers/matryoshka_retriever.js +1 -0
package/dist/agents/agent.cjs
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.Agent = exports.LLMSingleActionAgent = exports.RunnableAgent = exports.BaseMultiActionAgent = exports.BaseSingleActionAgent = exports.BaseAgent = void 0;
|
|
4
4
|
const serializable_1 = require("@langchain/core/load/serializable");
|
|
5
|
+
const runnables_1 = require("@langchain/core/runnables");
|
|
5
6
|
/**
|
|
6
7
|
* Error class for parse errors in LangChain. Contains information about
|
|
7
8
|
* the error message and the output that caused the error.
|
|
@@ -119,12 +120,12 @@ class RunnableAgent extends BaseMultiActionAgent {
|
|
|
119
120
|
this.runnable = fields.runnable;
|
|
120
121
|
this.stop = fields.stop;
|
|
121
122
|
}
|
|
122
|
-
async plan(steps, inputs, callbackManager) {
|
|
123
|
+
async plan(steps, inputs, callbackManager, config) {
|
|
123
124
|
const invokeInput = { ...inputs, steps };
|
|
124
|
-
const output = await this.runnable.invoke(invokeInput, {
|
|
125
|
+
const output = await this.runnable.invoke(invokeInput, (0, runnables_1.patchConfig)(config, {
|
|
125
126
|
callbacks: callbackManager,
|
|
126
127
|
runName: "RunnableAgent",
|
|
127
|
-
});
|
|
128
|
+
}));
|
|
128
129
|
if (isAgentAction(output)) {
|
|
129
130
|
return [output];
|
|
130
131
|
}
|
package/dist/agents/agent.d.ts
CHANGED
|
@@ -6,7 +6,7 @@ import { AgentAction, AgentFinish, AgentStep } from "@langchain/core/agents";
|
|
|
6
6
|
import { BaseMessage } from "@langchain/core/messages";
|
|
7
7
|
import { ChainValues } from "@langchain/core/utils/types";
|
|
8
8
|
import { Serializable } from "@langchain/core/load/serializable";
|
|
9
|
-
import { Runnable } from "@langchain/core/runnables";
|
|
9
|
+
import { Runnable, type RunnableConfig } from "@langchain/core/runnables";
|
|
10
10
|
import { LLMChain } from "../chains/llm_chain.js";
|
|
11
11
|
import { AgentActionOutputParser, AgentInput, RunnableAgentInput, SerializedAgent, StoppingMethod } from "./types.js";
|
|
12
12
|
/**
|
|
@@ -55,7 +55,7 @@ export declare abstract class BaseSingleActionAgent extends BaseAgent {
|
|
|
55
55
|
*
|
|
56
56
|
* @returns Action specifying what tool to use.
|
|
57
57
|
*/
|
|
58
|
-
abstract plan(steps: AgentStep[], inputs: ChainValues, callbackManager?: CallbackManager): Promise<AgentAction | AgentFinish>;
|
|
58
|
+
abstract plan(steps: AgentStep[], inputs: ChainValues, callbackManager?: CallbackManager, config?: RunnableConfig): Promise<AgentAction | AgentFinish>;
|
|
59
59
|
}
|
|
60
60
|
/**
|
|
61
61
|
* Abstract base class for multi-action agents in LangChain. Extends the
|
|
@@ -73,7 +73,7 @@ export declare abstract class BaseMultiActionAgent extends BaseAgent {
|
|
|
73
73
|
*
|
|
74
74
|
* @returns Actions specifying what tools to use.
|
|
75
75
|
*/
|
|
76
|
-
abstract plan(steps: AgentStep[], inputs: ChainValues, callbackManager?: CallbackManager): Promise<AgentAction[] | AgentFinish>;
|
|
76
|
+
abstract plan(steps: AgentStep[], inputs: ChainValues, callbackManager?: CallbackManager, config?: RunnableConfig): Promise<AgentAction[] | AgentFinish>;
|
|
77
77
|
}
|
|
78
78
|
/**
|
|
79
79
|
* Class representing a single action agent which accepts runnables.
|
|
@@ -89,7 +89,7 @@ export declare class RunnableAgent extends BaseMultiActionAgent {
|
|
|
89
89
|
stop?: string[];
|
|
90
90
|
get inputKeys(): string[];
|
|
91
91
|
constructor(fields: RunnableAgentInput);
|
|
92
|
-
plan(steps: AgentStep[], inputs: ChainValues, callbackManager?: CallbackManager): Promise<AgentAction[] | AgentFinish>;
|
|
92
|
+
plan(steps: AgentStep[], inputs: ChainValues, callbackManager?: CallbackManager, config?: RunnableConfig): Promise<AgentAction[] | AgentFinish>;
|
|
93
93
|
}
|
|
94
94
|
/**
|
|
95
95
|
* Interface for input data for creating a LLMSingleActionAgent.
|
package/dist/agents/agent.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { Serializable } from "@langchain/core/load/serializable";
|
|
2
|
+
import { patchConfig, } from "@langchain/core/runnables";
|
|
2
3
|
/**
|
|
3
4
|
* Error class for parse errors in LangChain. Contains information about
|
|
4
5
|
* the error message and the output that caused the error.
|
|
@@ -113,12 +114,12 @@ export class RunnableAgent extends BaseMultiActionAgent {
|
|
|
113
114
|
this.runnable = fields.runnable;
|
|
114
115
|
this.stop = fields.stop;
|
|
115
116
|
}
|
|
116
|
-
async plan(steps, inputs, callbackManager) {
|
|
117
|
+
async plan(steps, inputs, callbackManager, config) {
|
|
117
118
|
const invokeInput = { ...inputs, steps };
|
|
118
|
-
const output = await this.runnable.invoke(invokeInput, {
|
|
119
|
+
const output = await this.runnable.invoke(invokeInput, patchConfig(config, {
|
|
119
120
|
callbacks: callbackManager,
|
|
120
121
|
runName: "RunnableAgent",
|
|
121
|
-
});
|
|
122
|
+
}));
|
|
122
123
|
if (isAgentAction(output)) {
|
|
123
124
|
return [output];
|
|
124
125
|
}
|
package/dist/agents/executor.cjs
CHANGED
|
@@ -46,24 +46,34 @@ class AgentExecutorIterator extends serializable_1.Serializable {
|
|
|
46
46
|
writable: true,
|
|
47
47
|
value: void 0
|
|
48
48
|
});
|
|
49
|
+
Object.defineProperty(this, "config", {
|
|
50
|
+
enumerable: true,
|
|
51
|
+
configurable: true,
|
|
52
|
+
writable: true,
|
|
53
|
+
value: void 0
|
|
54
|
+
});
|
|
55
|
+
/** @deprecated Use "config" */
|
|
49
56
|
Object.defineProperty(this, "callbacks", {
|
|
50
57
|
enumerable: true,
|
|
51
58
|
configurable: true,
|
|
52
59
|
writable: true,
|
|
53
60
|
value: void 0
|
|
54
61
|
});
|
|
62
|
+
/** @deprecated Use "config" */
|
|
55
63
|
Object.defineProperty(this, "tags", {
|
|
56
64
|
enumerable: true,
|
|
57
65
|
configurable: true,
|
|
58
66
|
writable: true,
|
|
59
67
|
value: void 0
|
|
60
68
|
});
|
|
69
|
+
/** @deprecated Use "config" */
|
|
61
70
|
Object.defineProperty(this, "metadata", {
|
|
62
71
|
enumerable: true,
|
|
63
72
|
configurable: true,
|
|
64
73
|
writable: true,
|
|
65
74
|
value: void 0
|
|
66
75
|
});
|
|
76
|
+
/** @deprecated Use "config" */
|
|
67
77
|
Object.defineProperty(this, "runName", {
|
|
68
78
|
enumerable: true,
|
|
69
79
|
configurable: true,
|
|
@@ -101,6 +111,7 @@ class AgentExecutorIterator extends serializable_1.Serializable {
|
|
|
101
111
|
this.metadata = fields.metadata;
|
|
102
112
|
this.runName = fields.runName;
|
|
103
113
|
this.runManager = fields.runManager;
|
|
114
|
+
this.config = fields.config;
|
|
104
115
|
}
|
|
105
116
|
/**
|
|
106
117
|
* Reset the iterator to its initial state, clearing intermediate steps,
|
|
@@ -158,7 +169,7 @@ class AgentExecutorIterator extends serializable_1.Serializable {
|
|
|
158
169
|
* AgentExecutor's _takeNextStep method.
|
|
159
170
|
*/
|
|
160
171
|
async _executeNextStep(runManager) {
|
|
161
|
-
return this.agentExecutor._takeNextStep(this.nameToToolMap, this.inputs, this.intermediateSteps, runManager);
|
|
172
|
+
return this.agentExecutor._takeNextStep(this.nameToToolMap, this.inputs, this.intermediateSteps, runManager, this.config);
|
|
162
173
|
}
|
|
163
174
|
/**
|
|
164
175
|
* Process the output of the next step,
|
|
@@ -390,7 +401,7 @@ class AgentExecutor extends base_js_1.BaseChain {
|
|
|
390
401
|
while (this.shouldContinue(iterations)) {
|
|
391
402
|
let output;
|
|
392
403
|
try {
|
|
393
|
-
output = await this.agent.plan(steps, inputs, runManager?.getChild());
|
|
404
|
+
output = await this.agent.plan(steps, inputs, runManager?.getChild(), config);
|
|
394
405
|
}
|
|
395
406
|
catch (e) {
|
|
396
407
|
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
@@ -483,10 +494,10 @@ class AgentExecutor extends base_js_1.BaseChain {
|
|
|
483
494
|
const finish = await this.agent.returnStoppedResponse(this.earlyStoppingMethod, steps, inputs);
|
|
484
495
|
return getOutput(finish);
|
|
485
496
|
}
|
|
486
|
-
async _takeNextStep(nameToolMap, inputs, intermediateSteps, runManager) {
|
|
497
|
+
async _takeNextStep(nameToolMap, inputs, intermediateSteps, runManager, config) {
|
|
487
498
|
let output;
|
|
488
499
|
try {
|
|
489
|
-
output = await this.agent.plan(intermediateSteps, inputs, runManager?.getChild());
|
|
500
|
+
output = await this.agent.plan(intermediateSteps, inputs, runManager?.getChild(), config);
|
|
490
501
|
}
|
|
491
502
|
catch (e) {
|
|
492
503
|
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
@@ -614,6 +625,8 @@ class AgentExecutor extends base_js_1.BaseChain {
|
|
|
614
625
|
const agentExecutorIterator = new AgentExecutorIterator({
|
|
615
626
|
inputs,
|
|
616
627
|
agentExecutor: this,
|
|
628
|
+
config: options,
|
|
629
|
+
// TODO: Deprecate these other parameters
|
|
617
630
|
metadata: options?.metadata,
|
|
618
631
|
tags: options?.tags,
|
|
619
632
|
callbacks: options?.callbacks,
|
|
@@ -12,8 +12,12 @@ import { BaseChain, ChainInputs } from "../chains/base.js";
|
|
|
12
12
|
interface AgentExecutorIteratorInput {
|
|
13
13
|
agentExecutor: AgentExecutor;
|
|
14
14
|
inputs: Record<string, string>;
|
|
15
|
+
config?: RunnableConfig;
|
|
16
|
+
/** @deprecated Use "config" */
|
|
15
17
|
callbacks?: Callbacks;
|
|
18
|
+
/** @deprecated Use "config" */
|
|
16
19
|
tags?: string[];
|
|
20
|
+
/** @deprecated Use "config" */
|
|
17
21
|
metadata?: Record<string, unknown>;
|
|
18
22
|
runName?: string;
|
|
19
23
|
runManager?: CallbackManagerForChainRun;
|
|
@@ -22,9 +26,14 @@ export declare class AgentExecutorIterator extends Serializable implements Agent
|
|
|
22
26
|
lc_namespace: string[];
|
|
23
27
|
agentExecutor: AgentExecutor;
|
|
24
28
|
inputs: Record<string, string>;
|
|
29
|
+
config?: RunnableConfig;
|
|
30
|
+
/** @deprecated Use "config" */
|
|
25
31
|
callbacks?: Callbacks;
|
|
32
|
+
/** @deprecated Use "config" */
|
|
26
33
|
tags: string[] | undefined;
|
|
34
|
+
/** @deprecated Use "config" */
|
|
27
35
|
metadata: Record<string, unknown> | undefined;
|
|
36
|
+
/** @deprecated Use "config" */
|
|
28
37
|
runName: string | undefined;
|
|
29
38
|
private _finalOutputs;
|
|
30
39
|
get finalOutputs(): Record<string, unknown> | undefined;
|
|
@@ -141,7 +150,7 @@ export declare class AgentExecutor extends BaseChain<ChainValues, AgentExecutorO
|
|
|
141
150
|
private shouldContinue;
|
|
142
151
|
/** @ignore */
|
|
143
152
|
_call(inputs: ChainValues, runManager?: CallbackManagerForChainRun, config?: RunnableConfig): Promise<AgentExecutorOutput>;
|
|
144
|
-
_takeNextStep(nameToolMap: Record<string, ToolInterface>, inputs: ChainValues, intermediateSteps: AgentStep[], runManager?: CallbackManagerForChainRun): Promise<AgentFinish | AgentStep[]>;
|
|
153
|
+
_takeNextStep(nameToolMap: Record<string, ToolInterface>, inputs: ChainValues, intermediateSteps: AgentStep[], runManager?: CallbackManagerForChainRun, config?: RunnableConfig): Promise<AgentFinish | AgentStep[]>;
|
|
145
154
|
_return(output: AgentFinish, intermediateSteps: AgentStep[], runManager?: CallbackManagerForChainRun): Promise<AgentExecutorOutput>;
|
|
146
155
|
_getToolReturn(nextStepOutput: AgentStep): Promise<AgentFinish | null>;
|
|
147
156
|
_returnStoppedResponse(earlyStoppingMethod: StoppingMethod): AgentFinish;
|
package/dist/agents/executor.js
CHANGED
|
@@ -43,24 +43,34 @@ export class AgentExecutorIterator extends Serializable {
|
|
|
43
43
|
writable: true,
|
|
44
44
|
value: void 0
|
|
45
45
|
});
|
|
46
|
+
Object.defineProperty(this, "config", {
|
|
47
|
+
enumerable: true,
|
|
48
|
+
configurable: true,
|
|
49
|
+
writable: true,
|
|
50
|
+
value: void 0
|
|
51
|
+
});
|
|
52
|
+
/** @deprecated Use "config" */
|
|
46
53
|
Object.defineProperty(this, "callbacks", {
|
|
47
54
|
enumerable: true,
|
|
48
55
|
configurable: true,
|
|
49
56
|
writable: true,
|
|
50
57
|
value: void 0
|
|
51
58
|
});
|
|
59
|
+
/** @deprecated Use "config" */
|
|
52
60
|
Object.defineProperty(this, "tags", {
|
|
53
61
|
enumerable: true,
|
|
54
62
|
configurable: true,
|
|
55
63
|
writable: true,
|
|
56
64
|
value: void 0
|
|
57
65
|
});
|
|
66
|
+
/** @deprecated Use "config" */
|
|
58
67
|
Object.defineProperty(this, "metadata", {
|
|
59
68
|
enumerable: true,
|
|
60
69
|
configurable: true,
|
|
61
70
|
writable: true,
|
|
62
71
|
value: void 0
|
|
63
72
|
});
|
|
73
|
+
/** @deprecated Use "config" */
|
|
64
74
|
Object.defineProperty(this, "runName", {
|
|
65
75
|
enumerable: true,
|
|
66
76
|
configurable: true,
|
|
@@ -98,6 +108,7 @@ export class AgentExecutorIterator extends Serializable {
|
|
|
98
108
|
this.metadata = fields.metadata;
|
|
99
109
|
this.runName = fields.runName;
|
|
100
110
|
this.runManager = fields.runManager;
|
|
111
|
+
this.config = fields.config;
|
|
101
112
|
}
|
|
102
113
|
/**
|
|
103
114
|
* Reset the iterator to its initial state, clearing intermediate steps,
|
|
@@ -155,7 +166,7 @@ export class AgentExecutorIterator extends Serializable {
|
|
|
155
166
|
* AgentExecutor's _takeNextStep method.
|
|
156
167
|
*/
|
|
157
168
|
async _executeNextStep(runManager) {
|
|
158
|
-
return this.agentExecutor._takeNextStep(this.nameToToolMap, this.inputs, this.intermediateSteps, runManager);
|
|
169
|
+
return this.agentExecutor._takeNextStep(this.nameToToolMap, this.inputs, this.intermediateSteps, runManager, this.config);
|
|
159
170
|
}
|
|
160
171
|
/**
|
|
161
172
|
* Process the output of the next step,
|
|
@@ -385,7 +396,7 @@ export class AgentExecutor extends BaseChain {
|
|
|
385
396
|
while (this.shouldContinue(iterations)) {
|
|
386
397
|
let output;
|
|
387
398
|
try {
|
|
388
|
-
output = await this.agent.plan(steps, inputs, runManager?.getChild());
|
|
399
|
+
output = await this.agent.plan(steps, inputs, runManager?.getChild(), config);
|
|
389
400
|
}
|
|
390
401
|
catch (e) {
|
|
391
402
|
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
@@ -478,10 +489,10 @@ export class AgentExecutor extends BaseChain {
|
|
|
478
489
|
const finish = await this.agent.returnStoppedResponse(this.earlyStoppingMethod, steps, inputs);
|
|
479
490
|
return getOutput(finish);
|
|
480
491
|
}
|
|
481
|
-
async _takeNextStep(nameToolMap, inputs, intermediateSteps, runManager) {
|
|
492
|
+
async _takeNextStep(nameToolMap, inputs, intermediateSteps, runManager, config) {
|
|
482
493
|
let output;
|
|
483
494
|
try {
|
|
484
|
-
output = await this.agent.plan(intermediateSteps, inputs, runManager?.getChild());
|
|
495
|
+
output = await this.agent.plan(intermediateSteps, inputs, runManager?.getChild(), config);
|
|
485
496
|
}
|
|
486
497
|
catch (e) {
|
|
487
498
|
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
@@ -609,6 +620,8 @@ export class AgentExecutor extends BaseChain {
|
|
|
609
620
|
const agentExecutorIterator = new AgentExecutorIterator({
|
|
610
621
|
inputs,
|
|
611
622
|
agentExecutor: this,
|
|
623
|
+
config: options,
|
|
624
|
+
// TODO: Deprecate these other parameters
|
|
612
625
|
metadata: options?.metadata,
|
|
613
626
|
tags: options?.tags,
|
|
614
627
|
callbacks: options?.callbacks,
|
package/dist/chains/base.cjs
CHANGED
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.BaseChain = void 0;
|
|
4
4
|
const outputs_1 = require("@langchain/core/outputs");
|
|
5
5
|
const manager_1 = require("@langchain/core/callbacks/manager");
|
|
6
|
+
const runnables_1 = require("@langchain/core/runnables");
|
|
6
7
|
const base_1 = require("@langchain/core/language_models/base");
|
|
7
8
|
/**
|
|
8
9
|
* Base interface that all chains must implement.
|
|
@@ -48,7 +49,7 @@ class BaseChain extends base_1.BaseLangChain {
|
|
|
48
49
|
* @returns Promise that resolves with the output of the chain run.
|
|
49
50
|
*/
|
|
50
51
|
async invoke(input, options) {
|
|
51
|
-
const
|
|
52
|
+
const config = (0, runnables_1.ensureConfig)(options);
|
|
52
53
|
const fullValues = await this._formatValues(input);
|
|
53
54
|
const callbackManager_ = await manager_1.CallbackManager.configure(config?.callbacks, this.callbacks, config?.tags, this.tags, config?.metadata, this.metadata, { verbose: this.verbose });
|
|
54
55
|
const runManager = await callbackManager_?.handleChainStart(this.toJSON(), fullValues, undefined, undefined, undefined, undefined, config?.runName);
|
package/dist/chains/base.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { RUN_KEY } from "@langchain/core/outputs";
|
|
2
2
|
import { CallbackManager, parseCallbackConfigArg, } from "@langchain/core/callbacks/manager";
|
|
3
|
+
import { ensureConfig } from "@langchain/core/runnables";
|
|
3
4
|
import { BaseLangChain, } from "@langchain/core/language_models/base";
|
|
4
5
|
/**
|
|
5
6
|
* Base interface that all chains must implement.
|
|
@@ -45,7 +46,7 @@ export class BaseChain extends BaseLangChain {
|
|
|
45
46
|
* @returns Promise that resolves with the output of the chain run.
|
|
46
47
|
*/
|
|
47
48
|
async invoke(input, options) {
|
|
48
|
-
const
|
|
49
|
+
const config = ensureConfig(options);
|
|
49
50
|
const fullValues = await this._formatValues(input);
|
|
50
51
|
const callbackManager_ = await CallbackManager.configure(config?.callbacks, this.callbacks, config?.tags, this.tags, config?.metadata, this.metadata, { verbose: this.verbose });
|
|
51
52
|
const runManager = await callbackManager_?.handleChainStart(this.toJSON(), fullValues, undefined, undefined, undefined, undefined, config?.runName);
|
package/dist/embeddings/fake.cjs
CHANGED
|
@@ -14,10 +14,4 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
const entrypoint_deprecation_js_1 = require("../util/entrypoint_deprecation.cjs");
|
|
18
17
|
__exportStar(require("@langchain/core/utils/testing"), exports);
|
|
19
|
-
/* #__PURE__ */ (0, entrypoint_deprecation_js_1.logVersion010MigrationWarning)({
|
|
20
|
-
oldEntrypointName: "embeddings/fake",
|
|
21
|
-
newEntrypointName: "utils/testing",
|
|
22
|
-
newPackageName: "@langchain/core",
|
|
23
|
-
});
|
package/dist/embeddings/fake.js
CHANGED
|
@@ -1,7 +1 @@
|
|
|
1
|
-
import { logVersion010MigrationWarning } from "../util/entrypoint_deprecation.js";
|
|
2
1
|
export * from "@langchain/core/utils/testing";
|
|
3
|
-
/* #__PURE__ */ logVersion010MigrationWarning({
|
|
4
|
-
oldEntrypointName: "embeddings/fake",
|
|
5
|
-
newEntrypointName: "utils/testing",
|
|
6
|
-
newPackageName: "@langchain/core",
|
|
7
|
-
});
|
package/dist/load/import_map.cjs
CHANGED
|
@@ -24,8 +24,8 @@ var __importStar = (this && this.__importStar) || function (mod) {
|
|
|
24
24
|
return result;
|
|
25
25
|
};
|
|
26
26
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
27
|
-
exports.
|
|
28
|
-
exports.llms__fireworks = exports.chat_models__fireworks = exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__image = exports.prompts__chat = exports.schema = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.indexes = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__chat_models__bittensor = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = exports.util__math = exports.util__document = void 0;
|
|
27
|
+
exports.storage__encoder_backed = exports.stores__message__in_memory = exports.stores__file__in_memory = exports.stores__doc__in_memory = exports.retrievers__matryoshka_retriever = exports.retrievers__vespa = exports.retrievers__score_threshold = exports.retrievers__hyde = exports.retrievers__document_compressors__embeddings_filter = exports.retrievers__document_compressors__chain_extract = exports.retrievers__time_weighted = exports.retrievers__parent_document = exports.retrievers__multi_vector = exports.retrievers__multi_query = exports.retrievers__document_compressors = exports.retrievers__contextual_compression = exports.retrievers__remote = exports.output_parsers = exports.schema__query_constructor = exports.schema__prompt_template = exports.chat_models__anthropic = exports.document_transformers__openai_functions = exports.document_loaders__web__sort_xyz_blockchain = exports.document_loaders__web__serpapi = exports.document_loaders__web__searchapi = exports.document_loaders__base = exports.text_splitter = exports.vectorstores__memory = exports.llms__fake = exports.embeddings__fake = exports.embeddings__cache_backed = exports.chains__retrieval = exports.chains__openai_functions = exports.chains__history_aware_retriever = exports.chains__combine_documents__reduce = exports.chains__combine_documents = exports.chains = exports.tools__retriever = exports.tools__render = exports.tools__chain = exports.agents__openai__output_parser = exports.agents__xml__output_parser = exports.agents__react__output_parser = exports.agents__format_scratchpad__log_to_message = exports.agents__format_scratchpad__xml = exports.agents__format_scratchpad__log = exports.agents__format_scratchpad__openai_tools = exports.agents__format_scratchpad = exports.agents__toolkits = exports.agents = void 0;
|
|
28
|
+
exports.llms__fireworks = exports.chat_models__fireworks = exports.schema__output = exports.schema__output_parser = exports.schema__runnable = exports.prompts__base = exports.prompts__pipeline = exports.prompts__image = exports.prompts__chat = exports.schema = exports.schema__messages = exports.prompts__prompt = exports.embeddings__openai = exports.llms__openai = exports.chat_models__openai = exports.indexes = exports.runnables__remote = exports.smith = exports.evaluation = exports.experimental__prompts__custom_format = exports.experimental__masking = exports.experimental__chains__violation_of_expectations = exports.experimental__chat_models__bittensor = exports.experimental__plan_and_execute = exports.experimental__generative_agents = exports.experimental__babyagi = exports.experimental__openai_files = exports.experimental__openai_assistant = exports.experimental__autogpt = exports.util__time = exports.util__math = exports.util__document = exports.storage__in_memory = void 0;
|
|
29
29
|
exports.agents = __importStar(require("../agents/index.cjs"));
|
|
30
30
|
exports.agents__toolkits = __importStar(require("../agents/toolkits/index.cjs"));
|
|
31
31
|
exports.agents__format_scratchpad = __importStar(require("../agents/format_scratchpad/openai_functions.cjs"));
|
|
@@ -71,6 +71,7 @@ exports.retrievers__document_compressors__embeddings_filter = __importStar(requi
|
|
|
71
71
|
exports.retrievers__hyde = __importStar(require("../retrievers/hyde.cjs"));
|
|
72
72
|
exports.retrievers__score_threshold = __importStar(require("../retrievers/score_threshold.cjs"));
|
|
73
73
|
exports.retrievers__vespa = __importStar(require("../retrievers/vespa.cjs"));
|
|
74
|
+
exports.retrievers__matryoshka_retriever = __importStar(require("../retrievers/matryoshka_retriever.cjs"));
|
|
74
75
|
exports.stores__doc__in_memory = __importStar(require("../stores/doc/in_memory.cjs"));
|
|
75
76
|
exports.stores__file__in_memory = __importStar(require("../stores/file/in_memory.cjs"));
|
|
76
77
|
exports.stores__message__in_memory = __importStar(require("../stores/message/in_memory.cjs"));
|
|
@@ -43,6 +43,7 @@ export * as retrievers__document_compressors__embeddings_filter from "../retriev
|
|
|
43
43
|
export * as retrievers__hyde from "../retrievers/hyde.js";
|
|
44
44
|
export * as retrievers__score_threshold from "../retrievers/score_threshold.js";
|
|
45
45
|
export * as retrievers__vespa from "../retrievers/vespa.js";
|
|
46
|
+
export * as retrievers__matryoshka_retriever from "../retrievers/matryoshka_retriever.js";
|
|
46
47
|
export * as stores__doc__in_memory from "../stores/doc/in_memory.js";
|
|
47
48
|
export * as stores__file__in_memory from "../stores/file/in_memory.js";
|
|
48
49
|
export * as stores__message__in_memory from "../stores/message/in_memory.js";
|
package/dist/load/import_map.js
CHANGED
|
@@ -44,6 +44,7 @@ export * as retrievers__document_compressors__embeddings_filter from "../retriev
|
|
|
44
44
|
export * as retrievers__hyde from "../retrievers/hyde.js";
|
|
45
45
|
export * as retrievers__score_threshold from "../retrievers/score_threshold.js";
|
|
46
46
|
export * as retrievers__vespa from "../retrievers/vespa.js";
|
|
47
|
+
export * as retrievers__matryoshka_retriever from "../retrievers/matryoshka_retriever.js";
|
|
47
48
|
export * as stores__doc__in_memory from "../stores/doc/in_memory.js";
|
|
48
49
|
export * as stores__file__in_memory from "../stores/file/in_memory.js";
|
|
49
50
|
export * as stores__message__in_memory from "../stores/message/in_memory.js";
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.MatryoshkaRetriever = void 0;
|
|
4
|
+
const math_1 = require("@langchain/core/utils/math");
|
|
5
|
+
const vectorstores_1 = require("@langchain/core/vectorstores");
|
|
6
|
+
/**
|
|
7
|
+
* A retriever that uses two sets of embeddings to perform adaptive retrieval. Based
|
|
8
|
+
* off of the "Matryoshka embeddings: faster OpenAI vector search using Adaptive Retrieval"
|
|
9
|
+
* blog post {@link https://supabase.com/blog/matryoshka-embeddings}.
|
|
10
|
+
*
|
|
11
|
+
*
|
|
12
|
+
* This class performs "Adaptive Retrieval" for searching text embeddings efficiently using the
|
|
13
|
+
* Matryoshka Representation Learning (MRL) technique. It retrieves documents similar to a query
|
|
14
|
+
* embedding in two steps:
|
|
15
|
+
*
|
|
16
|
+
* First-pass: Uses a lower dimensional sub-vector from the MRL embedding for an initial, fast,
|
|
17
|
+
* but less accurate search.
|
|
18
|
+
*
|
|
19
|
+
* Second-pass: Re-ranks the top results from the first pass using the full, high-dimensional
|
|
20
|
+
* embedding for higher accuracy.
|
|
21
|
+
*
|
|
22
|
+
*
|
|
23
|
+
* This code implements MRL embeddings for efficient vector search by combining faster,
|
|
24
|
+
* lower-dimensional initial search with accurate, high-dimensional re-ranking.
|
|
25
|
+
*/
|
|
26
|
+
class MatryoshkaRetriever extends vectorstores_1.VectorStoreRetriever {
|
|
27
|
+
constructor(fields) {
|
|
28
|
+
super(fields);
|
|
29
|
+
Object.defineProperty(this, "smallK", {
|
|
30
|
+
enumerable: true,
|
|
31
|
+
configurable: true,
|
|
32
|
+
writable: true,
|
|
33
|
+
value: 50
|
|
34
|
+
});
|
|
35
|
+
Object.defineProperty(this, "largeK", {
|
|
36
|
+
enumerable: true,
|
|
37
|
+
configurable: true,
|
|
38
|
+
writable: true,
|
|
39
|
+
value: 8
|
|
40
|
+
});
|
|
41
|
+
Object.defineProperty(this, "largeEmbeddingKey", {
|
|
42
|
+
enumerable: true,
|
|
43
|
+
configurable: true,
|
|
44
|
+
writable: true,
|
|
45
|
+
value: "lc_large_embedding"
|
|
46
|
+
});
|
|
47
|
+
Object.defineProperty(this, "largeEmbeddingModel", {
|
|
48
|
+
enumerable: true,
|
|
49
|
+
configurable: true,
|
|
50
|
+
writable: true,
|
|
51
|
+
value: void 0
|
|
52
|
+
});
|
|
53
|
+
Object.defineProperty(this, "searchType", {
|
|
54
|
+
enumerable: true,
|
|
55
|
+
configurable: true,
|
|
56
|
+
writable: true,
|
|
57
|
+
value: "cosine"
|
|
58
|
+
});
|
|
59
|
+
/**
|
|
60
|
+
* Override the default `addDocuments` method to embed the documents twice,
|
|
61
|
+
* once using the larger embeddings model, and then again using the default
|
|
62
|
+
* embedding model linked to the vector store.
|
|
63
|
+
*
|
|
64
|
+
* @param {DocumentInterface[]} documents - An array of documents to add to the vector store.
|
|
65
|
+
* @param {AddDocumentOptions} options - An optional object containing additional options for adding documents.
|
|
66
|
+
* @returns {Promise<string[] | void>} A promise that resolves to an array of the document IDs that were added to the vector store.
|
|
67
|
+
*/
|
|
68
|
+
Object.defineProperty(this, "addDocuments", {
|
|
69
|
+
enumerable: true,
|
|
70
|
+
configurable: true,
|
|
71
|
+
writable: true,
|
|
72
|
+
value: async (documents, options) => {
|
|
73
|
+
// Insure documents metadata does not contain the large embedding key
|
|
74
|
+
if (documents.some((doc) => this.largeEmbeddingKey in doc.metadata)) {
|
|
75
|
+
throw new Error(`All documents must not contain the large embedding key: ${this.largeEmbeddingKey} in their metadata.`);
|
|
76
|
+
}
|
|
77
|
+
const allDocPageContent = documents.map((doc) => doc.pageContent);
|
|
78
|
+
const allDocLargeEmbeddings = await this.largeEmbeddingModel.embedDocuments(allDocPageContent);
|
|
79
|
+
const newDocuments = documents.map((doc, idx) => ({
|
|
80
|
+
...doc,
|
|
81
|
+
metadata: {
|
|
82
|
+
...doc.metadata,
|
|
83
|
+
[this.largeEmbeddingKey]: JSON.stringify(allDocLargeEmbeddings[idx]),
|
|
84
|
+
},
|
|
85
|
+
}));
|
|
86
|
+
return this.vectorStore.addDocuments(newDocuments, options);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
this.smallK = fields.smallK ?? this.smallK;
|
|
90
|
+
this.largeK = fields.largeK ?? this.largeK;
|
|
91
|
+
this.largeEmbeddingKey = fields.largeEmbeddingKey ?? this.largeEmbeddingKey;
|
|
92
|
+
this.largeEmbeddingModel = fields.largeEmbeddingModel;
|
|
93
|
+
this.searchType = fields.searchType ?? this.searchType;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Ranks documents based on their similarity to a query embedding using larger embeddings.
|
|
97
|
+
*
|
|
98
|
+
* This method takes a query embedding and a list of documents (smallResults) as input. Each document
|
|
99
|
+
* in the smallResults array has previously been associated with a large embedding stored in its metadata.
|
|
100
|
+
* Depending on the `searchType` (cosine, innerProduct, or euclidean), it calculates the similarity scores
|
|
101
|
+
* between the query embedding and each document's large embedding. It then ranks the documents based on
|
|
102
|
+
* these similarity scores, from the most similar to the least similar.
|
|
103
|
+
*
|
|
104
|
+
* The method returns a promise that resolves to an array of the top `largeK` documents, where `largeK`
|
|
105
|
+
* is a class property defining the number of documents to return. This subset of documents is determined
|
|
106
|
+
* by sorting the entire list of documents based on their similarity scores and then selecting the top
|
|
107
|
+
* `largeK` documents.
|
|
108
|
+
*
|
|
109
|
+
* @param {number[]} embeddedQuery The embedding of the query, represented as an array of numbers.
|
|
110
|
+
* @param {DocumentInterface[]} smallResults An array of documents, each with metadata that includes a large embedding for similarity comparison.
|
|
111
|
+
* @returns {Promise<DocumentInterface[]>} A promise that resolves to an array of the top `largeK` ranked documents based on their similarity to the query embedding.
|
|
112
|
+
*/
|
|
113
|
+
_rankByLargeEmbeddings(embeddedQuery, smallResults) {
|
|
114
|
+
const largeEmbeddings = smallResults.map((doc) => JSON.parse(doc.metadata[this.largeEmbeddingKey]));
|
|
115
|
+
let func;
|
|
116
|
+
switch (this.searchType) {
|
|
117
|
+
case "cosine":
|
|
118
|
+
func = () => (0, math_1.cosineSimilarity)([embeddedQuery], largeEmbeddings);
|
|
119
|
+
break;
|
|
120
|
+
case "innerProduct":
|
|
121
|
+
func = () => (0, math_1.innerProduct)([embeddedQuery], largeEmbeddings);
|
|
122
|
+
break;
|
|
123
|
+
case "euclidean":
|
|
124
|
+
func = () => (0, math_1.euclideanDistance)([embeddedQuery], largeEmbeddings);
|
|
125
|
+
break;
|
|
126
|
+
default:
|
|
127
|
+
throw new Error(`Unknown search type: ${this.searchType}`);
|
|
128
|
+
}
|
|
129
|
+
// Calculate the similarity scores between the query embedding and the large embeddings
|
|
130
|
+
const [similarityScores] = func();
|
|
131
|
+
// Create an array of indices from 0 to N-1, where N is the number of documents
|
|
132
|
+
let indices = Array.from({ length: smallResults.length }, (_, index) => index);
|
|
133
|
+
indices = indices
|
|
134
|
+
.map((v, i) => [similarityScores[i], v])
|
|
135
|
+
.sort(([a], [b]) => b - a)
|
|
136
|
+
.slice(0, this.largeK)
|
|
137
|
+
.map(([, i]) => i);
|
|
138
|
+
return indices.map((i) => smallResults[i]);
|
|
139
|
+
}
|
|
140
|
+
async _getRelevantDocuments(query) {
|
|
141
|
+
const [embeddedQuery, smallResults] = await Promise.all([
|
|
142
|
+
this.largeEmbeddingModel.embedQuery(query),
|
|
143
|
+
this.vectorStore.similaritySearch(query, this.smallK, this.filter),
|
|
144
|
+
]);
|
|
145
|
+
return this._rankByLargeEmbeddings(embeddedQuery, smallResults);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
exports.MatryoshkaRetriever = MatryoshkaRetriever;
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { DocumentInterface } from "@langchain/core/documents";
|
|
2
|
+
import { Embeddings } from "@langchain/core/embeddings";
|
|
3
|
+
import { VectorStore, VectorStoreRetriever, VectorStoreRetrieverInput } from "@langchain/core/vectorstores";
|
|
4
|
+
/**
|
|
5
|
+
* Type for options when adding a document to the VectorStore.
|
|
6
|
+
*/
|
|
7
|
+
type AddDocumentOptions = Record<string, any>;
|
|
8
|
+
export interface MatryoshkaRetrieverFields {
|
|
9
|
+
/**
|
|
10
|
+
* The number of documents to retrieve from the small store.
|
|
11
|
+
* @default 50
|
|
12
|
+
*/
|
|
13
|
+
smallK?: number;
|
|
14
|
+
/**
|
|
15
|
+
* The number of documents to retrieve from the large store.
|
|
16
|
+
* @default 8
|
|
17
|
+
*/
|
|
18
|
+
largeK?: number;
|
|
19
|
+
/**
|
|
20
|
+
* The metadata key to store the larger embeddings.
|
|
21
|
+
* @default "lc_large_embedding"
|
|
22
|
+
*/
|
|
23
|
+
largeEmbeddingKey?: string;
|
|
24
|
+
/**
|
|
25
|
+
* The embedding model to use when generating the large
|
|
26
|
+
* embeddings.
|
|
27
|
+
*/
|
|
28
|
+
largeEmbeddingModel: Embeddings;
|
|
29
|
+
/**
|
|
30
|
+
* The type of search to perform using the large embeddings.
|
|
31
|
+
* @default "cosine"
|
|
32
|
+
*/
|
|
33
|
+
searchType?: "cosine" | "innerProduct" | "euclidean";
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* A retriever that uses two sets of embeddings to perform adaptive retrieval. Based
|
|
37
|
+
* off of the "Matryoshka embeddings: faster OpenAI vector search using Adaptive Retrieval"
|
|
38
|
+
* blog post {@link https://supabase.com/blog/matryoshka-embeddings}.
|
|
39
|
+
*
|
|
40
|
+
*
|
|
41
|
+
* This class performs "Adaptive Retrieval" for searching text embeddings efficiently using the
|
|
42
|
+
* Matryoshka Representation Learning (MRL) technique. It retrieves documents similar to a query
|
|
43
|
+
* embedding in two steps:
|
|
44
|
+
*
|
|
45
|
+
* First-pass: Uses a lower dimensional sub-vector from the MRL embedding for an initial, fast,
|
|
46
|
+
* but less accurate search.
|
|
47
|
+
*
|
|
48
|
+
* Second-pass: Re-ranks the top results from the first pass using the full, high-dimensional
|
|
49
|
+
* embedding for higher accuracy.
|
|
50
|
+
*
|
|
51
|
+
*
|
|
52
|
+
* This code implements MRL embeddings for efficient vector search by combining faster,
|
|
53
|
+
* lower-dimensional initial search with accurate, high-dimensional re-ranking.
|
|
54
|
+
*/
|
|
55
|
+
export declare class MatryoshkaRetriever<Store extends VectorStore = VectorStore> extends VectorStoreRetriever<Store> {
|
|
56
|
+
smallK: number;
|
|
57
|
+
largeK: number;
|
|
58
|
+
largeEmbeddingKey: string;
|
|
59
|
+
largeEmbeddingModel: Embeddings;
|
|
60
|
+
searchType: "cosine" | "innerProduct" | "euclidean";
|
|
61
|
+
constructor(fields: MatryoshkaRetrieverFields & VectorStoreRetrieverInput<Store>);
|
|
62
|
+
/**
|
|
63
|
+
* Ranks documents based on their similarity to a query embedding using larger embeddings.
|
|
64
|
+
*
|
|
65
|
+
* This method takes a query embedding and a list of documents (smallResults) as input. Each document
|
|
66
|
+
* in the smallResults array has previously been associated with a large embedding stored in its metadata.
|
|
67
|
+
* Depending on the `searchType` (cosine, innerProduct, or euclidean), it calculates the similarity scores
|
|
68
|
+
* between the query embedding and each document's large embedding. It then ranks the documents based on
|
|
69
|
+
* these similarity scores, from the most similar to the least similar.
|
|
70
|
+
*
|
|
71
|
+
* The method returns a promise that resolves to an array of the top `largeK` documents, where `largeK`
|
|
72
|
+
* is a class property defining the number of documents to return. This subset of documents is determined
|
|
73
|
+
* by sorting the entire list of documents based on their similarity scores and then selecting the top
|
|
74
|
+
* `largeK` documents.
|
|
75
|
+
*
|
|
76
|
+
* @param {number[]} embeddedQuery The embedding of the query, represented as an array of numbers.
|
|
77
|
+
* @param {DocumentInterface[]} smallResults An array of documents, each with metadata that includes a large embedding for similarity comparison.
|
|
78
|
+
* @returns {Promise<DocumentInterface[]>} A promise that resolves to an array of the top `largeK` ranked documents based on their similarity to the query embedding.
|
|
79
|
+
*/
|
|
80
|
+
private _rankByLargeEmbeddings;
|
|
81
|
+
_getRelevantDocuments(query: string): Promise<DocumentInterface[]>;
|
|
82
|
+
/**
|
|
83
|
+
* Override the default `addDocuments` method to embed the documents twice,
|
|
84
|
+
* once using the larger embeddings model, and then again using the default
|
|
85
|
+
* embedding model linked to the vector store.
|
|
86
|
+
*
|
|
87
|
+
* @param {DocumentInterface[]} documents - An array of documents to add to the vector store.
|
|
88
|
+
* @param {AddDocumentOptions} options - An optional object containing additional options for adding documents.
|
|
89
|
+
* @returns {Promise<string[] | void>} A promise that resolves to an array of the document IDs that were added to the vector store.
|
|
90
|
+
*/
|
|
91
|
+
addDocuments: (documents: DocumentInterface[], options?: AddDocumentOptions) => Promise<string[] | void>;
|
|
92
|
+
}
|
|
93
|
+
export {};
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { cosineSimilarity, euclideanDistance, innerProduct, } from "@langchain/core/utils/math";
|
|
2
|
+
import { VectorStoreRetriever, } from "@langchain/core/vectorstores";
|
|
3
|
+
/**
|
|
4
|
+
* A retriever that uses two sets of embeddings to perform adaptive retrieval. Based
|
|
5
|
+
* off of the "Matryoshka embeddings: faster OpenAI vector search using Adaptive Retrieval"
|
|
6
|
+
* blog post {@link https://supabase.com/blog/matryoshka-embeddings}.
|
|
7
|
+
*
|
|
8
|
+
*
|
|
9
|
+
* This class performs "Adaptive Retrieval" for searching text embeddings efficiently using the
|
|
10
|
+
* Matryoshka Representation Learning (MRL) technique. It retrieves documents similar to a query
|
|
11
|
+
* embedding in two steps:
|
|
12
|
+
*
|
|
13
|
+
* First-pass: Uses a lower dimensional sub-vector from the MRL embedding for an initial, fast,
|
|
14
|
+
* but less accurate search.
|
|
15
|
+
*
|
|
16
|
+
* Second-pass: Re-ranks the top results from the first pass using the full, high-dimensional
|
|
17
|
+
* embedding for higher accuracy.
|
|
18
|
+
*
|
|
19
|
+
*
|
|
20
|
+
* This code implements MRL embeddings for efficient vector search by combining faster,
|
|
21
|
+
* lower-dimensional initial search with accurate, high-dimensional re-ranking.
|
|
22
|
+
*/
|
|
23
|
+
export class MatryoshkaRetriever extends VectorStoreRetriever {
|
|
24
|
+
constructor(fields) {
|
|
25
|
+
super(fields);
|
|
26
|
+
Object.defineProperty(this, "smallK", {
|
|
27
|
+
enumerable: true,
|
|
28
|
+
configurable: true,
|
|
29
|
+
writable: true,
|
|
30
|
+
value: 50
|
|
31
|
+
});
|
|
32
|
+
Object.defineProperty(this, "largeK", {
|
|
33
|
+
enumerable: true,
|
|
34
|
+
configurable: true,
|
|
35
|
+
writable: true,
|
|
36
|
+
value: 8
|
|
37
|
+
});
|
|
38
|
+
Object.defineProperty(this, "largeEmbeddingKey", {
|
|
39
|
+
enumerable: true,
|
|
40
|
+
configurable: true,
|
|
41
|
+
writable: true,
|
|
42
|
+
value: "lc_large_embedding"
|
|
43
|
+
});
|
|
44
|
+
Object.defineProperty(this, "largeEmbeddingModel", {
|
|
45
|
+
enumerable: true,
|
|
46
|
+
configurable: true,
|
|
47
|
+
writable: true,
|
|
48
|
+
value: void 0
|
|
49
|
+
});
|
|
50
|
+
Object.defineProperty(this, "searchType", {
|
|
51
|
+
enumerable: true,
|
|
52
|
+
configurable: true,
|
|
53
|
+
writable: true,
|
|
54
|
+
value: "cosine"
|
|
55
|
+
});
|
|
56
|
+
/**
|
|
57
|
+
* Override the default `addDocuments` method to embed the documents twice,
|
|
58
|
+
* once using the larger embeddings model, and then again using the default
|
|
59
|
+
* embedding model linked to the vector store.
|
|
60
|
+
*
|
|
61
|
+
* @param {DocumentInterface[]} documents - An array of documents to add to the vector store.
|
|
62
|
+
* @param {AddDocumentOptions} options - An optional object containing additional options for adding documents.
|
|
63
|
+
* @returns {Promise<string[] | void>} A promise that resolves to an array of the document IDs that were added to the vector store.
|
|
64
|
+
*/
|
|
65
|
+
Object.defineProperty(this, "addDocuments", {
|
|
66
|
+
enumerable: true,
|
|
67
|
+
configurable: true,
|
|
68
|
+
writable: true,
|
|
69
|
+
value: async (documents, options) => {
|
|
70
|
+
// Insure documents metadata does not contain the large embedding key
|
|
71
|
+
if (documents.some((doc) => this.largeEmbeddingKey in doc.metadata)) {
|
|
72
|
+
throw new Error(`All documents must not contain the large embedding key: ${this.largeEmbeddingKey} in their metadata.`);
|
|
73
|
+
}
|
|
74
|
+
const allDocPageContent = documents.map((doc) => doc.pageContent);
|
|
75
|
+
const allDocLargeEmbeddings = await this.largeEmbeddingModel.embedDocuments(allDocPageContent);
|
|
76
|
+
const newDocuments = documents.map((doc, idx) => ({
|
|
77
|
+
...doc,
|
|
78
|
+
metadata: {
|
|
79
|
+
...doc.metadata,
|
|
80
|
+
[this.largeEmbeddingKey]: JSON.stringify(allDocLargeEmbeddings[idx]),
|
|
81
|
+
},
|
|
82
|
+
}));
|
|
83
|
+
return this.vectorStore.addDocuments(newDocuments, options);
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
this.smallK = fields.smallK ?? this.smallK;
|
|
87
|
+
this.largeK = fields.largeK ?? this.largeK;
|
|
88
|
+
this.largeEmbeddingKey = fields.largeEmbeddingKey ?? this.largeEmbeddingKey;
|
|
89
|
+
this.largeEmbeddingModel = fields.largeEmbeddingModel;
|
|
90
|
+
this.searchType = fields.searchType ?? this.searchType;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Ranks documents based on their similarity to a query embedding using larger embeddings.
|
|
94
|
+
*
|
|
95
|
+
* This method takes a query embedding and a list of documents (smallResults) as input. Each document
|
|
96
|
+
* in the smallResults array has previously been associated with a large embedding stored in its metadata.
|
|
97
|
+
* Depending on the `searchType` (cosine, innerProduct, or euclidean), it calculates the similarity scores
|
|
98
|
+
* between the query embedding and each document's large embedding. It then ranks the documents based on
|
|
99
|
+
* these similarity scores, from the most similar to the least similar.
|
|
100
|
+
*
|
|
101
|
+
* The method returns a promise that resolves to an array of the top `largeK` documents, where `largeK`
|
|
102
|
+
* is a class property defining the number of documents to return. This subset of documents is determined
|
|
103
|
+
* by sorting the entire list of documents based on their similarity scores and then selecting the top
|
|
104
|
+
* `largeK` documents.
|
|
105
|
+
*
|
|
106
|
+
* @param {number[]} embeddedQuery The embedding of the query, represented as an array of numbers.
|
|
107
|
+
* @param {DocumentInterface[]} smallResults An array of documents, each with metadata that includes a large embedding for similarity comparison.
|
|
108
|
+
* @returns {Promise<DocumentInterface[]>} A promise that resolves to an array of the top `largeK` ranked documents based on their similarity to the query embedding.
|
|
109
|
+
*/
|
|
110
|
+
_rankByLargeEmbeddings(embeddedQuery, smallResults) {
|
|
111
|
+
const largeEmbeddings = smallResults.map((doc) => JSON.parse(doc.metadata[this.largeEmbeddingKey]));
|
|
112
|
+
let func;
|
|
113
|
+
switch (this.searchType) {
|
|
114
|
+
case "cosine":
|
|
115
|
+
func = () => cosineSimilarity([embeddedQuery], largeEmbeddings);
|
|
116
|
+
break;
|
|
117
|
+
case "innerProduct":
|
|
118
|
+
func = () => innerProduct([embeddedQuery], largeEmbeddings);
|
|
119
|
+
break;
|
|
120
|
+
case "euclidean":
|
|
121
|
+
func = () => euclideanDistance([embeddedQuery], largeEmbeddings);
|
|
122
|
+
break;
|
|
123
|
+
default:
|
|
124
|
+
throw new Error(`Unknown search type: ${this.searchType}`);
|
|
125
|
+
}
|
|
126
|
+
// Calculate the similarity scores between the query embedding and the large embeddings
|
|
127
|
+
const [similarityScores] = func();
|
|
128
|
+
// Create an array of indices from 0 to N-1, where N is the number of documents
|
|
129
|
+
let indices = Array.from({ length: smallResults.length }, (_, index) => index);
|
|
130
|
+
indices = indices
|
|
131
|
+
.map((v, i) => [similarityScores[i], v])
|
|
132
|
+
.sort(([a], [b]) => b - a)
|
|
133
|
+
.slice(0, this.largeK)
|
|
134
|
+
.map(([, i]) => i);
|
|
135
|
+
return indices.map((i) => smallResults[i]);
|
|
136
|
+
}
|
|
137
|
+
async _getRelevantDocuments(query) {
|
|
138
|
+
const [embeddedQuery, smallResults] = await Promise.all([
|
|
139
|
+
this.largeEmbeddingModel.embedQuery(query),
|
|
140
|
+
this.vectorStore.similaritySearch(query, this.smallK, this.filter),
|
|
141
|
+
]);
|
|
142
|
+
return this._rankByLargeEmbeddings(embeddedQuery, smallResults);
|
|
143
|
+
}
|
|
144
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langchain",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.24",
|
|
4
4
|
"description": "Typescript bindings for langchain",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -942,6 +942,10 @@
|
|
|
942
942
|
"retrievers/vespa.js",
|
|
943
943
|
"retrievers/vespa.d.ts",
|
|
944
944
|
"retrievers/vespa.d.cts",
|
|
945
|
+
"retrievers/matryoshka_retriever.cjs",
|
|
946
|
+
"retrievers/matryoshka_retriever.js",
|
|
947
|
+
"retrievers/matryoshka_retriever.d.ts",
|
|
948
|
+
"retrievers/matryoshka_retriever.d.cts",
|
|
945
949
|
"cache.cjs",
|
|
946
950
|
"cache.js",
|
|
947
951
|
"cache.d.ts",
|
|
@@ -1508,8 +1512,8 @@
|
|
|
1508
1512
|
},
|
|
1509
1513
|
"dependencies": {
|
|
1510
1514
|
"@anthropic-ai/sdk": "^0.9.1",
|
|
1511
|
-
"@langchain/community": "~0.0.
|
|
1512
|
-
"@langchain/core": "~0.1.
|
|
1515
|
+
"@langchain/community": "~0.0.33",
|
|
1516
|
+
"@langchain/core": "~0.1.36",
|
|
1513
1517
|
"@langchain/openai": "~0.0.14",
|
|
1514
1518
|
"binary-extensions": "^2.2.0",
|
|
1515
1519
|
"expr-eval": "^2.0.2",
|
|
@@ -1517,7 +1521,7 @@
|
|
|
1517
1521
|
"js-yaml": "^4.1.0",
|
|
1518
1522
|
"jsonpointer": "^5.0.1",
|
|
1519
1523
|
"langchainhub": "~0.0.8",
|
|
1520
|
-
"langsmith": "~0.1.
|
|
1524
|
+
"langsmith": "~0.1.7",
|
|
1521
1525
|
"ml-distance": "^4.0.0",
|
|
1522
1526
|
"openapi-types": "^12.1.3",
|
|
1523
1527
|
"p-retry": "4",
|
|
@@ -3641,6 +3645,15 @@
|
|
|
3641
3645
|
"import": "./retrievers/vespa.js",
|
|
3642
3646
|
"require": "./retrievers/vespa.cjs"
|
|
3643
3647
|
},
|
|
3648
|
+
"./retrievers/matryoshka_retriever": {
|
|
3649
|
+
"types": {
|
|
3650
|
+
"import": "./retrievers/matryoshka_retriever.d.ts",
|
|
3651
|
+
"require": "./retrievers/matryoshka_retriever.d.cts",
|
|
3652
|
+
"default": "./retrievers/matryoshka_retriever.d.ts"
|
|
3653
|
+
},
|
|
3654
|
+
"import": "./retrievers/matryoshka_retriever.js",
|
|
3655
|
+
"require": "./retrievers/matryoshka_retriever.cjs"
|
|
3656
|
+
},
|
|
3644
3657
|
"./cache": {
|
|
3645
3658
|
"types": {
|
|
3646
3659
|
"import": "./cache.d.ts",
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('../dist/retrievers/matryoshka_retriever.cjs');
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../dist/retrievers/matryoshka_retriever.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../dist/retrievers/matryoshka_retriever.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../dist/retrievers/matryoshka_retriever.js'
|