@lmnr-ai/lmnr 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.eslintrc.json ADDED
@@ -0,0 +1,2 @@
1
+ {
2
+ }
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Typescript SDK for Laminar AI
1
+ # Typescript SDK for Laminar
2
2
 
3
3
  ## Quickstart
4
4
 
@@ -8,9 +8,111 @@ npm install @lmnr-ai/lmnr
8
8
 
9
9
  ## Features
10
10
 
11
- - Make Laminar endpoint calls from your JS code
12
- - Make Laminar endpoint calls that can run your own functions as tools from your NodeJS code
13
- - `LaminarRemoteDebugger` to execute your own functions while you test your flows in workshop
11
+ - Instrumentation of your JS/TS code
12
+ - Events and semantic events right from the code
13
+ - Make Laminar pipeline calls from your JS code
14
+
15
+ ## Prerequisites
16
+
17
+ - Laminar project created at https://lmnr.ai
18
+ - Export (or set using .env) a variable `LMNR_PROJECT_API_KEY` with the value from the project settings page
19
+
20
+ ## Code instrumentation
21
+
22
+ For manual instrumetation you will need to import the following:
23
+ - `trace` - this is a function to start a trace. It returns a `TraceContext`
24
+ - `TraceContext` - a pointer to the current trace that you can pass around functions as you want.
25
+ - `SpanContext` - a pointer to the current span that you can pass around functions as you want
26
+ - `ObservationContext` – parent class of `TraceContext` and `SpanContext`. Useful, if you don't want to import the separate context types in TS.
27
+
28
+ Both `TraceContext` and `SpanContext` expose the following interfaces:
29
+ - `span(name: string, props: CreateSpanProps)` - create a child span within the current context. Returns `SpanContext`
30
+ - `update(props)` - update the current trace or span and return it. Returns `TraceContext` or `SpanContext`. Useful when some metadata becomes known later during the program execution
31
+
32
+ In addition, `SpanContext` allows you to:
33
+ - `event(name: string, props: SpanEventProps)` - emit a custom event at any point
34
+ - `evaluateEvent(name: string, evaluator: string, data: Record<string, NodeInput>, props: SpanEvaluateEventProps)` - register a possible event for automatic checking by Laminar.
35
+ - `end(props: UpdateSpanProps)` – update the current span, and terminate it
36
+
37
+ ### Example
38
+
39
+ ```javascript
40
+ // `trace()` is the main entrypoint into the observation of your app
41
+ // `ObservationContext` is a parent class for `SpanContext` and `TraceContext`
42
+ import {
43
+ initialize as lmnrInitialize,
44
+ trace,
45
+ SpanContext,
46
+ TraceContext
47
+ } from '@lmnr-ai/lmnr';
48
+
49
+ import OpenAI from 'openai';
50
+
51
+ const openai = new OpenAI({apiKey: process.env.OPENAI_API_KEY});
52
+
53
+ const getRandomCountry = (s: SpanContext): string => {
54
+ // create the span without registering the input
55
+ const span = s.span('getRandomCountry');
56
+ const countries = ['United States', 'Canada', 'Australia', 'Germany', 'Japan'];
57
+ const country = countries[Math.floor(Math.random() * countries.length)];
58
+
59
+ // end the span and register the output
60
+ span.end({output: country});
61
+
62
+ return country;
63
+ }
64
+
65
+ const foo = (question: string, t: TraceContext) => {
66
+ // create the span and register the input
67
+ const span = t.span('foo', {input: {question}});
68
+
69
+ // pass the span context down the function call if you want to trace it
70
+ const country = getRandomCountry(span);
71
+ question += country;
72
+ const result = openai.chat.completions.create({
73
+ model: 'gpt-4o-mini',
74
+ messages: [
75
+ {role: 'system', content: 'You are a helpful assistant.'},
76
+ {role: 'user', content: question}
77
+ ],
78
+ }).then((response) => {
79
+ const output = response.choices[0].message.content;
80
+
81
+ // ask Laminar to check for a pre-defined event.
82
+ // In this example the event will be called correctness,
83
+ // and the value will be determined by calling the "myCorrectnessEvaluator" pipeline
84
+ //
85
+ // the last argument to the function represents the inputs to the evaluator pipeline
86
+ span.evaluateEvent(
87
+ 'correctness',
88
+ 'myCorrectnessEvaluator',
89
+ { llmOutput: output ?? '' }
90
+ );
91
+ // end the span and register the output
92
+ span.end({ output });
93
+ });
94
+ };
95
+
96
+ lmnrInitialize({
97
+ projectApiKey: process.env.LMNR_PROJECT_API_KEY,
98
+ // this is the env that will be passed to the Laminar evaluator and
99
+ // be used during the event evaluation
100
+ env: {
101
+ OPENAI_API_KEY: process.env.OPENAI_API_KEY
102
+ }
103
+ })
104
+ // Start the trace observation at the entry to your program
105
+ const t = trace();
106
+
107
+ // pass the trace context into the handler
108
+ foo("What is the capital of ", t);
109
+ ```
110
+
111
+ Here's the UI result you get by calling that function 4 times in parallel, awaiting an artificial 500ms delay in `getRandomCountry`.
112
+
113
+ ![](/images//exampleTrace.png).
114
+
115
+ Yellow vertical bars represent the times where correctness was registered, because clearly gpt-4o-mini knows the capitals of these countries.
14
116
 
15
117
  ## Making Laminar pipeline calls
16
118
 
@@ -28,16 +130,73 @@ const result = await l.run({
28
130
  pipeline: 'my_pipeline_name',
29
131
  inputs: {'input': [{'role': 'user', 'content': 'hello'}]},
30
132
  env: {'OPENAI_API_KEY': 'sk-some-key'}, // optional
31
- metadata: {'session_id': 'your_custom_session_id'}, // optional
133
+ metadata: {'metadata_key': 'metadata_value'}, // optional
32
134
  });
33
135
  ```
34
136
 
35
137
  Resulting in:
36
138
 
37
- ```typecript
139
+ ```
38
140
  > console.log(result)
39
141
  {
40
142
  outputs: { output: { value: { role: 'user', content: 'hello' } } },
41
143
  runId: '05383a95-d316-4391-a64b-06c54d12982a'
42
144
  }
43
145
  ```
146
+
147
+ ## Running offline evaluations on your data
148
+
149
+ You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
150
+
151
+ Evaluation takes in the following parameters:
152
+ - `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
153
+ - `data` – an array of `Datapoint` objects, where each `Datapoint` has two keys: `target` and `data`, each containing a key-value object.
154
+ - `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
155
+ - `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `Record<string, number>` of scores.
156
+ - `config` – optional additional override parameters.
157
+
158
+ \* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
159
+
160
+ ### Example
161
+
162
+ ```javascript
163
+ import { Evaluation } from '@lmnr-ai/lmnr';
164
+
165
+ import OpenAI from 'openai';
166
+
167
+ const openai = new OpenAI({apiKey: process.env.OPENAI_API_KEY});
168
+
169
+ const getCapital = async ({country} : {country: string}): Promise<string> => {
170
+ const response = await openai.chat.completions.create({
171
+ model: 'gpt-4o-mini',
172
+ messages: [
173
+ {
174
+ role: 'system',
175
+ content: 'You are a helpful assistant.'
176
+ }, {
177
+ role: 'user',
178
+ content: `What is the capital of ${country}?
179
+ Just name the city and nothing else`
180
+ }
181
+ ],
182
+ });
183
+ return response.choices[0].message.content ?? ''
184
+ }
185
+
186
+ const e = new Evaluation( 'my-evaluation', {
187
+ data: [
188
+ { data: { country: 'Canada' }, target: { capital: 'Ottawa' } },
189
+ { data: { country: 'Germany' }, target: { capital: 'Berlin' } },
190
+ { data: { country: 'Tanzania' }, target: { capital: 'Dodoma' } },
191
+ ],
192
+ executor: async (data) => await getCapital(data),
193
+ evaluators: [
194
+ async (output, target) => (await output) === target.capital ? 1 : 0
195
+ ],
196
+ config: {
197
+ projectApiKey: process.env.LMNR_PROJECT_API_KEY
198
+ }
199
+ })
200
+
201
+ e.run();
202
+ ```
package/dist/index.d.mts CHANGED
@@ -13,13 +13,249 @@ type PipelineRunResponse = {
13
13
  outputs: Record<string, Record<string, NodeInput>>;
14
14
  runId: string;
15
15
  };
16
+ type EvaluateEvent = {
17
+ name: string;
18
+ evaluator: string;
19
+ data: Record<string, NodeInput>;
20
+ timestamp?: Date;
21
+ env?: Record<string, NodeInput>;
22
+ };
23
+ type Event = {
24
+ id: string;
25
+ templateName: string;
26
+ timestamp: Date;
27
+ spanId: string;
28
+ value: number | string | null;
29
+ };
30
+ type SpanType = 'DEFAULT' | 'LLM';
31
+ type Span = {
32
+ version: string;
33
+ spanType: SpanType;
34
+ id: string;
35
+ parentSpanId: string | null;
36
+ traceId: string;
37
+ name: string;
38
+ startTime: Date;
39
+ endTime: Date | null;
40
+ attributes: Record<string, any>;
41
+ input: any | null;
42
+ output: any | null;
43
+ metadata: Record<string, any> | null;
44
+ evaluateEvents: EvaluateEvent[];
45
+ events: Event[];
46
+ };
47
+ type Trace = {
48
+ id: string;
49
+ version: string;
50
+ success: boolean;
51
+ startTime: Date | null;
52
+ endTime: Date | null;
53
+ userId: string | null;
54
+ sessionId: string | null;
55
+ release: string;
56
+ metadata: Record<string, any> | null;
57
+ };
58
+ type EvaluationDatapoint<D, T, O> = {
59
+ data: Record<string, any> & D;
60
+ target: Record<string, any> & T;
61
+ executorOutput: any & O;
62
+ scores: Record<string, number>;
63
+ };
64
+ type EvaluationStatus = 'Started' | 'Finished' | 'Error';
65
+
66
+ declare class Collector {
67
+ private readonly flushInterval;
68
+ private readonly client;
69
+ private readonly maxQueueSize;
70
+ private queue;
71
+ private flushTimeout;
72
+ constructor();
73
+ addTask(task: Span | Trace): void;
74
+ setEnv(env?: Record<string, string>): void;
75
+ getEnv(): Record<string, string>;
76
+ setProjectApiKey(projectApiKey?: string): void;
77
+ private flush;
78
+ }
79
+
80
+ interface CreateSpanProps {
81
+ input?: any | null;
82
+ metadata?: Record<string, any> | null;
83
+ attributes?: Record<string, any>;
84
+ spanType?: SpanType;
85
+ }
86
+ declare class ObservationContext {
87
+ protected observation: Span | Trace;
88
+ parent: ObservationContext | null;
89
+ children: Record<string, SpanContext>;
90
+ protected collector: Collector;
91
+ constructor(observation: Span | Trace, parent: ObservationContext | null);
92
+ id(): string;
93
+ span(name: string, { input, metadata, attributes, spanType, }?: CreateSpanProps): SpanContext;
94
+ }
95
+ interface UpdateSpanProps {
96
+ input?: any | null;
97
+ output?: any | null;
98
+ metadata?: Record<string, any> | null;
99
+ attributes?: Record<string, any>;
100
+ evaluateEvents?: EvaluateEvent[];
101
+ override?: boolean;
102
+ }
103
+ interface SpanEventProps {
104
+ value?: string | number | boolean;
105
+ timestamp?: Date;
106
+ }
107
+ interface SpanEvaluateEventProps {
108
+ timestamp?: Date;
109
+ }
110
+ declare class SpanContext extends ObservationContext {
111
+ private inerSpan;
112
+ constructor(span: Span, parent: ObservationContext);
113
+ end({ input, output, metadata, attributes, evaluateEvents, override }?: UpdateSpanProps): SpanContext;
114
+ update({ input, output, metadata, attributes, evaluateEvents, override }: UpdateSpanProps): SpanContext;
115
+ event(name: string, { value, timestamp, }?: SpanEventProps): SpanContext;
116
+ /**
117
+ * Evaluate an event with the given name using the specified evaluator and data.
118
+ *
119
+ * The evaluator refers to the name of the Laminar pipeline.
120
+ * The data is passed as input to the evaluator pipeline, meaning you must specify the data you want to evaluate. The prompt
121
+ * of the evaluator will be templated with the keys of the data object.
122
+ * Typically, you would pass the output of LLM generation, users' messages, and other relevant data to `data`.
123
+ *
124
+ * @param {string} name - Name of the event.
125
+ * @param {string} evaluator - Name of the evaluator pipeline.
126
+ * @param {Record<string, NodeInput>} data - Data to be used when evaluating the event.
127
+ * @returns {SpanContext} The updated span context.
128
+ */
129
+ evaluateEvent(name: string, evaluator: string, data: Record<string, NodeInput>, { timestamp }?: SpanEvaluateEventProps): SpanContext;
130
+ private getParent;
131
+ private innerUpdate;
132
+ }
133
+ interface UpdateTraceProps {
134
+ success?: boolean;
135
+ userId?: string | null;
136
+ sessionId?: string | null;
137
+ release?: string;
138
+ metadata?: Record<string, any> | null;
139
+ }
140
+ declare class TraceContext extends ObservationContext {
141
+ private trace;
142
+ constructor(trace: Trace, parent: ObservationContext | null);
143
+ update({ success, userId, sessionId, release, metadata }?: UpdateTraceProps): TraceContext;
144
+ }
145
+ interface TraceProps {
146
+ userId?: string | null;
147
+ sessionId?: string | null;
148
+ release?: string;
149
+ metadata?: Record<string, any> | null;
150
+ }
151
+ declare const trace: ({ userId, sessionId, release, metadata, }?: TraceProps) => TraceContext;
152
+ /**
153
+ * Initializes the SDK with the provided project API key and environment variables.
154
+ *
155
+ * @param options - The options for initialization.
156
+ * @param options.projectApiKey - The project API key. Needed to authenticate with the Laminar API.
157
+ * @param options.env - The environment variables as a key-value pair. Passed to Laminar to be used in the evaluation.
158
+ */
159
+ declare const initialize: ({ projectApiKey, env }: {
160
+ projectApiKey?: string;
161
+ env?: Record<string, string>;
162
+ }) => void;
16
163
 
17
164
  declare class Laminar {
18
- private readonly projectApiKey;
19
- private readonly url;
20
- private readonly response;
21
- constructor(projectApiKey: string);
165
+ private readonly baseUrl;
166
+ private projectApiKey;
167
+ private env;
168
+ constructor(projectApiKey: string, env?: Record<string, string>);
169
+ setEnv(env?: Record<string, string>): void;
170
+ setProjectApiKey(projectApiKey?: string): void;
171
+ getEnv(): Record<string, string>;
22
172
  run({ pipeline, inputs, env, metadata, }: PipelineRunRequest): Promise<PipelineRunResponse>;
173
+ batchPostTraces(data: (Span | Trace)[]): Promise<void>;
174
+ createEvaluation(name: string): Promise<any>;
175
+ postEvaluationResults<D, T, O>(evaluationName: string, data: EvaluationDatapoint<D, T, O>[]): Promise<void>;
176
+ updateEvaluationStatus(evaluationName: string, status: EvaluationStatus): Promise<void>;
177
+ private getHeaders;
178
+ }
179
+
180
+ /**
181
+ * Configuration for the Evaluator
182
+ */
183
+ interface EvaluatorConfig {
184
+ batchSize?: number;
185
+ projectApiKey?: string;
186
+ }
187
+ declare abstract class Dataset<D, T> {
188
+ slice(start: number, end: number): Datapoint<D, T>[];
189
+ abstract size(): number;
190
+ abstract get(index: number): Datapoint<D, T>;
191
+ }
192
+ /**
193
+ * Datapoint is a single data point in the evaluation. `D` is the type of the input data, `T` is the type of the target data.
194
+ */
195
+ type Datapoint<D, T> = {
196
+ /**
197
+ * input to the executor function. Must be a record with string keys and any values.
198
+ */
199
+ data: Record<string, any> & D;
200
+ /**
201
+ * input to the evaluator function (alongside the executor output).
202
+ * Must be a record with string keys and any values.
203
+ */
204
+ target: Record<string, any> & T;
205
+ };
206
+ type EvaluatorFunctionReturn = number | Record<string, number>;
207
+ /**
208
+ * EvaluatorFunction is a function that takes the output of the executor and the target data, and returns a score.
209
+ * The score can be a single number or a record of string keys and number values. The latter is useful for evaluating
210
+ * multiple criteria in one go instead of running multiple evaluators.
211
+ */
212
+ type EvaluatorFunction<O, T> = (output: O | Promise<O>, target: T, ...args: any[]) => EvaluatorFunctionReturn | Promise<EvaluatorFunctionReturn>;
213
+ interface EvaluatorConstructorProps<D, T, O> {
214
+ /**
215
+ * List of data points to evaluate. `data` is the input to the executor function, `target` is the input to the evaluator function.
216
+ */
217
+ data: (Datapoint<D, T>[]) | Dataset<D, T>;
218
+ /**
219
+ * The executor function. Takes the data point + any additional arguments and returns the output to evaluate.
220
+ */
221
+ executor: (data: D, ...args: any[]) => O;
222
+ /**
223
+ * List of evaluator functions. Each evaluator function takes the output of the executor _and_ the target data, and returns
224
+ * a score. The score can be a single number or a record of string keys and number values.
225
+ * If the score is a single number, it will be named after the evaluator function. If the function is anonymous, it will be named
226
+ * `evaluator_${index}`, where index is the index of the evaluator function in the list starting from 1.
227
+ */
228
+ evaluators: EvaluatorFunction<O, T>[];
229
+ /**
230
+ * Optional override configurations for the evaluator.
231
+ */
232
+ config?: EvaluatorConfig;
233
+ }
234
+ declare class Evaluation<D, T, O> {
235
+ private name;
236
+ private data;
237
+ private executor;
238
+ private evaluators;
239
+ private evaluatorNames;
240
+ private laminarClient;
241
+ private batchSize;
242
+ /**
243
+ * Create a new evaluation and prepare data.
244
+ * @param name Name of the evaluation.
245
+ * @param props.data List of data points to evaluate. `data` is the input to the executor function, `target` is the input to the evaluator function.
246
+ * @param props.executor The executor function. Takes the data point + any additional arguments and returns the output to evaluate.
247
+ * @param props.evaluators List of evaluator functions. Each evaluator function takes the output of the executor and the target data, and returns.
248
+ */
249
+ constructor(name: string, { data, executor, evaluators, config }: EvaluatorConstructorProps<D, T, O>);
250
+ /**
251
+ * Runs the evaluation.
252
+ *
253
+ * Creates a new evaluation if no evaluation with such name exists, or adds data to an existing one otherwise.
254
+ * Evaluates data points in batches of `batchSize`. The executor function is called on each data point
255
+ * to get the output, and the output is then evaluated by each evaluator function.
256
+ */
257
+ run(): Promise<void>;
258
+ private evaluateBatch;
23
259
  }
24
260
 
25
- export { type ChatMessage, Laminar, type NodeInput, type PipelineRunRequest, type PipelineRunResponse };
261
+ export { type ChatMessage, type Datapoint, Dataset, type EvaluateEvent, Evaluation, type Event, Laminar, type NodeInput, ObservationContext, type PipelineRunRequest, type PipelineRunResponse, type Span, SpanContext, type Trace, TraceContext, initialize, trace };