npm - @lmnr-ai/lmnr - Versions diffs - 0.3.1 → 0.3.3 - Mend

@lmnr-ai/lmnr 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/.eslintrc.json ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ {
2	+ }

package/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# Typescript SDK for Laminar AI
+# Typescript SDK for Laminar
 ## Quickstart
@@ -8,9 +8,111 @@ npm install @lmnr-ai/lmnr
 ## Features
-- Make Laminar endpoint calls from your JS code
-- Make Laminar endpoint calls that can run your own functions as tools from your NodeJS code
-- `LaminarRemoteDebugger` to execute your own functions while you test your flows in workshop
+- Instrumentation of your JS/TS code
+- Events and semantic events right from the code
+- Make Laminar pipeline calls from your JS code
+## Prerequisites
+- Laminar project created at https://lmnr.ai
+- Export (or set using .env) a variable `LMNR_PROJECT_API_KEY` with the value from the project settings page
+## Code instrumentation
+For manual instrumetation you will need to import the following:
+- `trace` - this is a function to start a trace. It returns a `TraceContext`
+- `TraceContext` - a pointer to the current trace that you can pass around functions as you want.
+- `SpanContext` - a pointer to the current span that you can pass around functions as you want
+- `ObservationContext` – parent class of `TraceContext` and `SpanContext`. Useful, if you don't want to import the separate context types in TS.
+Both `TraceContext` and `SpanContext` expose the following interfaces:
+- `span(name: string, props: CreateSpanProps)` - create a child span within the current context. Returns `SpanContext`
+- `update(props)` - update the current trace or span and return it. Returns `TraceContext` or `SpanContext`. Useful when some metadata becomes known later during the program execution
+In addition, `SpanContext` allows you to:
+- `event(name: string, props: SpanEventProps)` - emit a custom event at any point
+- `evaluateEvent(name: string, evaluator: string, data: Record<string, NodeInput>, props: SpanEvaluateEventProps)` - register a possible event for automatic checking by Laminar.
+- `end(props: UpdateSpanProps)` – update the current span, and terminate it
+### Example
+```javascript
+// `trace()` is the main entrypoint into the observation of your app
+// `ObservationContext` is a parent class for `SpanContext` and `TraceContext`
+import {
+    initialize as lmnrInitialize,
+    trace,
+    SpanContext,
+    TraceContext
+} from '@lmnr-ai/lmnr';
+import OpenAI from 'openai';
+const openai = new OpenAI({apiKey: process.env.OPENAI_API_KEY});
+const getRandomCountry = (s: SpanContext): string => {
+    // create the span without registering the input
+    const span = s.span('getRandomCountry');
+    const countries = ['United States', 'Canada', 'Australia', 'Germany', 'Japan'];
+    const country =  countries[Math.floor(Math.random() * countries.length)];
+    // end the span and register the output
+    span.end({output: country});
+    return country;
+}
+const foo = (question: string, t: TraceContext) => {
+    // create the span and register the input
+    const span = t.span('foo', {input: {question}});
+    // pass the span context down the function call if you want to trace it
+    const country = getRandomCountry(span);
+    question += country;
+    const result = openai.chat.completions.create({
+        model: 'gpt-4o-mini',
+        messages: [
+            {role: 'system', content: 'You are a helpful assistant.'},
+            {role: 'user', content: question}
+        ],
+    }).then((response) => {
+        const output = response.choices[0].message.content;
+        // ask Laminar to check for a pre-defined event.
+        // In this example the event will be called correctness,
+        // and the value will be determined by calling the "myCorrectnessEvaluator" pipeline
+        //
+        // the last argument to the function represents the inputs to the evaluator pipeline
+        span.evaluateEvent(
+            'correctness',
+            'myCorrectnessEvaluator',
+            { llmOutput: output ?? '' }
+        );
+        // end the span and register the output
+        span.end({ output });
+    });
+};
+lmnrInitialize({
+    projectApiKey: process.env.LMNR_PROJECT_API_KEY,
+    // this is the env that will be passed to the Laminar evaluator and
+    // be used during the event evaluation
+    env: {
+        OPENAI_API_KEY: process.env.OPENAI_API_KEY
+    }
+})
+// Start the trace observation at the entry to your program
+const t = trace();
+// pass the trace context into the handler
+foo("What is the capital of ", t);
+```
+Here's the UI result you get by calling that function 4 times in parallel, awaiting an artificial 500ms delay in `getRandomCountry`.
+![](/images//exampleTrace.png).
+Yellow vertical bars represent the times where correctness was registered, because clearly gpt-4o-mini knows the capitals of these countries.
 ## Making Laminar pipeline calls
@@ -28,16 +130,73 @@ const result = await l.run({
     pipeline: 'my_pipeline_name',
     inputs: {'input': [{'role': 'user', 'content': 'hello'}]},
     env: {'OPENAI_API_KEY': 'sk-some-key'}, // optional
-    metadata: {'session_id': 'your_custom_session_id'}, // optional
+    metadata: {'metadata_key': 'metadata_value'}, // optional
 });
 ```
 Resulting in:
-```typecript
+```
 > console.log(result)
 {
   outputs: { output: { value: { role: 'user', content: 'hello' } } },
   runId: '05383a95-d316-4391-a64b-06c54d12982a'
 }
 ```
+## Running offline evaluations on your data
+You can evaluate your code with your own data and send it to Laminar using the `Evaluation` class.
+Evaluation takes in the following parameters:
+- `name` – the name of your evaluation. If no such evaluation exists in the project, it will be created. Otherwise, data will be pushed to the existing evaluation
+- `data` – an array of `Datapoint` objects, where each `Datapoint` has two keys: `target` and `data`, each containing a key-value object.
+- `executor` – the logic you want to evaluate. This function must take `data` as the first argument, and produce any output. *
+- `evaluators` – evaluaton logic. List of functions that take output of executor as the first argument, `target` as the second argument and produce a numeric scores. Each function can produce either a single number or `Record<string, number>` of scores.
+- `config` – optional additional override parameters.
+\* If you already have the outputs of executors you want to evaluate, you can specify the executor as an identity function, that takes in `data` and returns only needed value(s) from it.
+### Example
+```javascript
+import { Evaluation } from '@lmnr-ai/lmnr';
+import OpenAI from 'openai';
+const openai = new OpenAI({apiKey: process.env.OPENAI_API_KEY});
+const getCapital = async ({country} : {country: string}): Promise<string> => {
+    const response = await openai.chat.completions.create({
+        model: 'gpt-4o-mini',
+        messages: [
+            {
+                role: 'system',
+                content: 'You are a helpful assistant.'
+            }, {
+                role: 'user',
+                content: `What is the capital of ${country}?
+                Just name the city and nothing else`
+            }
+        ],
+    });
+    return response.choices[0].message.content ?? ''
+}
+const e = new Evaluation( 'my-evaluation', {
+    data: [
+        { data: { country: 'Canada' }, target: { capital: 'Ottawa' } },
+        { data: { country: 'Germany' }, target: { capital: 'Berlin' } },
+        { data: { country: 'Tanzania' }, target: { capital: 'Dodoma' } },
+    ],
+    executor: async (data) => await getCapital(data),
+    evaluators: [
+        async (output, target) => (await output) === target.capital ? 1 : 0
+    ],
+    config: {
+        projectApiKey: process.env.LMNR_PROJECT_API_KEY
+    }
+})
+e.run();
+```

package/dist/index.d.mts CHANGED Viewed

@@ -13,13 +13,249 @@ type PipelineRunResponse = {
     outputs: Record<string, Record<string, NodeInput>>;
     runId: string;
 };
+type EvaluateEvent = {
+    name: string;
+    evaluator: string;
+    data: Record<string, NodeInput>;
+    timestamp?: Date;
+    env?: Record<string, NodeInput>;
+};
+type Event = {
+    id: string;
+    templateName: string;
+    timestamp: Date;
+    spanId: string;
+    value: number | string | null;
+};
+type SpanType = 'DEFAULT' | 'LLM';
+type Span = {
+    version: string;
+    spanType: SpanType;
+    id: string;
+    parentSpanId: string | null;
+    traceId: string;
+    name: string;
+    startTime: Date;
+    endTime: Date | null;
+    attributes: Record<string, any>;
+    input: any | null;
+    output: any | null;
+    metadata: Record<string, any> | null;
+    evaluateEvents: EvaluateEvent[];
+    events: Event[];
+};
+type Trace = {
+    id: string;
+    version: string;
+    success: boolean;
+    startTime: Date | null;
+    endTime: Date | null;
+    userId: string | null;
+    sessionId: string | null;
+    release: string;
+    metadata: Record<string, any> | null;
+};
+type EvaluationDatapoint<D, T, O> = {
+    data: Record<string, any> & D;
+    target: Record<string, any> & T;
+    executorOutput: any & O;
+    scores: Record<string, number>;
+};
+type EvaluationStatus = 'Started' | 'Finished' | 'Error';
+declare class Collector {
+    private readonly flushInterval;
+    private readonly client;
+    private readonly maxQueueSize;
+    private queue;
+    private flushTimeout;
+    constructor();
+    addTask(task: Span | Trace): void;
+    setEnv(env?: Record<string, string>): void;
+    getEnv(): Record<string, string>;
+    setProjectApiKey(projectApiKey?: string): void;
+    private flush;
+}
+interface CreateSpanProps {
+    input?: any | null;
+    metadata?: Record<string, any> | null;
+    attributes?: Record<string, any>;
+    spanType?: SpanType;
+}
+declare class ObservationContext {
+    protected observation: Span | Trace;
+    parent: ObservationContext | null;
+    children: Record<string, SpanContext>;
+    protected collector: Collector;
+    constructor(observation: Span | Trace, parent: ObservationContext | null);
+    id(): string;
+    span(name: string, { input, metadata, attributes, spanType, }?: CreateSpanProps): SpanContext;
+}
+interface UpdateSpanProps {
+    input?: any | null;
+    output?: any | null;
+    metadata?: Record<string, any> | null;
+    attributes?: Record<string, any>;
+    evaluateEvents?: EvaluateEvent[];
+    override?: boolean;
+}
+interface SpanEventProps {
+    value?: string | number | boolean;
+    timestamp?: Date;
+}
+interface SpanEvaluateEventProps {
+    timestamp?: Date;
+}
+declare class SpanContext extends ObservationContext {
+    private inerSpan;
+    constructor(span: Span, parent: ObservationContext);
+    end({ input, output, metadata, attributes, evaluateEvents, override }?: UpdateSpanProps): SpanContext;
+    update({ input, output, metadata, attributes, evaluateEvents, override }: UpdateSpanProps): SpanContext;
+    event(name: string, { value, timestamp, }?: SpanEventProps): SpanContext;
+    /**
+     * Evaluate an event with the given name using the specified evaluator and data.
+     *
+     * The evaluator refers to the name of the Laminar pipeline.
+     * The data is passed as input to the evaluator pipeline, meaning you must specify the data you want to evaluate. The prompt
+     * of the evaluator will be templated with the keys of the data object.
+     * Typically, you would pass the output of LLM generation, users' messages, and other relevant data to `data`.
+     *
+     * @param {string} name - Name of the event.
+     * @param {string} evaluator - Name of the evaluator pipeline.
+     * @param {Record<string, NodeInput>} data - Data to be used when evaluating the event.
+     * @returns {SpanContext} The updated span context.
+     */
+    evaluateEvent(name: string, evaluator: string, data: Record<string, NodeInput>, { timestamp }?: SpanEvaluateEventProps): SpanContext;
+    private getParent;
+    private innerUpdate;
+}
+interface UpdateTraceProps {
+    success?: boolean;
+    userId?: string | null;
+    sessionId?: string | null;
+    release?: string;
+    metadata?: Record<string, any> | null;
+}
+declare class TraceContext extends ObservationContext {
+    private trace;
+    constructor(trace: Trace, parent: ObservationContext | null);
+    update({ success, userId, sessionId, release, metadata }?: UpdateTraceProps): TraceContext;
+}
+interface TraceProps {
+    userId?: string | null;
+    sessionId?: string | null;
+    release?: string;
+    metadata?: Record<string, any> | null;
+}
+declare const trace: ({ userId, sessionId, release, metadata, }?: TraceProps) => TraceContext;
+/**
+ * Initializes the SDK with the provided project API key and environment variables.
+ *
+ * @param options - The options for initialization.
+ * @param options.projectApiKey - The project API key. Needed to authenticate with the Laminar API.
+ * @param options.env - The environment variables as a key-value pair. Passed to Laminar to be used in the evaluation.
+ */
+declare const initialize: ({ projectApiKey, env }: {
+    projectApiKey?: string;
+    env?: Record<string, string>;
+}) => void;
 declare class Laminar {
-    private readonly projectApiKey;
-    private readonly url;
-    private readonly response;
-    constructor(projectApiKey: string);
+    private readonly baseUrl;
+    private projectApiKey;
+    private env;
+    constructor(projectApiKey: string, env?: Record<string, string>);
+    setEnv(env?: Record<string, string>): void;
+    setProjectApiKey(projectApiKey?: string): void;
+    getEnv(): Record<string, string>;
     run({ pipeline, inputs, env, metadata, }: PipelineRunRequest): Promise<PipelineRunResponse>;
+    batchPostTraces(data: (Span | Trace)[]): Promise<void>;
+    createEvaluation(name: string): Promise<any>;
+    postEvaluationResults<D, T, O>(evaluationName: string, data: EvaluationDatapoint<D, T, O>[]): Promise<void>;
+    updateEvaluationStatus(evaluationName: string, status: EvaluationStatus): Promise<void>;
+    private getHeaders;
+}
+/**
+ * Configuration for the Evaluator
+ */
+interface EvaluatorConfig {
+    batchSize?: number;
+    projectApiKey?: string;
+}
+declare abstract class Dataset<D, T> {
+    slice(start: number, end: number): Datapoint<D, T>[];
+    abstract size(): number;
+    abstract get(index: number): Datapoint<D, T>;
+}
+/**
+ * Datapoint is a single data point in the evaluation. `D` is the type of the input data, `T` is the type of the target data.
+ */
+type Datapoint<D, T> = {
+    /**
+     * input to the executor function. Must be a record with string keys and any values.
+     */
+    data: Record<string, any> & D;
+    /**
+     * input to the evaluator function (alongside the executor output).
+     * Must be a record with string keys and any values.
+     */
+    target: Record<string, any> & T;
+};
+type EvaluatorFunctionReturn = number | Record<string, number>;
+/**
+ * EvaluatorFunction is a function that takes the output of the executor and the target data, and returns a score.
+ * The score can be a single number or a record of string keys and number values. The latter is useful for evaluating
+ * multiple criteria in one go instead of running multiple evaluators.
+ */
+type EvaluatorFunction<O, T> = (output: O | Promise<O>, target: T, ...args: any[]) => EvaluatorFunctionReturn | Promise<EvaluatorFunctionReturn>;
+interface EvaluatorConstructorProps<D, T, O> {
+    /**
+     * List of data points to evaluate. `data` is the input to the executor function, `target` is the input to the evaluator function.
+     */
+    data: (Datapoint<D, T>[]) | Dataset<D, T>;
+    /**
+     * The executor function. Takes the data point + any additional arguments and returns the output to evaluate.
+     */
+    executor: (data: D, ...args: any[]) => O;
+    /**
+     * List of evaluator functions. Each evaluator function takes the output of the executor _and_ the target data, and returns
+     * a score. The score can be a single number or a record of string keys and number values.
+     * If the score is a single number, it will be named after the evaluator function. If the function is anonymous, it will be named
+     * `evaluator_${index}`, where index is the index of the evaluator function in the list starting from 1.
+     */
+    evaluators: EvaluatorFunction<O, T>[];
+    /**
+     * Optional override configurations for the evaluator.
+     */
+    config?: EvaluatorConfig;
+}
+declare class Evaluation<D, T, O> {
+    private name;
+    private data;
+    private executor;
+    private evaluators;
+    private evaluatorNames;
+    private laminarClient;
+    private batchSize;
+    /**
+     * Create a new evaluation and prepare data.
+     * @param name Name of the evaluation.
+     * @param props.data List of data points to evaluate. `data` is the input to the executor function, `target` is the input to the evaluator function.
+     * @param props.executor The executor function. Takes the data point + any additional arguments and returns the output to evaluate.
+     * @param props.evaluators List of evaluator functions. Each evaluator function takes the output of the executor and the target data, and returns.
+     */
+    constructor(name: string, { data, executor, evaluators, config }: EvaluatorConstructorProps<D, T, O>);
+    /**
+     * Runs the evaluation.
+     *
+     * Creates a new evaluation if no evaluation with such name exists, or adds data to an existing one otherwise.
+     * Evaluates data points in batches of `batchSize`. The executor function is called on each data point
+     * to get the output, and the output is then evaluated by each evaluator function.
+     */
+    run(): Promise<void>;
+    private evaluateBatch;
 }
-export { type ChatMessage, Laminar, type NodeInput, type PipelineRunRequest, type PipelineRunResponse };
+export { type ChatMessage, type Datapoint, Dataset, type EvaluateEvent, Evaluation, type Event, Laminar, type NodeInput, ObservationContext, type PipelineRunRequest, type PipelineRunResponse, type Span, SpanContext, type Trace, TraceContext, initialize, trace };