llama-stack-client 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core.d.ts.map +1 -1
- package/core.js +12 -6
- package/core.js.map +1 -1
- package/core.mjs +12 -6
- package/core.mjs.map +1 -1
- package/index.d.mts +16 -6
- package/index.d.ts +16 -6
- package/index.d.ts.map +1 -1
- package/index.js +13 -1
- package/index.js.map +1 -1
- package/index.mjs +14 -2
- package/index.mjs.map +1 -1
- package/internal/decoders/line.d.ts +2 -2
- package/internal/decoders/line.d.ts.map +1 -1
- package/internal/decoders/line.js +69 -34
- package/internal/decoders/line.js.map +1 -1
- package/internal/decoders/line.mjs +69 -34
- package/internal/decoders/line.mjs.map +1 -1
- package/package.json +1 -1
- package/resources/agents/agents.d.ts +9 -0
- package/resources/agents/agents.d.ts.map +1 -1
- package/resources/agents/agents.js.map +1 -1
- package/resources/agents/agents.mjs.map +1 -1
- package/resources/agents/session.d.ts +3 -0
- package/resources/agents/session.d.ts.map +1 -1
- package/resources/agents/turn.d.ts +91 -5
- package/resources/agents/turn.d.ts.map +1 -1
- package/resources/batch-inference.d.ts +14 -0
- package/resources/batch-inference.d.ts.map +1 -1
- package/resources/benchmarks.d.ts +32 -0
- package/resources/benchmarks.d.ts.map +1 -0
- package/resources/benchmarks.js +22 -0
- package/resources/benchmarks.js.map +1 -0
- package/resources/benchmarks.mjs +18 -0
- package/resources/benchmarks.mjs.map +1 -0
- package/resources/datasets.d.ts +19 -15
- package/resources/datasets.d.ts.map +1 -1
- package/resources/eval/eval.d.ts +22 -18
- package/resources/eval/eval.d.ts.map +1 -1
- package/resources/eval/eval.js +6 -0
- package/resources/eval/eval.js.map +1 -1
- package/resources/eval/eval.mjs +6 -0
- package/resources/eval/eval.mjs.map +1 -1
- package/resources/eval/index.d.ts +1 -1
- package/resources/eval/index.d.ts.map +1 -1
- package/resources/eval/index.js.map +1 -1
- package/resources/eval/index.mjs.map +1 -1
- package/resources/eval/jobs.d.ts +3 -3
- package/resources/eval/jobs.d.ts.map +1 -1
- package/resources/eval/jobs.js +6 -6
- package/resources/eval/jobs.js.map +1 -1
- package/resources/eval/jobs.mjs +6 -6
- package/resources/eval/jobs.mjs.map +1 -1
- package/resources/eval-tasks.d.ts +6 -17
- package/resources/eval-tasks.d.ts.map +1 -1
- package/resources/eval-tasks.js.map +1 -1
- package/resources/eval-tasks.mjs.map +1 -1
- package/resources/index.d.ts +3 -2
- package/resources/index.d.ts.map +1 -1
- package/resources/index.js +3 -1
- package/resources/index.js.map +1 -1
- package/resources/index.mjs +2 -1
- package/resources/index.mjs.map +1 -1
- package/resources/inference.d.ts +58 -2
- package/resources/inference.d.ts.map +1 -1
- package/resources/models.d.ts +1 -1
- package/resources/models.d.ts.map +1 -1
- package/resources/post-training/job.d.ts +8 -1
- package/resources/post-training/job.d.ts.map +1 -1
- package/resources/post-training/job.js.map +1 -1
- package/resources/post-training/job.mjs.map +1 -1
- package/resources/providers.d.ts +1 -1
- package/resources/providers.d.ts.map +1 -1
- package/resources/routes.d.ts +1 -1
- package/resources/routes.d.ts.map +1 -1
- package/resources/scoring-functions.d.ts +1 -1
- package/resources/scoring-functions.d.ts.map +1 -1
- package/resources/shared.d.ts +152 -11
- package/resources/shared.d.ts.map +1 -1
- package/resources/shields.d.ts +4 -1
- package/resources/shields.d.ts.map +1 -1
- package/resources/synthetic-data-generation.d.ts +7 -0
- package/resources/synthetic-data-generation.d.ts.map +1 -1
- package/resources/telemetry.d.ts +4 -15
- package/resources/telemetry.d.ts.map +1 -1
- package/resources/tool-runtime/rag-tool.d.ts +3 -0
- package/resources/tool-runtime/rag-tool.d.ts.map +1 -1
- package/resources/tool-runtime/tool-runtime.d.ts +9 -1
- package/resources/tool-runtime/tool-runtime.d.ts.map +1 -1
- package/resources/tool-runtime/tool-runtime.js.map +1 -1
- package/resources/tool-runtime/tool-runtime.mjs.map +1 -1
- package/resources/toolgroups.d.ts +13 -4
- package/resources/toolgroups.d.ts.map +1 -1
- package/resources/toolgroups.js.map +1 -1
- package/resources/toolgroups.mjs.map +1 -1
- package/resources/tools.d.ts +1 -1
- package/resources/tools.d.ts.map +1 -1
- package/resources/vector-dbs.d.ts +1 -11
- package/resources/vector-dbs.d.ts.map +1 -1
- package/resources/vector-io.d.ts +9 -0
- package/resources/vector-io.d.ts.map +1 -1
- package/src/core.ts +14 -6
- package/src/index.ts +48 -13
- package/src/internal/decoders/line.ts +69 -38
- package/src/resources/agents/agents.ts +9 -0
- package/src/resources/agents/session.ts +3 -0
- package/src/resources/agents/turn.ts +100 -6
- package/src/resources/batch-inference.ts +14 -0
- package/src/resources/benchmarks.ts +69 -0
- package/src/resources/datasets.ts +22 -22
- package/src/resources/eval/eval.ts +46 -26
- package/src/resources/eval/index.ts +3 -1
- package/src/resources/eval/jobs.ts +6 -6
- package/src/resources/eval-tasks.ts +11 -27
- package/src/resources/index.ts +11 -8
- package/src/resources/inference.ts +71 -2
- package/src/resources/models.ts +1 -1
- package/src/resources/post-training/job.ts +14 -2
- package/src/resources/providers.ts +1 -1
- package/src/resources/routes.ts +1 -1
- package/src/resources/scoring-functions.ts +1 -1
- package/src/resources/shared.ts +172 -14
- package/src/resources/shields.ts +4 -1
- package/src/resources/synthetic-data-generation.ts +7 -0
- package/src/resources/telemetry.ts +4 -22
- package/src/resources/tool-runtime/rag-tool.ts +3 -0
- package/src/resources/tool-runtime/tool-runtime.ts +10 -1
- package/src/resources/toolgroups.ts +15 -4
- package/src/resources/tools.ts +1 -1
- package/src/resources/vector-dbs.ts +1 -17
- package/src/resources/vector-io.ts +9 -0
- package/src/streaming.ts +5 -1
- package/src/version.ts +1 -1
- package/streaming.d.ts +3 -1
- package/streaming.d.ts.map +1 -1
- package/streaming.js +4 -1
- package/streaming.js.map +1 -1
- package/streaming.mjs +4 -1
- package/streaming.mjs.map +1 -1
- package/version.d.ts +1 -1
- package/version.js +1 -1
- package/version.mjs +1 -1
|
@@ -31,12 +31,23 @@ export interface BatchInferenceChatCompletionParams {
|
|
|
31
31
|
|
|
32
32
|
logprobs?: BatchInferenceChatCompletionParams.Logprobs;
|
|
33
33
|
|
|
34
|
+
/**
|
|
35
|
+
* Configuration for JSON schema-guided response generation.
|
|
36
|
+
*/
|
|
34
37
|
response_format?: Shared.ResponseFormat;
|
|
35
38
|
|
|
36
39
|
sampling_params?: Shared.SamplingParams;
|
|
37
40
|
|
|
41
|
+
/**
|
|
42
|
+
* Whether tool use is required or automatic. This is a hint to the model which may
|
|
43
|
+
* not be followed. It depends on the Instruction Following capabilities of the
|
|
44
|
+
* model.
|
|
45
|
+
*/
|
|
38
46
|
tool_choice?: 'auto' | 'required';
|
|
39
47
|
|
|
48
|
+
/**
|
|
49
|
+
* Prompt format for calling custom / zero shot tools.
|
|
50
|
+
*/
|
|
40
51
|
tool_prompt_format?: 'json' | 'function_tag' | 'python_list';
|
|
41
52
|
|
|
42
53
|
tools?: Array<BatchInferenceChatCompletionParams.Tool>;
|
|
@@ -66,6 +77,9 @@ export interface BatchInferenceCompletionParams {
|
|
|
66
77
|
|
|
67
78
|
logprobs?: BatchInferenceCompletionParams.Logprobs;
|
|
68
79
|
|
|
80
|
+
/**
|
|
81
|
+
* Configuration for JSON schema-guided response generation.
|
|
82
|
+
*/
|
|
69
83
|
response_format?: Shared.ResponseFormat;
|
|
70
84
|
|
|
71
85
|
sampling_params?: Shared.SamplingParams;
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
import { APIResource } from '../resource';
|
|
4
|
+
import * as Core from '../core';
|
|
5
|
+
|
|
6
|
+
export class Benchmarks extends APIResource {
|
|
7
|
+
retrieve(benchmarkId: string, options?: Core.RequestOptions): Core.APIPromise<Benchmark | null> {
|
|
8
|
+
return this._client.get(`/v1/eval/benchmarks/${benchmarkId}`, options);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
list(options?: Core.RequestOptions): Core.APIPromise<BenchmarkListResponse> {
|
|
12
|
+
return (
|
|
13
|
+
this._client.get('/v1/eval/benchmarks', options) as Core.APIPromise<{ data: BenchmarkListResponse }>
|
|
14
|
+
)._thenUnwrap((obj) => obj.data);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
register(body: BenchmarkRegisterParams, options?: Core.RequestOptions): Core.APIPromise<void> {
|
|
18
|
+
return this._client.post('/v1/eval/benchmarks', {
|
|
19
|
+
body,
|
|
20
|
+
...options,
|
|
21
|
+
headers: { Accept: '*/*', ...options?.headers },
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface Benchmark {
|
|
27
|
+
dataset_id: string;
|
|
28
|
+
|
|
29
|
+
identifier: string;
|
|
30
|
+
|
|
31
|
+
metadata: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
32
|
+
|
|
33
|
+
provider_id: string;
|
|
34
|
+
|
|
35
|
+
provider_resource_id: string;
|
|
36
|
+
|
|
37
|
+
scoring_functions: Array<string>;
|
|
38
|
+
|
|
39
|
+
type: 'benchmark';
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface ListBenchmarksResponse {
|
|
43
|
+
data: BenchmarkListResponse;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export type BenchmarkListResponse = Array<Benchmark>;
|
|
47
|
+
|
|
48
|
+
export interface BenchmarkRegisterParams {
|
|
49
|
+
benchmark_id: string;
|
|
50
|
+
|
|
51
|
+
dataset_id: string;
|
|
52
|
+
|
|
53
|
+
scoring_functions: Array<string>;
|
|
54
|
+
|
|
55
|
+
metadata?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
56
|
+
|
|
57
|
+
provider_benchmark_id?: string;
|
|
58
|
+
|
|
59
|
+
provider_id?: string;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export declare namespace Benchmarks {
|
|
63
|
+
export {
|
|
64
|
+
type Benchmark as Benchmark,
|
|
65
|
+
type ListBenchmarksResponse as ListBenchmarksResponse,
|
|
66
|
+
type BenchmarkListResponse as BenchmarkListResponse,
|
|
67
|
+
type BenchmarkRegisterParams as BenchmarkRegisterParams,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
@@ -35,25 +35,7 @@ export class Datasets extends APIResource {
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
export interface ListDatasetsResponse {
|
|
38
|
-
data:
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
export namespace ListDatasetsResponse {
|
|
42
|
-
export interface Data {
|
|
43
|
-
dataset_schema: Record<string, Shared.ParamType>;
|
|
44
|
-
|
|
45
|
-
identifier: string;
|
|
46
|
-
|
|
47
|
-
metadata: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
48
|
-
|
|
49
|
-
provider_id: string;
|
|
50
|
-
|
|
51
|
-
provider_resource_id: string;
|
|
52
|
-
|
|
53
|
-
type: 'dataset';
|
|
54
|
-
|
|
55
|
-
url: Shared.URL;
|
|
56
|
-
}
|
|
38
|
+
data: DatasetListResponse;
|
|
57
39
|
}
|
|
58
40
|
|
|
59
41
|
export interface DatasetRetrieveResponse {
|
|
@@ -69,7 +51,13 @@ export interface DatasetRetrieveResponse {
|
|
|
69
51
|
|
|
70
52
|
type: 'dataset';
|
|
71
53
|
|
|
72
|
-
url:
|
|
54
|
+
url: DatasetRetrieveResponse.URL;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export namespace DatasetRetrieveResponse {
|
|
58
|
+
export interface URL {
|
|
59
|
+
uri: string;
|
|
60
|
+
}
|
|
73
61
|
}
|
|
74
62
|
|
|
75
63
|
export type DatasetListResponse = Array<DatasetListResponse.DatasetListResponseItem>;
|
|
@@ -88,7 +76,13 @@ export namespace DatasetListResponse {
|
|
|
88
76
|
|
|
89
77
|
type: 'dataset';
|
|
90
78
|
|
|
91
|
-
url:
|
|
79
|
+
url: DatasetListResponseItem.URL;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export namespace DatasetListResponseItem {
|
|
83
|
+
export interface URL {
|
|
84
|
+
uri: string;
|
|
85
|
+
}
|
|
92
86
|
}
|
|
93
87
|
}
|
|
94
88
|
|
|
@@ -97,7 +91,7 @@ export interface DatasetRegisterParams {
|
|
|
97
91
|
|
|
98
92
|
dataset_schema: Record<string, Shared.ParamType>;
|
|
99
93
|
|
|
100
|
-
url:
|
|
94
|
+
url: DatasetRegisterParams.URL;
|
|
101
95
|
|
|
102
96
|
metadata?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
103
97
|
|
|
@@ -106,6 +100,12 @@ export interface DatasetRegisterParams {
|
|
|
106
100
|
provider_id?: string;
|
|
107
101
|
}
|
|
108
102
|
|
|
103
|
+
export namespace DatasetRegisterParams {
|
|
104
|
+
export interface URL {
|
|
105
|
+
uri: string;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
109
|
export declare namespace Datasets {
|
|
110
110
|
export {
|
|
111
111
|
type ListDatasetsResponse as ListDatasetsResponse,
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import { APIResource } from '../../resource';
|
|
4
4
|
import * as Core from '../../core';
|
|
5
|
-
import * as EvalAPI from './eval';
|
|
6
5
|
import * as ScoringFunctionsAPI from '../scoring-functions';
|
|
7
6
|
import * as Shared from '../shared';
|
|
8
7
|
import * as JobsAPI from './jobs';
|
|
@@ -19,9 +18,35 @@ export class Eval extends APIResource {
|
|
|
19
18
|
return this._client.post(`/v1/eval/tasks/${taskId}/evaluations`, { body, ...options });
|
|
20
19
|
}
|
|
21
20
|
|
|
21
|
+
evaluateRowsAlpha(
|
|
22
|
+
benchmarkId: string,
|
|
23
|
+
body: EvalEvaluateRowsAlphaParams,
|
|
24
|
+
options?: Core.RequestOptions,
|
|
25
|
+
): Core.APIPromise<EvaluateResponse> {
|
|
26
|
+
return this._client.post(`/v1/eval/benchmarks/${benchmarkId}/evaluations`, { body, ...options });
|
|
27
|
+
}
|
|
28
|
+
|
|
22
29
|
runEval(taskId: string, body: EvalRunEvalParams, options?: Core.RequestOptions): Core.APIPromise<Job> {
|
|
23
30
|
return this._client.post(`/v1/eval/tasks/${taskId}/jobs`, { body, ...options });
|
|
24
31
|
}
|
|
32
|
+
|
|
33
|
+
runEvalAlpha(
|
|
34
|
+
benchmarkId: string,
|
|
35
|
+
body: EvalRunEvalAlphaParams,
|
|
36
|
+
options?: Core.RequestOptions,
|
|
37
|
+
): Core.APIPromise<Job> {
|
|
38
|
+
return this._client.post(`/v1/eval/benchmarks/${benchmarkId}/jobs`, { body, ...options });
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface BenchmarkConfig {
|
|
43
|
+
eval_candidate: EvalCandidate;
|
|
44
|
+
|
|
45
|
+
scoring_params: Record<string, ScoringFunctionsAPI.ScoringFnParams>;
|
|
46
|
+
|
|
47
|
+
type: 'benchmark';
|
|
48
|
+
|
|
49
|
+
num_examples?: number;
|
|
25
50
|
}
|
|
26
51
|
|
|
27
52
|
export type EvalCandidate = EvalCandidate.ModelCandidate | EvalCandidate.AgentCandidate;
|
|
@@ -34,6 +59,9 @@ export namespace EvalCandidate {
|
|
|
34
59
|
|
|
35
60
|
type: 'model';
|
|
36
61
|
|
|
62
|
+
/**
|
|
63
|
+
* A system message providing instructions or context to the model.
|
|
64
|
+
*/
|
|
37
65
|
system_message?: Shared.SystemMessage;
|
|
38
66
|
}
|
|
39
67
|
|
|
@@ -44,28 +72,6 @@ export namespace EvalCandidate {
|
|
|
44
72
|
}
|
|
45
73
|
}
|
|
46
74
|
|
|
47
|
-
export type EvalTaskConfig = EvalTaskConfig.BenchmarkEvalTaskConfig | EvalTaskConfig.AppEvalTaskConfig;
|
|
48
|
-
|
|
49
|
-
export namespace EvalTaskConfig {
|
|
50
|
-
export interface BenchmarkEvalTaskConfig {
|
|
51
|
-
eval_candidate: EvalAPI.EvalCandidate;
|
|
52
|
-
|
|
53
|
-
type: 'benchmark';
|
|
54
|
-
|
|
55
|
-
num_examples?: number;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
export interface AppEvalTaskConfig {
|
|
59
|
-
eval_candidate: EvalAPI.EvalCandidate;
|
|
60
|
-
|
|
61
|
-
scoring_params: Record<string, ScoringFunctionsAPI.ScoringFnParams>;
|
|
62
|
-
|
|
63
|
-
type: 'app';
|
|
64
|
-
|
|
65
|
-
num_examples?: number;
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
75
|
export interface EvaluateResponse {
|
|
70
76
|
generations: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
|
|
71
77
|
|
|
@@ -81,23 +87,37 @@ export interface EvalEvaluateRowsParams {
|
|
|
81
87
|
|
|
82
88
|
scoring_functions: Array<string>;
|
|
83
89
|
|
|
84
|
-
task_config:
|
|
90
|
+
task_config: BenchmarkConfig;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export interface EvalEvaluateRowsAlphaParams {
|
|
94
|
+
input_rows: Array<Record<string, boolean | number | string | Array<unknown> | unknown | null>>;
|
|
95
|
+
|
|
96
|
+
scoring_functions: Array<string>;
|
|
97
|
+
|
|
98
|
+
task_config: BenchmarkConfig;
|
|
85
99
|
}
|
|
86
100
|
|
|
87
101
|
export interface EvalRunEvalParams {
|
|
88
|
-
task_config:
|
|
102
|
+
task_config: BenchmarkConfig;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export interface EvalRunEvalAlphaParams {
|
|
106
|
+
task_config: BenchmarkConfig;
|
|
89
107
|
}
|
|
90
108
|
|
|
91
109
|
Eval.Jobs = Jobs;
|
|
92
110
|
|
|
93
111
|
export declare namespace Eval {
|
|
94
112
|
export {
|
|
113
|
+
type BenchmarkConfig as BenchmarkConfig,
|
|
95
114
|
type EvalCandidate as EvalCandidate,
|
|
96
|
-
type EvalTaskConfig as EvalTaskConfig,
|
|
97
115
|
type EvaluateResponse as EvaluateResponse,
|
|
98
116
|
type Job as Job,
|
|
99
117
|
type EvalEvaluateRowsParams as EvalEvaluateRowsParams,
|
|
118
|
+
type EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams,
|
|
100
119
|
type EvalRunEvalParams as EvalRunEvalParams,
|
|
120
|
+
type EvalRunEvalAlphaParams as EvalRunEvalAlphaParams,
|
|
101
121
|
};
|
|
102
122
|
|
|
103
123
|
export { Jobs as Jobs, type JobStatusResponse as JobStatusResponse };
|
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
export {
|
|
4
4
|
Eval,
|
|
5
|
+
type BenchmarkConfig,
|
|
5
6
|
type EvalCandidate,
|
|
6
|
-
type EvalTaskConfig,
|
|
7
7
|
type EvaluateResponse,
|
|
8
8
|
type Job,
|
|
9
9
|
type EvalEvaluateRowsParams,
|
|
10
|
+
type EvalEvaluateRowsAlphaParams,
|
|
10
11
|
type EvalRunEvalParams,
|
|
12
|
+
type EvalRunEvalAlphaParams,
|
|
11
13
|
} from './eval';
|
|
12
14
|
export { Jobs, type JobStatusResponse } from './jobs';
|
|
@@ -6,26 +6,26 @@ import * as EvalAPI from './eval';
|
|
|
6
6
|
|
|
7
7
|
export class Jobs extends APIResource {
|
|
8
8
|
retrieve(
|
|
9
|
-
|
|
9
|
+
benchmarkId: string,
|
|
10
10
|
jobId: string,
|
|
11
11
|
options?: Core.RequestOptions,
|
|
12
12
|
): Core.APIPromise<EvalAPI.EvaluateResponse> {
|
|
13
|
-
return this._client.get(`/v1/eval/
|
|
13
|
+
return this._client.get(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}/result`, options);
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
-
cancel(
|
|
17
|
-
return this._client.delete(`/v1/eval/
|
|
16
|
+
cancel(benchmarkId: string, jobId: string, options?: Core.RequestOptions): Core.APIPromise<void> {
|
|
17
|
+
return this._client.delete(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}`, {
|
|
18
18
|
...options,
|
|
19
19
|
headers: { Accept: '*/*', ...options?.headers },
|
|
20
20
|
});
|
|
21
21
|
}
|
|
22
22
|
|
|
23
23
|
status(
|
|
24
|
-
|
|
24
|
+
benchmarkId: string,
|
|
25
25
|
jobId: string,
|
|
26
26
|
options?: Core.RequestOptions,
|
|
27
27
|
): Core.APIPromise<JobStatusResponse | null> {
|
|
28
|
-
return this._client.get(`/v1/eval/
|
|
28
|
+
return this._client.get(`/v1/eval/benchmarks/${benchmarkId}/jobs/${jobId}`, options);
|
|
29
29
|
}
|
|
30
30
|
}
|
|
31
31
|
|
|
@@ -2,15 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
import { APIResource } from '../resource';
|
|
4
4
|
import * as Core from '../core';
|
|
5
|
+
import * as BenchmarksAPI from './benchmarks';
|
|
5
6
|
|
|
6
7
|
export class EvalTasks extends APIResource {
|
|
7
|
-
retrieve(
|
|
8
|
+
retrieve(
|
|
9
|
+
evalTaskId: string,
|
|
10
|
+
options?: Core.RequestOptions,
|
|
11
|
+
): Core.APIPromise<BenchmarksAPI.Benchmark | null> {
|
|
8
12
|
return this._client.get(`/v1/eval-tasks/${evalTaskId}`, options);
|
|
9
13
|
}
|
|
10
14
|
|
|
11
|
-
list(options?: Core.RequestOptions): Core.APIPromise<
|
|
15
|
+
list(options?: Core.RequestOptions): Core.APIPromise<BenchmarksAPI.BenchmarkListResponse> {
|
|
12
16
|
return (
|
|
13
|
-
this._client.get('/v1/eval-tasks', options) as Core.APIPromise<{
|
|
17
|
+
this._client.get('/v1/eval-tasks', options) as Core.APIPromise<{
|
|
18
|
+
data: BenchmarksAPI.BenchmarkListResponse;
|
|
19
|
+
}>
|
|
14
20
|
)._thenUnwrap((obj) => obj.data);
|
|
15
21
|
}
|
|
16
22
|
|
|
@@ -23,27 +29,7 @@ export class EvalTasks extends APIResource {
|
|
|
23
29
|
}
|
|
24
30
|
}
|
|
25
31
|
|
|
26
|
-
export
|
|
27
|
-
dataset_id: string;
|
|
28
|
-
|
|
29
|
-
identifier: string;
|
|
30
|
-
|
|
31
|
-
metadata: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
32
|
-
|
|
33
|
-
provider_id: string;
|
|
34
|
-
|
|
35
|
-
provider_resource_id: string;
|
|
36
|
-
|
|
37
|
-
scoring_functions: Array<string>;
|
|
38
|
-
|
|
39
|
-
type: 'eval_task';
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
export interface ListEvalTasksResponse {
|
|
43
|
-
data: Array<EvalTask>;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
export type EvalTaskListResponse = Array<EvalTask>;
|
|
32
|
+
export type EvalTaskListResponse = Array<BenchmarksAPI.Benchmark>;
|
|
47
33
|
|
|
48
34
|
export interface EvalTaskRegisterParams {
|
|
49
35
|
dataset_id: string;
|
|
@@ -54,15 +40,13 @@ export interface EvalTaskRegisterParams {
|
|
|
54
40
|
|
|
55
41
|
metadata?: Record<string, boolean | number | string | Array<unknown> | unknown | null>;
|
|
56
42
|
|
|
57
|
-
|
|
43
|
+
provider_benchmark_id?: string;
|
|
58
44
|
|
|
59
45
|
provider_id?: string;
|
|
60
46
|
}
|
|
61
47
|
|
|
62
48
|
export declare namespace EvalTasks {
|
|
63
49
|
export {
|
|
64
|
-
type EvalTask as EvalTask,
|
|
65
|
-
type ListEvalTasksResponse as ListEvalTasksResponse,
|
|
66
50
|
type EvalTaskListResponse as EvalTaskListResponse,
|
|
67
51
|
type EvalTaskRegisterParams as EvalTaskRegisterParams,
|
|
68
52
|
};
|
package/src/resources/index.ts
CHANGED
|
@@ -17,6 +17,13 @@ export {
|
|
|
17
17
|
type BatchInferenceChatCompletionParams,
|
|
18
18
|
type BatchInferenceCompletionParams,
|
|
19
19
|
} from './batch-inference';
|
|
20
|
+
export {
|
|
21
|
+
Benchmarks,
|
|
22
|
+
type Benchmark,
|
|
23
|
+
type ListBenchmarksResponse,
|
|
24
|
+
type BenchmarkListResponse,
|
|
25
|
+
type BenchmarkRegisterParams,
|
|
26
|
+
} from './benchmarks';
|
|
20
27
|
export {
|
|
21
28
|
Datasetio,
|
|
22
29
|
type PaginatedRowsResult,
|
|
@@ -32,20 +39,16 @@ export {
|
|
|
32
39
|
} from './datasets';
|
|
33
40
|
export {
|
|
34
41
|
Eval,
|
|
42
|
+
type BenchmarkConfig,
|
|
35
43
|
type EvalCandidate,
|
|
36
|
-
type EvalTaskConfig,
|
|
37
44
|
type EvaluateResponse,
|
|
38
45
|
type Job,
|
|
39
46
|
type EvalEvaluateRowsParams,
|
|
47
|
+
type EvalEvaluateRowsAlphaParams,
|
|
40
48
|
type EvalRunEvalParams,
|
|
49
|
+
type EvalRunEvalAlphaParams,
|
|
41
50
|
} from './eval/eval';
|
|
42
|
-
export {
|
|
43
|
-
EvalTasks,
|
|
44
|
-
type EvalTask,
|
|
45
|
-
type ListEvalTasksResponse,
|
|
46
|
-
type EvalTaskListResponse,
|
|
47
|
-
type EvalTaskRegisterParams,
|
|
48
|
-
} from './eval-tasks';
|
|
51
|
+
export { EvalTasks, type EvalTaskListResponse, type EvalTaskRegisterParams } from './eval-tasks';
|
|
49
52
|
export {
|
|
50
53
|
Inference,
|
|
51
54
|
type ChatCompletionResponseStreamChunk,
|
|
@@ -71,11 +71,16 @@ export class Inference extends APIResource {
|
|
|
71
71
|
}
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
+
/**
|
|
75
|
+
* A chunk of a streamed chat completion response.
|
|
76
|
+
*/
|
|
74
77
|
export interface ChatCompletionResponseStreamChunk {
|
|
75
78
|
/**
|
|
76
79
|
* The event containing the new content
|
|
77
80
|
*/
|
|
78
81
|
event: ChatCompletionResponseStreamChunk.Event;
|
|
82
|
+
|
|
83
|
+
metrics?: Array<ChatCompletionResponseStreamChunk.Metric>;
|
|
79
84
|
}
|
|
80
85
|
|
|
81
86
|
export namespace ChatCompletionResponseStreamChunk {
|
|
@@ -104,8 +109,29 @@ export namespace ChatCompletionResponseStreamChunk {
|
|
|
104
109
|
*/
|
|
105
110
|
stop_reason?: 'end_of_turn' | 'end_of_message' | 'out_of_tokens';
|
|
106
111
|
}
|
|
112
|
+
|
|
113
|
+
export interface Metric {
|
|
114
|
+
metric: string;
|
|
115
|
+
|
|
116
|
+
span_id: string;
|
|
117
|
+
|
|
118
|
+
timestamp: string;
|
|
119
|
+
|
|
120
|
+
trace_id: string;
|
|
121
|
+
|
|
122
|
+
type: 'metric';
|
|
123
|
+
|
|
124
|
+
unit: string;
|
|
125
|
+
|
|
126
|
+
value: number;
|
|
127
|
+
|
|
128
|
+
attributes?: Record<string, string | number | boolean | null>;
|
|
129
|
+
}
|
|
107
130
|
}
|
|
108
131
|
|
|
132
|
+
/**
|
|
133
|
+
* Response from a completion request.
|
|
134
|
+
*/
|
|
109
135
|
export interface CompletionResponse {
|
|
110
136
|
/**
|
|
111
137
|
* The generated completion text
|
|
@@ -123,6 +149,9 @@ export interface CompletionResponse {
|
|
|
123
149
|
logprobs?: Array<TokenLogProbs>;
|
|
124
150
|
}
|
|
125
151
|
|
|
152
|
+
/**
|
|
153
|
+
* Response containing generated embeddings.
|
|
154
|
+
*/
|
|
126
155
|
export interface EmbeddingsResponse {
|
|
127
156
|
/**
|
|
128
157
|
* List of embedding vectors, one per input content. Each embedding is a list of
|
|
@@ -132,6 +161,9 @@ export interface EmbeddingsResponse {
|
|
|
132
161
|
embeddings: Array<Array<number>>;
|
|
133
162
|
}
|
|
134
163
|
|
|
164
|
+
/**
|
|
165
|
+
* Log probabilities for generated tokens.
|
|
166
|
+
*/
|
|
135
167
|
export interface TokenLogProbs {
|
|
136
168
|
/**
|
|
137
169
|
* Dictionary mapping tokens to their log probabilities
|
|
@@ -182,17 +214,23 @@ export interface InferenceChatCompletionParamsBase {
|
|
|
182
214
|
|
|
183
215
|
/**
|
|
184
216
|
* (Optional) Whether tool use is required or automatic. Defaults to
|
|
185
|
-
* ToolChoice.auto.
|
|
217
|
+
* ToolChoice.auto. .. deprecated:: Use tool_config instead.
|
|
186
218
|
*/
|
|
187
219
|
tool_choice?: 'auto' | 'required';
|
|
188
220
|
|
|
221
|
+
/**
|
|
222
|
+
* (Optional) Configuration for tool use.
|
|
223
|
+
*/
|
|
224
|
+
tool_config?: InferenceChatCompletionParams.ToolConfig;
|
|
225
|
+
|
|
189
226
|
/**
|
|
190
227
|
* (Optional) Instructs the model how to format tool calls. By default, Llama Stack
|
|
191
228
|
* will attempt to use a format that is best adapted to the model. -
|
|
192
229
|
* `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
|
|
193
230
|
* `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
|
|
194
231
|
* <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
|
|
195
|
-
* are output as Python syntax -- a list of function calls.
|
|
232
|
+
* are output as Python syntax -- a list of function calls. .. deprecated:: Use
|
|
233
|
+
* tool_config instead.
|
|
196
234
|
*/
|
|
197
235
|
tool_prompt_format?: 'json' | 'function_tag' | 'python_list';
|
|
198
236
|
|
|
@@ -214,6 +252,37 @@ export namespace InferenceChatCompletionParams {
|
|
|
214
252
|
top_k?: number;
|
|
215
253
|
}
|
|
216
254
|
|
|
255
|
+
/**
|
|
256
|
+
* (Optional) Configuration for tool use.
|
|
257
|
+
*/
|
|
258
|
+
export interface ToolConfig {
|
|
259
|
+
/**
|
|
260
|
+
* (Optional) Config for how to override the default system prompt. -
|
|
261
|
+
* `SystemMessageBehavior.append`: Appends the provided system message to the
|
|
262
|
+
* default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
|
|
263
|
+
* system prompt with the provided system message. The system message can include
|
|
264
|
+
* the string '{{function_definitions}}' to indicate where the function definitions
|
|
265
|
+
* should be inserted.
|
|
266
|
+
*/
|
|
267
|
+
system_message_behavior: 'append' | 'replace';
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* (Optional) Whether tool use is required or automatic. Defaults to
|
|
271
|
+
* ToolChoice.auto.
|
|
272
|
+
*/
|
|
273
|
+
tool_choice?: 'auto' | 'required';
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* (Optional) Instructs the model how to format tool calls. By default, Llama Stack
|
|
277
|
+
* will attempt to use a format that is best adapted to the model. -
|
|
278
|
+
* `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
|
|
279
|
+
* `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
|
|
280
|
+
* <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
|
|
281
|
+
* are output as Python syntax -- a list of function calls.
|
|
282
|
+
*/
|
|
283
|
+
tool_prompt_format?: 'json' | 'function_tag' | 'python_list';
|
|
284
|
+
}
|
|
285
|
+
|
|
217
286
|
export interface Tool {
|
|
218
287
|
tool_name: 'brave_search' | 'wolfram_alpha' | 'photogen' | 'code_interpreter' | (string & {});
|
|
219
288
|
|
package/src/resources/models.ts
CHANGED
|
@@ -2,11 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
import { APIResource } from '../../resource';
|
|
4
4
|
import * as Core from '../../core';
|
|
5
|
+
import { ListPostTrainingJobsResponse } from './post-training';
|
|
6
|
+
import * as PostTrainingAPI from './post-training';
|
|
5
7
|
|
|
6
8
|
export class Job extends APIResource {
|
|
7
|
-
list(
|
|
9
|
+
list(
|
|
10
|
+
options?: Core.RequestOptions,
|
|
11
|
+
): Core.APIPromise<Array<PostTrainingAPI.ListPostTrainingJobsResponse.Data>> {
|
|
8
12
|
return (
|
|
9
|
-
this._client.get('/v1/post-training/jobs', options) as Core.APIPromise<{
|
|
13
|
+
this._client.get('/v1/post-training/jobs', options) as Core.APIPromise<{
|
|
14
|
+
data: Array<PostTrainingAPI.ListPostTrainingJobsResponse.Data>;
|
|
15
|
+
}>
|
|
10
16
|
)._thenUnwrap((obj) => obj.data);
|
|
11
17
|
}
|
|
12
18
|
|
|
@@ -38,12 +44,18 @@ export namespace JobListResponse {
|
|
|
38
44
|
}
|
|
39
45
|
}
|
|
40
46
|
|
|
47
|
+
/**
|
|
48
|
+
* Artifacts of a finetuning job.
|
|
49
|
+
*/
|
|
41
50
|
export interface JobArtifactsResponse {
|
|
42
51
|
checkpoints: Array<unknown>;
|
|
43
52
|
|
|
44
53
|
job_uuid: string;
|
|
45
54
|
}
|
|
46
55
|
|
|
56
|
+
/**
|
|
57
|
+
* Status of a finetuning job.
|
|
58
|
+
*/
|
|
47
59
|
export interface JobStatusResponse {
|
|
48
60
|
checkpoints: Array<unknown>;
|
|
49
61
|
|
package/src/resources/routes.ts
CHANGED