anyscale 0.26.47__py3-none-any.whl → 0.26.48__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anyscale/__init__.py +0 -7
- anyscale/_private/anyscale_client/anyscale_client.py +1 -208
- anyscale/_private/anyscale_client/common.py +0 -55
- anyscale/_private/anyscale_client/fake_anyscale_client.py +19 -46
- anyscale/_private/docgen/__main__.py +24 -45
- anyscale/_private/docgen/generator.py +32 -16
- anyscale/_private/docgen/generator_legacy.py +58 -6
- anyscale/_private/docgen/models.md +3 -2
- anyscale/_private/workload/workload_config.py +16 -8
- anyscale/_private/workload/workload_sdk.py +22 -5
- anyscale/client/README.md +4 -1
- anyscale/client/openapi_client/__init__.py +2 -1
- anyscale/client/openapi_client/api/default_api.py +253 -4
- anyscale/client/openapi_client/models/__init__.py +2 -1
- anyscale/client/openapi_client/models/{alert_type.py → alert_issue_type.py} +8 -20
- anyscale/client/openapi_client/models/baseimagesenum.py +1 -2
- anyscale/client/openapi_client/models/cloud.py +31 -3
- anyscale/client/openapi_client/models/cloud_deployment.py +30 -3
- anyscale/client/openapi_client/models/cloud_with_cloud_resource.py +29 -1
- anyscale/client/openapi_client/models/cloud_with_cloud_resource_gcp.py +29 -1
- anyscale/client/openapi_client/models/dataset_metrics.py +6 -6
- anyscale/client/openapi_client/models/dataset_state.py +2 -1
- anyscale/client/openapi_client/models/describe_system_workload_response.py +32 -6
- anyscale/client/openapi_client/models/experimental_workspace.py +29 -1
- anyscale/client/openapi_client/models/experimental_workspaces_sort_field.py +2 -1
- anyscale/client/openapi_client/models/operator_metrics.py +8 -9
- anyscale/client/openapi_client/models/operator_status.py +102 -0
- anyscale/client/openapi_client/models/organization_usage_alert.py +20 -20
- anyscale/client/openapi_client/models/supportedbaseimagesenum.py +1 -2
- anyscale/cloud/models.py +330 -0
- anyscale/commands/cloud_commands.py +132 -43
- anyscale/commands/command_examples.py +54 -134
- anyscale/commands/compute_config_commands.py +7 -11
- anyscale/compute_config/__init__.py +2 -16
- anyscale/compute_config/_private/compute_config_sdk.py +27 -17
- anyscale/compute_config/commands.py +14 -44
- anyscale/compute_config/models.py +49 -26
- anyscale/controllers/cloud_controller.py +289 -171
- anyscale/controllers/cloud_file_storage_utils.py +204 -0
- anyscale/controllers/kubernetes_verifier.py +1567 -0
- anyscale/job/_private/job_sdk.py +17 -8
- anyscale/job/models.py +1 -1
- anyscale/scripts.py +0 -2
- anyscale/sdk/anyscale_client/models/baseimagesenum.py +1 -2
- anyscale/sdk/anyscale_client/models/cloud.py +31 -3
- anyscale/sdk/anyscale_client/models/supportedbaseimagesenum.py +1 -2
- anyscale/shared_anyscale_utils/utils/id_gen.py +1 -0
- anyscale/version.py +1 -1
- anyscale/workspace/models.py +14 -7
- {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/METADATA +1 -1
- {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/RECORD +56 -70
- anyscale/commands/llm/dataset_commands.py +0 -269
- anyscale/commands/llm/group.py +0 -15
- anyscale/commands/llm/models_commands.py +0 -123
- anyscale/controllers/llm/__init__.py +0 -0
- anyscale/controllers/llm/models_controller.py +0 -144
- anyscale/llm/__init__.py +0 -2
- anyscale/llm/dataset/__init__.py +0 -2
- anyscale/llm/dataset/_private/__init__.py +0 -0
- anyscale/llm/dataset/_private/docs.py +0 -63
- anyscale/llm/dataset/_private/models.py +0 -71
- anyscale/llm/dataset/_private/sdk.py +0 -147
- anyscale/llm/model/__init__.py +0 -2
- anyscale/llm/model/_private/models_sdk.py +0 -62
- anyscale/llm/model/commands.py +0 -93
- anyscale/llm/model/models.py +0 -171
- anyscale/llm/model/sdk.py +0 -62
- anyscale/llm/sdk.py +0 -27
- {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/WHEEL +0 -0
- {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/entry_points.txt +0 -0
- {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/licenses/LICENSE +0 -0
- {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/licenses/NOTICE +0 -0
- {anyscale-0.26.47.dist-info → anyscale-0.26.48.dist-info}/top_level.txt +0 -0
@@ -1,123 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
import click
|
4
|
-
|
5
|
-
from anyscale.commands import command_examples
|
6
|
-
from anyscale.commands.util import AnyscaleCommand
|
7
|
-
from anyscale.controllers.llm.models_controller import ModelsController
|
8
|
-
from anyscale.util import validate_non_negative_arg
|
9
|
-
|
10
|
-
|
11
|
-
@click.group("model", help="Finetuned models stored on your Anyscale cloud.")
|
12
|
-
def models_cli():
|
13
|
-
pass
|
14
|
-
|
15
|
-
|
16
|
-
@models_cli.command(
|
17
|
-
name="get",
|
18
|
-
short_help="Retrieves information for a model in your Anyscale cloud.",
|
19
|
-
cls=AnyscaleCommand,
|
20
|
-
is_alpha=True,
|
21
|
-
example=command_examples.LLM_MODELS_GET_EXAMPLE,
|
22
|
-
)
|
23
|
-
@click.option(
|
24
|
-
"--model-id",
|
25
|
-
required=False,
|
26
|
-
type=str,
|
27
|
-
default=None,
|
28
|
-
help="ID for the model of interest",
|
29
|
-
)
|
30
|
-
@click.option(
|
31
|
-
"--job-id",
|
32
|
-
required=False,
|
33
|
-
type=str,
|
34
|
-
default=None,
|
35
|
-
help="ID for the Anyscale job corresponding to the fine-tuning run",
|
36
|
-
)
|
37
|
-
def get_model(model_id: Optional[str], job_id: Optional[str]) -> None:
|
38
|
-
"""
|
39
|
-
Gets the model card for the given model ID or corresponding job ID.
|
40
|
-
|
41
|
-
Example usage:
|
42
|
-
|
43
|
-
anyscale llm model get --model-id my-model-id
|
44
|
-
|
45
|
-
anyscale llm model get --job-id job_123
|
46
|
-
"""
|
47
|
-
ModelsController().get_model(model_id=model_id, job_id=job_id)
|
48
|
-
|
49
|
-
|
50
|
-
@models_cli.command(
|
51
|
-
name="delete",
|
52
|
-
short_help="Delete a fine-tuned model in your Anyscale cloud.",
|
53
|
-
cls=AnyscaleCommand,
|
54
|
-
is_alpha=True,
|
55
|
-
example=command_examples.LLM_MODELS_DELETE_EXAMPLE,
|
56
|
-
)
|
57
|
-
@click.argument("model_id", required=True)
|
58
|
-
def delete_model(model_id: str) -> None:
|
59
|
-
"""
|
60
|
-
Deletes the model for the given model ID. Requires owner permission for the corresponding Anyscale project.
|
61
|
-
|
62
|
-
MODEL_ID = ID for the model of interest
|
63
|
-
|
64
|
-
Example usage:
|
65
|
-
|
66
|
-
anyscale llm model delete my-model-id
|
67
|
-
"""
|
68
|
-
ModelsController().delete_model(model_id)
|
69
|
-
|
70
|
-
|
71
|
-
@models_cli.command(
|
72
|
-
name="list",
|
73
|
-
short_help="Lists fine-tuned models available to the user.",
|
74
|
-
cls=AnyscaleCommand,
|
75
|
-
is_alpha=True,
|
76
|
-
example=command_examples.LLM_MODELS_LIST_EXAMPLE,
|
77
|
-
)
|
78
|
-
@click.option(
|
79
|
-
"--cloud-id",
|
80
|
-
required=False,
|
81
|
-
type=str,
|
82
|
-
help="Cloud ID to filter by. If not specified, all models from all visible clouds (filtered optionally by `project_id`) are listed.",
|
83
|
-
)
|
84
|
-
@click.option(
|
85
|
-
"--project-id",
|
86
|
-
required=False,
|
87
|
-
type=str,
|
88
|
-
help="Project ID to filter by. If not specified, all the models from all visible projects (filtered optionally by `cloud_id`) are listed.",
|
89
|
-
)
|
90
|
-
@click.option(
|
91
|
-
"--max-items",
|
92
|
-
required=False,
|
93
|
-
type=int,
|
94
|
-
default=20,
|
95
|
-
help="Maximum number of items to show in the list. By default, the 20 most recently created models are fetched.",
|
96
|
-
callback=validate_non_negative_arg,
|
97
|
-
)
|
98
|
-
def list_models(cloud_id: Optional[str], project_id: Optional[str], max_items: int):
|
99
|
-
"""
|
100
|
-
Lists fine-tuned models available to the user.
|
101
|
-
|
102
|
-
By default, all models in all visible clouds under all visible projects to the user are listed. This is optionally filtered by `project_id` and/or `cloud_id`.
|
103
|
-
|
104
|
-
Example usage:
|
105
|
-
|
106
|
-
anyscale llm model list
|
107
|
-
|
108
|
-
anyscale llm model list --max-items 50
|
109
|
-
|
110
|
-
anyscale llm model list --cloud-id cld_123
|
111
|
-
|
112
|
-
anyscale llm model list --project-id prj_123
|
113
|
-
|
114
|
-
anyscale llm model list --cloud-id cld_123 --project-id prj_123
|
115
|
-
|
116
|
-
|
117
|
-
NOTE:
|
118
|
-
If you are running this from within an Anyscale workspace, and neither `cloud_id` nor `project_id` are provided, the cloud and project of the workspace will be used.
|
119
|
-
"""
|
120
|
-
controller = ModelsController()
|
121
|
-
controller.list_models(
|
122
|
-
cloud_id=cloud_id, project_id=project_id, max_items=max_items
|
123
|
-
)
|
File without changes
|
@@ -1,144 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
from typing import Any, Dict, Optional
|
3
|
-
|
4
|
-
from rich import print as rprint
|
5
|
-
|
6
|
-
from anyscale.api_utils.common_utils import (
|
7
|
-
get_current_workspace_id,
|
8
|
-
source_cloud_id_and_project_id,
|
9
|
-
)
|
10
|
-
from anyscale.cli_logger import BlockLogger
|
11
|
-
from anyscale.client.openapi_client import FinetunedmodelListResponse
|
12
|
-
from anyscale.client.openapi_client.models import FineTunedModel as APIFineTunedModel
|
13
|
-
from anyscale.controllers.base_controller import BaseController
|
14
|
-
from anyscale.llm.model.models import DeletedFineTunedModel, FineTunedModel
|
15
|
-
|
16
|
-
|
17
|
-
LIST_ENDPOINT_COUNT = 20
|
18
|
-
|
19
|
-
|
20
|
-
class ModelsController(BaseController):
|
21
|
-
def __init__(
|
22
|
-
self, log: Optional[BlockLogger] = None, initialize_auth_api_client: bool = True
|
23
|
-
):
|
24
|
-
if log is None:
|
25
|
-
log = BlockLogger()
|
26
|
-
|
27
|
-
super().__init__(initialize_auth_api_client=initialize_auth_api_client)
|
28
|
-
|
29
|
-
self.log = log
|
30
|
-
self.log.open_block("Output")
|
31
|
-
|
32
|
-
def _parse_response_model_get(self, model: APIFineTunedModel) -> FineTunedModel:
|
33
|
-
return FineTunedModel(
|
34
|
-
id=model.id,
|
35
|
-
base_model_id=model.base_model_id,
|
36
|
-
cloud_id=model.cloud_id,
|
37
|
-
# model.created_at is a datetime object, convert to unix timestamp
|
38
|
-
created_at=int(model.created_at.timestamp()),
|
39
|
-
# `creator` is a MiniUser object, just retrieve email
|
40
|
-
creator=model.creator.email if model.creator is not None else None,
|
41
|
-
ft_type=model.ft_type,
|
42
|
-
generation_config=model.generation_config,
|
43
|
-
job_id=model.job_id,
|
44
|
-
project_id=model.project_id,
|
45
|
-
storage_uri=model.storage_uri,
|
46
|
-
workspace_id=model.workspace_id,
|
47
|
-
)
|
48
|
-
|
49
|
-
def _truncate(self, val: str, limit=50):
|
50
|
-
return val[:limit] + "..." if len(val) > limit else val
|
51
|
-
|
52
|
-
def _format_as_dict(
|
53
|
-
self, model: FineTunedModel, truncate: bool = False
|
54
|
-
) -> Dict[str, Any]:
|
55
|
-
output_map = {
|
56
|
-
"id": model.id,
|
57
|
-
"base_model_id": model.base_model_id,
|
58
|
-
"storage_uri": self._truncate(model.storage_uri)
|
59
|
-
if truncate
|
60
|
-
else model.storage_uri,
|
61
|
-
"ft_type": str(model.ft_type),
|
62
|
-
"cloud_id": model.cloud_id,
|
63
|
-
"project_id": model.project_id if model.project_id else "N/A",
|
64
|
-
"created_at": model.created_at,
|
65
|
-
"creator": model.creator if model.creator else "N/A",
|
66
|
-
"job_id": model.job_id if model.job_id else "N/A",
|
67
|
-
"workspace_id": model.workspace_id if model.workspace_id else "N/A",
|
68
|
-
"generation_config": self._truncate(json.dumps(model.generation_config))
|
69
|
-
if truncate
|
70
|
-
else model.generation_config,
|
71
|
-
}
|
72
|
-
return output_map
|
73
|
-
|
74
|
-
def get_model(self, model_id: Optional[str], job_id: Optional[str]):
|
75
|
-
"""Retrieves model information given model id"""
|
76
|
-
if model_id:
|
77
|
-
model = self.api_client.get_model_api_v2_llm_models_model_id_get(
|
78
|
-
model_id
|
79
|
-
).result
|
80
|
-
elif job_id:
|
81
|
-
model = self.api_client.get_model_by_job_id_api_v2_llm_models_get_by_job_id_job_id_get(
|
82
|
-
job_id
|
83
|
-
).result
|
84
|
-
else:
|
85
|
-
raise ValueError("Atleast one of `model-id` or `job-id` should be provided")
|
86
|
-
|
87
|
-
model = self._parse_response_model_get(model)
|
88
|
-
formatted_model = self._format_as_dict(model)
|
89
|
-
rprint(formatted_model)
|
90
|
-
return
|
91
|
-
|
92
|
-
def delete_model(self, model_id: str):
|
93
|
-
deleted_model = self.api_client.delete_model_api_v2_llm_models_model_id_delete(
|
94
|
-
model_id
|
95
|
-
).result
|
96
|
-
deleted_model_dict = deleted_model.to_dict()
|
97
|
-
deleted_model_dict["deleted_at"] = int(
|
98
|
-
deleted_model_dict["deleted_at"].timestamp()
|
99
|
-
)
|
100
|
-
deleted_model = DeletedFineTunedModel.from_dict(deleted_model_dict)
|
101
|
-
rprint(deleted_model.to_dict())
|
102
|
-
return
|
103
|
-
|
104
|
-
def list_models(
|
105
|
-
self, *, cloud_id: Optional[str], project_id: Optional[str], max_items: int
|
106
|
-
):
|
107
|
-
"""Lists fine-tuned models optionally filtered by `cloud_id` and `project_id`"""
|
108
|
-
if get_current_workspace_id() is not None:
|
109
|
-
# Resolve `cloud_id` and `project_id`. If not provided and if this is being run in a workspace,
|
110
|
-
# we use the `cloud_id` and `project_id` of the workspace
|
111
|
-
cloud_id, project_id = source_cloud_id_and_project_id(
|
112
|
-
internal_api=self.api_client,
|
113
|
-
external_api=self.anyscale_api_client,
|
114
|
-
cloud_id=cloud_id,
|
115
|
-
project_id=project_id,
|
116
|
-
)
|
117
|
-
paging_token = None
|
118
|
-
results = []
|
119
|
-
while True:
|
120
|
-
count = min(LIST_ENDPOINT_COUNT, max_items)
|
121
|
-
resp: FinetunedmodelListResponse = self.api_client.list_models_api_v2_llm_models_get(
|
122
|
-
cloud_id=cloud_id,
|
123
|
-
project_id=project_id,
|
124
|
-
paging_token=paging_token,
|
125
|
-
count=count,
|
126
|
-
)
|
127
|
-
models = resp.results
|
128
|
-
results.extend(models)
|
129
|
-
if not len(models) or not resp.metadata.next_paging_token:
|
130
|
-
break
|
131
|
-
|
132
|
-
if max_items and len(results) >= max_items:
|
133
|
-
break
|
134
|
-
paging_token = resp.metadata.next_paging_token
|
135
|
-
|
136
|
-
results = results[:max_items] if max_items else results
|
137
|
-
parsed_results = [self._parse_response_model_get(result) for result in results]
|
138
|
-
# get formatted dict with truncated strings for a nicer print
|
139
|
-
models_as_dicts = [
|
140
|
-
self._format_as_dict(model, truncate=True) for model in parsed_results
|
141
|
-
]
|
142
|
-
print("MODELS:")
|
143
|
-
rprint(models_as_dicts)
|
144
|
-
return
|
anyscale/llm/__init__.py
DELETED
anyscale/llm/dataset/__init__.py
DELETED
File without changes
|
@@ -1,63 +0,0 @@
|
|
1
|
-
GET_PY_EXAMPLE = """
|
2
|
-
import anyscale
|
3
|
-
from anyscale.llm.dataset import Dataset
|
4
|
-
|
5
|
-
dataset: Dataset = anyscale.llm.dataset.get("my_first_dataset")
|
6
|
-
print(f"Dataset name: '{dataset.name}'") # Dataset name: 'my_first_dataset'
|
7
|
-
|
8
|
-
# Get the second latest version of the dataset
|
9
|
-
prev_dataset = anyscale.llm.dataset.get("my_first_dataset", version=-1)
|
10
|
-
"""
|
11
|
-
|
12
|
-
GET_PY_ARG_DOCSTRINGS = {
|
13
|
-
"name": "Name of the dataset",
|
14
|
-
"version": "Version of the dataset. If a negative integer is provided, the dataset returned is this many versions back of the latest version. Default: Latest version.",
|
15
|
-
"project": "Name of the Anyscale project that the dataset belongs to. If not provided, all projects will be searched.",
|
16
|
-
}
|
17
|
-
|
18
|
-
UPLOAD_PY_EXAMPLE = """
|
19
|
-
import anyscale
|
20
|
-
|
21
|
-
anyscale.llm.dataset.upload("path/to/my_first_dataset.jsonl", name="my_first_dataset")
|
22
|
-
anyscale.llm.dataset.upload("my_dataset.jsonl", "second_dataset")
|
23
|
-
anyscale.llm.dataset.upload("my_dataset2.jsonl", "second_dataset", description="added 3 lines")
|
24
|
-
"""
|
25
|
-
|
26
|
-
UPLOAD_PY_ARG_DOCSTRINGS = {
|
27
|
-
"dataset_file": "Path to the dataset file to upload.",
|
28
|
-
"name": "Name of a new dataset, or an existing dataset, to upload a new version of.",
|
29
|
-
"description": "Description of the dataset version.",
|
30
|
-
"cloud": "Name of the Anyscale cloud to upload a new dataset to. If not provided, the default cloud will be used.",
|
31
|
-
"project": "Name of the Anyscale project to upload a new dataset to. If not provided, the default project of the cloud will be used.",
|
32
|
-
}
|
33
|
-
|
34
|
-
DOWNLOAD_PY_EXAMPLE = """
|
35
|
-
import anyscale
|
36
|
-
|
37
|
-
dataset_contents: bytes = anyscale.llm.dataset.download("my_first_dataset.jsonl")
|
38
|
-
jsonl_obj = [json.loads(line) for line in dataset_contents.decode().splitlines()]
|
39
|
-
|
40
|
-
prev_dataset_contents = anyscale.llm.dataset.download("my_first_dataset.jsonl", version=-1)
|
41
|
-
"""
|
42
|
-
|
43
|
-
DOWNLOAD_PY_ARG_DOCSTRINGS = {
|
44
|
-
"name": "Name of the dataset to download.",
|
45
|
-
"version": "Version of the dataset to download. If a negative integer is provided, the dataset returned is this many versions back of the latest version. Default: Latest version.",
|
46
|
-
"project": "Name of the Anyscale project to download the dataset from. If not provided, all projects will be searched.",
|
47
|
-
}
|
48
|
-
|
49
|
-
LIST_PY_EXAMPLE = """
|
50
|
-
import anyscale
|
51
|
-
|
52
|
-
datasets = anyscale.llm.dataset.list(limit=10)
|
53
|
-
for d in datasets:
|
54
|
-
print(f"Dataset name: '{d.name}'") # Prints 10 dataset names
|
55
|
-
"""
|
56
|
-
|
57
|
-
LIST_PY_ARG_DOCSTRINGS = {
|
58
|
-
"limit": "Maximum number of datasets to return. Default: 1000.",
|
59
|
-
"after": "ID of the dataset to start the listing from. If provided, the list will start from the dataset after this ID.",
|
60
|
-
"name_contains": "Filter datasets by name. If provided, only datasets with name containing this string will be returned.",
|
61
|
-
"cloud": "Name of the Anyscale cloud to search in. If not provided, all clouds will be searched.",
|
62
|
-
"project": "Name of the Anyscale project to search in. If not provided, all projects will be searched.",
|
63
|
-
}
|
@@ -1,71 +0,0 @@
|
|
1
|
-
from dataclasses import dataclass, field
|
2
|
-
from datetime import datetime
|
3
|
-
from typing import Optional
|
4
|
-
|
5
|
-
from anyscale._private.models.model_base import ModelBase
|
6
|
-
from anyscale.client.openapi_client.models import Dataset as InternalDataset
|
7
|
-
from anyscale.commands import command_examples
|
8
|
-
from anyscale.llm.dataset._private import docs
|
9
|
-
|
10
|
-
|
11
|
-
@dataclass(frozen=True)
|
12
|
-
class Dataset(ModelBase):
|
13
|
-
"""
|
14
|
-
Metadata about a dataset, which is a file uploaded by a user to their Anyscale cloud.
|
15
|
-
"""
|
16
|
-
|
17
|
-
__ignore_validation__ = True
|
18
|
-
|
19
|
-
__doc_py_example__ = docs.GET_PY_EXAMPLE
|
20
|
-
__doc_cli_example__ = command_examples.LLM_DATASET_GET_EXAMPLE
|
21
|
-
|
22
|
-
id: str = field(metadata={"docstring": "The ID of the dataset."})
|
23
|
-
name: str = field(metadata={"docstring": "The name of the dataset."})
|
24
|
-
filename: str = field(
|
25
|
-
metadata={"docstring": "The file name of the uploaded dataset."}
|
26
|
-
)
|
27
|
-
storage_uri: str = field(
|
28
|
-
metadata={
|
29
|
-
"docstring": "The URI at which the dataset is stored (eg. `s3://bucket/path/to/test.jsonl`)."
|
30
|
-
}
|
31
|
-
)
|
32
|
-
version: int = field(metadata={"docstring": "The version of the dataset."})
|
33
|
-
num_versions: int = field(
|
34
|
-
metadata={"docstring": "Number of versions of the dataset."}
|
35
|
-
)
|
36
|
-
created_at: datetime = field(
|
37
|
-
metadata={"docstring": "The time at which the dataset was uploaded."}
|
38
|
-
)
|
39
|
-
creator_id: str = field(
|
40
|
-
metadata={"docstring": "The ID of the Anyscale user who uploaded the dataset."}
|
41
|
-
)
|
42
|
-
project_id: str = field(
|
43
|
-
metadata={
|
44
|
-
"docstring": "The ID of the Anyscale project that the dataset belongs to."
|
45
|
-
}
|
46
|
-
)
|
47
|
-
cloud_id: str = field(
|
48
|
-
metadata={
|
49
|
-
"docstring": "The ID of the Anyscale cloud that the dataset belongs to."
|
50
|
-
}
|
51
|
-
)
|
52
|
-
description: Optional[str] = field(
|
53
|
-
default=None,
|
54
|
-
metadata={"docstring": "The description of the current dataset version."},
|
55
|
-
)
|
56
|
-
|
57
|
-
@classmethod
|
58
|
-
def parse_from_internal_model(cls, internal_model: InternalDataset) -> "Dataset":
|
59
|
-
return Dataset(
|
60
|
-
id=internal_model.id,
|
61
|
-
name=internal_model.name,
|
62
|
-
filename=internal_model.filename,
|
63
|
-
storage_uri=internal_model.storage_uri,
|
64
|
-
version=internal_model.version,
|
65
|
-
num_versions=internal_model.num_versions,
|
66
|
-
created_at=internal_model.created_at,
|
67
|
-
creator_id=internal_model.creator_id,
|
68
|
-
project_id=internal_model.project_id,
|
69
|
-
cloud_id=internal_model.cloud_id,
|
70
|
-
description=internal_model.description,
|
71
|
-
)
|
@@ -1,147 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
from anyscale._private.models.model_base import ListResponse
|
4
|
-
from anyscale._private.sdk import sdk_command_v2
|
5
|
-
from anyscale._private.sdk.base_sdk import BaseSDK
|
6
|
-
from anyscale.llm.dataset._private import docs
|
7
|
-
from anyscale.llm.dataset._private.models import Dataset
|
8
|
-
|
9
|
-
|
10
|
-
@sdk_command_v2(
|
11
|
-
doc_py_example=docs.GET_PY_EXAMPLE, arg_docstrings=docs.GET_PY_ARG_DOCSTRINGS,
|
12
|
-
)
|
13
|
-
def get(
|
14
|
-
name: str, version: Optional[int] = None, project: Optional[str] = None
|
15
|
-
) -> Dataset:
|
16
|
-
"""Retrieves metadata about a dataset.
|
17
|
-
|
18
|
-
:param name: Name of the dataset.
|
19
|
-
:param version: Version of the dataset. If a negative integer is provided, the dataset returned is this many versions back of the latest version. Default: Latest version.
|
20
|
-
:param project: Name of the Anyscale project that the dataset belongs to. If not provided, all projects will be searched.
|
21
|
-
|
22
|
-
Example usage:
|
23
|
-
```python
|
24
|
-
dataset = anyscale.llm.dataset.get("my_first_dataset")
|
25
|
-
print(f"Dataset name: '{dataset.name}'") # Dataset name: 'my_first_dataset'
|
26
|
-
|
27
|
-
# Get the second latest version of the dataset
|
28
|
-
prev_dataset = anyscale.llm.dataset.get("my_first_dataset", version=-1)
|
29
|
-
```
|
30
|
-
|
31
|
-
Return:
|
32
|
-
Dataset: The `Dataset` object.
|
33
|
-
"""
|
34
|
-
_sdk = BaseSDK()
|
35
|
-
dataset = _sdk.client.get_dataset(name, version, project)
|
36
|
-
return dataset
|
37
|
-
|
38
|
-
|
39
|
-
@sdk_command_v2(
|
40
|
-
doc_py_example=docs.UPLOAD_PY_EXAMPLE, arg_docstrings=docs.UPLOAD_PY_ARG_DOCSTRINGS,
|
41
|
-
)
|
42
|
-
def upload(
|
43
|
-
dataset_file: str,
|
44
|
-
name: str,
|
45
|
-
*,
|
46
|
-
description: Optional[str] = None,
|
47
|
-
cloud: Optional[str] = None,
|
48
|
-
project: Optional[str] = None,
|
49
|
-
) -> Dataset:
|
50
|
-
"""Uploads a dataset, or a new version of a dataset, to your Anyscale cloud.
|
51
|
-
|
52
|
-
:param dataset_file: Path to the dataset file to upload.
|
53
|
-
:param name: Name of a new dataset, or an existing dataset, to upload a new version of.
|
54
|
-
:param description: Description of the dataset version.
|
55
|
-
:param cloud: Name of the Anyscale cloud to upload a new dataset to. If not provided, the default cloud will be used.
|
56
|
-
:param project: Name of the Anyscale project to upload a new dataset to. If not provided, the default project of the cloud will be used.
|
57
|
-
|
58
|
-
Example usage:
|
59
|
-
```python
|
60
|
-
anyscale.llm.dataset.upload("path/to/my_first_dataset.jsonl", name="my_first_dataset")
|
61
|
-
anyscale.llm.dataset.upload("my_dataset.jsonl", "second_dataset")
|
62
|
-
anyscale.llm.dataset.upload("my_dataset2.jsonl", "second_dataset", description="added 3 lines")
|
63
|
-
```
|
64
|
-
Return:
|
65
|
-
Dataset: The `Dataset` object representing the uploaded dataset.
|
66
|
-
|
67
|
-
NOTE:
|
68
|
-
If you are uploading a new version, have run this from within an Anyscale workspace,
|
69
|
-
and neither `cloud` nor `project` are provided, the cloud and project of the workspace will be used.
|
70
|
-
"""
|
71
|
-
_sdk = BaseSDK()
|
72
|
-
dataset = _sdk.client.upload_dataset(
|
73
|
-
dataset_file, name, description, cloud, project,
|
74
|
-
)
|
75
|
-
return dataset
|
76
|
-
|
77
|
-
|
78
|
-
@sdk_command_v2(
|
79
|
-
doc_py_example=docs.DOWNLOAD_PY_EXAMPLE,
|
80
|
-
arg_docstrings=docs.DOWNLOAD_PY_ARG_DOCSTRINGS,
|
81
|
-
)
|
82
|
-
def download(
|
83
|
-
name: str, version: Optional[int] = None, project: Optional[str] = None
|
84
|
-
) -> bytes:
|
85
|
-
"""Downloads a dataset from your Anyscale cloud.
|
86
|
-
|
87
|
-
:param name: Name of the dataset to download.
|
88
|
-
:param version: Version of the dataset to download. If a negative integer is provided, the dataset returned is this many versions back of the latest version. Default: Latest version.
|
89
|
-
:param project: Name of the Anyscale project to download the dataset from. If not provided, all projects will be searched.
|
90
|
-
|
91
|
-
Example usage:
|
92
|
-
```python
|
93
|
-
dataset_contents: bytes = anyscale.llm.dataset.download("my_first_dataset.jsonl")
|
94
|
-
jsonl_obj = [json.loads(line) for line in dataset_contents.decode().splitlines()]
|
95
|
-
|
96
|
-
prev_dataset_contents = anyscale.llm.dataset.download("my_first_dataset.jsonl", version=-1)
|
97
|
-
```
|
98
|
-
|
99
|
-
Returns:
|
100
|
-
bytes: The contents of the dataset file.
|
101
|
-
"""
|
102
|
-
_sdk = BaseSDK()
|
103
|
-
dataset_bytes = _sdk.client.download_dataset(name, version, project)
|
104
|
-
return dataset_bytes
|
105
|
-
|
106
|
-
|
107
|
-
@sdk_command_v2(
|
108
|
-
doc_py_example=docs.LIST_PY_EXAMPLE, arg_docstrings=docs.LIST_PY_ARG_DOCSTRINGS
|
109
|
-
)
|
110
|
-
def list( # noqa: A001
|
111
|
-
*,
|
112
|
-
# Pagination
|
113
|
-
limit: Optional[int] = None,
|
114
|
-
after: Optional[str] = None, # Unique ID to start listing after
|
115
|
-
# Filtering
|
116
|
-
name_contains: Optional[str] = None, # Substring in name, case insensitive
|
117
|
-
cloud: Optional[str] = None,
|
118
|
-
project: Optional[str] = None,
|
119
|
-
) -> ListResponse[Dataset]:
|
120
|
-
"""
|
121
|
-
Lists datasets.
|
122
|
-
|
123
|
-
:param limit: Maximum number of datasets to return. Default: 1000.
|
124
|
-
:param after: ID of the dataset to start the listing from. If provided, the list will start from the dataset after this ID.
|
125
|
-
:param name_contains: Filter datasets by name. If provided, only datasets with name containing this string will be returned.
|
126
|
-
:param cloud: Name of the Anyscale cloud to search in. If not provided, all clouds will be searched.
|
127
|
-
:param project: Name of the Anyscale project to search in. If not provided, all projects will be searched.
|
128
|
-
|
129
|
-
Example usage:
|
130
|
-
```
|
131
|
-
datasets = anyscale.llm.dataset.list(limit=10)
|
132
|
-
for d in datasets:
|
133
|
-
print(f"Dataset name: '{d.name}'") # Prints 10 dataset names
|
134
|
-
```
|
135
|
-
|
136
|
-
Returns:
|
137
|
-
ListResponse[Dataset]: List of `Dataset` objects.
|
138
|
-
"""
|
139
|
-
_sdk = BaseSDK()
|
140
|
-
list_response = _sdk.client.list_datasets(
|
141
|
-
limit=limit,
|
142
|
-
after=after,
|
143
|
-
name_contains=name_contains,
|
144
|
-
cloud=cloud,
|
145
|
-
project=project,
|
146
|
-
)
|
147
|
-
return list_response
|
anyscale/llm/model/__init__.py
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
|
3
|
-
from anyscale._private.anyscale_client import AnyscaleClientInterface
|
4
|
-
from anyscale._private.sdk.base_sdk import BaseSDK
|
5
|
-
from anyscale._private.sdk.timer import Timer
|
6
|
-
from anyscale.cli_logger import BlockLogger
|
7
|
-
from anyscale.client.openapi_client.models import FineTunedModel as APIFineTunedModel
|
8
|
-
from anyscale.llm.model.models import DeletedFineTunedModel, FineTunedModel
|
9
|
-
|
10
|
-
|
11
|
-
class PrivateLLMModelsSDK(BaseSDK):
|
12
|
-
def __init__(
|
13
|
-
self,
|
14
|
-
*,
|
15
|
-
logger: Optional[BlockLogger] = None,
|
16
|
-
client: Optional[AnyscaleClientInterface] = None,
|
17
|
-
timer: Optional[Timer] = None,
|
18
|
-
):
|
19
|
-
super().__init__(logger=logger, client=client, timer=timer)
|
20
|
-
|
21
|
-
def _parse_response_model_get(self, model: APIFineTunedModel) -> FineTunedModel:
|
22
|
-
return FineTunedModel(
|
23
|
-
id=model.id,
|
24
|
-
base_model_id=model.base_model_id,
|
25
|
-
cloud_id=model.cloud_id,
|
26
|
-
# model.created_at is a datetime object, convert to unix timestamp
|
27
|
-
created_at=int(model.created_at.timestamp()),
|
28
|
-
# `creator` is a MiniUser object, just retrieve email
|
29
|
-
creator=model.creator.email if model.creator is not None else None,
|
30
|
-
ft_type=model.ft_type,
|
31
|
-
generation_config=model.generation_config,
|
32
|
-
job_id=model.job_id,
|
33
|
-
project_id=model.project_id,
|
34
|
-
storage_uri=model.storage_uri,
|
35
|
-
workspace_id=model.workspace_id,
|
36
|
-
)
|
37
|
-
|
38
|
-
def list(
|
39
|
-
self,
|
40
|
-
*,
|
41
|
-
cloud_id: Optional[str] = None,
|
42
|
-
project_id: Optional[str] = None,
|
43
|
-
max_items: int = 20,
|
44
|
-
):
|
45
|
-
finetuned_models = self.client.list_finetuned_models(
|
46
|
-
cloud_id, project_id, max_items
|
47
|
-
)
|
48
|
-
parsed_models = [
|
49
|
-
self._parse_response_model_get(model) for model in finetuned_models
|
50
|
-
]
|
51
|
-
return parsed_models
|
52
|
-
|
53
|
-
def get(
|
54
|
-
self, *, model_id: Optional[str] = None, job_id: Optional[str] = None
|
55
|
-
) -> FineTunedModel:
|
56
|
-
model = self.client.get_finetuned_model(model_id, job_id)
|
57
|
-
return self._parse_response_model_get(model)
|
58
|
-
|
59
|
-
def delete(self, model_id) -> DeletedFineTunedModel:
|
60
|
-
deleted_model = self.client.delete_finetuned_model(model_id).to_dict()
|
61
|
-
deleted_model["deleted_at"] = int(deleted_model["deleted_at"].timestamp())
|
62
|
-
return DeletedFineTunedModel(**deleted_model)
|