mlflow-tclake-plugin 0.0.1__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlflow_tclake_plugin/tclake_store.py +156 -209
- {mlflow_tclake_plugin-0.0.1.dist-info → mlflow_tclake_plugin-2.0.1.dist-info}/METADATA +4 -2
- mlflow_tclake_plugin-2.0.1.dist-info/RECORD +8 -0
- mlflow_tclake_plugin-2.0.1.dist-info/licenses/LICENSE.txt +21 -0
- mlflow_tclake_plugin-0.0.1.dist-info/RECORD +0 -7
- {mlflow_tclake_plugin-0.0.1.dist-info → mlflow_tclake_plugin-2.0.1.dist-info}/WHEEL +0 -0
- {mlflow_tclake_plugin-0.0.1.dist-info → mlflow_tclake_plugin-2.0.1.dist-info}/entry_points.txt +0 -0
- {mlflow_tclake_plugin-0.0.1.dist-info → mlflow_tclake_plugin-2.0.1.dist-info}/top_level.txt +0 -0
|
@@ -3,7 +3,6 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
5
|
import uuid
|
|
6
|
-
import urllib.parse
|
|
7
6
|
from datetime import datetime
|
|
8
7
|
|
|
9
8
|
from cachetools import TTLCache
|
|
@@ -11,23 +10,16 @@ from tencentcloud.common import credential
|
|
|
11
10
|
from tencentcloud.common.common_client import CommonClient
|
|
12
11
|
from tencentcloud.common.profile.client_profile import ClientProfile
|
|
13
12
|
|
|
14
|
-
from mlflow.entities.model_registry import
|
|
15
|
-
RegisteredModel,
|
|
16
|
-
ModelVersion,
|
|
17
|
-
ModelVersionTag,
|
|
18
|
-
)
|
|
13
|
+
from mlflow.entities.model_registry import RegisteredModel, ModelVersion, ModelVersionTag
|
|
19
14
|
from mlflow.exceptions import MlflowException
|
|
20
|
-
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
|
|
21
|
-
from mlflow.store.model_registry import
|
|
22
|
-
|
|
23
|
-
SEARCH_MODEL_VERSION_MAX_RESULTS_THRESHOLD,
|
|
24
|
-
)
|
|
15
|
+
from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE
|
|
16
|
+
from mlflow.store.model_registry import SEARCH_REGISTERED_MODEL_MAX_RESULTS_THRESHOLD, \
|
|
17
|
+
SEARCH_MODEL_VERSION_MAX_RESULTS_THRESHOLD
|
|
25
18
|
from mlflow.store.model_registry.abstract_store import AbstractStore
|
|
26
19
|
from mlflow.utils.annotations import experimental
|
|
27
20
|
from mlflow.utils.search_utils import SearchModelUtils, SearchModelVersionUtils
|
|
28
21
|
from mlflow.store.entities.paged_list import PagedList
|
|
29
|
-
|
|
30
|
-
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException
|
|
22
|
+
|
|
31
23
|
|
|
32
24
|
def to_string(obj):
|
|
33
25
|
if obj is None:
|
|
@@ -73,13 +65,9 @@ def _get_description(resp):
|
|
|
73
65
|
TCLAKE_MLFLOW_TAG_PREFIX = "tclake.tag."
|
|
74
66
|
TCLAKE_MLFLOW_RUN_ID_KEY = "tclake.mlflow.run_id"
|
|
75
67
|
TCLAKE_MLFLOW_RUN_LINK_KEY = "tclake.mlflow.run_link"
|
|
76
|
-
|
|
77
|
-
TCLAKE_WEDATA_WORKSPACE_ID_KEY = "tclake.wedata.workspace_id"
|
|
78
|
-
TCLAKE_MLFLOW_DEPLOYMENT_STATUS_KEY = "tclake.mlflow.deployment_status"
|
|
79
|
-
TCLAKE_MLFLOW_MODEL_ID_KEY = "tclake.mlflow.model_id"
|
|
68
|
+
TCLAKE_UUID_KEY = "tccatalog.identifier"
|
|
80
69
|
TCLAKE_MLFLOW_MODEL_SIGNATURE_KEY = "tclake.mlflow.model_signature"
|
|
81
70
|
|
|
82
|
-
UN_DEPLOYMENT = "UnDeployment"
|
|
83
71
|
|
|
84
72
|
def _set_kv_to_properties(key, value, properties=None):
|
|
85
73
|
if properties is None:
|
|
@@ -98,6 +86,10 @@ def _get_kv_from_properties(properties, key):
|
|
|
98
86
|
return None
|
|
99
87
|
|
|
100
88
|
|
|
89
|
+
def _get_uuid_from_properties(properties):
|
|
90
|
+
return _get_kv_from_properties(properties, TCLAKE_UUID_KEY)
|
|
91
|
+
|
|
92
|
+
|
|
101
93
|
def _set_run_id_to_properties(run_id, properties=None):
|
|
102
94
|
return _set_kv_to_properties(TCLAKE_MLFLOW_RUN_ID_KEY, run_id, properties)
|
|
103
95
|
|
|
@@ -113,15 +105,10 @@ def _set_run_link_to_properties(run_link, properties):
|
|
|
113
105
|
def _get_run_link_from_properties(properties):
|
|
114
106
|
return _get_kv_from_properties(properties, TCLAKE_MLFLOW_RUN_LINK_KEY)
|
|
115
107
|
|
|
116
|
-
def _get_model_id_from_properties(properties):
|
|
117
|
-
return _get_kv_from_properties(properties, TCLAKE_MLFLOW_MODEL_ID_KEY)
|
|
118
|
-
|
|
119
108
|
|
|
120
109
|
def _add_tag_to_properties(tag, properties=None):
|
|
121
110
|
if tag and tag.value is not None:
|
|
122
|
-
properties = _set_kv_to_properties(
|
|
123
|
-
TCLAKE_MLFLOW_TAG_PREFIX + tag.key, tag.value, properties
|
|
124
|
-
)
|
|
111
|
+
properties = _set_kv_to_properties(TCLAKE_MLFLOW_TAG_PREFIX + tag.key, tag.value, properties)
|
|
125
112
|
return properties
|
|
126
113
|
|
|
127
114
|
|
|
@@ -131,47 +118,84 @@ def _add_tags_to_properties(tags, properties=None):
|
|
|
131
118
|
properties = _add_tag_to_properties(tag, properties)
|
|
132
119
|
return properties
|
|
133
120
|
|
|
134
|
-
def _add_deployment_job_id_to_properties(deployment_job_id, properties=None):
|
|
135
|
-
properties = _set_kv_to_properties(TCLAKE_MLFLOW_DEPLOYMENT_JOB_ID_KEY, deployment_job_id, properties)
|
|
136
|
-
return properties
|
|
137
|
-
|
|
138
|
-
def _add_workspace_id_to_properties(workspace_id, properties=None):
|
|
139
|
-
if workspace_id is not None:
|
|
140
|
-
properties = _set_kv_to_properties(TCLAKE_WEDATA_WORKSPACE_ID_KEY, workspace_id, properties)
|
|
141
|
-
return properties
|
|
142
121
|
|
|
143
|
-
def _add_deployment_status_to_properties(status, properties=None):
|
|
144
|
-
if status is not None:
|
|
145
|
-
properties = _set_kv_to_properties(TCLAKE_MLFLOW_DEPLOYMENT_STATUS_KEY, status, properties)
|
|
146
|
-
return properties
|
|
147
122
|
|
|
148
|
-
def
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
123
|
+
def parse_model_signatures_from_dict(signature_dict):
|
|
124
|
+
signatures = []
|
|
125
|
+
inputs_str = signature_dict.get("inputs")
|
|
126
|
+
if inputs_str:
|
|
127
|
+
inputs = json.loads(inputs_str)
|
|
128
|
+
for input_item in inputs:
|
|
129
|
+
model_signature = {
|
|
130
|
+
"name": input_item.get("name", ""),
|
|
131
|
+
"type": "INPUT",
|
|
132
|
+
"inputFlag": "true"
|
|
133
|
+
}
|
|
134
|
+
type_val = input_item.get("type")
|
|
135
|
+
if type_val is not None:
|
|
136
|
+
if type_val == "tensor":
|
|
137
|
+
tensor_spec = input_item.get("tensor-spec", {}).get("dtype", "")
|
|
138
|
+
if tensor_spec:
|
|
139
|
+
model_signature["type"] = str(tensor_spec)
|
|
140
|
+
else:
|
|
141
|
+
model_signature["type"] = str(type_val)
|
|
142
|
+
|
|
143
|
+
signatures.append(model_signature)
|
|
144
|
+
|
|
145
|
+
outputs_str = signature_dict.get("outputs")
|
|
146
|
+
if outputs_str:
|
|
147
|
+
outputs = json.loads(outputs_str)
|
|
148
|
+
for output_item in outputs:
|
|
149
|
+
name_val = output_item.get("name", "")
|
|
150
|
+
if not name_val:
|
|
151
|
+
name_val = output_item.get("prediction_column_name", "")
|
|
152
|
+
model_signature = {
|
|
153
|
+
"name": name_val,
|
|
154
|
+
"type": "OUTPUT",
|
|
155
|
+
"inputFlag": "false"
|
|
156
|
+
}
|
|
157
|
+
type_val = output_item.get("type")
|
|
158
|
+
if type_val is not None:
|
|
159
|
+
if type_val == "tensor":
|
|
160
|
+
tensor_spec = output_item.get("tensor-spec", {}).get("dtype", "")
|
|
161
|
+
if tensor_spec:
|
|
162
|
+
model_signature["type"] = str(tensor_spec)
|
|
163
|
+
else:
|
|
164
|
+
model_signature["type"] = str(type_val)
|
|
165
|
+
|
|
166
|
+
signatures.append(model_signature)
|
|
167
|
+
|
|
168
|
+
log_msg(f"parse model version signatures: {signatures}")
|
|
169
|
+
return signatures
|
|
152
170
|
|
|
153
171
|
|
|
154
172
|
def _add_model_signature_to_properties(source, properties):
|
|
155
173
|
log_msg("model source is {}".format(source))
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
174
|
+
try:
|
|
175
|
+
from mlflow.models.model import get_model_info
|
|
176
|
+
model_info = get_model_info(source)
|
|
177
|
+
signature = model_info.signature
|
|
178
|
+
log_msg("model {} signature is {}".format(source, signature))
|
|
179
|
+
if signature:
|
|
180
|
+
sig_json = json.dumps(parse_model_signatures_from_dict(signature.to_dict()))
|
|
181
|
+
log_msg("model {} signature json is {}".format(source, sig_json))
|
|
182
|
+
else:
|
|
183
|
+
log_msg(f"Registered model signature is not found in source artifact location '{source}'")
|
|
184
|
+
sig_json = json.dumps([])
|
|
185
|
+
except Exception as e:
|
|
186
|
+
log_msg(f"Failed to get model signature from {source}: {e}")
|
|
187
|
+
sig_json = json.dumps([])
|
|
163
188
|
properties = _set_kv_to_properties(TCLAKE_MLFLOW_MODEL_SIGNATURE_KEY, sig_json, properties)
|
|
164
189
|
return properties
|
|
165
190
|
|
|
191
|
+
|
|
166
192
|
def _get_tags_from_properties(properties):
|
|
167
193
|
if properties is None:
|
|
168
194
|
return None
|
|
169
195
|
tags = []
|
|
170
196
|
for p in properties:
|
|
171
197
|
if p["Key"].startswith(TCLAKE_MLFLOW_TAG_PREFIX):
|
|
172
|
-
tags.append(
|
|
173
|
-
ModelVersionTag(p["Key"][len(TCLAKE_MLFLOW_TAG_PREFIX) :], p["Value"])
|
|
174
|
-
)
|
|
198
|
+
tags.append(ModelVersionTag(p["Key"][len(TCLAKE_MLFLOW_TAG_PREFIX):], p["Value"]))
|
|
175
199
|
return tags
|
|
176
200
|
|
|
177
201
|
|
|
@@ -195,20 +219,17 @@ def _make_model(resp):
|
|
|
195
219
|
creation_timestamp=_get_create_time_from_audit(audit),
|
|
196
220
|
last_updated_timestamp=_get_last_updated_time_from_audit(audit),
|
|
197
221
|
description=_get_description(resp),
|
|
198
|
-
tags=_get_tags_from_properties(properties)
|
|
222
|
+
tags=_get_tags_from_properties(properties),
|
|
199
223
|
)
|
|
200
224
|
|
|
201
225
|
|
|
202
226
|
def _get_model_version_name(entity):
|
|
203
|
-
return "{}.{}.{}".format(
|
|
204
|
-
entity["CatalogName"], entity["SchemaName"], entity["ModelName"]
|
|
205
|
-
)
|
|
227
|
+
return "{}.{}.{}".format(entity["CatalogName"], entity["SchemaName"], entity["ModelName"])
|
|
206
228
|
|
|
207
229
|
|
|
208
230
|
def _make_model_version(entity, name):
|
|
209
231
|
properties = entity["Properties"]
|
|
210
232
|
audit = entity["Audit"]
|
|
211
|
-
aliases = entity["Aliases"]
|
|
212
233
|
return ModelVersion(
|
|
213
234
|
name=name,
|
|
214
235
|
version=_get_model_version(entity["Version"]),
|
|
@@ -219,8 +240,6 @@ def _make_model_version(entity, name):
|
|
|
219
240
|
run_id=_get_run_id_from_properties(properties),
|
|
220
241
|
tags=_get_tags_from_properties(properties),
|
|
221
242
|
run_link=_get_run_link_from_properties(properties),
|
|
222
|
-
aliases=aliases,
|
|
223
|
-
model_id=_get_model_id_from_properties(properties),
|
|
224
243
|
status="READY",
|
|
225
244
|
)
|
|
226
245
|
|
|
@@ -248,9 +267,7 @@ def _parse_page_token(page_token):
|
|
|
248
267
|
|
|
249
268
|
|
|
250
269
|
def _create_page_token(offset, search_id):
|
|
251
|
-
return base64.b64encode(
|
|
252
|
-
json.dumps({"offset": offset, "search_id": search_id}).encode("utf-8")
|
|
253
|
-
)
|
|
270
|
+
return base64.b64encode(json.dumps({"offset": offset, "search_id": search_id}).encode("utf-8"))
|
|
254
271
|
|
|
255
272
|
|
|
256
273
|
@experimental
|
|
@@ -261,50 +278,29 @@ class TCLakeStore(AbstractStore):
|
|
|
261
278
|
|
|
262
279
|
def __init__(self, store_uri=None, tracking_uri=None):
|
|
263
280
|
super().__init__(store_uri, tracking_uri)
|
|
264
|
-
log_msg(
|
|
265
|
-
"initializing tencent tclake client {} {}".format(store_uri, tracking_uri)
|
|
266
|
-
)
|
|
281
|
+
log_msg("initializing tencent tclake client {} {}".format(store_uri, tracking_uri))
|
|
267
282
|
sid = os.getenv("TENCENTCLOUD_SECRET_ID", "")
|
|
268
283
|
if len(sid) == 0:
|
|
269
284
|
raise MlflowException("TENCENTCLOUD_SECRET_ID is not set")
|
|
270
285
|
sk = os.getenv("TENCENTCLOUD_SECRET_KEY", "")
|
|
271
286
|
if len(sk) == 0:
|
|
272
287
|
raise MlflowException("TENCENTCLOUD_SECRET_KEY is not set")
|
|
273
|
-
sk = os.getenv("TENCENTCLOUD_SECRET_KEY", "")
|
|
274
|
-
if len(sk) == 0:
|
|
275
|
-
raise MlflowException("TENCENTCLOUD_SECRET_KEY is not set")
|
|
276
|
-
|
|
277
288
|
token = os.getenv("TENCENTCLOUD_TOKEN", None)
|
|
278
|
-
|
|
279
|
-
self.workspace_id = os.getenv("WEDATA_WORKSPACE_ID", "")
|
|
280
|
-
if len(self.workspace_id) == 0:
|
|
281
|
-
raise MlflowException("WEDATA_WORKSPACE_ID is not set")
|
|
282
|
-
log_msg(str.format("wedata workspace id: {}", self.workspace_id))
|
|
283
|
-
|
|
284
289
|
client_profile = get_tencent_cloud_client_profile()
|
|
285
290
|
cred = credential.Credential(sid, sk, token)
|
|
286
291
|
parts = store_uri.split(":")
|
|
287
292
|
if len(parts) < 2:
|
|
288
293
|
raise MlflowException("set store_uri tclake:{region}")
|
|
289
294
|
region = parts[1]
|
|
290
|
-
self.client = CommonClient(
|
|
291
|
-
"tccatalog", "2024-10-24", cred, region, client_profile
|
|
292
|
-
)
|
|
295
|
+
self.client = CommonClient("tccatalog", "2024-10-24", cred, region, client_profile)
|
|
293
296
|
self.headers = get_tencent_cloud_headers()
|
|
294
|
-
self.default_catalog_name = os.getenv(
|
|
295
|
-
|
|
296
|
-
)
|
|
297
|
-
self.default_schema_name = os.getenv(
|
|
298
|
-
"TENCENTCLOUD_DEFAULT_SCHEMA_NAME", "default"
|
|
299
|
-
)
|
|
297
|
+
self.default_catalog_name = os.getenv("TENCENTCLOUD_DEFAULT_CATALOG_NAME", "default")
|
|
298
|
+
self.default_schema_name = os.getenv("TENCENTCLOUD_DEFAULT_SCHEMA_NAME", "default")
|
|
300
299
|
cache_size = int(os.getenv("TCLAKE_CACHE_SIZE", "100"))
|
|
301
300
|
cache_ttl = int(os.getenv("TCLAKE_CACHE_TTL_SECS", "300"))
|
|
302
301
|
self.cache = TTLCache(maxsize=cache_size, ttl=cache_ttl)
|
|
303
|
-
log_msg(
|
|
304
|
-
|
|
305
|
-
region, client_profile, self.headers, cache_size, cache_ttl
|
|
306
|
-
)
|
|
307
|
-
)
|
|
302
|
+
log_msg("initialized tencent tclake client successfully {} {} {} {} {}".format(
|
|
303
|
+
region, client_profile, self.headers, cache_size, cache_ttl))
|
|
308
304
|
|
|
309
305
|
def _split_model_name(self, name):
|
|
310
306
|
parts = name.split(".")
|
|
@@ -314,9 +310,7 @@ class TCLakeStore(AbstractStore):
|
|
|
314
310
|
return [self.default_catalog_name, parts[0], parts[1]]
|
|
315
311
|
if len(parts) == 3:
|
|
316
312
|
return parts
|
|
317
|
-
raise MlflowException(
|
|
318
|
-
"invalid model name: {}, must be catalog.schema.model".format(name)
|
|
319
|
-
)
|
|
313
|
+
raise MlflowException("invalid model name: {}, must be catalog.schema.model".format(name))
|
|
320
314
|
|
|
321
315
|
def _call(self, action, req):
|
|
322
316
|
log_msg("req: {}\n{}".format(action, json.dumps(req, indent=2)))
|
|
@@ -342,7 +336,7 @@ class TCLakeStore(AbstractStore):
|
|
|
342
336
|
"CatalogName": catalog_name,
|
|
343
337
|
"SchemaName": schema_name,
|
|
344
338
|
"ModelName": model_name,
|
|
345
|
-
"ModelVersions": self._get_model_version_numbers(name)
|
|
339
|
+
"ModelVersions": self._get_model_version_numbers(name)
|
|
346
340
|
}
|
|
347
341
|
resp = self._call("DescribeModelVersions", req_body)
|
|
348
342
|
model_version = None
|
|
@@ -354,32 +348,21 @@ class TCLakeStore(AbstractStore):
|
|
|
354
348
|
return None
|
|
355
349
|
return _make_model_version(model_version, name)
|
|
356
350
|
|
|
357
|
-
def create_registered_model(self, name, tags=None, description=None
|
|
351
|
+
def create_registered_model(self, name, tags=None, description=None):
|
|
358
352
|
log_msg("create_registered_model {} {} {}".format(name, tags, description))
|
|
359
353
|
[catalog_name, schema_name, model_name] = self._split_model_name(name)
|
|
360
354
|
properties = []
|
|
361
355
|
_add_tags_to_properties(tags, properties)
|
|
362
|
-
_add_deployment_job_id_to_properties(deployment_job_id, properties)
|
|
363
|
-
_add_workspace_id_to_properties(self.workspace_id, properties)
|
|
364
356
|
req_body = {
|
|
365
357
|
"CatalogName": catalog_name,
|
|
366
358
|
"SchemaName": schema_name,
|
|
367
359
|
"ModelName": model_name,
|
|
368
360
|
"Comment": description if description else "",
|
|
369
|
-
"Properties": properties
|
|
361
|
+
"Properties": properties
|
|
370
362
|
}
|
|
371
|
-
|
|
372
|
-
resp = self._call("CreateModel", req_body)
|
|
373
|
-
except Exception as e:
|
|
374
|
-
if isinstance(e, TencentCloudSDKException):
|
|
375
|
-
if e.code == "FailedOperation.MetalakeAlreadyExistsError":
|
|
376
|
-
raise MlflowException(
|
|
377
|
-
f"Registered Model (name={name}) already exists.",
|
|
378
|
-
RESOURCE_ALREADY_EXISTS,
|
|
379
|
-
)
|
|
380
|
-
raise
|
|
363
|
+
resp = self._call("CreateModel", req_body)
|
|
381
364
|
return _make_model(resp["Model"])
|
|
382
|
-
|
|
365
|
+
|
|
383
366
|
def update_registered_model(self, name, description):
|
|
384
367
|
log_msg("update_register_model {} {}".format(name, description))
|
|
385
368
|
[catalog_name, schema_name, model_name] = self._split_model_name(name)
|
|
@@ -387,7 +370,7 @@ class TCLakeStore(AbstractStore):
|
|
|
387
370
|
"CatalogName": catalog_name,
|
|
388
371
|
"SchemaName": schema_name,
|
|
389
372
|
"ModelName": model_name,
|
|
390
|
-
"NewComment": description if description else ""
|
|
373
|
+
"NewComment": description if description else ""
|
|
391
374
|
}
|
|
392
375
|
resp = self._call("ModifyModelComment", req_body)
|
|
393
376
|
return _make_model(resp["Model"])
|
|
@@ -399,7 +382,7 @@ class TCLakeStore(AbstractStore):
|
|
|
399
382
|
"CatalogName": catalog_name,
|
|
400
383
|
"SchemaName": schema_name,
|
|
401
384
|
"ModelName": model_name,
|
|
402
|
-
"NewName": new_name
|
|
385
|
+
"NewName": new_name
|
|
403
386
|
}
|
|
404
387
|
resp = self._call("ModifyModelName", req_body)
|
|
405
388
|
return _make_model(resp["Model"])
|
|
@@ -417,28 +400,30 @@ class TCLakeStore(AbstractStore):
|
|
|
417
400
|
raise MlflowException("Failed to delete model {}".format(name))
|
|
418
401
|
|
|
419
402
|
def _fetch_all_models(self):
|
|
420
|
-
req_body = {
|
|
403
|
+
req_body = {
|
|
404
|
+
"Offset": 0,
|
|
405
|
+
"Limit": 200,
|
|
406
|
+
"SnapshotBased": True,
|
|
407
|
+
"SnapshotId": ""
|
|
408
|
+
}
|
|
421
409
|
resp = self._call("SearchModels", req_body)
|
|
422
|
-
total = resp[
|
|
423
|
-
model_list = resp[
|
|
410
|
+
total = resp['TotalCount']
|
|
411
|
+
model_list = resp['Models']
|
|
424
412
|
while len(model_list) < total:
|
|
425
413
|
time.sleep(0.05)
|
|
426
414
|
req_body["Offset"] += req_body["Limit"]
|
|
427
415
|
req_body["SnapshotId"] = resp["SnapshotId"]
|
|
428
416
|
resp = self._call("SearchModels", req_body)
|
|
429
|
-
model_list.extend(resp[
|
|
430
|
-
if len(resp[
|
|
417
|
+
model_list.extend(resp['Models'])
|
|
418
|
+
if len(resp['Models']) < req_body["Limit"]:
|
|
431
419
|
break
|
|
432
420
|
return [_make_model(model) for model in model_list]
|
|
433
421
|
|
|
434
422
|
def search_registered_models(
|
|
435
|
-
|
|
423
|
+
self, filter_string=None, max_results=None, order_by=None, page_token=None
|
|
436
424
|
):
|
|
437
|
-
log_msg(
|
|
438
|
-
|
|
439
|
-
filter_string, max_results, order_by, page_token
|
|
440
|
-
)
|
|
441
|
-
)
|
|
425
|
+
log_msg("search_registered_models {} {} {} {}".format(
|
|
426
|
+
filter_string, max_results, order_by, page_token))
|
|
442
427
|
if not isinstance(max_results, int) or max_results < 1:
|
|
443
428
|
raise MlflowException(
|
|
444
429
|
"Invalid value for max_results. It must be a positive integer,"
|
|
@@ -461,16 +446,8 @@ class TCLakeStore(AbstractStore):
|
|
|
461
446
|
search_id = "model_" + str(uuid.uuid4())
|
|
462
447
|
page_token = _create_page_token(0, search_id)
|
|
463
448
|
self.cache[search_id] = sorted_rms
|
|
464
|
-
log_msg(
|
|
465
|
-
|
|
466
|
-
filter_string,
|
|
467
|
-
max_results,
|
|
468
|
-
order_by,
|
|
469
|
-
page_token,
|
|
470
|
-
search_id,
|
|
471
|
-
len(sorted_rms),
|
|
472
|
-
)
|
|
473
|
-
)
|
|
449
|
+
log_msg("search_registered_models add cache {} {} {} {} {} {}".format(
|
|
450
|
+
filter_string, max_results, order_by, page_token, search_id, len(sorted_rms)))
|
|
474
451
|
return self._get_page_list(page_token, max_results)
|
|
475
452
|
|
|
476
453
|
def get_registered_model(self, name):
|
|
@@ -495,7 +472,10 @@ class TCLakeStore(AbstractStore):
|
|
|
495
472
|
"ModelName": model_name,
|
|
496
473
|
}
|
|
497
474
|
resp = self._call("DescribeModelVersions", req)
|
|
498
|
-
return [
|
|
475
|
+
return [
|
|
476
|
+
_make_model_version(mv, name)
|
|
477
|
+
for mv in resp["ModelVersions"]
|
|
478
|
+
]
|
|
499
479
|
|
|
500
480
|
def set_registered_model_tag(self, name, tag):
|
|
501
481
|
log_msg("set_registered_model_tag {} {}".format(name, tag))
|
|
@@ -504,7 +484,7 @@ class TCLakeStore(AbstractStore):
|
|
|
504
484
|
"CatalogName": catalog_name,
|
|
505
485
|
"SchemaName": schema_name,
|
|
506
486
|
"ModelName": model_name,
|
|
507
|
-
"Properties": _add_tag_to_properties(tag)
|
|
487
|
+
"Properties": _add_tag_to_properties(tag)
|
|
508
488
|
}
|
|
509
489
|
self._call("ModifyModelProperties", req)
|
|
510
490
|
|
|
@@ -515,37 +495,29 @@ class TCLakeStore(AbstractStore):
|
|
|
515
495
|
"CatalogName": catalog_name,
|
|
516
496
|
"SchemaName": schema_name,
|
|
517
497
|
"ModelName": model_name,
|
|
518
|
-
"RemovedKeys": [_append_tag_key_prefix(key)]
|
|
498
|
+
"RemovedKeys": [_append_tag_key_prefix(key)]
|
|
519
499
|
}
|
|
520
500
|
self._call("ModifyModelProperties", req)
|
|
521
501
|
|
|
522
502
|
def create_model_version(
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
model_id=None,
|
|
503
|
+
self,
|
|
504
|
+
name,
|
|
505
|
+
source,
|
|
506
|
+
run_id=None,
|
|
507
|
+
tags=None,
|
|
508
|
+
run_link=None,
|
|
509
|
+
description=None,
|
|
510
|
+
local_model_path=None,
|
|
532
511
|
):
|
|
533
|
-
log_msg(
|
|
534
|
-
|
|
535
|
-
name, source, run_id, tags, run_link, description, local_model_path
|
|
536
|
-
)
|
|
537
|
-
)
|
|
512
|
+
log_msg("create_model_version {} {} {} {} {} {} {}".format(
|
|
513
|
+
name, source, run_id, tags, run_link, description, local_model_path))
|
|
538
514
|
[catalog_name, schema_name, model_name] = self._split_model_name(name)
|
|
539
515
|
version_alias = str(uuid.uuid4())
|
|
540
516
|
properties = []
|
|
541
517
|
_add_tags_to_properties(tags, properties)
|
|
542
518
|
_set_run_id_to_properties(run_id, properties)
|
|
543
519
|
_set_run_link_to_properties(run_link, properties)
|
|
544
|
-
_add_workspace_id_to_properties(self.workspace_id, properties)
|
|
545
|
-
_add_deployment_status_to_properties(UN_DEPLOYMENT, properties)
|
|
546
|
-
_add_model_id_to_properties(model_id, properties)
|
|
547
520
|
_add_model_signature_to_properties(source, properties)
|
|
548
|
-
|
|
549
521
|
req_body = {
|
|
550
522
|
"CatalogName": catalog_name,
|
|
551
523
|
"SchemaName": schema_name,
|
|
@@ -553,7 +525,7 @@ class TCLakeStore(AbstractStore):
|
|
|
553
525
|
"Uri": source,
|
|
554
526
|
"Comment": description if description else "",
|
|
555
527
|
"Properties": properties,
|
|
556
|
-
"Aliases": [version_alias]
|
|
528
|
+
"Aliases": [version_alias]
|
|
557
529
|
}
|
|
558
530
|
self._call("CreateModelVersion", req_body)
|
|
559
531
|
return self._get_model_version_by_alias(name, version_alias)
|
|
@@ -566,14 +538,12 @@ class TCLakeStore(AbstractStore):
|
|
|
566
538
|
"SchemaName": schema_name,
|
|
567
539
|
"ModelName": model_name,
|
|
568
540
|
"ModelVersion": _set_model_version(version),
|
|
569
|
-
"NewComment": description if description else ""
|
|
541
|
+
"NewComment": description if description else ""
|
|
570
542
|
}
|
|
571
543
|
resp = self._call("ModifyModelVersionComment", req_body)
|
|
572
544
|
return _make_model_version(resp["ModelVersion"], name)
|
|
573
545
|
|
|
574
|
-
def transition_model_version_stage(
|
|
575
|
-
self, name, version, stage, archive_existing_versions
|
|
576
|
-
):
|
|
546
|
+
def transition_model_version_stage(self, name, version, stage, archive_existing_versions):
|
|
577
547
|
raise NotImplementedError("Method not implemented")
|
|
578
548
|
|
|
579
549
|
def delete_model_version(self, name, version):
|
|
@@ -587,9 +557,7 @@ class TCLakeStore(AbstractStore):
|
|
|
587
557
|
}
|
|
588
558
|
resp = self._call("DropModelVersion", req_body)
|
|
589
559
|
if not resp["Dropped"]:
|
|
590
|
-
raise Exception(
|
|
591
|
-
"Failed to delete model version {} {}".format(name, version)
|
|
592
|
-
)
|
|
560
|
+
raise Exception("Failed to delete model version {} {}".format(name, version))
|
|
593
561
|
|
|
594
562
|
def get_model_version(self, name, version):
|
|
595
563
|
log_msg("get_model_version {} {}".format(name, version))
|
|
@@ -604,31 +572,31 @@ class TCLakeStore(AbstractStore):
|
|
|
604
572
|
return _make_model_version(resp["ModelVersion"], name)
|
|
605
573
|
|
|
606
574
|
def _fetch_all_model_versions(self):
|
|
607
|
-
req_body = {
|
|
575
|
+
req_body = {
|
|
576
|
+
"Offset": 0,
|
|
577
|
+
"Limit": 200,
|
|
578
|
+
"SnapshotBased": True,
|
|
579
|
+
"SnapshotId": ""
|
|
580
|
+
}
|
|
608
581
|
resp = self._call("SearchModelVersions", req_body)
|
|
609
|
-
total = resp[
|
|
610
|
-
model_version_list = resp[
|
|
582
|
+
total = resp['TotalCount']
|
|
583
|
+
model_version_list = resp['ModelVersions']
|
|
611
584
|
while len(model_version_list) < total:
|
|
612
585
|
time.sleep(0.05)
|
|
613
586
|
req_body["Offset"] += req_body["Limit"]
|
|
614
587
|
req_body["SnapshotId"] = resp["SnapshotId"]
|
|
615
588
|
resp = self._call("SearchModelVersions", req_body)
|
|
616
|
-
model_version_list.extend(resp[
|
|
617
|
-
if len(resp[
|
|
589
|
+
model_version_list.extend(resp['ModelVersions'])
|
|
590
|
+
if len(resp['ModelVersions']) < req_body["Limit"]:
|
|
618
591
|
break
|
|
619
|
-
return [
|
|
620
|
-
|
|
621
|
-
for model_version in model_version_list
|
|
622
|
-
]
|
|
592
|
+
return [_make_model_version(model_version, _get_model_version_name(model_version)) for model_version in
|
|
593
|
+
model_version_list]
|
|
623
594
|
|
|
624
595
|
def search_model_versions(
|
|
625
|
-
|
|
596
|
+
self, filter_string=None, max_results=None, order_by=None, page_token=None
|
|
626
597
|
):
|
|
627
|
-
log_msg(
|
|
628
|
-
|
|
629
|
-
filter_string, max_results, order_by, page_token
|
|
630
|
-
)
|
|
631
|
-
)
|
|
598
|
+
log_msg("search_model_versions {} {} {} {}".format(
|
|
599
|
+
filter_string, max_results, order_by, page_token))
|
|
632
600
|
if not isinstance(max_results, int) or max_results < 1:
|
|
633
601
|
raise MlflowException(
|
|
634
602
|
"Invalid value for max_results. It must be a positive integer,"
|
|
@@ -647,59 +615,39 @@ class TCLakeStore(AbstractStore):
|
|
|
647
615
|
filtered_mvs = SearchModelVersionUtils.filter(model_versions, filter_string)
|
|
648
616
|
sorted_mvs = SearchModelVersionUtils.sort(
|
|
649
617
|
filtered_mvs,
|
|
650
|
-
order_by
|
|
651
|
-
or ["last_updated_timestamp DESC", "name ASC", "version_number DESC"],
|
|
618
|
+
order_by or ["last_updated_timestamp DESC", "name ASC", "version_number DESC"],
|
|
652
619
|
)
|
|
653
620
|
if len(sorted_mvs) == 0:
|
|
654
621
|
return PagedList([], None)
|
|
655
622
|
search_id = "model_version_" + str(uuid.uuid4())
|
|
656
623
|
page_token = _create_page_token(0, search_id)
|
|
657
624
|
self.cache[search_id] = sorted_mvs
|
|
658
|
-
log_msg(
|
|
659
|
-
|
|
660
|
-
filter_string,
|
|
661
|
-
max_results,
|
|
662
|
-
order_by,
|
|
663
|
-
page_token,
|
|
664
|
-
search_id,
|
|
665
|
-
len(sorted_mvs),
|
|
666
|
-
)
|
|
667
|
-
)
|
|
625
|
+
log_msg("search_model_versions add cache {} {} {} {} {} {}".format(
|
|
626
|
+
filter_string, max_results, order_by, page_token, search_id, len(sorted_mvs)))
|
|
668
627
|
|
|
669
628
|
return self._get_page_list(page_token, max_results)
|
|
670
629
|
|
|
671
630
|
def _get_page_list(self, page_token, max_results):
|
|
672
631
|
token_info = _parse_page_token(page_token)
|
|
673
|
-
log_msg(
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
)
|
|
677
|
-
)
|
|
678
|
-
sorted_mvs = self.cache.get(token_info["search_id"])
|
|
632
|
+
log_msg("_get_page_list token_info {} {} {}".format(
|
|
633
|
+
page_token, max_results, token_info))
|
|
634
|
+
sorted_mvs = self.cache.get(token_info['search_id'])
|
|
679
635
|
if sorted_mvs is None:
|
|
680
636
|
raise MlflowException(
|
|
681
637
|
"Invalid page token: search id not found or expired",
|
|
682
638
|
INVALID_PARAMETER_VALUE,
|
|
683
639
|
)
|
|
684
|
-
start_offset = token_info[
|
|
640
|
+
start_offset = token_info['offset']
|
|
685
641
|
final_offset = start_offset + max_results
|
|
686
642
|
|
|
687
|
-
paginated_rms = sorted_mvs[start_offset
|
|
643
|
+
paginated_rms = sorted_mvs[start_offset: min(len(sorted_mvs), final_offset)]
|
|
688
644
|
next_page_token = None
|
|
689
645
|
if final_offset < len(sorted_mvs):
|
|
690
|
-
next_page_token = _create_page_token(final_offset, token_info[
|
|
646
|
+
next_page_token = _create_page_token(final_offset, token_info['search_id'])
|
|
691
647
|
else:
|
|
692
|
-
self.cache.pop(token_info[
|
|
693
|
-
log_msg(
|
|
694
|
-
|
|
695
|
-
token_info["search_id"],
|
|
696
|
-
start_offset,
|
|
697
|
-
final_offset,
|
|
698
|
-
len(sorted_mvs),
|
|
699
|
-
page_token,
|
|
700
|
-
next_page_token,
|
|
701
|
-
)
|
|
702
|
-
)
|
|
648
|
+
self.cache.pop(token_info['search_id'], None)
|
|
649
|
+
log_msg("pop cache {} {} {} {} {} {}".format(
|
|
650
|
+
token_info['search_id'], start_offset, final_offset, len(sorted_mvs), page_token, next_page_token))
|
|
703
651
|
return PagedList(paginated_rms, next_page_token)
|
|
704
652
|
|
|
705
653
|
def set_model_version_tag(self, name, version, tag):
|
|
@@ -710,7 +658,7 @@ class TCLakeStore(AbstractStore):
|
|
|
710
658
|
"SchemaName": schema_name,
|
|
711
659
|
"ModelName": model_name,
|
|
712
660
|
"ModelVersion": _set_model_version(version),
|
|
713
|
-
"Properties": _add_tag_to_properties(tag)
|
|
661
|
+
"Properties": _add_tag_to_properties(tag)
|
|
714
662
|
}
|
|
715
663
|
self._call("ModifyModelVersionProperties", req)
|
|
716
664
|
|
|
@@ -722,7 +670,7 @@ class TCLakeStore(AbstractStore):
|
|
|
722
670
|
"SchemaName": schema_name,
|
|
723
671
|
"ModelName": model_name,
|
|
724
672
|
"ModelVersion": _set_model_version(version),
|
|
725
|
-
"RemovedKeys": [_append_tag_key_prefix(key)]
|
|
673
|
+
"RemovedKeys": [_append_tag_key_prefix(key)]
|
|
726
674
|
}
|
|
727
675
|
self._call("ModifyModelVersionProperties", req)
|
|
728
676
|
|
|
@@ -739,5 +687,4 @@ class TCLakeStore(AbstractStore):
|
|
|
739
687
|
def get_model_version_download_uri(self, name, version):
|
|
740
688
|
log_msg("get_model_version_download_uri {} {}".format(name, version))
|
|
741
689
|
model_version = self.get_model_version(name, version)
|
|
742
|
-
return model_version.source
|
|
743
|
-
|
|
690
|
+
return model_version.source
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mlflow-tclake-plugin
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: Tclake plugin for MLflow
|
|
5
|
-
|
|
5
|
+
License-File: LICENSE.txt
|
|
6
|
+
Requires-Dist: mlflow>=2.7.2
|
|
6
7
|
Requires-Dist: tencentcloud-sdk-python-common>=3.0.1478
|
|
8
|
+
Dynamic: license-file
|
|
7
9
|
Dynamic: requires-dist
|
|
8
10
|
Dynamic: summary
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
mlflow_tclake_plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
mlflow_tclake_plugin/tclake_store.py,sha256=djAE2-C_2BKkguy8WN-uETIOcC9qLiiCSDu7teXFRQc,27054
|
|
3
|
+
mlflow_tclake_plugin-2.0.1.dist-info/licenses/LICENSE.txt,sha256=X60Z7_gpe--AyXGUWeFWpOpFl5m-yeemfodWGJn1rUA,1067
|
|
4
|
+
mlflow_tclake_plugin-2.0.1.dist-info/METADATA,sha256=rVelcV3Jsjdi0RVsbdGPkYGONGiONW63dL6iJb1rzFc,271
|
|
5
|
+
mlflow_tclake_plugin-2.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
6
|
+
mlflow_tclake_plugin-2.0.1.dist-info/entry_points.txt,sha256=xKXky9-NyJWEG1SgBknrqskN2rsZG3t4UXiSzkTcsuE,85
|
|
7
|
+
mlflow_tclake_plugin-2.0.1.dist-info/top_level.txt,sha256=zrA5UNyfF3skRmxPNsvrJI3yf-um6CJX_xO5KvWc2o0,21
|
|
8
|
+
mlflow_tclake_plugin-2.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) [2025] [tencent]
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
mlflow_tclake_plugin/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
mlflow_tclake_plugin/tclake_store.py,sha256=Ys3giqvpdth7gwZ7O3TeaQH1OJH08XUpqjRkOVIM9JM,28301
|
|
3
|
-
mlflow_tclake_plugin-0.0.1.dist-info/METADATA,sha256=My3rL7JEx5v_HsA42LimQvwHJU1ms0Qa2mvHlYGZ70s,223
|
|
4
|
-
mlflow_tclake_plugin-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
5
|
-
mlflow_tclake_plugin-0.0.1.dist-info/entry_points.txt,sha256=xKXky9-NyJWEG1SgBknrqskN2rsZG3t4UXiSzkTcsuE,85
|
|
6
|
-
mlflow_tclake_plugin-0.0.1.dist-info/top_level.txt,sha256=zrA5UNyfF3skRmxPNsvrJI3yf-um6CJX_xO5KvWc2o0,21
|
|
7
|
-
mlflow_tclake_plugin-0.0.1.dist-info/RECORD,,
|
|
File without changes
|
{mlflow_tclake_plugin-0.0.1.dist-info → mlflow_tclake_plugin-2.0.1.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|