label-studio-sdk 1.0.13__py3-none-any.whl → 1.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of label-studio-sdk might be problematic. Click here for more details.

@@ -3,6 +3,7 @@ import io
3
3
  import logging
4
4
  import os
5
5
  import shutil
6
+ import base64
6
7
  from contextlib import contextmanager
7
8
  from tempfile import mkdtemp
8
9
  from urllib.parse import urlparse
@@ -201,7 +202,7 @@ def download_and_cache(
201
202
 
202
203
  # local storage: /data/local-files?d=dir/1.jpg => 1.jpg
203
204
  if is_local_storage_file:
204
- url_filename = os.path.basename(url.split('?d=')[1])
205
+ url_filename = os.path.basename(url.split("?d=")[1])
205
206
  # cloud storage: s3://bucket/1.jpg => 1.jpg
206
207
  elif is_cloud_storage_file:
207
208
  url_filename = os.path.basename(url)
@@ -213,7 +214,11 @@ def download_and_cache(
213
214
  filepath = os.path.join(cache_dir, url_hash + "__" + url_filename)
214
215
 
215
216
  if not os.path.exists(filepath):
216
- logger.info("Download {url} to {filepath}. download_resources: {download_resources}".format(url=url, filepath=filepath, download_resources=download_resources))
217
+ logger.info(
218
+ "Download {url} to {filepath}. download_resources: {download_resources}".format(
219
+ url=url, filepath=filepath, download_resources=download_resources
220
+ )
221
+ )
217
222
  if download_resources:
218
223
  headers = {
219
224
  # avoid requests.exceptions.HTTPError: 403 Client Error: Forbidden. Please comply with the User-Agent policy:
@@ -256,3 +261,123 @@ def get_all_files_from_dir(d):
256
261
  if os.path.isfile(filepath):
257
262
  out.append(filepath)
258
263
  return out
264
+
265
+
266
+ def get_base64_content(
267
+ url,
268
+ hostname=None,
269
+ access_token=None,
270
+ task_id=None,
271
+ ):
272
+ """This helper function is used to download a file and return its base64 representation without saving to filesystem.
273
+
274
+ :param url: File URL to download, it can be a uploaded file, local storage, cloud storage file or just http(s) url
275
+ :param hostname: Label Studio Hostname, it will be used for uploaded files, local storage files and cloud storage files
276
+ if not provided, it will be taken from LABEL_STUDIO_URL env variable
277
+ :param access_token: Label Studio access token, it will be used for uploaded files, local storage files and cloud storage files
278
+ if not provided, it will be taken from LABEL_STUDIO_API_KEY env variable
279
+ :param task_id: Label Studio Task ID, required for cloud storage files
280
+ because the URL will be rebuilt to `{hostname}/tasks/{task_id}/presign/?fileuri={url}`
281
+
282
+ :return: base64 encoded file content
283
+ """
284
+ # get environment variables
285
+ hostname = (
286
+ hostname
287
+ or os.getenv("LABEL_STUDIO_URL", "")
288
+ or os.getenv("LABEL_STUDIO_HOST", "")
289
+ )
290
+ access_token = (
291
+ access_token
292
+ or os.getenv("LABEL_STUDIO_API_KEY", "")
293
+ or os.getenv("LABEL_STUDIO_ACCESS_TOKEN", "")
294
+ )
295
+ if "localhost" in hostname:
296
+ logger.warning(
297
+ f"Using `localhost` ({hostname}) in LABEL_STUDIO_URL, "
298
+ f"`localhost` is not accessible inside of docker containers. "
299
+ f"You can check your IP with utilities like `ifconfig` and set it as LABEL_STUDIO_URL."
300
+ )
301
+ if hostname and not (
302
+ hostname.startswith("http://") or hostname.startswith("https://")
303
+ ):
304
+ raise ValueError(
305
+ f"Invalid hostname in LABEL_STUDIO_URL: {hostname}. "
306
+ "Please provide full URL starting with protocol (http:// or https://)."
307
+ )
308
+
309
+ # fix file upload url
310
+ if url.startswith("upload") or url.startswith("/upload"):
311
+ url = "/data" + ("" if url.startswith("/") else "/") + url
312
+
313
+ is_uploaded_file = url.startswith("/data/upload")
314
+ is_local_storage_file = url.startswith("/data/") and "?d=" in url
315
+ is_cloud_storage_file = (
316
+ url.startswith("s3:") or url.startswith("gs:") or url.startswith("azure-blob:")
317
+ )
318
+
319
+ # Local storage file: try to load locally
320
+ if is_local_storage_file:
321
+ filepath = url.split("?d=")[1]
322
+ filepath = safe_build_path(LOCAL_FILES_DOCUMENT_ROOT, filepath)
323
+ if os.path.exists(filepath):
324
+ logger.debug(
325
+ f"Local Storage file path exists locally, read content directly: {filepath}"
326
+ )
327
+ with open(filepath, "rb") as f:
328
+ return base64.b64encode(f.read()).decode("utf-8")
329
+
330
+ # Upload or Local Storage file
331
+ if is_uploaded_file or is_local_storage_file or is_cloud_storage_file:
332
+ # hostname check
333
+ if not hostname:
334
+ raise FileNotFoundError(
335
+ f"Can't resolve url, hostname not provided: {url}. "
336
+ "You can set LABEL_STUDIO_URL environment variable to use it as a hostname."
337
+ )
338
+ # uploaded and local storage file
339
+ elif is_uploaded_file or is_local_storage_file:
340
+ url = concat_urls(hostname, url)
341
+ logger.info("Resolving url using hostname [" + hostname + "]: " + url)
342
+ # s3, gs, azure-blob file
343
+ elif is_cloud_storage_file:
344
+ if task_id is None:
345
+ raise Exception(
346
+ "Label Studio Task ID is required for cloud storage files"
347
+ )
348
+ url = concat_urls(hostname, f"/tasks/{task_id}/presign/?fileuri={url}")
349
+ logger.info(
350
+ "Cloud storage file: Resolving url using hostname ["
351
+ + hostname
352
+ + "]: "
353
+ + url
354
+ )
355
+
356
+ # check access token
357
+ if not access_token:
358
+ raise FileNotFoundError(
359
+ "To access uploaded and local storage files you have to "
360
+ "set LABEL_STUDIO_API_KEY environment variable."
361
+ )
362
+
363
+ # Download the content but don't save to filesystem
364
+ headers = {
365
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
366
+ }
367
+
368
+ # check if url matches hostname - then uses access token to this Label Studio instance
369
+ parsed_url = urlparse(url)
370
+ if access_token and hostname and parsed_url.netloc == urlparse(hostname).netloc:
371
+ headers["Authorization"] = "Token " + access_token
372
+ logger.debug("Authorization token is used for get_base64_content")
373
+
374
+ try:
375
+ r = requests.get(url, headers=headers, verify=VERIFY_SSL)
376
+ r.raise_for_status()
377
+ return base64.b64encode(r.content).decode("utf-8")
378
+ except requests.exceptions.SSLError as e:
379
+ logger.error(
380
+ f"SSL error during requests.get('{url}'): {e}\n"
381
+ f"Try to set VERIFY_SSL=False in environment variables to bypass SSL verification."
382
+ )
383
+ raise e
@@ -18,7 +18,7 @@ class BaseClientWrapper:
18
18
 
19
19
  # even in the async case, refreshing access token (when the existing one is expired) should be sync
20
20
  from ..tokens.client_ext import TokensClientExt
21
- self._tokens_client = TokensClientExt(base_url=base_url, api_key=api_key)
21
+ self._tokens_client = TokensClientExt(base_url=base_url, api_key=api_key, client_wrapper=self)
22
22
 
23
23
 
24
24
  def get_timeout(self) -> typing.Optional[float]:
@@ -8,9 +8,11 @@ import re
8
8
  import json
9
9
  import jsonschema
10
10
 
11
+ from functools import cached_property
11
12
  from typing import Dict, Optional, List, Tuple, Any, Callable, Union
12
13
  from pydantic import BaseModel
13
14
 
15
+
14
16
  # from typing import Dict, Optional, List, Tuple, Any
15
17
  from collections import defaultdict, OrderedDict
16
18
  from lxml import etree
@@ -517,6 +519,19 @@ class LabelInterface:
517
519
 
518
520
  return lst
519
521
 
522
+ @cached_property
523
+ def ner_tags(self):
524
+ return self.find_tags('controls', lambda t: t.tag.lower() in ('labels', 'hypertextlabels'))
525
+
526
+ @cached_property
527
+ def image_tags(self):
528
+ return self.find_tags('objects', lambda t: t.tag.lower() == 'image')
529
+
530
+ @cached_property
531
+ def pdf_tags(self):
532
+ return self.find_tags('objects', lambda t: t.tag.lower() == 'pdf')
533
+
534
+
520
535
  def load_task(self, task):
521
536
  """Loads a task and substitutes the value in each object tag
522
537
  with actual data from the task, returning a copy of the
@@ -189,12 +189,17 @@ class AudioTag(ObjectTag):
189
189
 
190
190
 
191
191
  class ImageTag(ObjectTag):
192
- """ """
192
+ """Image tag"""
193
193
  tag: str = "Image"
194
194
 
195
195
  def _generate_example(self, examples, only_urls=False):
196
196
  """ """
197
197
  return examples.get("Image")
198
+
199
+ @property
200
+ def is_image_list(self):
201
+ """Check if the tag is an image list, i.e. it has a valueList attribute that accepts list of images"""
202
+ return bool(self.attr.get("valueList")) if self.attr else False
198
203
 
199
204
 
200
205
  class TableTag(ObjectTag):
@@ -1,6 +1,7 @@
1
1
  import threading
2
2
  import typing
3
3
  from datetime import datetime, timezone
4
+ import ssl
4
5
 
5
6
  import httpx
6
7
  import jwt
@@ -12,9 +13,10 @@ from ..types.access_token_response import AccessTokenResponse
12
13
  class TokensClientExt:
13
14
  """Client for managing authentication tokens."""
14
15
 
15
- def __init__(self, base_url: str, api_key: str):
16
+ def __init__(self, base_url: str, api_key: str, client_wrapper=None):
16
17
  self._base_url = base_url
17
18
  self._api_key = api_key
19
+ self._client_wrapper = client_wrapper
18
20
  self._use_legacy_token = not self._is_valid_jwt_token(api_key, raise_if_expired=True)
19
21
 
20
22
  # cache state for access token when using jwt-based api_key
@@ -76,19 +78,63 @@ class TokensClientExt:
76
78
 
77
79
  return self._access_token
78
80
 
81
+ def _get_client_params(self, existing_client: httpx.AsyncClient) -> dict:
82
+ """Extract parameters from an existing client to create a new one.
83
+
84
+ Args:
85
+ existing_client: The existing client to extract parameters from.
86
+
87
+ Returns:
88
+ dict: Parameters for creating a new client.
89
+ """
90
+ return {
91
+ 'auth': existing_client.auth,
92
+ 'params': existing_client.params,
93
+ 'headers': existing_client.headers,
94
+ 'cookies': existing_client.cookies,
95
+ 'timeout': existing_client.timeout,
96
+ 'follow_redirects': existing_client.follow_redirects,
97
+ 'max_redirects': existing_client.max_redirects,
98
+ 'event_hooks': existing_client.event_hooks,
99
+ 'base_url': existing_client.base_url,
100
+ 'trust_env': existing_client.trust_env,
101
+ 'default_encoding': existing_client._default_encoding,
102
+ 'verify': existing_client._transport._pool._ssl_context.verify_mode != ssl.CERT_NONE,
103
+ 'http1': existing_client._transport._pool._http1,
104
+ 'http2': existing_client._transport._pool._http2,
105
+ 'limits': httpx.Limits(
106
+ max_connections=existing_client._transport._pool._max_connections,
107
+ max_keepalive_connections=existing_client._transport._pool._max_keepalive_connections,
108
+ keepalive_expiry=existing_client._transport._pool._keepalive_expiry
109
+ )
110
+ }
111
+
79
112
  def refresh(self) -> AccessTokenResponse:
80
113
  """Refresh the access token and return the token response."""
81
- # We don't do this often, just use a separate httpx client for simplicity here
82
- # (avoids complicated state management and sync vs async handling)
83
- with httpx.Client() as sync_client:
84
- response = sync_client.request(
114
+ existing_client = self._client_wrapper.httpx_client.httpx_client
115
+
116
+ # For sync client, use it directly
117
+ if isinstance(existing_client, httpx.Client):
118
+ print(f"\nverify:{existing_client._transport._pool._ssl_context.verify_mode != ssl.CERT_NONE}")
119
+ response = existing_client.request(
85
120
  method="POST",
86
121
  url=f"{self._base_url}/api/token/refresh/",
87
122
  json={"refresh": self._api_key},
88
123
  headers={"Content-Type": "application/json"},
89
124
  )
125
+ else:
126
+ # If an async client was used, get all parameters from the client to init a new sync client
127
+ client_params = self._get_client_params(existing_client)
128
+
129
+ with httpx.Client(**client_params) as sync_client:
130
+ response = sync_client.request(
131
+ method="POST",
132
+ url=f"{self._base_url}/api/token/refresh/",
133
+ json={"refresh": self._api_key},
134
+ headers={"Content-Type": "application/json"},
135
+ )
90
136
 
91
- if response.status_code == 200:
92
- return AccessTokenResponse.parse_obj(response.json())
93
- else:
94
- raise ApiError(status_code=response.status_code, body=response.json())
137
+ if response.status_code == 200:
138
+ return AccessTokenResponse.parse_obj(response.json())
139
+ else:
140
+ raise ApiError(status_code=response.status_code, body=response.json())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: label-studio-sdk
3
- Version: 1.0.13
3
+ Version: 1.0.15
4
4
  Summary:
5
5
  Requires-Python: >=3.9,<4
6
6
  Classifier: Intended Audience :: Developers
@@ -6,7 +6,7 @@ label_studio_sdk/_extensions/label_studio_tools/core/__init__.py,sha256=47DEQpj8
6
6
  label_studio_sdk/_extensions/label_studio_tools/core/label_config.py,sha256=P1S7dPjFkqF2zIQzk11iljhharrUc9qQRM_rUN38iJQ,6406
7
7
  label_studio_sdk/_extensions/label_studio_tools/core/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  label_studio_sdk/_extensions/label_studio_tools/core/utils/exceptions.py,sha256=JxaXUMghUp1YvL--s8KFC4mCHVbV39giE3kSBHCmuFU,66
9
- label_studio_sdk/_extensions/label_studio_tools/core/utils/io.py,sha256=FO0fBVvffuDjbQQcvqLsXmGUn1gCP1YmA-tmNsvX8oo,9650
9
+ label_studio_sdk/_extensions/label_studio_tools/core/utils/io.py,sha256=NJHLJA8Q93_MLmo_Yx9F4_Z-kkWCZmF_Ahi3MtDQCPo,14800
10
10
  label_studio_sdk/_extensions/label_studio_tools/core/utils/json_schema.py,sha256=_Lg3DxhRqGhzlk3egGUDufx-iaoEWec19upZKp-Cwic,3378
11
11
  label_studio_sdk/_extensions/label_studio_tools/core/utils/params.py,sha256=ZSUb-IXG5OcPQ7pJ8NDRLon-cMxnjVq6XtinxvTuJso,1244
12
12
  label_studio_sdk/_extensions/label_studio_tools/etl/__init__.py,sha256=SdN7JGLJ1araqbx-nL2fVdhm6E6CNyru-vWVs6sMswI,31
@@ -68,7 +68,7 @@ label_studio_sdk/converter/main.py,sha256=gfe5zPV2dnIk4ifG1AT95ExkzOSLzje0EOjnW0
68
68
  label_studio_sdk/converter/utils.py,sha256=VshPBwZLu2VPIGVsShKAkZwB_zKz0VvMkNRCwWeEqWg,18702
69
69
  label_studio_sdk/core/__init__.py,sha256=-t9txgeQZL_1FDw_08GEoj4ft1Cn9Dti6X0Drsadlr0,1519
70
70
  label_studio_sdk/core/api_error.py,sha256=RE8LELok2QCjABadECTvtDp7qejA1VmINCh6TbqPwSE,426
71
- label_studio_sdk/core/client_wrapper.py,sha256=fThAUV72_apQ4I2bS60iM84082lgXo4YMM6yvl07Kpg,2209
71
+ label_studio_sdk/core/client_wrapper.py,sha256=lAGxJnIC7HMfysZS9sLHX1lnc82jw_n3eo_pTeljlFc,2230
72
72
  label_studio_sdk/core/datetime_utils.py,sha256=nBys2IsYrhPdszxGKCNRPSOCwa-5DWOHG95FB8G9PKo,1047
73
73
  label_studio_sdk/core/file.py,sha256=d4NNbX8XvXP32z8KpK2Xovv33nFfruIrpz0QWxlgpZk,2663
74
74
  label_studio_sdk/core/http_client.py,sha256=siUQ6UV0ARZALlxubqWSSAAPC9B4VW8y6MGlHStfaeo,19552
@@ -157,9 +157,9 @@ label_studio_sdk/label_interface/base.py,sha256=NCgY7ntk0WSc9O9iXu3g37-CxbZgCx_W
157
157
  label_studio_sdk/label_interface/control_tags.py,sha256=qLe4gsRxvppuNtrxfmgZHFX1ahM-XhePlrchZfnJiL0,30141
158
158
  label_studio_sdk/label_interface/create.py,sha256=c3h5_FF4u5J62_mqq1oK2mjqXL-I1559C6MfoxkgO6s,6993
159
159
  label_studio_sdk/label_interface/data_examples.json,sha256=uCYvCtMIxPi1-jLlFhwJPh01tLyMIRwTjINeAeW-JzE,8195
160
- label_studio_sdk/label_interface/interface.py,sha256=U_2IkpQXxw8fBW_buwfWIOvgdk1z_xLHFdk8YPUC1eE,45519
160
+ label_studio_sdk/label_interface/interface.py,sha256=nEC_RQJ9VCCtYRKqJ7AYhggbLqvTKvJkPIRTQeIm8vc,45959
161
161
  label_studio_sdk/label_interface/label_tags.py,sha256=nWEo21Gd8IPzIO72UqraLrChIbvrSMCA_eEhzYGnGCc,2282
162
- label_studio_sdk/label_interface/object_tags.py,sha256=EGe3bYTZr92SezzWka8grYnvOQNtyfEYa5-yoM4a7Es,8705
162
+ label_studio_sdk/label_interface/object_tags.py,sha256=9k3DEYEh7aXSLh2JjH-SWNVupP1qgwHFte85Ix7-4dQ,8944
163
163
  label_studio_sdk/label_interface/objects.py,sha256=V1Spp0S9qE7iA-5kPCi0QyHrJ80Du9BUuYMsQUAQqc0,1535
164
164
  label_studio_sdk/label_interface/region.py,sha256=th39WeQk8ypi-4krEpsW0BZnoygu4XgvP4w7NkRQp2M,1755
165
165
  label_studio_sdk/ml/__init__.py,sha256=J4ncAcAOU_qriOx_Im9eFmXyupKM19SXMcpMcXSmw-I,455
@@ -215,7 +215,7 @@ label_studio_sdk/tasks/types/tasks_list_request_fields.py,sha256=5YXxQgyzoaL0QjS
215
215
  label_studio_sdk/tasks/types/tasks_list_response.py,sha256=j1pNluAWQOQ8-d9YXQyRQAefnrl8uLQEB7_L55Z8DME,1136
216
216
  label_studio_sdk/tokens/__init__.py,sha256=FTtvy8EDg9nNNg9WCatVgKTRYV8-_v1roeGPAKoa_pw,65
217
217
  label_studio_sdk/tokens/client.py,sha256=SvBcKXIsrTihMJC72Ifxv0U1N3gtLGz3JxqdXYA_hD4,19101
218
- label_studio_sdk/tokens/client_ext.py,sha256=chhzBuVYp0YeUrAnYVwDX5yJq5IJlomGhBQ1zvjZAkI,3976
218
+ label_studio_sdk/tokens/client_ext.py,sha256=Rma4ROcJPQOcBBGsA8Iteqk7WrbW7DTU381Ay5pJMiY,6134
219
219
  label_studio_sdk/types/__init__.py,sha256=fQykjzHpX04ftslk5I_hWSJQ_H9Kd8XJAmSv18EVOIc,8905
220
220
  label_studio_sdk/types/access_token_response.py,sha256=RV9FqkIiFR_9kmKueB-KiqjVyneiqUkMVueAlk5fUyc,624
221
221
  label_studio_sdk/types/annotation.py,sha256=AnHm2VjMasWZsaNXVSUzLYbpYrmM4NPZgWQh7WGa6ZQ,3157
@@ -364,7 +364,7 @@ label_studio_sdk/workspaces/members/client.py,sha256=IVM52Yq_9zMQ3TUHT0AkZ5BTQ9a
364
364
  label_studio_sdk/workspaces/members/types/__init__.py,sha256=ZIa_rd7d6K9ZITjTU6fptyGgvjNDySksJ7Rbn4wyhD4,252
365
365
  label_studio_sdk/workspaces/members/types/members_create_response.py,sha256=7Hp5FSWm4xR5ZOEmEIglq5HYtM9KWZZBDp87jw7jYFg,668
366
366
  label_studio_sdk/workspaces/members/types/members_list_response_item.py,sha256=DIc5DJoVahI9olBis_iFgOJrAf05m2fCE8g4R5ZeDko,712
367
- label_studio_sdk-1.0.13.dist-info/LICENSE,sha256=ymVrFcHiJGjHeY30NWZgdV-xzNEtfuC63oK9ZeMDjhs,11341
368
- label_studio_sdk-1.0.13.dist-info/METADATA,sha256=-1jBn5dwOCPtA4yi0RN5kq7HY373qPlf5dSWNP1r7YU,6033
369
- label_studio_sdk-1.0.13.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
370
- label_studio_sdk-1.0.13.dist-info/RECORD,,
367
+ label_studio_sdk-1.0.15.dist-info/LICENSE,sha256=ymVrFcHiJGjHeY30NWZgdV-xzNEtfuC63oK9ZeMDjhs,11341
368
+ label_studio_sdk-1.0.15.dist-info/METADATA,sha256=Ct3zUaMfhUAHG0IuTDHQ3jCjtD0HcPVxN0Ocj-JPajI,6033
369
+ label_studio_sdk-1.0.15.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
370
+ label_studio_sdk-1.0.15.dist-info/RECORD,,