kaggle 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kaggle/__init__.py +1 -1
- kaggle/api/kaggle_api_extended.py +206 -75
- kaggle/cli.py +36 -28
- kaggle/models/upload_file.py +4 -4
- {kaggle-1.8.2.dist-info → kaggle-1.8.4.dist-info}/METADATA +66 -75
- kaggle-1.8.4.dist-info/RECORD +15 -0
- {kaggle-1.8.2.dist-info → kaggle-1.8.4.dist-info}/WHEEL +1 -1
- kaggle/models/api_blob_type.py +0 -4
- kaggle/models/dataset_column.py +0 -228
- kaggle/models/dataset_new_request.py +0 -443
- kaggle/models/dataset_new_version_request.py +0 -319
- kaggle/models/dataset_update_settings_request.py +0 -344
- kaggle/models/kernel_push_request.py +0 -608
- kaggle/models/model_instance_new_version_request.py +0 -145
- kaggle/models/model_instance_update_request.py +0 -451
- kaggle/models/model_new_instance_request.py +0 -552
- kaggle/models/model_new_request.py +0 -329
- kaggle/models/model_update_request.py +0 -300
- kaggle/models/start_blob_upload_request.py +0 -240
- kaggle/models/start_blob_upload_response.py +0 -142
- kaggle-1.8.2.dist-info/RECORD +0 -148
- kagglesdk/LICENSE +0 -201
- kagglesdk/__init__.py +0 -6
- kagglesdk/admin/__init__.py +0 -0
- kagglesdk/admin/services/__init__.py +0 -0
- kagglesdk/admin/services/inbox_file_service.py +0 -22
- kagglesdk/admin/types/__init__.py +0 -0
- kagglesdk/admin/types/inbox_file_service.py +0 -74
- kagglesdk/benchmarks/__init__.py +0 -0
- kagglesdk/benchmarks/services/__init__.py +0 -0
- kagglesdk/benchmarks/services/benchmarks_api_service.py +0 -19
- kagglesdk/benchmarks/types/__init__.py +0 -0
- kagglesdk/benchmarks/types/benchmark_types.py +0 -307
- kagglesdk/benchmarks/types/benchmarks_api_service.py +0 -243
- kagglesdk/blobs/__init__.py +0 -0
- kagglesdk/blobs/services/__init__.py +0 -0
- kagglesdk/blobs/services/blob_api_service.py +0 -25
- kagglesdk/blobs/types/__init__.py +0 -0
- kagglesdk/blobs/types/blob_api_service.py +0 -177
- kagglesdk/common/__init__.py +0 -0
- kagglesdk/common/services/__init__.py +0 -0
- kagglesdk/common/services/operations_service.py +0 -46
- kagglesdk/common/types/__init__.py +0 -0
- kagglesdk/common/types/file_download.py +0 -102
- kagglesdk/common/types/http_redirect.py +0 -105
- kagglesdk/common/types/operations.py +0 -194
- kagglesdk/common/types/operations_service.py +0 -48
- kagglesdk/community/__init__.py +0 -0
- kagglesdk/community/types/__init__.py +0 -0
- kagglesdk/community/types/content_enums.py +0 -44
- kagglesdk/community/types/organization.py +0 -410
- kagglesdk/competitions/__init__.py +0 -0
- kagglesdk/competitions/services/__init__.py +0 -0
- kagglesdk/competitions/services/competition_api_service.py +0 -178
- kagglesdk/competitions/types/__init__.py +0 -0
- kagglesdk/competitions/types/competition.py +0 -14
- kagglesdk/competitions/types/competition_api_service.py +0 -2393
- kagglesdk/competitions/types/competition_enums.py +0 -53
- kagglesdk/competitions/types/search_competitions.py +0 -28
- kagglesdk/competitions/types/submission_status.py +0 -9
- kagglesdk/datasets/__init__.py +0 -0
- kagglesdk/datasets/databundles/__init__.py +0 -0
- kagglesdk/datasets/databundles/types/__init__.py +0 -0
- kagglesdk/datasets/databundles/types/databundle_api_types.py +0 -540
- kagglesdk/datasets/services/__init__.py +0 -0
- kagglesdk/datasets/services/dataset_api_service.py +0 -195
- kagglesdk/datasets/types/__init__.py +0 -0
- kagglesdk/datasets/types/dataset_api_service.py +0 -3047
- kagglesdk/datasets/types/dataset_enums.py +0 -103
- kagglesdk/datasets/types/dataset_service.py +0 -145
- kagglesdk/datasets/types/dataset_types.py +0 -646
- kagglesdk/datasets/types/search_datasets.py +0 -6
- kagglesdk/discussions/__init__.py +0 -0
- kagglesdk/discussions/types/__init__.py +0 -0
- kagglesdk/discussions/types/search_discussions.py +0 -43
- kagglesdk/discussions/types/writeup_enums.py +0 -11
- kagglesdk/education/__init__.py +0 -0
- kagglesdk/education/services/__init__.py +0 -0
- kagglesdk/education/services/education_api_service.py +0 -19
- kagglesdk/education/types/__init__.py +0 -0
- kagglesdk/education/types/education_api_service.py +0 -248
- kagglesdk/education/types/education_service.py +0 -139
- kagglesdk/kaggle_client.py +0 -101
- kagglesdk/kaggle_creds.py +0 -148
- kagglesdk/kaggle_env.py +0 -104
- kagglesdk/kaggle_http_client.py +0 -269
- kagglesdk/kaggle_oauth.py +0 -200
- kagglesdk/kaggle_object.py +0 -344
- kagglesdk/kernels/__init__.py +0 -0
- kagglesdk/kernels/services/__init__.py +0 -0
- kagglesdk/kernels/services/kernels_api_service.py +0 -146
- kagglesdk/kernels/types/__init__.py +0 -0
- kagglesdk/kernels/types/kernels_api_service.py +0 -2451
- kagglesdk/kernels/types/kernels_enums.py +0 -39
- kagglesdk/kernels/types/search_kernels.py +0 -6
- kagglesdk/licenses/__init__.py +0 -0
- kagglesdk/licenses/types/__init__.py +0 -0
- kagglesdk/licenses/types/licenses_types.py +0 -182
- kagglesdk/models/__init__.py +0 -0
- kagglesdk/models/services/__init__.py +0 -0
- kagglesdk/models/services/model_api_service.py +0 -280
- kagglesdk/models/services/model_service.py +0 -19
- kagglesdk/models/types/__init__.py +0 -0
- kagglesdk/models/types/model_api_service.py +0 -4069
- kagglesdk/models/types/model_enums.py +0 -68
- kagglesdk/models/types/model_service.py +0 -275
- kagglesdk/models/types/model_types.py +0 -1338
- kagglesdk/models/types/search_models.py +0 -8
- kagglesdk/search/__init__.py +0 -0
- kagglesdk/search/services/__init__.py +0 -0
- kagglesdk/search/services/search_api_service.py +0 -19
- kagglesdk/search/types/__init__.py +0 -0
- kagglesdk/search/types/search_api_service.py +0 -2435
- kagglesdk/search/types/search_content_shared.py +0 -50
- kagglesdk/search/types/search_enums.py +0 -45
- kagglesdk/search/types/search_service.py +0 -303
- kagglesdk/security/__init__.py +0 -0
- kagglesdk/security/services/__init__.py +0 -0
- kagglesdk/security/services/iam_service.py +0 -31
- kagglesdk/security/services/oauth_service.py +0 -58
- kagglesdk/security/types/__init__.py +0 -0
- kagglesdk/security/types/authentication.py +0 -171
- kagglesdk/security/types/iam_service.py +0 -496
- kagglesdk/security/types/oauth_service.py +0 -1181
- kagglesdk/security/types/roles.py +0 -8
- kagglesdk/security/types/security_types.py +0 -159
- kagglesdk/test/__init__.py +0 -0
- kagglesdk/test/test_client.py +0 -41
- kagglesdk/users/__init__.py +0 -0
- kagglesdk/users/services/__init__.py +0 -0
- kagglesdk/users/services/account_service.py +0 -31
- kagglesdk/users/services/group_api_service.py +0 -31
- kagglesdk/users/types/__init__.py +0 -0
- kagglesdk/users/types/account_service.py +0 -345
- kagglesdk/users/types/group_api_service.py +0 -315
- kagglesdk/users/types/group_types.py +0 -165
- kagglesdk/users/types/groups_enum.py +0 -8
- kagglesdk/users/types/progression_service.py +0 -9
- kagglesdk/users/types/search_users.py +0 -23
- kagglesdk/users/types/user_avatar.py +0 -226
- kagglesdk/users/types/users_enums.py +0 -22
- {kaggle-1.8.2.dist-info → kaggle-1.8.4.dist-info}/entry_points.txt +0 -0
- {kaggle-1.8.2.dist-info → kaggle-1.8.4.dist-info}/licenses/LICENSE.txt +0 -0
kaggle/__init__.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import absolute_import
|
|
|
3
3
|
import os
|
|
4
4
|
from kaggle.api.kaggle_api_extended import KaggleApi
|
|
5
5
|
|
|
6
|
-
__version__ = "1.8.
|
|
6
|
+
__version__ = "1.8.4"
|
|
7
7
|
|
|
8
8
|
enable_oauth = os.environ.get("KAGGLE_ENABLE_OAUTH") in ("1", "true", "yes")
|
|
9
9
|
api = KaggleApi(enable_oauth=enable_oauth)
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
from __future__ import print_function
|
|
19
19
|
|
|
20
20
|
import csv
|
|
21
|
-
from datetime import datetime
|
|
21
|
+
from datetime import datetime
|
|
22
22
|
from enum import Enum
|
|
23
23
|
import io
|
|
24
24
|
|
|
@@ -49,6 +49,7 @@ from slugify import slugify
|
|
|
49
49
|
from tqdm import tqdm
|
|
50
50
|
from urllib3.util.retry import Retry
|
|
51
51
|
from google.protobuf import field_mask_pb2
|
|
52
|
+
from packaging.version import parse
|
|
52
53
|
|
|
53
54
|
import kaggle
|
|
54
55
|
from kagglesdk import get_access_token_from_env, KaggleClient, KaggleCredentials, KaggleEnv, KaggleOAuth # type: ignore[attr-defined]
|
|
@@ -56,7 +57,6 @@ from kagglesdk.admin.types.inbox_file_service import CreateInboxFileRequest
|
|
|
56
57
|
from kagglesdk.blobs.types.blob_api_service import ApiStartBlobUploadRequest, ApiStartBlobUploadResponse, ApiBlobType
|
|
57
58
|
from kagglesdk.competitions.types.competition_api_service import (
|
|
58
59
|
ApiListCompetitionsRequest,
|
|
59
|
-
ApiCompetition,
|
|
60
60
|
ApiCreateCodeSubmissionRequest,
|
|
61
61
|
ApiCreateSubmissionResponse,
|
|
62
62
|
ApiStartSubmissionUploadRequest,
|
|
@@ -149,7 +149,6 @@ from kagglesdk.models.types.model_api_service import (
|
|
|
149
149
|
from kagglesdk.models.types.model_enums import ListModelsOrderBy, ModelInstanceType, ModelFramework
|
|
150
150
|
from kagglesdk.models.types.model_types import Owner
|
|
151
151
|
from kagglesdk.security.types.oauth_service import IntrospectTokenRequest
|
|
152
|
-
from ..models.dataset_column import DatasetColumn
|
|
153
152
|
from ..models.upload_file import UploadFile
|
|
154
153
|
import kagglesdk.kaggle_client
|
|
155
154
|
from enum import EnumMeta
|
|
@@ -673,14 +672,12 @@ class KaggleApi:
|
|
|
673
672
|
if self._authenticate_with_legacy_apikey():
|
|
674
673
|
return
|
|
675
674
|
if self.enable_oauth:
|
|
676
|
-
print("You must
|
|
675
|
+
print("You must authenticate before you can call the Kaggle API.")
|
|
677
676
|
print('Please run "kaggle auth login" to log in.')
|
|
678
677
|
else:
|
|
678
|
+
print("You must authenticate before you can call the Kaggle API.")
|
|
679
679
|
print(
|
|
680
|
-
"
|
|
681
|
-
" {}. Or use the environment method. See setup"
|
|
682
|
-
" instructions at"
|
|
683
|
-
" https://github.com/Kaggle/kaggle-api/".format(self.config_file, self.config_dir)
|
|
680
|
+
"Follow the instructions to authenticate at: https://github.com/Kaggle/kaggle-cli/blob/main/docs/README.md#authentication"
|
|
684
681
|
)
|
|
685
682
|
exit(1)
|
|
686
683
|
|
|
@@ -739,7 +736,6 @@ class KaggleApi:
|
|
|
739
736
|
self.CONFIG_NAME_AUTH_METHOD: AuthMethod.ACCESS_TOKEN,
|
|
740
737
|
}
|
|
741
738
|
self.logger.debug(f"Authenticated with access token in: {source}")
|
|
742
|
-
del os.environ["KAGGLE_API_TOKEN"]
|
|
743
739
|
return True
|
|
744
740
|
|
|
745
741
|
def _authenticate_with_oauth_creds(self) -> bool:
|
|
@@ -1080,11 +1076,12 @@ class KaggleApi:
|
|
|
1080
1076
|
username=self.config_values.get(self.CONFIG_NAME_USER),
|
|
1081
1077
|
password=self.config_values.get(self.CONFIG_NAME_KEY),
|
|
1082
1078
|
api_token=self.config_values.get(self.CONFIG_NAME_TOKEN),
|
|
1079
|
+
response_processor=self.get_response_processor(),
|
|
1083
1080
|
)
|
|
1084
1081
|
|
|
1085
1082
|
@staticmethod
|
|
1086
1083
|
def build_kaggle_client_with_params(
|
|
1087
|
-
args: List[str], username: str = None, password: str = None, api_token: str = None
|
|
1084
|
+
args: List[str], username: str = None, password: str = None, api_token: str = None, response_processor=None
|
|
1088
1085
|
) -> kagglesdk.kaggle_client.KaggleClient:
|
|
1089
1086
|
"""Builds a Kaggle client with the given parameters.
|
|
1090
1087
|
|
|
@@ -1093,6 +1090,7 @@ class KaggleApi:
|
|
|
1093
1090
|
username (str): The username to use for authentication.
|
|
1094
1091
|
password (str): The password to use for authentication.
|
|
1095
1092
|
api_token (str): The API token to use for authentication.
|
|
1093
|
+
response_processor: Callback used to process HTTP response.
|
|
1096
1094
|
|
|
1097
1095
|
Returns:
|
|
1098
1096
|
kagglesdk.kaggle_client.KaggleClient: A Kaggle client.
|
|
@@ -1109,6 +1107,7 @@ class KaggleApi:
|
|
|
1109
1107
|
username=username,
|
|
1110
1108
|
password=password,
|
|
1111
1109
|
api_token=api_token,
|
|
1110
|
+
response_processor=response_processor,
|
|
1112
1111
|
)
|
|
1113
1112
|
|
|
1114
1113
|
def camel_to_snake(self, name: str) -> str:
|
|
@@ -1346,6 +1345,12 @@ class KaggleApi:
|
|
|
1346
1345
|
return resp
|
|
1347
1346
|
|
|
1348
1347
|
submit_request = ApiCreateSubmissionRequest()
|
|
1348
|
+
|
|
1349
|
+
# Admin-only feature to submit for a given model (b/475908216)
|
|
1350
|
+
model_version_id = os.getenv("KAGGLE_COMPETITION_SUBMISSION_MODEL_VERSION_ID", None)
|
|
1351
|
+
if model_version_id:
|
|
1352
|
+
submit_request.benchmark_model_version_id = int(model_version_id)
|
|
1353
|
+
|
|
1349
1354
|
submit_request.competition_name = competition
|
|
1350
1355
|
submit_request.blob_file_tokens = response.token
|
|
1351
1356
|
if message:
|
|
@@ -1380,7 +1385,7 @@ class KaggleApi:
|
|
|
1380
1385
|
str:
|
|
1381
1386
|
"""
|
|
1382
1387
|
if kernel and not version or version and not kernel:
|
|
1383
|
-
raise ValueError("Code competition submissions require both the output file name and the version
|
|
1388
|
+
raise ValueError("Code competition submissions require both the output file name and the version number")
|
|
1384
1389
|
competition = competition or competition_opt
|
|
1385
1390
|
try:
|
|
1386
1391
|
if kernel:
|
|
@@ -1408,6 +1413,7 @@ class KaggleApi:
|
|
|
1408
1413
|
competition: str,
|
|
1409
1414
|
group: SubmissionGroup = SubmissionGroup.SUBMISSION_GROUP_ALL,
|
|
1410
1415
|
sort: SubmissionSortBy = SubmissionSortBy.SUBMISSION_SORT_BY_DATE,
|
|
1416
|
+
page_number: int = -1,
|
|
1411
1417
|
page_token: str = "",
|
|
1412
1418
|
page_size: int = 20,
|
|
1413
1419
|
) -> list[ApiSubmission | None] | None:
|
|
@@ -1417,6 +1423,7 @@ class KaggleApi:
|
|
|
1417
1423
|
competition (str): The name of the competition.
|
|
1418
1424
|
group (SubmissionGroup): The submission group.
|
|
1419
1425
|
sort (SubmissionSortBy): The sort-by option.
|
|
1426
|
+
page_number (int): The page number to show.
|
|
1420
1427
|
page_token (str): The pageToken for pagination.
|
|
1421
1428
|
page_size (int): The number of items per page.
|
|
1422
1429
|
|
|
@@ -1426,6 +1433,7 @@ class KaggleApi:
|
|
|
1426
1433
|
with self.build_kaggle_client() as kaggle:
|
|
1427
1434
|
request = ApiListSubmissionsRequest()
|
|
1428
1435
|
request.competition_name = competition
|
|
1436
|
+
request.page = page_number
|
|
1429
1437
|
request.page_token = page_token
|
|
1430
1438
|
request.page_size = page_size
|
|
1431
1439
|
request.group = group
|
|
@@ -2152,17 +2160,17 @@ class KaggleApi:
|
|
|
2152
2160
|
except zipfile.BadZipFile as e:
|
|
2153
2161
|
raise ValueError(
|
|
2154
2162
|
f"The file {outfile} is corrupted or not a valid zip file. "
|
|
2155
|
-
"Please report this issue at https://www.github.com/kaggle/kaggle-
|
|
2163
|
+
"Please report this issue at https://www.github.com/kaggle/kaggle-cli/issues"
|
|
2156
2164
|
)
|
|
2157
2165
|
except FileNotFoundError:
|
|
2158
2166
|
raise FileNotFoundError(
|
|
2159
2167
|
f"The file {outfile} was not found. "
|
|
2160
|
-
"Please report this issue at https://www.github.com/kaggle/kaggle-
|
|
2168
|
+
"Please report this issue at https://www.github.com/kaggle/kaggle-cli"
|
|
2161
2169
|
)
|
|
2162
2170
|
except Exception as e:
|
|
2163
2171
|
raise RuntimeError(
|
|
2164
2172
|
f"An unexpected error occurred: {e}. "
|
|
2165
|
-
"Please report this issue at https://www.github.com/kaggle/kaggle-
|
|
2173
|
+
"Please report this issue at https://www.github.com/kaggle/kaggle-cli"
|
|
2166
2174
|
)
|
|
2167
2175
|
|
|
2168
2176
|
try:
|
|
@@ -2627,8 +2635,10 @@ class KaggleApi:
|
|
|
2627
2635
|
else:
|
|
2628
2636
|
print("Dataset creation error: " + result.error)
|
|
2629
2637
|
|
|
2630
|
-
def download_file(
|
|
2631
|
-
|
|
2638
|
+
def download_file(
|
|
2639
|
+
self, response, outfile, http_client, quiet=True, resume=False, chunk_size=1048576, max_retries=5, timeout=300
|
|
2640
|
+
):
|
|
2641
|
+
"""Downloads a file to an output file, streaming in chunks with automatic retry on failure.
|
|
2632
2642
|
|
|
2633
2643
|
Args:
|
|
2634
2644
|
response: The response object to download.
|
|
@@ -2637,14 +2647,16 @@ class KaggleApi:
|
|
|
2637
2647
|
quiet: Suppress verbose output (default is True).
|
|
2638
2648
|
chunk_size: The size of the chunk to stream.
|
|
2639
2649
|
resume: Whether to resume an existing download.
|
|
2650
|
+
max_retries: Maximum number of retry attempts on network errors (default is 5).
|
|
2651
|
+
timeout: Timeout in seconds for each chunk read operation (default is 300).
|
|
2640
2652
|
"""
|
|
2641
2653
|
|
|
2642
2654
|
outpath = os.path.dirname(outfile)
|
|
2643
2655
|
if not os.path.exists(outpath):
|
|
2644
2656
|
os.makedirs(outpath)
|
|
2657
|
+
|
|
2658
|
+
# Get file metadata
|
|
2645
2659
|
size = int(response.headers["Content-Length"])
|
|
2646
|
-
size_read = 0
|
|
2647
|
-
open_mode = "wb"
|
|
2648
2660
|
last_modified = response.headers.get("Last-Modified")
|
|
2649
2661
|
if last_modified is None:
|
|
2650
2662
|
remote_date = datetime.now()
|
|
@@ -2652,57 +2664,135 @@ class KaggleApi:
|
|
|
2652
2664
|
remote_date = datetime.strptime(response.headers["Last-Modified"], "%a, %d %b %Y %H:%M:%S %Z")
|
|
2653
2665
|
remote_date_timestamp = time.mktime(remote_date.timetuple())
|
|
2654
2666
|
|
|
2655
|
-
if
|
|
2656
|
-
print("Downloading " + os.path.basename(outfile) + " to " + outpath)
|
|
2657
|
-
|
|
2658
|
-
file_exists = os.path.isfile(outfile)
|
|
2667
|
+
# Check if file is resumable
|
|
2659
2668
|
resumable = "Accept-Ranges" in response.headers and response.headers["Accept-Ranges"] == "bytes"
|
|
2660
2669
|
|
|
2661
|
-
|
|
2662
|
-
|
|
2663
|
-
|
|
2670
|
+
# Retry loop for handling network errors
|
|
2671
|
+
retry_count = 0
|
|
2672
|
+
download_url = response.url
|
|
2673
|
+
original_method = response.request.method if hasattr(response, "request") else "GET"
|
|
2664
2674
|
|
|
2665
|
-
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
|
|
2669
|
-
size_read,
|
|
2670
|
-
size - size_read,
|
|
2671
|
-
)
|
|
2672
|
-
)
|
|
2675
|
+
# Preserve original request headers for authentication
|
|
2676
|
+
original_headers = {}
|
|
2677
|
+
if hasattr(response, "request") and hasattr(response.request, "headers"):
|
|
2678
|
+
original_headers = dict(response.request.headers)
|
|
2673
2679
|
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
headers={"Range": "bytes=%d-" % (size_read,)},
|
|
2679
|
-
stream=True,
|
|
2680
|
-
)
|
|
2680
|
+
while retry_count <= max_retries:
|
|
2681
|
+
try:
|
|
2682
|
+
# Check file existence inside loop (may be created during retry)
|
|
2683
|
+
file_exists = os.path.isfile(outfile)
|
|
2681
2684
|
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2685
|
-
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
if not
|
|
2689
|
-
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
2693
|
-
|
|
2685
|
+
# Determine starting position
|
|
2686
|
+
if retry_count > 0 or (resume and resumable and file_exists):
|
|
2687
|
+
size_read = os.path.getsize(outfile) if file_exists else 0
|
|
2688
|
+
open_mode = "ab"
|
|
2689
|
+
|
|
2690
|
+
if size_read >= size:
|
|
2691
|
+
if not quiet:
|
|
2692
|
+
print("File already downloaded completely.")
|
|
2693
|
+
return
|
|
2694
|
+
|
|
2695
|
+
if not quiet:
|
|
2696
|
+
if retry_count > 0:
|
|
2697
|
+
print(
|
|
2698
|
+
f"Retry {retry_count}/{max_retries}: Resuming from {size_read} bytes ({size - size_read} bytes left)..."
|
|
2699
|
+
)
|
|
2700
|
+
else:
|
|
2701
|
+
print(f"Resuming from {size_read} bytes ({size - size_read} bytes left)...")
|
|
2702
|
+
|
|
2703
|
+
# Request with Range header for resume, preserving authentication
|
|
2704
|
+
retry_headers = original_headers.copy()
|
|
2705
|
+
retry_headers["Range"] = f"bytes={size_read}-"
|
|
2706
|
+
response = requests.request(
|
|
2707
|
+
original_method,
|
|
2708
|
+
download_url,
|
|
2709
|
+
headers=retry_headers,
|
|
2710
|
+
stream=True,
|
|
2711
|
+
timeout=timeout,
|
|
2712
|
+
)
|
|
2694
2713
|
else:
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
os.
|
|
2700
|
-
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
|
|
2714
|
+
size_read = 0
|
|
2715
|
+
open_mode = "wb"
|
|
2716
|
+
|
|
2717
|
+
if not quiet:
|
|
2718
|
+
print("Downloading " + os.path.basename(outfile) + " to " + outpath)
|
|
2719
|
+
|
|
2720
|
+
# Download with progress bar
|
|
2721
|
+
with tqdm(
|
|
2722
|
+
total=size, initial=size_read, unit="B", unit_scale=True, unit_divisor=1024, disable=quiet
|
|
2723
|
+
) as pbar:
|
|
2724
|
+
with open(outfile, open_mode) as out:
|
|
2725
|
+
# TODO: Delete this test after all API methods are converted.
|
|
2726
|
+
if type(response).__name__ == "HTTPResponse":
|
|
2727
|
+
while True:
|
|
2728
|
+
data = response.read(chunk_size)
|
|
2729
|
+
if not data:
|
|
2730
|
+
break
|
|
2731
|
+
out.write(data)
|
|
2732
|
+
out.flush() # Ensure data is written to disk
|
|
2733
|
+
os.utime(outfile, times=(remote_date_timestamp - 1, remote_date_timestamp - 1))
|
|
2734
|
+
size_read += len(data)
|
|
2735
|
+
pbar.update(len(data))
|
|
2736
|
+
else:
|
|
2737
|
+
for data in response.iter_content(chunk_size):
|
|
2738
|
+
if not data:
|
|
2739
|
+
break
|
|
2740
|
+
out.write(data)
|
|
2741
|
+
out.flush() # Ensure data is written to disk
|
|
2742
|
+
os.utime(outfile, times=(remote_date_timestamp - 1, remote_date_timestamp - 1))
|
|
2743
|
+
size_read += len(data)
|
|
2744
|
+
pbar.update(len(data))
|
|
2745
|
+
|
|
2746
|
+
# Download completed successfully
|
|
2747
|
+
if not quiet:
|
|
2748
|
+
print("\n", end="")
|
|
2749
|
+
|
|
2750
|
+
os.utime(outfile, times=(remote_date_timestamp, remote_date_timestamp))
|
|
2751
|
+
|
|
2752
|
+
# Verify file size
|
|
2753
|
+
final_size = os.path.getsize(outfile)
|
|
2754
|
+
if final_size != size:
|
|
2755
|
+
error_msg = f"Downloaded file size ({final_size}) does not match expected size ({size})"
|
|
2756
|
+
if not quiet:
|
|
2757
|
+
print(f"\n{error_msg}")
|
|
2758
|
+
raise ValueError(error_msg)
|
|
2704
2759
|
|
|
2705
|
-
|
|
2760
|
+
# Success - exit retry loop
|
|
2761
|
+
break
|
|
2762
|
+
|
|
2763
|
+
except (
|
|
2764
|
+
requests.exceptions.ConnectionError,
|
|
2765
|
+
requests.exceptions.Timeout,
|
|
2766
|
+
requests.exceptions.ChunkedEncodingError,
|
|
2767
|
+
urllib3_exceptions.ProtocolError,
|
|
2768
|
+
urllib3_exceptions.ReadTimeoutError,
|
|
2769
|
+
OSError,
|
|
2770
|
+
) as e:
|
|
2771
|
+
|
|
2772
|
+
retry_count += 1
|
|
2773
|
+
|
|
2774
|
+
if retry_count > max_retries:
|
|
2775
|
+
if not quiet:
|
|
2776
|
+
print(f"\nDownload failed after {max_retries} retries.")
|
|
2777
|
+
print(f"Error: {type(e).__name__}: {str(e)}")
|
|
2778
|
+
print(f"Partial file saved at: {outfile}")
|
|
2779
|
+
print(f"You can resume by running the same command again.")
|
|
2780
|
+
raise
|
|
2781
|
+
|
|
2782
|
+
# Calculate backoff time (exponential with jitter)
|
|
2783
|
+
backoff_time = min(2**retry_count + random(), 60) # Cap at 60 seconds
|
|
2784
|
+
|
|
2785
|
+
if not quiet:
|
|
2786
|
+
print(f"\nConnection error: {type(e).__name__}: {str(e)}")
|
|
2787
|
+
print(f"Retrying in {backoff_time:.1f} seconds... (attempt {retry_count}/{max_retries})")
|
|
2788
|
+
|
|
2789
|
+
time.sleep(backoff_time)
|
|
2790
|
+
|
|
2791
|
+
# Ensure file exists for resume
|
|
2792
|
+
if not os.path.isfile(outfile):
|
|
2793
|
+
open(outfile, "a").close()
|
|
2794
|
+
|
|
2795
|
+
continue
|
|
2706
2796
|
|
|
2707
2797
|
def kernels_list(
|
|
2708
2798
|
self,
|
|
@@ -2950,7 +3040,9 @@ class KaggleApi:
|
|
|
2950
3040
|
meta_file = self.kernels_initialize(folder)
|
|
2951
3041
|
print("Kernel metadata template written to: " + meta_file)
|
|
2952
3042
|
|
|
2953
|
-
def kernels_push(
|
|
3043
|
+
def kernels_push(
|
|
3044
|
+
self, folder: str, timeout: Optional[str] = None, acc: Optional[str] = None
|
|
3045
|
+
) -> ApiSaveKernelResponse:
|
|
2954
3046
|
"""Pushes a kernel to Kaggle.
|
|
2955
3047
|
|
|
2956
3048
|
This method reads the metadata file and kernel files from a notebook,
|
|
@@ -2959,6 +3051,8 @@ class KaggleApi:
|
|
|
2959
3051
|
Args:
|
|
2960
3052
|
folder (str): The path to the folder.
|
|
2961
3053
|
timeout (Optional[str]): The maximum run time in seconds.
|
|
3054
|
+
acc (Optional[str]): The type of accelerator to use for the kernel run. If set, this value overrides boolean
|
|
3055
|
+
settings for GPU/TPU found in the metadata file.
|
|
2962
3056
|
|
|
2963
3057
|
Returns:
|
|
2964
3058
|
ApiSaveKernelResponse: An ApiSaveKernelResponse object.
|
|
@@ -3033,7 +3127,7 @@ class KaggleApi:
|
|
|
3033
3127
|
|
|
3034
3128
|
model_sources = cast(List[str], self.get_or_default(meta_data, "model_sources", []))
|
|
3035
3129
|
for source in model_sources:
|
|
3036
|
-
self.
|
|
3130
|
+
self.validate_model_instance_version_string(source)
|
|
3037
3131
|
|
|
3038
3132
|
docker_pinning_type = self.get_or_default(meta_data, "docker_image_pinning_type", None)
|
|
3039
3133
|
if docker_pinning_type is not None and docker_pinning_type not in self.valid_push_pinning_types:
|
|
@@ -3077,19 +3171,22 @@ class KaggleApi:
|
|
|
3077
3171
|
request.docker_image = self.get_or_default(meta_data, "docker_image", None)
|
|
3078
3172
|
if timeout:
|
|
3079
3173
|
request.session_timeout_seconds = int(timeout)
|
|
3174
|
+
# The allowed names are in an enum that is not currently included in kagglesdk.
|
|
3175
|
+
request.machine_shape = acc if acc else self.get_or_default(meta_data, "machine_shape", None)
|
|
3080
3176
|
# Without the type hint, mypy thinks save_kernel() has type Any when checking warn_return_any.
|
|
3081
3177
|
response: ApiSaveKernelResponse = kaggle.kernels.kernels_api_client.save_kernel(request)
|
|
3082
3178
|
return response
|
|
3083
3179
|
|
|
3084
|
-
def kernels_push_cli(self, folder, timeout):
|
|
3180
|
+
def kernels_push_cli(self, folder, timeout, acc):
|
|
3085
3181
|
"""A client wrapper for kernels_push.
|
|
3086
3182
|
|
|
3087
3183
|
Args:
|
|
3088
3184
|
folder: The path to the folder.
|
|
3089
3185
|
timeout: The maximum run time in seconds.
|
|
3186
|
+
acc: The accelerator to use.
|
|
3090
3187
|
"""
|
|
3091
3188
|
folder = folder or os.getcwd()
|
|
3092
|
-
result = self.kernels_push(folder, timeout)
|
|
3189
|
+
result = self.kernels_push(folder, timeout, acc)
|
|
3093
3190
|
|
|
3094
3191
|
if result is None:
|
|
3095
3192
|
print("Kernel push error: see previous output")
|
|
@@ -3220,7 +3317,7 @@ class KaggleApi:
|
|
|
3220
3317
|
if file_name is None:
|
|
3221
3318
|
print(
|
|
3222
3319
|
"Unknown language %s + kernel type %s - please report this "
|
|
3223
|
-
"on the kaggle-
|
|
3320
|
+
"on the kaggle-cli github issues" % (language, kernel_type)
|
|
3224
3321
|
)
|
|
3225
3322
|
print("Saving as a python file, even though this may not be the " "correct language")
|
|
3226
3323
|
file_name = "script.py"
|
|
@@ -3268,12 +3365,15 @@ class KaggleApi:
|
|
|
3268
3365
|
else:
|
|
3269
3366
|
print("Source code downloaded to " + effective_path)
|
|
3270
3367
|
|
|
3271
|
-
def kernels_output(
|
|
3368
|
+
def kernels_output(
|
|
3369
|
+
self, kernel: str, path: str, file_pattern: str = None, force: bool = False, quiet: bool = True
|
|
3370
|
+
) -> Tuple[List[str], str]:
|
|
3272
3371
|
"""Retrieves the output for a specified kernel.
|
|
3273
3372
|
|
|
3274
3373
|
Args:
|
|
3275
3374
|
kernel (str): The kernel for which to retrieve the output.
|
|
3276
3375
|
path (str): The path to which to pull the files.
|
|
3376
|
+
file_pattern (str): Optional regex pattern to match against filenames. Only files matching the pattern will be downloaded.
|
|
3277
3377
|
force (bool): If True, force an overwrite if the output already exists (default is False).
|
|
3278
3378
|
quiet (bool): Suppress verbose output (default is True).
|
|
3279
3379
|
|
|
@@ -3302,6 +3402,14 @@ class KaggleApi:
|
|
|
3302
3402
|
if not os.path.isdir(target_dir):
|
|
3303
3403
|
raise ValueError("You must specify a directory for the kernels output")
|
|
3304
3404
|
|
|
3405
|
+
if file_pattern is not None:
|
|
3406
|
+
try:
|
|
3407
|
+
compiled_pattern = re.compile(file_pattern)
|
|
3408
|
+
except re.error as e:
|
|
3409
|
+
raise ValueError(f"Invalid regex pattern '{file_pattern}': {e}")
|
|
3410
|
+
else:
|
|
3411
|
+
compiled_pattern = None
|
|
3412
|
+
|
|
3305
3413
|
token = None
|
|
3306
3414
|
with self.build_kaggle_client() as kaggle:
|
|
3307
3415
|
request = ApiListKernelSessionOutputRequest()
|
|
@@ -3312,6 +3420,9 @@ class KaggleApi:
|
|
|
3312
3420
|
|
|
3313
3421
|
outfiles = []
|
|
3314
3422
|
for item in response.files:
|
|
3423
|
+
if compiled_pattern and not compiled_pattern.search(item.file_name):
|
|
3424
|
+
continue
|
|
3425
|
+
|
|
3315
3426
|
outfile = os.path.join(target_dir, item.file_name)
|
|
3316
3427
|
outfiles.append(outfile)
|
|
3317
3428
|
download_response = requests.get(item.url, stream=True)
|
|
@@ -3333,7 +3444,7 @@ class KaggleApi:
|
|
|
3333
3444
|
|
|
3334
3445
|
return outfiles, token # Breaking change, we need to get the token to the UI
|
|
3335
3446
|
|
|
3336
|
-
def kernels_output_cli(self, kernel, kernel_opt=None, path=None, force=False, quiet=False):
|
|
3447
|
+
def kernels_output_cli(self, kernel, kernel_opt=None, path=None, force=False, quiet=False, file_pattern=None):
|
|
3337
3448
|
"""A client wrapper for kernels_output.
|
|
3338
3449
|
|
|
3339
3450
|
This method is a client wrapper for the kernels_output function.
|
|
@@ -3345,9 +3456,10 @@ class KaggleApi:
|
|
|
3345
3456
|
path: The path to which to pull the files.
|
|
3346
3457
|
force: If True, force an overwrite if the output already exists (default is False).
|
|
3347
3458
|
quiet: Suppress verbose output (default is False).
|
|
3459
|
+
file_pattern: Regex pattern to match against filenames. Only files matching the pattern will be downloaded.
|
|
3348
3460
|
"""
|
|
3349
3461
|
kernel = kernel or kernel_opt
|
|
3350
|
-
(_, token) = self.kernels_output(kernel, path, force, quiet)
|
|
3462
|
+
(_, token) = self.kernels_output(kernel, path, file_pattern, force, quiet)
|
|
3351
3463
|
if token:
|
|
3352
3464
|
print(f"Next page token: {token}")
|
|
3353
3465
|
|
|
@@ -4285,7 +4397,7 @@ class KaggleApi:
|
|
|
4285
4397
|
t.extractall(effective_path)
|
|
4286
4398
|
except Exception as e:
|
|
4287
4399
|
raise ValueError(
|
|
4288
|
-
"Error extracting the tar.gz file, please report on " "www.github.com/kaggle/kaggle-
|
|
4400
|
+
"Error extracting the tar.gz file, please report on " "www.github.com/kaggle/kaggle-cli", e
|
|
4289
4401
|
)
|
|
4290
4402
|
|
|
4291
4403
|
try:
|
|
@@ -4809,11 +4921,12 @@ class KaggleApi:
|
|
|
4809
4921
|
column: A list of values in a column to be processed.
|
|
4810
4922
|
|
|
4811
4923
|
Returns:
|
|
4812
|
-
|
|
4924
|
+
An ApiDatasetColumn object.
|
|
4813
4925
|
"""
|
|
4814
|
-
processed_column =
|
|
4815
|
-
|
|
4816
|
-
)
|
|
4926
|
+
processed_column = ApiDatasetColumn()
|
|
4927
|
+
processed_column.name = self.get_or_fail(column, "name")
|
|
4928
|
+
processed_column.description = self.get_or_default(column, "description", "")
|
|
4929
|
+
|
|
4817
4930
|
if "type" in column:
|
|
4818
4931
|
original_type = column["type"].lower()
|
|
4819
4932
|
processed_column.original_type = original_type
|
|
@@ -5200,6 +5313,24 @@ class KaggleApi:
|
|
|
5200
5313
|
sys.stdout.write("Please respond with 'yes' or 'no'.\n")
|
|
5201
5314
|
return False
|
|
5202
5315
|
|
|
5316
|
+
def _check_response_version(self, response: Response):
|
|
5317
|
+
if self.already_printed_version_warning:
|
|
5318
|
+
return
|
|
5319
|
+
latest_version_str = response.headers.get("X-Kaggle-APIVersion")
|
|
5320
|
+
if latest_version_str:
|
|
5321
|
+
current_version = parse(kaggle.__version__)
|
|
5322
|
+
latest_version = parse(latest_version_str)
|
|
5323
|
+
if latest_version > current_version:
|
|
5324
|
+
print(
|
|
5325
|
+
f"Warning: Looks like you're using an outdated `kaggle`` "
|
|
5326
|
+
"version (installed: {current_version}), please consider "
|
|
5327
|
+
"upgrading to the latest version ({latest_version_str})"
|
|
5328
|
+
)
|
|
5329
|
+
self.already_printed_version_warning = True
|
|
5330
|
+
|
|
5331
|
+
def get_response_processor(self):
|
|
5332
|
+
return self._check_response_version
|
|
5333
|
+
|
|
5203
5334
|
|
|
5204
5335
|
class TqdmBufferedReader(io.BufferedReader):
|
|
5205
5336
|
|