kaggle 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. kaggle/__init__.py +1 -1
  2. kaggle/api/kaggle_api_extended.py +206 -75
  3. kaggle/cli.py +36 -28
  4. kaggle/models/upload_file.py +4 -4
  5. {kaggle-1.8.2.dist-info → kaggle-1.8.4.dist-info}/METADATA +66 -75
  6. kaggle-1.8.4.dist-info/RECORD +15 -0
  7. {kaggle-1.8.2.dist-info → kaggle-1.8.4.dist-info}/WHEEL +1 -1
  8. kaggle/models/api_blob_type.py +0 -4
  9. kaggle/models/dataset_column.py +0 -228
  10. kaggle/models/dataset_new_request.py +0 -443
  11. kaggle/models/dataset_new_version_request.py +0 -319
  12. kaggle/models/dataset_update_settings_request.py +0 -344
  13. kaggle/models/kernel_push_request.py +0 -608
  14. kaggle/models/model_instance_new_version_request.py +0 -145
  15. kaggle/models/model_instance_update_request.py +0 -451
  16. kaggle/models/model_new_instance_request.py +0 -552
  17. kaggle/models/model_new_request.py +0 -329
  18. kaggle/models/model_update_request.py +0 -300
  19. kaggle/models/start_blob_upload_request.py +0 -240
  20. kaggle/models/start_blob_upload_response.py +0 -142
  21. kaggle-1.8.2.dist-info/RECORD +0 -148
  22. kagglesdk/LICENSE +0 -201
  23. kagglesdk/__init__.py +0 -6
  24. kagglesdk/admin/__init__.py +0 -0
  25. kagglesdk/admin/services/__init__.py +0 -0
  26. kagglesdk/admin/services/inbox_file_service.py +0 -22
  27. kagglesdk/admin/types/__init__.py +0 -0
  28. kagglesdk/admin/types/inbox_file_service.py +0 -74
  29. kagglesdk/benchmarks/__init__.py +0 -0
  30. kagglesdk/benchmarks/services/__init__.py +0 -0
  31. kagglesdk/benchmarks/services/benchmarks_api_service.py +0 -19
  32. kagglesdk/benchmarks/types/__init__.py +0 -0
  33. kagglesdk/benchmarks/types/benchmark_types.py +0 -307
  34. kagglesdk/benchmarks/types/benchmarks_api_service.py +0 -243
  35. kagglesdk/blobs/__init__.py +0 -0
  36. kagglesdk/blobs/services/__init__.py +0 -0
  37. kagglesdk/blobs/services/blob_api_service.py +0 -25
  38. kagglesdk/blobs/types/__init__.py +0 -0
  39. kagglesdk/blobs/types/blob_api_service.py +0 -177
  40. kagglesdk/common/__init__.py +0 -0
  41. kagglesdk/common/services/__init__.py +0 -0
  42. kagglesdk/common/services/operations_service.py +0 -46
  43. kagglesdk/common/types/__init__.py +0 -0
  44. kagglesdk/common/types/file_download.py +0 -102
  45. kagglesdk/common/types/http_redirect.py +0 -105
  46. kagglesdk/common/types/operations.py +0 -194
  47. kagglesdk/common/types/operations_service.py +0 -48
  48. kagglesdk/community/__init__.py +0 -0
  49. kagglesdk/community/types/__init__.py +0 -0
  50. kagglesdk/community/types/content_enums.py +0 -44
  51. kagglesdk/community/types/organization.py +0 -410
  52. kagglesdk/competitions/__init__.py +0 -0
  53. kagglesdk/competitions/services/__init__.py +0 -0
  54. kagglesdk/competitions/services/competition_api_service.py +0 -178
  55. kagglesdk/competitions/types/__init__.py +0 -0
  56. kagglesdk/competitions/types/competition.py +0 -14
  57. kagglesdk/competitions/types/competition_api_service.py +0 -2393
  58. kagglesdk/competitions/types/competition_enums.py +0 -53
  59. kagglesdk/competitions/types/search_competitions.py +0 -28
  60. kagglesdk/competitions/types/submission_status.py +0 -9
  61. kagglesdk/datasets/__init__.py +0 -0
  62. kagglesdk/datasets/databundles/__init__.py +0 -0
  63. kagglesdk/datasets/databundles/types/__init__.py +0 -0
  64. kagglesdk/datasets/databundles/types/databundle_api_types.py +0 -540
  65. kagglesdk/datasets/services/__init__.py +0 -0
  66. kagglesdk/datasets/services/dataset_api_service.py +0 -195
  67. kagglesdk/datasets/types/__init__.py +0 -0
  68. kagglesdk/datasets/types/dataset_api_service.py +0 -3047
  69. kagglesdk/datasets/types/dataset_enums.py +0 -103
  70. kagglesdk/datasets/types/dataset_service.py +0 -145
  71. kagglesdk/datasets/types/dataset_types.py +0 -646
  72. kagglesdk/datasets/types/search_datasets.py +0 -6
  73. kagglesdk/discussions/__init__.py +0 -0
  74. kagglesdk/discussions/types/__init__.py +0 -0
  75. kagglesdk/discussions/types/search_discussions.py +0 -43
  76. kagglesdk/discussions/types/writeup_enums.py +0 -11
  77. kagglesdk/education/__init__.py +0 -0
  78. kagglesdk/education/services/__init__.py +0 -0
  79. kagglesdk/education/services/education_api_service.py +0 -19
  80. kagglesdk/education/types/__init__.py +0 -0
  81. kagglesdk/education/types/education_api_service.py +0 -248
  82. kagglesdk/education/types/education_service.py +0 -139
  83. kagglesdk/kaggle_client.py +0 -101
  84. kagglesdk/kaggle_creds.py +0 -148
  85. kagglesdk/kaggle_env.py +0 -104
  86. kagglesdk/kaggle_http_client.py +0 -269
  87. kagglesdk/kaggle_oauth.py +0 -200
  88. kagglesdk/kaggle_object.py +0 -344
  89. kagglesdk/kernels/__init__.py +0 -0
  90. kagglesdk/kernels/services/__init__.py +0 -0
  91. kagglesdk/kernels/services/kernels_api_service.py +0 -146
  92. kagglesdk/kernels/types/__init__.py +0 -0
  93. kagglesdk/kernels/types/kernels_api_service.py +0 -2451
  94. kagglesdk/kernels/types/kernels_enums.py +0 -39
  95. kagglesdk/kernels/types/search_kernels.py +0 -6
  96. kagglesdk/licenses/__init__.py +0 -0
  97. kagglesdk/licenses/types/__init__.py +0 -0
  98. kagglesdk/licenses/types/licenses_types.py +0 -182
  99. kagglesdk/models/__init__.py +0 -0
  100. kagglesdk/models/services/__init__.py +0 -0
  101. kagglesdk/models/services/model_api_service.py +0 -280
  102. kagglesdk/models/services/model_service.py +0 -19
  103. kagglesdk/models/types/__init__.py +0 -0
  104. kagglesdk/models/types/model_api_service.py +0 -4069
  105. kagglesdk/models/types/model_enums.py +0 -68
  106. kagglesdk/models/types/model_service.py +0 -275
  107. kagglesdk/models/types/model_types.py +0 -1338
  108. kagglesdk/models/types/search_models.py +0 -8
  109. kagglesdk/search/__init__.py +0 -0
  110. kagglesdk/search/services/__init__.py +0 -0
  111. kagglesdk/search/services/search_api_service.py +0 -19
  112. kagglesdk/search/types/__init__.py +0 -0
  113. kagglesdk/search/types/search_api_service.py +0 -2435
  114. kagglesdk/search/types/search_content_shared.py +0 -50
  115. kagglesdk/search/types/search_enums.py +0 -45
  116. kagglesdk/search/types/search_service.py +0 -303
  117. kagglesdk/security/__init__.py +0 -0
  118. kagglesdk/security/services/__init__.py +0 -0
  119. kagglesdk/security/services/iam_service.py +0 -31
  120. kagglesdk/security/services/oauth_service.py +0 -58
  121. kagglesdk/security/types/__init__.py +0 -0
  122. kagglesdk/security/types/authentication.py +0 -171
  123. kagglesdk/security/types/iam_service.py +0 -496
  124. kagglesdk/security/types/oauth_service.py +0 -1181
  125. kagglesdk/security/types/roles.py +0 -8
  126. kagglesdk/security/types/security_types.py +0 -159
  127. kagglesdk/test/__init__.py +0 -0
  128. kagglesdk/test/test_client.py +0 -41
  129. kagglesdk/users/__init__.py +0 -0
  130. kagglesdk/users/services/__init__.py +0 -0
  131. kagglesdk/users/services/account_service.py +0 -31
  132. kagglesdk/users/services/group_api_service.py +0 -31
  133. kagglesdk/users/types/__init__.py +0 -0
  134. kagglesdk/users/types/account_service.py +0 -345
  135. kagglesdk/users/types/group_api_service.py +0 -315
  136. kagglesdk/users/types/group_types.py +0 -165
  137. kagglesdk/users/types/groups_enum.py +0 -8
  138. kagglesdk/users/types/progression_service.py +0 -9
  139. kagglesdk/users/types/search_users.py +0 -23
  140. kagglesdk/users/types/user_avatar.py +0 -226
  141. kagglesdk/users/types/users_enums.py +0 -22
  142. {kaggle-1.8.2.dist-info → kaggle-1.8.4.dist-info}/entry_points.txt +0 -0
  143. {kaggle-1.8.2.dist-info → kaggle-1.8.4.dist-info}/licenses/LICENSE.txt +0 -0
kaggle/__init__.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import absolute_import
3
3
  import os
4
4
  from kaggle.api.kaggle_api_extended import KaggleApi
5
5
 
6
- __version__ = "1.8.2"
6
+ __version__ = "1.8.4"
7
7
 
8
8
  enable_oauth = os.environ.get("KAGGLE_ENABLE_OAUTH") in ("1", "true", "yes")
9
9
  api = KaggleApi(enable_oauth=enable_oauth)
@@ -18,7 +18,7 @@
18
18
  from __future__ import print_function
19
19
 
20
20
  import csv
21
- from datetime import datetime, timedelta
21
+ from datetime import datetime
22
22
  from enum import Enum
23
23
  import io
24
24
 
@@ -49,6 +49,7 @@ from slugify import slugify
49
49
  from tqdm import tqdm
50
50
  from urllib3.util.retry import Retry
51
51
  from google.protobuf import field_mask_pb2
52
+ from packaging.version import parse
52
53
 
53
54
  import kaggle
54
55
  from kagglesdk import get_access_token_from_env, KaggleClient, KaggleCredentials, KaggleEnv, KaggleOAuth # type: ignore[attr-defined]
@@ -56,7 +57,6 @@ from kagglesdk.admin.types.inbox_file_service import CreateInboxFileRequest
56
57
  from kagglesdk.blobs.types.blob_api_service import ApiStartBlobUploadRequest, ApiStartBlobUploadResponse, ApiBlobType
57
58
  from kagglesdk.competitions.types.competition_api_service import (
58
59
  ApiListCompetitionsRequest,
59
- ApiCompetition,
60
60
  ApiCreateCodeSubmissionRequest,
61
61
  ApiCreateSubmissionResponse,
62
62
  ApiStartSubmissionUploadRequest,
@@ -149,7 +149,6 @@ from kagglesdk.models.types.model_api_service import (
149
149
  from kagglesdk.models.types.model_enums import ListModelsOrderBy, ModelInstanceType, ModelFramework
150
150
  from kagglesdk.models.types.model_types import Owner
151
151
  from kagglesdk.security.types.oauth_service import IntrospectTokenRequest
152
- from ..models.dataset_column import DatasetColumn
153
152
  from ..models.upload_file import UploadFile
154
153
  import kagglesdk.kaggle_client
155
154
  from enum import EnumMeta
@@ -673,14 +672,12 @@ class KaggleApi:
673
672
  if self._authenticate_with_legacy_apikey():
674
673
  return
675
674
  if self.enable_oauth:
676
- print("You must log in to Kaggle to use the Kaggle API.")
675
+ print("You must authenticate before you can call the Kaggle API.")
677
676
  print('Please run "kaggle auth login" to log in.')
678
677
  else:
678
+ print("You must authenticate before you can call the Kaggle API.")
679
679
  print(
680
- "Could not find {}. Make sure it's located in"
681
- " {}. Or use the environment method. See setup"
682
- " instructions at"
683
- " https://github.com/Kaggle/kaggle-api/".format(self.config_file, self.config_dir)
680
+ "Follow the instructions to authenticate at: https://github.com/Kaggle/kaggle-cli/blob/main/docs/README.md#authentication"
684
681
  )
685
682
  exit(1)
686
683
 
@@ -739,7 +736,6 @@ class KaggleApi:
739
736
  self.CONFIG_NAME_AUTH_METHOD: AuthMethod.ACCESS_TOKEN,
740
737
  }
741
738
  self.logger.debug(f"Authenticated with access token in: {source}")
742
- del os.environ["KAGGLE_API_TOKEN"]
743
739
  return True
744
740
 
745
741
  def _authenticate_with_oauth_creds(self) -> bool:
@@ -1080,11 +1076,12 @@ class KaggleApi:
1080
1076
  username=self.config_values.get(self.CONFIG_NAME_USER),
1081
1077
  password=self.config_values.get(self.CONFIG_NAME_KEY),
1082
1078
  api_token=self.config_values.get(self.CONFIG_NAME_TOKEN),
1079
+ response_processor=self.get_response_processor(),
1083
1080
  )
1084
1081
 
1085
1082
  @staticmethod
1086
1083
  def build_kaggle_client_with_params(
1087
- args: List[str], username: str = None, password: str = None, api_token: str = None
1084
+ args: List[str], username: str = None, password: str = None, api_token: str = None, response_processor=None
1088
1085
  ) -> kagglesdk.kaggle_client.KaggleClient:
1089
1086
  """Builds a Kaggle client with the given parameters.
1090
1087
 
@@ -1093,6 +1090,7 @@ class KaggleApi:
1093
1090
  username (str): The username to use for authentication.
1094
1091
  password (str): The password to use for authentication.
1095
1092
  api_token (str): The API token to use for authentication.
1093
+ response_processor: Callback used to process HTTP response.
1096
1094
 
1097
1095
  Returns:
1098
1096
  kagglesdk.kaggle_client.KaggleClient: A Kaggle client.
@@ -1109,6 +1107,7 @@ class KaggleApi:
1109
1107
  username=username,
1110
1108
  password=password,
1111
1109
  api_token=api_token,
1110
+ response_processor=response_processor,
1112
1111
  )
1113
1112
 
1114
1113
  def camel_to_snake(self, name: str) -> str:
@@ -1346,6 +1345,12 @@ class KaggleApi:
1346
1345
  return resp
1347
1346
 
1348
1347
  submit_request = ApiCreateSubmissionRequest()
1348
+
1349
+ # Admin-only feature to submit for a given model (b/475908216)
1350
+ model_version_id = os.getenv("KAGGLE_COMPETITION_SUBMISSION_MODEL_VERSION_ID", None)
1351
+ if model_version_id:
1352
+ submit_request.benchmark_model_version_id = int(model_version_id)
1353
+
1349
1354
  submit_request.competition_name = competition
1350
1355
  submit_request.blob_file_tokens = response.token
1351
1356
  if message:
@@ -1380,7 +1385,7 @@ class KaggleApi:
1380
1385
  str:
1381
1386
  """
1382
1387
  if kernel and not version or version and not kernel:
1383
- raise ValueError("Code competition submissions require both the output file name and the version label")
1388
+ raise ValueError("Code competition submissions require both the output file name and the version number")
1384
1389
  competition = competition or competition_opt
1385
1390
  try:
1386
1391
  if kernel:
@@ -1408,6 +1413,7 @@ class KaggleApi:
1408
1413
  competition: str,
1409
1414
  group: SubmissionGroup = SubmissionGroup.SUBMISSION_GROUP_ALL,
1410
1415
  sort: SubmissionSortBy = SubmissionSortBy.SUBMISSION_SORT_BY_DATE,
1416
+ page_number: int = -1,
1411
1417
  page_token: str = "",
1412
1418
  page_size: int = 20,
1413
1419
  ) -> list[ApiSubmission | None] | None:
@@ -1417,6 +1423,7 @@ class KaggleApi:
1417
1423
  competition (str): The name of the competition.
1418
1424
  group (SubmissionGroup): The submission group.
1419
1425
  sort (SubmissionSortBy): The sort-by option.
1426
+ page_number (int): The page number to show.
1420
1427
  page_token (str): The pageToken for pagination.
1421
1428
  page_size (int): The number of items per page.
1422
1429
 
@@ -1426,6 +1433,7 @@ class KaggleApi:
1426
1433
  with self.build_kaggle_client() as kaggle:
1427
1434
  request = ApiListSubmissionsRequest()
1428
1435
  request.competition_name = competition
1436
+ request.page = page_number
1429
1437
  request.page_token = page_token
1430
1438
  request.page_size = page_size
1431
1439
  request.group = group
@@ -2152,17 +2160,17 @@ class KaggleApi:
2152
2160
  except zipfile.BadZipFile as e:
2153
2161
  raise ValueError(
2154
2162
  f"The file {outfile} is corrupted or not a valid zip file. "
2155
- "Please report this issue at https://www.github.com/kaggle/kaggle-api"
2163
+ "Please report this issue at https://www.github.com/kaggle/kaggle-cli/issues"
2156
2164
  )
2157
2165
  except FileNotFoundError:
2158
2166
  raise FileNotFoundError(
2159
2167
  f"The file {outfile} was not found. "
2160
- "Please report this issue at https://www.github.com/kaggle/kaggle-api"
2168
+ "Please report this issue at https://www.github.com/kaggle/kaggle-cli"
2161
2169
  )
2162
2170
  except Exception as e:
2163
2171
  raise RuntimeError(
2164
2172
  f"An unexpected error occurred: {e}. "
2165
- "Please report this issue at https://www.github.com/kaggle/kaggle-api"
2173
+ "Please report this issue at https://www.github.com/kaggle/kaggle-cli"
2166
2174
  )
2167
2175
 
2168
2176
  try:
@@ -2627,8 +2635,10 @@ class KaggleApi:
2627
2635
  else:
2628
2636
  print("Dataset creation error: " + result.error)
2629
2637
 
2630
- def download_file(self, response, outfile, http_client, quiet=True, resume=False, chunk_size=1048576):
2631
- """Downloads a file to an output file, streaming in chunks.
2638
+ def download_file(
2639
+ self, response, outfile, http_client, quiet=True, resume=False, chunk_size=1048576, max_retries=5, timeout=300
2640
+ ):
2641
+ """Downloads a file to an output file, streaming in chunks with automatic retry on failure.
2632
2642
 
2633
2643
  Args:
2634
2644
  response: The response object to download.
@@ -2637,14 +2647,16 @@ class KaggleApi:
2637
2647
  quiet: Suppress verbose output (default is True).
2638
2648
  chunk_size: The size of the chunk to stream.
2639
2649
  resume: Whether to resume an existing download.
2650
+ max_retries: Maximum number of retry attempts on network errors (default is 5).
2651
+ timeout: Timeout in seconds for each chunk read operation (default is 300).
2640
2652
  """
2641
2653
 
2642
2654
  outpath = os.path.dirname(outfile)
2643
2655
  if not os.path.exists(outpath):
2644
2656
  os.makedirs(outpath)
2657
+
2658
+ # Get file metadata
2645
2659
  size = int(response.headers["Content-Length"])
2646
- size_read = 0
2647
- open_mode = "wb"
2648
2660
  last_modified = response.headers.get("Last-Modified")
2649
2661
  if last_modified is None:
2650
2662
  remote_date = datetime.now()
@@ -2652,57 +2664,135 @@ class KaggleApi:
2652
2664
  remote_date = datetime.strptime(response.headers["Last-Modified"], "%a, %d %b %Y %H:%M:%S %Z")
2653
2665
  remote_date_timestamp = time.mktime(remote_date.timetuple())
2654
2666
 
2655
- if not quiet:
2656
- print("Downloading " + os.path.basename(outfile) + " to " + outpath)
2657
-
2658
- file_exists = os.path.isfile(outfile)
2667
+ # Check if file is resumable
2659
2668
  resumable = "Accept-Ranges" in response.headers and response.headers["Accept-Ranges"] == "bytes"
2660
2669
 
2661
- if resume and resumable and file_exists:
2662
- size_read = os.path.getsize(outfile)
2663
- open_mode = "ab"
2670
+ # Retry loop for handling network errors
2671
+ retry_count = 0
2672
+ download_url = response.url
2673
+ original_method = response.request.method if hasattr(response, "request") else "GET"
2664
2674
 
2665
- if not quiet:
2666
- print(
2667
- "... resuming from %d bytes (%d bytes left) ..."
2668
- % (
2669
- size_read,
2670
- size - size_read,
2671
- )
2672
- )
2675
+ # Preserve original request headers for authentication
2676
+ original_headers = {}
2677
+ if hasattr(response, "request") and hasattr(response.request, "headers"):
2678
+ original_headers = dict(response.request.headers)
2673
2679
 
2674
- request_history = response.history[0]
2675
- response = requests.request(
2676
- request_history.request.method,
2677
- response.url,
2678
- headers={"Range": "bytes=%d-" % (size_read,)},
2679
- stream=True,
2680
- )
2680
+ while retry_count <= max_retries:
2681
+ try:
2682
+ # Check file existence inside loop (may be created during retry)
2683
+ file_exists = os.path.isfile(outfile)
2681
2684
 
2682
- with tqdm(total=size, initial=size_read, unit="B", unit_scale=True, unit_divisor=1024, disable=quiet) as pbar:
2683
- with open(outfile, open_mode) as out:
2684
- # TODO: Delete this test after all API methods are converted.
2685
- if type(response).__name__ == "HTTPResponse":
2686
- while True:
2687
- data = response.read(chunk_size)
2688
- if not data:
2689
- break
2690
- out.write(data)
2691
- os.utime(outfile, times=(remote_date_timestamp - 1, remote_date_timestamp - 1))
2692
- size_read = min(size, size_read + chunk_size)
2693
- pbar.update(len(data))
2685
+ # Determine starting position
2686
+ if retry_count > 0 or (resume and resumable and file_exists):
2687
+ size_read = os.path.getsize(outfile) if file_exists else 0
2688
+ open_mode = "ab"
2689
+
2690
+ if size_read >= size:
2691
+ if not quiet:
2692
+ print("File already downloaded completely.")
2693
+ return
2694
+
2695
+ if not quiet:
2696
+ if retry_count > 0:
2697
+ print(
2698
+ f"Retry {retry_count}/{max_retries}: Resuming from {size_read} bytes ({size - size_read} bytes left)..."
2699
+ )
2700
+ else:
2701
+ print(f"Resuming from {size_read} bytes ({size - size_read} bytes left)...")
2702
+
2703
+ # Request with Range header for resume, preserving authentication
2704
+ retry_headers = original_headers.copy()
2705
+ retry_headers["Range"] = f"bytes={size_read}-"
2706
+ response = requests.request(
2707
+ original_method,
2708
+ download_url,
2709
+ headers=retry_headers,
2710
+ stream=True,
2711
+ timeout=timeout,
2712
+ )
2694
2713
  else:
2695
- for data in response.iter_content(chunk_size):
2696
- if not data:
2697
- break
2698
- out.write(data)
2699
- os.utime(outfile, times=(remote_date_timestamp - 1, remote_date_timestamp - 1))
2700
- size_read = min(size, size_read + chunk_size)
2701
- pbar.update(len(data))
2702
- if not quiet:
2703
- print("\n", end="")
2714
+ size_read = 0
2715
+ open_mode = "wb"
2716
+
2717
+ if not quiet:
2718
+ print("Downloading " + os.path.basename(outfile) + " to " + outpath)
2719
+
2720
+ # Download with progress bar
2721
+ with tqdm(
2722
+ total=size, initial=size_read, unit="B", unit_scale=True, unit_divisor=1024, disable=quiet
2723
+ ) as pbar:
2724
+ with open(outfile, open_mode) as out:
2725
+ # TODO: Delete this test after all API methods are converted.
2726
+ if type(response).__name__ == "HTTPResponse":
2727
+ while True:
2728
+ data = response.read(chunk_size)
2729
+ if not data:
2730
+ break
2731
+ out.write(data)
2732
+ out.flush() # Ensure data is written to disk
2733
+ os.utime(outfile, times=(remote_date_timestamp - 1, remote_date_timestamp - 1))
2734
+ size_read += len(data)
2735
+ pbar.update(len(data))
2736
+ else:
2737
+ for data in response.iter_content(chunk_size):
2738
+ if not data:
2739
+ break
2740
+ out.write(data)
2741
+ out.flush() # Ensure data is written to disk
2742
+ os.utime(outfile, times=(remote_date_timestamp - 1, remote_date_timestamp - 1))
2743
+ size_read += len(data)
2744
+ pbar.update(len(data))
2745
+
2746
+ # Download completed successfully
2747
+ if not quiet:
2748
+ print("\n", end="")
2749
+
2750
+ os.utime(outfile, times=(remote_date_timestamp, remote_date_timestamp))
2751
+
2752
+ # Verify file size
2753
+ final_size = os.path.getsize(outfile)
2754
+ if final_size != size:
2755
+ error_msg = f"Downloaded file size ({final_size}) does not match expected size ({size})"
2756
+ if not quiet:
2757
+ print(f"\n{error_msg}")
2758
+ raise ValueError(error_msg)
2704
2759
 
2705
- os.utime(outfile, times=(remote_date_timestamp, remote_date_timestamp))
2760
+ # Success - exit retry loop
2761
+ break
2762
+
2763
+ except (
2764
+ requests.exceptions.ConnectionError,
2765
+ requests.exceptions.Timeout,
2766
+ requests.exceptions.ChunkedEncodingError,
2767
+ urllib3_exceptions.ProtocolError,
2768
+ urllib3_exceptions.ReadTimeoutError,
2769
+ OSError,
2770
+ ) as e:
2771
+
2772
+ retry_count += 1
2773
+
2774
+ if retry_count > max_retries:
2775
+ if not quiet:
2776
+ print(f"\nDownload failed after {max_retries} retries.")
2777
+ print(f"Error: {type(e).__name__}: {str(e)}")
2778
+ print(f"Partial file saved at: {outfile}")
2779
+ print(f"You can resume by running the same command again.")
2780
+ raise
2781
+
2782
+ # Calculate backoff time (exponential with jitter)
2783
+ backoff_time = min(2**retry_count + random(), 60) # Cap at 60 seconds
2784
+
2785
+ if not quiet:
2786
+ print(f"\nConnection error: {type(e).__name__}: {str(e)}")
2787
+ print(f"Retrying in {backoff_time:.1f} seconds... (attempt {retry_count}/{max_retries})")
2788
+
2789
+ time.sleep(backoff_time)
2790
+
2791
+ # Ensure file exists for resume
2792
+ if not os.path.isfile(outfile):
2793
+ open(outfile, "a").close()
2794
+
2795
+ continue
2706
2796
 
2707
2797
  def kernels_list(
2708
2798
  self,
@@ -2950,7 +3040,9 @@ class KaggleApi:
2950
3040
  meta_file = self.kernels_initialize(folder)
2951
3041
  print("Kernel metadata template written to: " + meta_file)
2952
3042
 
2953
- def kernels_push(self, folder: str, timeout: Optional[str] = None) -> ApiSaveKernelResponse:
3043
+ def kernels_push(
3044
+ self, folder: str, timeout: Optional[str] = None, acc: Optional[str] = None
3045
+ ) -> ApiSaveKernelResponse:
2954
3046
  """Pushes a kernel to Kaggle.
2955
3047
 
2956
3048
  This method reads the metadata file and kernel files from a notebook,
@@ -2959,6 +3051,8 @@ class KaggleApi:
2959
3051
  Args:
2960
3052
  folder (str): The path to the folder.
2961
3053
  timeout (Optional[str]): The maximum run time in seconds.
3054
+ acc (Optional[str]): The type of accelerator to use for the kernel run. If set, this value overrides boolean
3055
+ settings for GPU/TPU found in the metadata file.
2962
3056
 
2963
3057
  Returns:
2964
3058
  ApiSaveKernelResponse: An ApiSaveKernelResponse object.
@@ -3033,7 +3127,7 @@ class KaggleApi:
3033
3127
 
3034
3128
  model_sources = cast(List[str], self.get_or_default(meta_data, "model_sources", []))
3035
3129
  for source in model_sources:
3036
- self.validate_model_string(source)
3130
+ self.validate_model_instance_version_string(source)
3037
3131
 
3038
3132
  docker_pinning_type = self.get_or_default(meta_data, "docker_image_pinning_type", None)
3039
3133
  if docker_pinning_type is not None and docker_pinning_type not in self.valid_push_pinning_types:
@@ -3077,19 +3171,22 @@ class KaggleApi:
3077
3171
  request.docker_image = self.get_or_default(meta_data, "docker_image", None)
3078
3172
  if timeout:
3079
3173
  request.session_timeout_seconds = int(timeout)
3174
+ # The allowed names are in an enum that is not currently included in kagglesdk.
3175
+ request.machine_shape = acc if acc else self.get_or_default(meta_data, "machine_shape", None)
3080
3176
  # Without the type hint, mypy thinks save_kernel() has type Any when checking warn_return_any.
3081
3177
  response: ApiSaveKernelResponse = kaggle.kernels.kernels_api_client.save_kernel(request)
3082
3178
  return response
3083
3179
 
3084
- def kernels_push_cli(self, folder, timeout):
3180
+ def kernels_push_cli(self, folder, timeout, acc):
3085
3181
  """A client wrapper for kernels_push.
3086
3182
 
3087
3183
  Args:
3088
3184
  folder: The path to the folder.
3089
3185
  timeout: The maximum run time in seconds.
3186
+ acc: The accelerator to use.
3090
3187
  """
3091
3188
  folder = folder or os.getcwd()
3092
- result = self.kernels_push(folder, timeout)
3189
+ result = self.kernels_push(folder, timeout, acc)
3093
3190
 
3094
3191
  if result is None:
3095
3192
  print("Kernel push error: see previous output")
@@ -3220,7 +3317,7 @@ class KaggleApi:
3220
3317
  if file_name is None:
3221
3318
  print(
3222
3319
  "Unknown language %s + kernel type %s - please report this "
3223
- "on the kaggle-api github issues" % (language, kernel_type)
3320
+ "on the kaggle-cli github issues" % (language, kernel_type)
3224
3321
  )
3225
3322
  print("Saving as a python file, even though this may not be the " "correct language")
3226
3323
  file_name = "script.py"
@@ -3268,12 +3365,15 @@ class KaggleApi:
3268
3365
  else:
3269
3366
  print("Source code downloaded to " + effective_path)
3270
3367
 
3271
- def kernels_output(self, kernel: str, path: str, force: bool = False, quiet: bool = True) -> Tuple[List[str], str]:
3368
+ def kernels_output(
3369
+ self, kernel: str, path: str, file_pattern: str = None, force: bool = False, quiet: bool = True
3370
+ ) -> Tuple[List[str], str]:
3272
3371
  """Retrieves the output for a specified kernel.
3273
3372
 
3274
3373
  Args:
3275
3374
  kernel (str): The kernel for which to retrieve the output.
3276
3375
  path (str): The path to which to pull the files.
3376
+ file_pattern (str): Optional regex pattern to match against filenames. Only files matching the pattern will be downloaded.
3277
3377
  force (bool): If True, force an overwrite if the output already exists (default is False).
3278
3378
  quiet (bool): Suppress verbose output (default is True).
3279
3379
 
@@ -3302,6 +3402,14 @@ class KaggleApi:
3302
3402
  if not os.path.isdir(target_dir):
3303
3403
  raise ValueError("You must specify a directory for the kernels output")
3304
3404
 
3405
+ if file_pattern is not None:
3406
+ try:
3407
+ compiled_pattern = re.compile(file_pattern)
3408
+ except re.error as e:
3409
+ raise ValueError(f"Invalid regex pattern '{file_pattern}': {e}")
3410
+ else:
3411
+ compiled_pattern = None
3412
+
3305
3413
  token = None
3306
3414
  with self.build_kaggle_client() as kaggle:
3307
3415
  request = ApiListKernelSessionOutputRequest()
@@ -3312,6 +3420,9 @@ class KaggleApi:
3312
3420
 
3313
3421
  outfiles = []
3314
3422
  for item in response.files:
3423
+ if compiled_pattern and not compiled_pattern.search(item.file_name):
3424
+ continue
3425
+
3315
3426
  outfile = os.path.join(target_dir, item.file_name)
3316
3427
  outfiles.append(outfile)
3317
3428
  download_response = requests.get(item.url, stream=True)
@@ -3333,7 +3444,7 @@ class KaggleApi:
3333
3444
 
3334
3445
  return outfiles, token # Breaking change, we need to get the token to the UI
3335
3446
 
3336
- def kernels_output_cli(self, kernel, kernel_opt=None, path=None, force=False, quiet=False):
3447
+ def kernels_output_cli(self, kernel, kernel_opt=None, path=None, force=False, quiet=False, file_pattern=None):
3337
3448
  """A client wrapper for kernels_output.
3338
3449
 
3339
3450
  This method is a client wrapper for the kernels_output function.
@@ -3345,9 +3456,10 @@ class KaggleApi:
3345
3456
  path: The path to which to pull the files.
3346
3457
  force: If True, force an overwrite if the output already exists (default is False).
3347
3458
  quiet: Suppress verbose output (default is False).
3459
+ file_pattern: Regex pattern to match against filenames. Only files matching the pattern will be downloaded.
3348
3460
  """
3349
3461
  kernel = kernel or kernel_opt
3350
- (_, token) = self.kernels_output(kernel, path, force, quiet)
3462
+ (_, token) = self.kernels_output(kernel, path, file_pattern, force, quiet)
3351
3463
  if token:
3352
3464
  print(f"Next page token: {token}")
3353
3465
 
@@ -4285,7 +4397,7 @@ class KaggleApi:
4285
4397
  t.extractall(effective_path)
4286
4398
  except Exception as e:
4287
4399
  raise ValueError(
4288
- "Error extracting the tar.gz file, please report on " "www.github.com/kaggle/kaggle-api", e
4400
+ "Error extracting the tar.gz file, please report on " "www.github.com/kaggle/kaggle-cli", e
4289
4401
  )
4290
4402
 
4291
4403
  try:
@@ -4809,11 +4921,12 @@ class KaggleApi:
4809
4921
  column: A list of values in a column to be processed.
4810
4922
 
4811
4923
  Returns:
4812
- A DatasetColumn object.
4924
+ An ApiDatasetColumn object.
4813
4925
  """
4814
- processed_column = DatasetColumn(
4815
- name=self.get_or_fail(column, "name"), description=self.get_or_default(column, "description", "")
4816
- )
4926
+ processed_column = ApiDatasetColumn()
4927
+ processed_column.name = self.get_or_fail(column, "name")
4928
+ processed_column.description = self.get_or_default(column, "description", "")
4929
+
4817
4930
  if "type" in column:
4818
4931
  original_type = column["type"].lower()
4819
4932
  processed_column.original_type = original_type
@@ -5200,6 +5313,24 @@ class KaggleApi:
5200
5313
  sys.stdout.write("Please respond with 'yes' or 'no'.\n")
5201
5314
  return False
5202
5315
 
5316
+ def _check_response_version(self, response: Response):
5317
+ if self.already_printed_version_warning:
5318
+ return
5319
+ latest_version_str = response.headers.get("X-Kaggle-APIVersion")
5320
+ if latest_version_str:
5321
+ current_version = parse(kaggle.__version__)
5322
+ latest_version = parse(latest_version_str)
5323
+ if latest_version > current_version:
5324
+ print(
5325
+ f"Warning: Looks like you're using an outdated `kaggle`` "
5326
+ "version (installed: {current_version}), please consider "
5327
+ "upgrading to the latest version ({latest_version_str})"
5328
+ )
5329
+ self.already_printed_version_warning = True
5330
+
5331
+ def get_response_processor(self):
5332
+ return self._check_response_version
5333
+
5203
5334
 
5204
5335
  class TqdmBufferedReader(io.BufferedReader):
5205
5336