kaggle 1.8.3__py3-none-any.whl → 1.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kaggle/__init__.py +1 -1
- kaggle/api/kaggle_api_extended.py +183 -74
- kaggle/cli.py +36 -28
- kaggle/models/upload_file.py +4 -4
- {kaggle-1.8.3.dist-info → kaggle-1.8.4.dist-info}/METADATA +65 -74
- kaggle-1.8.4.dist-info/RECORD +15 -0
- kaggle/models/api_blob_type.py +0 -4
- kaggle/models/dataset_column.py +0 -228
- kaggle/models/dataset_new_request.py +0 -443
- kaggle/models/dataset_new_version_request.py +0 -319
- kaggle/models/dataset_update_settings_request.py +0 -344
- kaggle/models/kernel_push_request.py +0 -608
- kaggle/models/model_instance_new_version_request.py +0 -145
- kaggle/models/model_instance_update_request.py +0 -451
- kaggle/models/model_new_instance_request.py +0 -552
- kaggle/models/model_new_request.py +0 -329
- kaggle/models/model_update_request.py +0 -300
- kaggle/models/start_blob_upload_request.py +0 -240
- kaggle/models/start_blob_upload_response.py +0 -142
- kaggle-1.8.3.dist-info/RECORD +0 -28
- {kaggle-1.8.3.dist-info → kaggle-1.8.4.dist-info}/WHEEL +0 -0
- {kaggle-1.8.3.dist-info → kaggle-1.8.4.dist-info}/entry_points.txt +0 -0
- {kaggle-1.8.3.dist-info → kaggle-1.8.4.dist-info}/licenses/LICENSE.txt +0 -0
kaggle/__init__.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import absolute_import
|
|
|
3
3
|
import os
|
|
4
4
|
from kaggle.api.kaggle_api_extended import KaggleApi
|
|
5
5
|
|
|
6
|
-
__version__ = "1.8.
|
|
6
|
+
__version__ = "1.8.4"
|
|
7
7
|
|
|
8
8
|
enable_oauth = os.environ.get("KAGGLE_ENABLE_OAUTH") in ("1", "true", "yes")
|
|
9
9
|
api = KaggleApi(enable_oauth=enable_oauth)
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
from __future__ import print_function
|
|
19
19
|
|
|
20
20
|
import csv
|
|
21
|
-
from datetime import datetime
|
|
21
|
+
from datetime import datetime
|
|
22
22
|
from enum import Enum
|
|
23
23
|
import io
|
|
24
24
|
|
|
@@ -57,7 +57,6 @@ from kagglesdk.admin.types.inbox_file_service import CreateInboxFileRequest
|
|
|
57
57
|
from kagglesdk.blobs.types.blob_api_service import ApiStartBlobUploadRequest, ApiStartBlobUploadResponse, ApiBlobType
|
|
58
58
|
from kagglesdk.competitions.types.competition_api_service import (
|
|
59
59
|
ApiListCompetitionsRequest,
|
|
60
|
-
ApiCompetition,
|
|
61
60
|
ApiCreateCodeSubmissionRequest,
|
|
62
61
|
ApiCreateSubmissionResponse,
|
|
63
62
|
ApiStartSubmissionUploadRequest,
|
|
@@ -150,7 +149,6 @@ from kagglesdk.models.types.model_api_service import (
|
|
|
150
149
|
from kagglesdk.models.types.model_enums import ListModelsOrderBy, ModelInstanceType, ModelFramework
|
|
151
150
|
from kagglesdk.models.types.model_types import Owner
|
|
152
151
|
from kagglesdk.security.types.oauth_service import IntrospectTokenRequest
|
|
153
|
-
from ..models.dataset_column import DatasetColumn
|
|
154
152
|
from ..models.upload_file import UploadFile
|
|
155
153
|
import kagglesdk.kaggle_client
|
|
156
154
|
from enum import EnumMeta
|
|
@@ -674,14 +672,12 @@ class KaggleApi:
|
|
|
674
672
|
if self._authenticate_with_legacy_apikey():
|
|
675
673
|
return
|
|
676
674
|
if self.enable_oauth:
|
|
677
|
-
print("You must
|
|
675
|
+
print("You must authenticate before you can call the Kaggle API.")
|
|
678
676
|
print('Please run "kaggle auth login" to log in.')
|
|
679
677
|
else:
|
|
678
|
+
print("You must authenticate before you can call the Kaggle API.")
|
|
680
679
|
print(
|
|
681
|
-
"
|
|
682
|
-
" {}. Or use the environment method. See setup"
|
|
683
|
-
" instructions at"
|
|
684
|
-
" https://github.com/Kaggle/kaggle-api/".format(self.config_file, self.config_dir)
|
|
680
|
+
"Follow the instructions to authenticate at: https://github.com/Kaggle/kaggle-cli/blob/main/docs/README.md#authentication"
|
|
685
681
|
)
|
|
686
682
|
exit(1)
|
|
687
683
|
|
|
@@ -740,7 +736,6 @@ class KaggleApi:
|
|
|
740
736
|
self.CONFIG_NAME_AUTH_METHOD: AuthMethod.ACCESS_TOKEN,
|
|
741
737
|
}
|
|
742
738
|
self.logger.debug(f"Authenticated with access token in: {source}")
|
|
743
|
-
os.environ.pop("KAGGLE_API_TOKEN", None)
|
|
744
739
|
return True
|
|
745
740
|
|
|
746
741
|
def _authenticate_with_oauth_creds(self) -> bool:
|
|
@@ -1350,6 +1345,12 @@ class KaggleApi:
|
|
|
1350
1345
|
return resp
|
|
1351
1346
|
|
|
1352
1347
|
submit_request = ApiCreateSubmissionRequest()
|
|
1348
|
+
|
|
1349
|
+
# Admin-only feature to submit for a given model (b/475908216)
|
|
1350
|
+
model_version_id = os.getenv("KAGGLE_COMPETITION_SUBMISSION_MODEL_VERSION_ID", None)
|
|
1351
|
+
if model_version_id:
|
|
1352
|
+
submit_request.benchmark_model_version_id = int(model_version_id)
|
|
1353
|
+
|
|
1353
1354
|
submit_request.competition_name = competition
|
|
1354
1355
|
submit_request.blob_file_tokens = response.token
|
|
1355
1356
|
if message:
|
|
@@ -1384,7 +1385,7 @@ class KaggleApi:
|
|
|
1384
1385
|
str:
|
|
1385
1386
|
"""
|
|
1386
1387
|
if kernel and not version or version and not kernel:
|
|
1387
|
-
raise ValueError("Code competition submissions require both the output file name and the version
|
|
1388
|
+
raise ValueError("Code competition submissions require both the output file name and the version number")
|
|
1388
1389
|
competition = competition or competition_opt
|
|
1389
1390
|
try:
|
|
1390
1391
|
if kernel:
|
|
@@ -1412,6 +1413,7 @@ class KaggleApi:
|
|
|
1412
1413
|
competition: str,
|
|
1413
1414
|
group: SubmissionGroup = SubmissionGroup.SUBMISSION_GROUP_ALL,
|
|
1414
1415
|
sort: SubmissionSortBy = SubmissionSortBy.SUBMISSION_SORT_BY_DATE,
|
|
1416
|
+
page_number: int = -1,
|
|
1415
1417
|
page_token: str = "",
|
|
1416
1418
|
page_size: int = 20,
|
|
1417
1419
|
) -> list[ApiSubmission | None] | None:
|
|
@@ -1421,6 +1423,7 @@ class KaggleApi:
|
|
|
1421
1423
|
competition (str): The name of the competition.
|
|
1422
1424
|
group (SubmissionGroup): The submission group.
|
|
1423
1425
|
sort (SubmissionSortBy): The sort-by option.
|
|
1426
|
+
page_number (int): The page number to show.
|
|
1424
1427
|
page_token (str): The pageToken for pagination.
|
|
1425
1428
|
page_size (int): The number of items per page.
|
|
1426
1429
|
|
|
@@ -1430,6 +1433,7 @@ class KaggleApi:
|
|
|
1430
1433
|
with self.build_kaggle_client() as kaggle:
|
|
1431
1434
|
request = ApiListSubmissionsRequest()
|
|
1432
1435
|
request.competition_name = competition
|
|
1436
|
+
request.page = page_number
|
|
1433
1437
|
request.page_token = page_token
|
|
1434
1438
|
request.page_size = page_size
|
|
1435
1439
|
request.group = group
|
|
@@ -2156,17 +2160,17 @@ class KaggleApi:
|
|
|
2156
2160
|
except zipfile.BadZipFile as e:
|
|
2157
2161
|
raise ValueError(
|
|
2158
2162
|
f"The file {outfile} is corrupted or not a valid zip file. "
|
|
2159
|
-
"Please report this issue at https://www.github.com/kaggle/kaggle-
|
|
2163
|
+
"Please report this issue at https://www.github.com/kaggle/kaggle-cli/issues"
|
|
2160
2164
|
)
|
|
2161
2165
|
except FileNotFoundError:
|
|
2162
2166
|
raise FileNotFoundError(
|
|
2163
2167
|
f"The file {outfile} was not found. "
|
|
2164
|
-
"Please report this issue at https://www.github.com/kaggle/kaggle-
|
|
2168
|
+
"Please report this issue at https://www.github.com/kaggle/kaggle-cli"
|
|
2165
2169
|
)
|
|
2166
2170
|
except Exception as e:
|
|
2167
2171
|
raise RuntimeError(
|
|
2168
2172
|
f"An unexpected error occurred: {e}. "
|
|
2169
|
-
"Please report this issue at https://www.github.com/kaggle/kaggle-
|
|
2173
|
+
"Please report this issue at https://www.github.com/kaggle/kaggle-cli"
|
|
2170
2174
|
)
|
|
2171
2175
|
|
|
2172
2176
|
try:
|
|
@@ -2631,8 +2635,10 @@ class KaggleApi:
|
|
|
2631
2635
|
else:
|
|
2632
2636
|
print("Dataset creation error: " + result.error)
|
|
2633
2637
|
|
|
2634
|
-
def download_file(
|
|
2635
|
-
|
|
2638
|
+
def download_file(
|
|
2639
|
+
self, response, outfile, http_client, quiet=True, resume=False, chunk_size=1048576, max_retries=5, timeout=300
|
|
2640
|
+
):
|
|
2641
|
+
"""Downloads a file to an output file, streaming in chunks with automatic retry on failure.
|
|
2636
2642
|
|
|
2637
2643
|
Args:
|
|
2638
2644
|
response: The response object to download.
|
|
@@ -2641,14 +2647,16 @@ class KaggleApi:
|
|
|
2641
2647
|
quiet: Suppress verbose output (default is True).
|
|
2642
2648
|
chunk_size: The size of the chunk to stream.
|
|
2643
2649
|
resume: Whether to resume an existing download.
|
|
2650
|
+
max_retries: Maximum number of retry attempts on network errors (default is 5).
|
|
2651
|
+
timeout: Timeout in seconds for each chunk read operation (default is 300).
|
|
2644
2652
|
"""
|
|
2645
2653
|
|
|
2646
2654
|
outpath = os.path.dirname(outfile)
|
|
2647
2655
|
if not os.path.exists(outpath):
|
|
2648
2656
|
os.makedirs(outpath)
|
|
2657
|
+
|
|
2658
|
+
# Get file metadata
|
|
2649
2659
|
size = int(response.headers["Content-Length"])
|
|
2650
|
-
size_read = 0
|
|
2651
|
-
open_mode = "wb"
|
|
2652
2660
|
last_modified = response.headers.get("Last-Modified")
|
|
2653
2661
|
if last_modified is None:
|
|
2654
2662
|
remote_date = datetime.now()
|
|
@@ -2656,57 +2664,135 @@ class KaggleApi:
|
|
|
2656
2664
|
remote_date = datetime.strptime(response.headers["Last-Modified"], "%a, %d %b %Y %H:%M:%S %Z")
|
|
2657
2665
|
remote_date_timestamp = time.mktime(remote_date.timetuple())
|
|
2658
2666
|
|
|
2659
|
-
if
|
|
2660
|
-
print("Downloading " + os.path.basename(outfile) + " to " + outpath)
|
|
2661
|
-
|
|
2662
|
-
file_exists = os.path.isfile(outfile)
|
|
2667
|
+
# Check if file is resumable
|
|
2663
2668
|
resumable = "Accept-Ranges" in response.headers and response.headers["Accept-Ranges"] == "bytes"
|
|
2664
2669
|
|
|
2665
|
-
|
|
2666
|
-
|
|
2667
|
-
|
|
2670
|
+
# Retry loop for handling network errors
|
|
2671
|
+
retry_count = 0
|
|
2672
|
+
download_url = response.url
|
|
2673
|
+
original_method = response.request.method if hasattr(response, "request") else "GET"
|
|
2668
2674
|
|
|
2669
|
-
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
size_read,
|
|
2674
|
-
size - size_read,
|
|
2675
|
-
)
|
|
2676
|
-
)
|
|
2675
|
+
# Preserve original request headers for authentication
|
|
2676
|
+
original_headers = {}
|
|
2677
|
+
if hasattr(response, "request") and hasattr(response.request, "headers"):
|
|
2678
|
+
original_headers = dict(response.request.headers)
|
|
2677
2679
|
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2680
|
+
while retry_count <= max_retries:
|
|
2681
|
+
try:
|
|
2682
|
+
# Check file existence inside loop (may be created during retry)
|
|
2683
|
+
file_exists = os.path.isfile(outfile)
|
|
2684
|
+
|
|
2685
|
+
# Determine starting position
|
|
2686
|
+
if retry_count > 0 or (resume and resumable and file_exists):
|
|
2687
|
+
size_read = os.path.getsize(outfile) if file_exists else 0
|
|
2688
|
+
open_mode = "ab"
|
|
2689
|
+
|
|
2690
|
+
if size_read >= size:
|
|
2691
|
+
if not quiet:
|
|
2692
|
+
print("File already downloaded completely.")
|
|
2693
|
+
return
|
|
2685
2694
|
|
|
2686
|
-
|
|
2687
|
-
|
|
2688
|
-
|
|
2689
|
-
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
2693
|
-
|
|
2694
|
-
|
|
2695
|
-
|
|
2696
|
-
|
|
2697
|
-
|
|
2695
|
+
if not quiet:
|
|
2696
|
+
if retry_count > 0:
|
|
2697
|
+
print(
|
|
2698
|
+
f"Retry {retry_count}/{max_retries}: Resuming from {size_read} bytes ({size - size_read} bytes left)..."
|
|
2699
|
+
)
|
|
2700
|
+
else:
|
|
2701
|
+
print(f"Resuming from {size_read} bytes ({size - size_read} bytes left)...")
|
|
2702
|
+
|
|
2703
|
+
# Request with Range header for resume, preserving authentication
|
|
2704
|
+
retry_headers = original_headers.copy()
|
|
2705
|
+
retry_headers["Range"] = f"bytes={size_read}-"
|
|
2706
|
+
response = requests.request(
|
|
2707
|
+
original_method,
|
|
2708
|
+
download_url,
|
|
2709
|
+
headers=retry_headers,
|
|
2710
|
+
stream=True,
|
|
2711
|
+
timeout=timeout,
|
|
2712
|
+
)
|
|
2698
2713
|
else:
|
|
2699
|
-
|
|
2700
|
-
|
|
2701
|
-
break
|
|
2702
|
-
out.write(data)
|
|
2703
|
-
os.utime(outfile, times=(remote_date_timestamp - 1, remote_date_timestamp - 1))
|
|
2704
|
-
size_read = min(size, size_read + chunk_size)
|
|
2705
|
-
pbar.update(len(data))
|
|
2706
|
-
if not quiet:
|
|
2707
|
-
print("\n", end="")
|
|
2714
|
+
size_read = 0
|
|
2715
|
+
open_mode = "wb"
|
|
2708
2716
|
|
|
2709
|
-
|
|
2717
|
+
if not quiet:
|
|
2718
|
+
print("Downloading " + os.path.basename(outfile) + " to " + outpath)
|
|
2719
|
+
|
|
2720
|
+
# Download with progress bar
|
|
2721
|
+
with tqdm(
|
|
2722
|
+
total=size, initial=size_read, unit="B", unit_scale=True, unit_divisor=1024, disable=quiet
|
|
2723
|
+
) as pbar:
|
|
2724
|
+
with open(outfile, open_mode) as out:
|
|
2725
|
+
# TODO: Delete this test after all API methods are converted.
|
|
2726
|
+
if type(response).__name__ == "HTTPResponse":
|
|
2727
|
+
while True:
|
|
2728
|
+
data = response.read(chunk_size)
|
|
2729
|
+
if not data:
|
|
2730
|
+
break
|
|
2731
|
+
out.write(data)
|
|
2732
|
+
out.flush() # Ensure data is written to disk
|
|
2733
|
+
os.utime(outfile, times=(remote_date_timestamp - 1, remote_date_timestamp - 1))
|
|
2734
|
+
size_read += len(data)
|
|
2735
|
+
pbar.update(len(data))
|
|
2736
|
+
else:
|
|
2737
|
+
for data in response.iter_content(chunk_size):
|
|
2738
|
+
if not data:
|
|
2739
|
+
break
|
|
2740
|
+
out.write(data)
|
|
2741
|
+
out.flush() # Ensure data is written to disk
|
|
2742
|
+
os.utime(outfile, times=(remote_date_timestamp - 1, remote_date_timestamp - 1))
|
|
2743
|
+
size_read += len(data)
|
|
2744
|
+
pbar.update(len(data))
|
|
2745
|
+
|
|
2746
|
+
# Download completed successfully
|
|
2747
|
+
if not quiet:
|
|
2748
|
+
print("\n", end="")
|
|
2749
|
+
|
|
2750
|
+
os.utime(outfile, times=(remote_date_timestamp, remote_date_timestamp))
|
|
2751
|
+
|
|
2752
|
+
# Verify file size
|
|
2753
|
+
final_size = os.path.getsize(outfile)
|
|
2754
|
+
if final_size != size:
|
|
2755
|
+
error_msg = f"Downloaded file size ({final_size}) does not match expected size ({size})"
|
|
2756
|
+
if not quiet:
|
|
2757
|
+
print(f"\n{error_msg}")
|
|
2758
|
+
raise ValueError(error_msg)
|
|
2759
|
+
|
|
2760
|
+
# Success - exit retry loop
|
|
2761
|
+
break
|
|
2762
|
+
|
|
2763
|
+
except (
|
|
2764
|
+
requests.exceptions.ConnectionError,
|
|
2765
|
+
requests.exceptions.Timeout,
|
|
2766
|
+
requests.exceptions.ChunkedEncodingError,
|
|
2767
|
+
urllib3_exceptions.ProtocolError,
|
|
2768
|
+
urllib3_exceptions.ReadTimeoutError,
|
|
2769
|
+
OSError,
|
|
2770
|
+
) as e:
|
|
2771
|
+
|
|
2772
|
+
retry_count += 1
|
|
2773
|
+
|
|
2774
|
+
if retry_count > max_retries:
|
|
2775
|
+
if not quiet:
|
|
2776
|
+
print(f"\nDownload failed after {max_retries} retries.")
|
|
2777
|
+
print(f"Error: {type(e).__name__}: {str(e)}")
|
|
2778
|
+
print(f"Partial file saved at: {outfile}")
|
|
2779
|
+
print(f"You can resume by running the same command again.")
|
|
2780
|
+
raise
|
|
2781
|
+
|
|
2782
|
+
# Calculate backoff time (exponential with jitter)
|
|
2783
|
+
backoff_time = min(2**retry_count + random(), 60) # Cap at 60 seconds
|
|
2784
|
+
|
|
2785
|
+
if not quiet:
|
|
2786
|
+
print(f"\nConnection error: {type(e).__name__}: {str(e)}")
|
|
2787
|
+
print(f"Retrying in {backoff_time:.1f} seconds... (attempt {retry_count}/{max_retries})")
|
|
2788
|
+
|
|
2789
|
+
time.sleep(backoff_time)
|
|
2790
|
+
|
|
2791
|
+
# Ensure file exists for resume
|
|
2792
|
+
if not os.path.isfile(outfile):
|
|
2793
|
+
open(outfile, "a").close()
|
|
2794
|
+
|
|
2795
|
+
continue
|
|
2710
2796
|
|
|
2711
2797
|
def kernels_list(
|
|
2712
2798
|
self,
|
|
@@ -2954,7 +3040,9 @@ class KaggleApi:
|
|
|
2954
3040
|
meta_file = self.kernels_initialize(folder)
|
|
2955
3041
|
print("Kernel metadata template written to: " + meta_file)
|
|
2956
3042
|
|
|
2957
|
-
def kernels_push(
|
|
3043
|
+
def kernels_push(
|
|
3044
|
+
self, folder: str, timeout: Optional[str] = None, acc: Optional[str] = None
|
|
3045
|
+
) -> ApiSaveKernelResponse:
|
|
2958
3046
|
"""Pushes a kernel to Kaggle.
|
|
2959
3047
|
|
|
2960
3048
|
This method reads the metadata file and kernel files from a notebook,
|
|
@@ -2963,6 +3051,8 @@ class KaggleApi:
|
|
|
2963
3051
|
Args:
|
|
2964
3052
|
folder (str): The path to the folder.
|
|
2965
3053
|
timeout (Optional[str]): The maximum run time in seconds.
|
|
3054
|
+
acc (Optional[str]): The type of accelerator to use for the kernel run. If set, this value overrides boolean
|
|
3055
|
+
settings for GPU/TPU found in the metadata file.
|
|
2966
3056
|
|
|
2967
3057
|
Returns:
|
|
2968
3058
|
ApiSaveKernelResponse: An ApiSaveKernelResponse object.
|
|
@@ -3037,7 +3127,7 @@ class KaggleApi:
|
|
|
3037
3127
|
|
|
3038
3128
|
model_sources = cast(List[str], self.get_or_default(meta_data, "model_sources", []))
|
|
3039
3129
|
for source in model_sources:
|
|
3040
|
-
self.
|
|
3130
|
+
self.validate_model_instance_version_string(source)
|
|
3041
3131
|
|
|
3042
3132
|
docker_pinning_type = self.get_or_default(meta_data, "docker_image_pinning_type", None)
|
|
3043
3133
|
if docker_pinning_type is not None and docker_pinning_type not in self.valid_push_pinning_types:
|
|
@@ -3081,19 +3171,22 @@ class KaggleApi:
|
|
|
3081
3171
|
request.docker_image = self.get_or_default(meta_data, "docker_image", None)
|
|
3082
3172
|
if timeout:
|
|
3083
3173
|
request.session_timeout_seconds = int(timeout)
|
|
3174
|
+
# The allowed names are in an enum that is not currently included in kagglesdk.
|
|
3175
|
+
request.machine_shape = acc if acc else self.get_or_default(meta_data, "machine_shape", None)
|
|
3084
3176
|
# Without the type hint, mypy thinks save_kernel() has type Any when checking warn_return_any.
|
|
3085
3177
|
response: ApiSaveKernelResponse = kaggle.kernels.kernels_api_client.save_kernel(request)
|
|
3086
3178
|
return response
|
|
3087
3179
|
|
|
3088
|
-
def kernels_push_cli(self, folder, timeout):
|
|
3180
|
+
def kernels_push_cli(self, folder, timeout, acc):
|
|
3089
3181
|
"""A client wrapper for kernels_push.
|
|
3090
3182
|
|
|
3091
3183
|
Args:
|
|
3092
3184
|
folder: The path to the folder.
|
|
3093
3185
|
timeout: The maximum run time in seconds.
|
|
3186
|
+
acc: The accelerator to use.
|
|
3094
3187
|
"""
|
|
3095
3188
|
folder = folder or os.getcwd()
|
|
3096
|
-
result = self.kernels_push(folder, timeout)
|
|
3189
|
+
result = self.kernels_push(folder, timeout, acc)
|
|
3097
3190
|
|
|
3098
3191
|
if result is None:
|
|
3099
3192
|
print("Kernel push error: see previous output")
|
|
@@ -3224,7 +3317,7 @@ class KaggleApi:
|
|
|
3224
3317
|
if file_name is None:
|
|
3225
3318
|
print(
|
|
3226
3319
|
"Unknown language %s + kernel type %s - please report this "
|
|
3227
|
-
"on the kaggle-
|
|
3320
|
+
"on the kaggle-cli github issues" % (language, kernel_type)
|
|
3228
3321
|
)
|
|
3229
3322
|
print("Saving as a python file, even though this may not be the " "correct language")
|
|
3230
3323
|
file_name = "script.py"
|
|
@@ -3272,12 +3365,15 @@ class KaggleApi:
|
|
|
3272
3365
|
else:
|
|
3273
3366
|
print("Source code downloaded to " + effective_path)
|
|
3274
3367
|
|
|
3275
|
-
def kernels_output(
|
|
3368
|
+
def kernels_output(
|
|
3369
|
+
self, kernel: str, path: str, file_pattern: str = None, force: bool = False, quiet: bool = True
|
|
3370
|
+
) -> Tuple[List[str], str]:
|
|
3276
3371
|
"""Retrieves the output for a specified kernel.
|
|
3277
3372
|
|
|
3278
3373
|
Args:
|
|
3279
3374
|
kernel (str): The kernel for which to retrieve the output.
|
|
3280
3375
|
path (str): The path to which to pull the files.
|
|
3376
|
+
file_pattern (str): Optional regex pattern to match against filenames. Only files matching the pattern will be downloaded.
|
|
3281
3377
|
force (bool): If True, force an overwrite if the output already exists (default is False).
|
|
3282
3378
|
quiet (bool): Suppress verbose output (default is True).
|
|
3283
3379
|
|
|
@@ -3306,6 +3402,14 @@ class KaggleApi:
|
|
|
3306
3402
|
if not os.path.isdir(target_dir):
|
|
3307
3403
|
raise ValueError("You must specify a directory for the kernels output")
|
|
3308
3404
|
|
|
3405
|
+
if file_pattern is not None:
|
|
3406
|
+
try:
|
|
3407
|
+
compiled_pattern = re.compile(file_pattern)
|
|
3408
|
+
except re.error as e:
|
|
3409
|
+
raise ValueError(f"Invalid regex pattern '{file_pattern}': {e}")
|
|
3410
|
+
else:
|
|
3411
|
+
compiled_pattern = None
|
|
3412
|
+
|
|
3309
3413
|
token = None
|
|
3310
3414
|
with self.build_kaggle_client() as kaggle:
|
|
3311
3415
|
request = ApiListKernelSessionOutputRequest()
|
|
@@ -3316,6 +3420,9 @@ class KaggleApi:
|
|
|
3316
3420
|
|
|
3317
3421
|
outfiles = []
|
|
3318
3422
|
for item in response.files:
|
|
3423
|
+
if compiled_pattern and not compiled_pattern.search(item.file_name):
|
|
3424
|
+
continue
|
|
3425
|
+
|
|
3319
3426
|
outfile = os.path.join(target_dir, item.file_name)
|
|
3320
3427
|
outfiles.append(outfile)
|
|
3321
3428
|
download_response = requests.get(item.url, stream=True)
|
|
@@ -3337,7 +3444,7 @@ class KaggleApi:
|
|
|
3337
3444
|
|
|
3338
3445
|
return outfiles, token # Breaking change, we need to get the token to the UI
|
|
3339
3446
|
|
|
3340
|
-
def kernels_output_cli(self, kernel, kernel_opt=None, path=None, force=False, quiet=False):
|
|
3447
|
+
def kernels_output_cli(self, kernel, kernel_opt=None, path=None, force=False, quiet=False, file_pattern=None):
|
|
3341
3448
|
"""A client wrapper for kernels_output.
|
|
3342
3449
|
|
|
3343
3450
|
This method is a client wrapper for the kernels_output function.
|
|
@@ -3349,9 +3456,10 @@ class KaggleApi:
|
|
|
3349
3456
|
path: The path to which to pull the files.
|
|
3350
3457
|
force: If True, force an overwrite if the output already exists (default is False).
|
|
3351
3458
|
quiet: Suppress verbose output (default is False).
|
|
3459
|
+
file_pattern: Regex pattern to match against filenames. Only files matching the pattern will be downloaded.
|
|
3352
3460
|
"""
|
|
3353
3461
|
kernel = kernel or kernel_opt
|
|
3354
|
-
(_, token) = self.kernels_output(kernel, path, force, quiet)
|
|
3462
|
+
(_, token) = self.kernels_output(kernel, path, file_pattern, force, quiet)
|
|
3355
3463
|
if token:
|
|
3356
3464
|
print(f"Next page token: {token}")
|
|
3357
3465
|
|
|
@@ -4289,7 +4397,7 @@ class KaggleApi:
|
|
|
4289
4397
|
t.extractall(effective_path)
|
|
4290
4398
|
except Exception as e:
|
|
4291
4399
|
raise ValueError(
|
|
4292
|
-
"Error extracting the tar.gz file, please report on " "www.github.com/kaggle/kaggle-
|
|
4400
|
+
"Error extracting the tar.gz file, please report on " "www.github.com/kaggle/kaggle-cli", e
|
|
4293
4401
|
)
|
|
4294
4402
|
|
|
4295
4403
|
try:
|
|
@@ -4813,11 +4921,12 @@ class KaggleApi:
|
|
|
4813
4921
|
column: A list of values in a column to be processed.
|
|
4814
4922
|
|
|
4815
4923
|
Returns:
|
|
4816
|
-
|
|
4924
|
+
An ApiDatasetColumn object.
|
|
4817
4925
|
"""
|
|
4818
|
-
processed_column =
|
|
4819
|
-
|
|
4820
|
-
)
|
|
4926
|
+
processed_column = ApiDatasetColumn()
|
|
4927
|
+
processed_column.name = self.get_or_fail(column, "name")
|
|
4928
|
+
processed_column.description = self.get_or_default(column, "description", "")
|
|
4929
|
+
|
|
4821
4930
|
if "type" in column:
|
|
4822
4931
|
original_type = column["type"].lower()
|
|
4823
4932
|
processed_column.original_type = original_type
|
kaggle/cli.py
CHANGED
|
@@ -35,8 +35,8 @@ def main() -> None:
|
|
|
35
35
|
"-v",
|
|
36
36
|
"--version",
|
|
37
37
|
action="version",
|
|
38
|
-
help="Print the Kaggle
|
|
39
|
-
version="Kaggle
|
|
38
|
+
help="Print the Kaggle CLI version",
|
|
39
|
+
version="Kaggle CLI " + kaggle.__version__,
|
|
40
40
|
)
|
|
41
41
|
parser.add_argument(
|
|
42
42
|
"-W",
|
|
@@ -550,6 +550,7 @@ def parse_kernels(subparsers) -> None:
|
|
|
550
550
|
parser_kernels_push_optional.add_argument(
|
|
551
551
|
"-t", "--timeout", type=int, dest="timeout", help=Help.param_kernel_timeout
|
|
552
552
|
)
|
|
553
|
+
parser_kernels_push_optional.add_argument("--accelerator", dest="acc", help=Help.param_kernel_acc)
|
|
553
554
|
parser_kernels_push._action_groups.append(parser_kernels_push_optional)
|
|
554
555
|
parser_kernels_push.set_defaults(func=api.kernels_push_cli)
|
|
555
556
|
|
|
@@ -592,6 +593,9 @@ def parse_kernels(subparsers) -> None:
|
|
|
592
593
|
parser_kernels_output_optional.add_argument(
|
|
593
594
|
"-q", "--quiet", dest="quiet", action="store_true", required=False, help=Help.param_quiet
|
|
594
595
|
)
|
|
596
|
+
parser_kernels_output_optional.add_argument(
|
|
597
|
+
"--file-pattern", dest="file_pattern", required=False, help=Help.param_kernel_output_file_pattern
|
|
598
|
+
)
|
|
595
599
|
parser_kernels_output._action_groups.append(parser_kernels_output_optional)
|
|
596
600
|
parser_kernels_output.set_defaults(func=api.kernels_output_cli)
|
|
597
601
|
|
|
@@ -1083,9 +1087,9 @@ class Help(object):
|
|
|
1083
1087
|
+ ", ".join(kernels_choices)
|
|
1084
1088
|
+ "}\nmodels {"
|
|
1085
1089
|
+ ", ".join(models_choices)
|
|
1086
|
-
+ "}\nmodels
|
|
1090
|
+
+ "}\nmodels variations {"
|
|
1087
1091
|
+ ", ".join(model_instances_choices)
|
|
1088
|
-
+ "}\nmodels
|
|
1092
|
+
+ "}\nmodels variations versions {"
|
|
1089
1093
|
+ ", ".join(model_instance_versions_choices)
|
|
1090
1094
|
+ "}\nconfig {"
|
|
1091
1095
|
+ ", ".join(config_choices)
|
|
@@ -1098,8 +1102,8 @@ class Help(object):
|
|
|
1098
1102
|
group_datasets = "Commands related to Kaggle datasets"
|
|
1099
1103
|
group_kernels = "Commands related to Kaggle kernels"
|
|
1100
1104
|
group_models = "Commands related to Kaggle models"
|
|
1101
|
-
group_model_instances = "Commands related to Kaggle model
|
|
1102
|
-
group_model_instance_versions = "Commands related to Kaggle model
|
|
1105
|
+
group_model_instances = "Commands related to Kaggle model variations"
|
|
1106
|
+
group_model_instance_versions = "Commands related to Kaggle model variations versions"
|
|
1103
1107
|
group_files = "Commands related files"
|
|
1104
1108
|
group_config = "Configuration settings"
|
|
1105
1109
|
group_auth = "Commands related to authentication"
|
|
@@ -1174,7 +1178,7 @@ class Help(object):
|
|
|
1174
1178
|
"File for upload (full path), or the name of the output file produced by a kernel (for code competitions)"
|
|
1175
1179
|
)
|
|
1176
1180
|
param_code_kernel = "Name of kernel (notebook) to submit to a code competition"
|
|
1177
|
-
param_code_version = 'Version of kernel to submit to a code competition, e.g. "
|
|
1181
|
+
param_code_version = 'Version of kernel to submit to a code competition, e.g. "3"'
|
|
1178
1182
|
param_csv = "Print results in CSV format (if not set print in table format)"
|
|
1179
1183
|
param_page = "Page number for results paging. Page size is 20 by default"
|
|
1180
1184
|
# NOTE: Default and max page size are set by the mid-tier code.
|
|
@@ -1226,7 +1230,7 @@ class Help(object):
|
|
|
1226
1230
|
param_dataset_upfile = (
|
|
1227
1231
|
"Folder for upload, containing data files and a "
|
|
1228
1232
|
"special datasets-metadata.json file "
|
|
1229
|
-
"(https://github.com/Kaggle/kaggle-
|
|
1233
|
+
"(https://github.com/Kaggle/kaggle-cli/blob/main/docs/datasets_metadata.md). "
|
|
1230
1234
|
"Defaults to current working directory"
|
|
1231
1235
|
)
|
|
1232
1236
|
param_dataset_sort_by = (
|
|
@@ -1258,7 +1262,7 @@ class Help(object):
|
|
|
1258
1262
|
param_kernel_upfile = (
|
|
1259
1263
|
"Folder for upload, containing data files and a "
|
|
1260
1264
|
"special kernel-metadata.json file "
|
|
1261
|
-
"(https://github.com/Kaggle/kaggle-
|
|
1265
|
+
"(https://github.com/Kaggle/kaggle-cli/blob/main/docs/kernels_metadata.md). "
|
|
1262
1266
|
"Defaults to current working directory"
|
|
1263
1267
|
)
|
|
1264
1268
|
param_kernel_parent = "Find children of the specified parent kernel"
|
|
@@ -1293,6 +1297,10 @@ class Help(object):
|
|
|
1293
1297
|
"is only applicable if a search term is specified."
|
|
1294
1298
|
)
|
|
1295
1299
|
param_kernel_pull_metadata = "Generate metadata when pulling kernel"
|
|
1300
|
+
param_kernel_output_file_pattern = (
|
|
1301
|
+
"Regex pattern to match against filenames. Only files matching the pattern will be downloaded."
|
|
1302
|
+
)
|
|
1303
|
+
param_kernel_acc = "Specify the type of accelerator to use for the kernel run"
|
|
1296
1304
|
|
|
1297
1305
|
# Models params
|
|
1298
1306
|
param_model = "Model URL suffix in format <owner>/<model-name>"
|
|
@@ -1303,47 +1311,47 @@ class Help(object):
|
|
|
1303
1311
|
param_model_owner = "Find public models owned by a specific user or organization"
|
|
1304
1312
|
param_model_downfile = (
|
|
1305
1313
|
"Folder containing the special model-metadata.json file "
|
|
1306
|
-
"(https://github.com/Kaggle/kaggle-
|
|
1314
|
+
"(https://github.com/Kaggle/kaggle-cli/blob/main/docs/models_metadata.md)."
|
|
1307
1315
|
)
|
|
1308
1316
|
param_model_upfile = (
|
|
1309
1317
|
"Folder containing the special model-metadata.json file "
|
|
1310
|
-
"(https://github.com/Kaggle/kaggle-
|
|
1318
|
+
"(https://github.com/Kaggle/kaggle-cli/blob/main/docs/models_metadata.md). "
|
|
1311
1319
|
"Defaults to current working directory"
|
|
1312
1320
|
)
|
|
1313
1321
|
|
|
1314
1322
|
# Model Instances params
|
|
1315
|
-
param_model_instance = "Model
|
|
1316
|
-
command_model_instances_get = "Get a model
|
|
1317
|
-
command_model_instances_init = "Initialize metadata file for model
|
|
1318
|
-
command_model_instances_files = "List files for the current version of a model
|
|
1319
|
-
command_model_instances_list = "List
|
|
1320
|
-
command_model_instances_new = "Create a new model
|
|
1323
|
+
param_model_instance = "Model variation URL suffix in format <owner>/<model-name>/<framework>/<instance-slug>"
|
|
1324
|
+
command_model_instances_get = "Get a model variation"
|
|
1325
|
+
command_model_instances_init = "Initialize metadata file for model variation creation"
|
|
1326
|
+
command_model_instances_files = "List files for the current version of a model variation"
|
|
1327
|
+
command_model_instances_list = "List variations of a model"
|
|
1328
|
+
command_model_instances_new = "Create a new model variation"
|
|
1321
1329
|
param_model_instance_downfile = (
|
|
1322
1330
|
"Folder for downloading the special model-instance-metadata.json file "
|
|
1323
|
-
"(https://github.com/Kaggle/kaggle-
|
|
1331
|
+
"(https://github.com/Kaggle/kaggle-cli/blob/main/docs/models_metadata.md#model-instance). "
|
|
1324
1332
|
)
|
|
1325
1333
|
param_model_instance_upfile = (
|
|
1326
1334
|
"Folder for upload, containing data files and a "
|
|
1327
1335
|
"special model-instance-metadata.json file "
|
|
1328
|
-
"(https://github.com/Kaggle/kaggle-
|
|
1336
|
+
"(https://github.com/Kaggle/kaggle-cli/blob/main/docs/models_metadata.md#model-instance). "
|
|
1329
1337
|
"Defaults to current working directory"
|
|
1330
1338
|
)
|
|
1331
|
-
command_model_instances_delete = "Delete a model
|
|
1332
|
-
command_model_instances_update = "Update a model
|
|
1339
|
+
command_model_instances_delete = "Delete a model variation"
|
|
1340
|
+
command_model_instances_update = "Update a model variation"
|
|
1333
1341
|
|
|
1334
1342
|
# Model Instance Versions params
|
|
1335
1343
|
param_model_instance_version = (
|
|
1336
|
-
"Model
|
|
1344
|
+
"Model variation version URL suffix in format <owner>/<model-name>/<framework>/<variation-slug>/<version-number>"
|
|
1337
1345
|
)
|
|
1338
1346
|
|
|
1339
1347
|
# Model Instance Versions params
|
|
1340
|
-
command_model_instance_versions_new = "Create a new model
|
|
1348
|
+
command_model_instance_versions_new = "Create a new model variation version"
|
|
1341
1349
|
param_model_instance_version_upfile = "Folder for upload. Defaults to current working directory"
|
|
1342
|
-
command_model_instance_versions_delete = "Delete a model
|
|
1343
|
-
command_model_instance_versions_download = "Download model
|
|
1344
|
-
command_model_instance_versions_files = "List model
|
|
1345
|
-
command_model_instance_versions_list = "List model
|
|
1346
|
-
param_model_instance_version_notes = "Version notes to record for the new model
|
|
1350
|
+
command_model_instance_versions_delete = "Delete a model variation version"
|
|
1351
|
+
command_model_instance_versions_download = "Download model variation version files"
|
|
1352
|
+
command_model_instance_versions_files = "List model variation version files"
|
|
1353
|
+
command_model_instance_versions_list = "List model variation versions"
|
|
1354
|
+
param_model_instance_version_notes = "Version notes to record for the new model variation version"
|
|
1347
1355
|
|
|
1348
1356
|
# Files params
|
|
1349
1357
|
param_files_upload_inbox_path = "Virtual path on the server where the uploaded files will be stored"
|