kash-shell 0.3.34__py3-none-any.whl → 0.3.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/config/env_settings.py +0 -3
- kash/config/logger.py +2 -0
- kash/config/logger_basic.py +10 -1
- kash/config/settings.py +0 -12
- kash/config/setup.py +15 -0
- kash/config/text_styles.py +1 -1
- kash/config/warm_slow_imports.py +60 -0
- kash/exec/action_decorators.py +2 -2
- kash/exec/action_exec.py +1 -1
- kash/exec/fetch_url_items.py +4 -2
- kash/mcp/mcp_cli.py +17 -5
- kash/mcp/mcp_server_routes.py +6 -4
- kash/model/actions_model.py +11 -3
- kash/model/items_model.py +16 -11
- kash/shell/shell_main.py +3 -14
- kash/utils/common/import_utils.py +136 -12
- kash/utils/common/s3_utils.py +89 -7
- kash/web_content/web_extract.py +0 -1
- kash/web_content/web_fetch.py +270 -98
- kash/workspaces/workspaces.py +2 -0
- {kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/METADATA +2 -1
- {kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/RECORD +25 -24
- {kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.34.dist-info → kash_shell-0.3.35.dist-info}/licenses/LICENSE +0 -0
kash/utils/common/s3_utils.py
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import os
|
|
3
4
|
import shutil
|
|
4
5
|
import subprocess
|
|
6
|
+
from logging import getLogger
|
|
5
7
|
from pathlib import Path
|
|
6
8
|
|
|
9
|
+
from dotenv import find_dotenv, load_dotenv
|
|
7
10
|
from sidematter_format.sidematter_format import Sidematter
|
|
11
|
+
from strif import abbrev_str
|
|
8
12
|
|
|
9
13
|
from kash.utils.common.url import Url, is_s3_url, parse_s3_url
|
|
10
14
|
|
|
15
|
+
log = getLogger(__name__)
|
|
16
|
+
|
|
11
17
|
|
|
12
18
|
def check_aws_cli() -> None:
|
|
13
19
|
"""
|
|
@@ -19,6 +25,54 @@ def check_aws_cli() -> None:
|
|
|
19
25
|
)
|
|
20
26
|
|
|
21
27
|
|
|
28
|
+
def run_aws_command(cmd: list[str]) -> subprocess.CompletedProcess[str]:
|
|
29
|
+
"""
|
|
30
|
+
Run an AWS CLI command and capture output.
|
|
31
|
+
Raises a RuntimeError with stdout/stderr on failure.
|
|
32
|
+
"""
|
|
33
|
+
result = subprocess.run(
|
|
34
|
+
cmd,
|
|
35
|
+
capture_output=True,
|
|
36
|
+
text=True,
|
|
37
|
+
env=os.environ,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
if result.returncode != 0:
|
|
41
|
+
# Build a detailed error message
|
|
42
|
+
error_parts = [f"AWS command failed with exit code {result.returncode}"]
|
|
43
|
+
error_parts.append(f"Command: {' '.join(cmd)}")
|
|
44
|
+
|
|
45
|
+
if result.stdout:
|
|
46
|
+
error_parts.append(f"stdout: {result.stdout}")
|
|
47
|
+
if result.stderr:
|
|
48
|
+
error_parts.append(f"stderr: {result.stderr}")
|
|
49
|
+
|
|
50
|
+
raise RuntimeError("\n".join(error_parts))
|
|
51
|
+
|
|
52
|
+
return result
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def reload_aws_env_vars() -> None:
|
|
56
|
+
"""
|
|
57
|
+
Fresh reload of AWS env vars from .env.local.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def aws_creds() -> set[tuple[str, str]]:
|
|
61
|
+
return {(k, abbrev_str(v, 5)) for k, v in os.environ.items() if k.startswith("AWS_")}
|
|
62
|
+
|
|
63
|
+
if len(aws_creds()) == 0:
|
|
64
|
+
dotenv_path = find_dotenv(".env.local", usecwd=True) or find_dotenv(".env", usecwd=True)
|
|
65
|
+
load_dotenv(dotenv_path, override=True)
|
|
66
|
+
if len(aws_creds()) > 0:
|
|
67
|
+
log.info(
|
|
68
|
+
"Loaded %s, found AWS credentials: %s",
|
|
69
|
+
dotenv_path,
|
|
70
|
+
aws_creds(),
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
log.warning("No AWS credentials found in env or .env files")
|
|
74
|
+
|
|
75
|
+
|
|
22
76
|
def get_s3_parent_folder(url: Url) -> Url | None:
|
|
23
77
|
"""
|
|
24
78
|
Get the parent folder of an S3 URL, or None if not an S3 URL.
|
|
@@ -47,6 +101,7 @@ def s3_sync_to_folder(
|
|
|
47
101
|
- For a single file: the file URL (and sidematter file/dir URLs if included).
|
|
48
102
|
- For a directory: the destination parent prefix URL (non-recursive reporting).
|
|
49
103
|
"""
|
|
104
|
+
reload_aws_env_vars()
|
|
50
105
|
|
|
51
106
|
src_path = Path(src_path)
|
|
52
107
|
if not src_path.exists():
|
|
@@ -71,7 +126,7 @@ def s3_sync_to_folder(
|
|
|
71
126
|
for p in sync_paths:
|
|
72
127
|
if p.is_file():
|
|
73
128
|
# Use sync with include/exclude to leverage default short-circuiting
|
|
74
|
-
|
|
129
|
+
run_aws_command(
|
|
75
130
|
[
|
|
76
131
|
"aws",
|
|
77
132
|
"s3",
|
|
@@ -82,27 +137,54 @@ def s3_sync_to_folder(
|
|
|
82
137
|
"*",
|
|
83
138
|
"--include",
|
|
84
139
|
p.name,
|
|
85
|
-
]
|
|
86
|
-
check=True,
|
|
140
|
+
]
|
|
87
141
|
)
|
|
88
142
|
targets.append(Url(dest_prefix + p.name))
|
|
89
143
|
elif p.is_dir():
|
|
90
144
|
dest_dir = dest_prefix + p.name + "/"
|
|
91
|
-
|
|
145
|
+
run_aws_command(["aws", "s3", "sync", str(p), dest_dir])
|
|
92
146
|
targets.append(Url(dest_dir))
|
|
93
147
|
|
|
94
148
|
return targets
|
|
95
149
|
else:
|
|
96
150
|
# Directory mode: sync whole directory.
|
|
97
|
-
|
|
151
|
+
run_aws_command(
|
|
98
152
|
[
|
|
99
153
|
"aws",
|
|
100
154
|
"s3",
|
|
101
155
|
"sync",
|
|
102
156
|
str(src_path),
|
|
103
157
|
dest_prefix,
|
|
104
|
-
]
|
|
105
|
-
check=True,
|
|
158
|
+
]
|
|
106
159
|
)
|
|
107
160
|
targets.append(Url(dest_prefix))
|
|
108
161
|
return targets
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def s3_download_file(s3_url: Url, target_path: str | Path) -> None:
|
|
165
|
+
"""
|
|
166
|
+
Download a file from S3 to a local path using the AWS CLI.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
s3_url: The S3 URL to download from (s3://bucket/path/to/file)
|
|
170
|
+
target_path: The local path to save the file to
|
|
171
|
+
"""
|
|
172
|
+
reload_aws_env_vars()
|
|
173
|
+
|
|
174
|
+
if not is_s3_url(s3_url):
|
|
175
|
+
raise ValueError(f"Source must be an s3:// URL: {s3_url}")
|
|
176
|
+
|
|
177
|
+
check_aws_cli()
|
|
178
|
+
|
|
179
|
+
target_path = Path(target_path)
|
|
180
|
+
|
|
181
|
+
# Use aws s3 cp to download the file
|
|
182
|
+
run_aws_command(
|
|
183
|
+
[
|
|
184
|
+
"aws",
|
|
185
|
+
"s3",
|
|
186
|
+
"cp",
|
|
187
|
+
str(s3_url),
|
|
188
|
+
str(target_path),
|
|
189
|
+
]
|
|
190
|
+
)
|
kash/web_content/web_extract.py
CHANGED
kash/web_content/web_fetch.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
import ssl
|
|
5
|
+
from collections.abc import Iterable
|
|
4
6
|
from dataclasses import dataclass
|
|
5
7
|
from enum import Enum
|
|
6
8
|
from functools import cache, cached_property
|
|
@@ -12,17 +14,145 @@ from cachetools import TTLCache
|
|
|
12
14
|
from strif import atomic_output_file, copyfile_atomic
|
|
13
15
|
|
|
14
16
|
from kash.config.env_settings import KashEnv
|
|
17
|
+
from kash.utils.common.s3_utils import s3_download_file
|
|
15
18
|
from kash.utils.common.url import Url
|
|
16
19
|
from kash.utils.file_utils.file_formats import MimeType
|
|
17
20
|
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _httpx_verify_context() -> ssl.SSLContext | bool:
|
|
25
|
+
"""
|
|
26
|
+
Return an SSLContext that uses the system trust store via truststore, if available.
|
|
27
|
+
Falls back to certifi bundle; otherwise True to use httpx defaults.
|
|
28
|
+
"""
|
|
29
|
+
try:
|
|
30
|
+
import truststore
|
|
31
|
+
|
|
32
|
+
return truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
|
|
33
|
+
except Exception:
|
|
34
|
+
try:
|
|
35
|
+
import certifi
|
|
36
|
+
|
|
37
|
+
return ssl.create_default_context(cafile=certifi.where())
|
|
38
|
+
except Exception:
|
|
39
|
+
return True
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _stream_to_file(
|
|
43
|
+
target_filename: str | Path,
|
|
44
|
+
response_iterator: Iterable[bytes],
|
|
45
|
+
total_size: int,
|
|
46
|
+
show_progress: bool,
|
|
47
|
+
) -> None:
|
|
48
|
+
with atomic_output_file(target_filename, make_parents=True) as temp_filename:
|
|
49
|
+
with open(temp_filename, "wb") as f:
|
|
50
|
+
if not show_progress:
|
|
51
|
+
for chunk in response_iterator:
|
|
52
|
+
if chunk:
|
|
53
|
+
f.write(chunk)
|
|
54
|
+
else:
|
|
55
|
+
from tqdm import tqdm
|
|
56
|
+
|
|
57
|
+
with tqdm(
|
|
58
|
+
total=total_size,
|
|
59
|
+
unit="B",
|
|
60
|
+
unit_scale=True,
|
|
61
|
+
desc=f"Downloading {Path(str(target_filename)).name}",
|
|
62
|
+
) as progress:
|
|
63
|
+
for chunk in response_iterator:
|
|
64
|
+
if chunk:
|
|
65
|
+
f.write(chunk)
|
|
66
|
+
progress.update(len(chunk))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _httpx_fetch(
|
|
70
|
+
url: Url,
|
|
71
|
+
*,
|
|
72
|
+
timeout: int,
|
|
73
|
+
auth: Any | None,
|
|
74
|
+
headers: dict[str, str] | None,
|
|
75
|
+
mode: ClientMode,
|
|
76
|
+
log_label: str,
|
|
77
|
+
):
|
|
78
|
+
import httpx
|
|
79
|
+
|
|
80
|
+
req_headers = _get_req_headers(mode, headers)
|
|
81
|
+
parsed_url = urlparse(str(url))
|
|
82
|
+
with httpx.Client(
|
|
83
|
+
follow_redirects=True,
|
|
84
|
+
timeout=timeout,
|
|
85
|
+
auth=auth,
|
|
86
|
+
headers=req_headers,
|
|
87
|
+
verify=_httpx_verify_context(),
|
|
88
|
+
) as client:
|
|
89
|
+
log.debug("fetch_url (%s): using headers: %s", log_label, client.headers)
|
|
90
|
+
if mode is ClientMode.BROWSER_HEADERS:
|
|
91
|
+
_prime_host(parsed_url.netloc, client, timeout)
|
|
92
|
+
response = client.get(url)
|
|
93
|
+
log.info(
|
|
94
|
+
"Fetched (%s): %s (%s bytes): %s",
|
|
95
|
+
log_label,
|
|
96
|
+
response.status_code,
|
|
97
|
+
len(response.content),
|
|
98
|
+
url,
|
|
99
|
+
)
|
|
100
|
+
response.raise_for_status()
|
|
101
|
+
return response
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _httpx_download(
|
|
105
|
+
url: Url,
|
|
106
|
+
target_filename: str | Path,
|
|
107
|
+
*,
|
|
108
|
+
show_progress: bool,
|
|
109
|
+
timeout: int,
|
|
110
|
+
auth: Any | None,
|
|
111
|
+
headers: dict[str, str] | None,
|
|
112
|
+
mode: ClientMode,
|
|
113
|
+
log_label: str,
|
|
114
|
+
) -> dict[str, str]:
|
|
115
|
+
import httpx
|
|
116
|
+
|
|
117
|
+
req_headers = _get_req_headers(mode, headers)
|
|
118
|
+
parsed_url = urlparse(str(url))
|
|
119
|
+
with httpx.Client(
|
|
120
|
+
follow_redirects=True,
|
|
121
|
+
timeout=timeout,
|
|
122
|
+
headers=req_headers,
|
|
123
|
+
verify=_httpx_verify_context(),
|
|
124
|
+
) as client:
|
|
125
|
+
if mode is ClientMode.BROWSER_HEADERS:
|
|
126
|
+
_prime_host(parsed_url.netloc, client, timeout)
|
|
127
|
+
log.debug("download_url (%s): using headers: %s", log_label, client.headers)
|
|
128
|
+
with client.stream("GET", url, auth=auth, follow_redirects=True) as response:
|
|
129
|
+
response.raise_for_status()
|
|
130
|
+
response_headers = dict(response.headers)
|
|
131
|
+
total = int(response.headers.get("content-length", "0"))
|
|
132
|
+
_stream_to_file(target_filename, response.iter_bytes(), total, show_progress)
|
|
133
|
+
return response_headers
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _is_tls_cert_error(exc: Exception) -> bool:
|
|
137
|
+
"""
|
|
138
|
+
Heuristic detection of TLS/certificate verification errors coming from curl_cffi/libcurl.
|
|
139
|
+
"""
|
|
140
|
+
s = str(exc).lower()
|
|
141
|
+
if "curl: (60)" in s:
|
|
142
|
+
return True
|
|
143
|
+
if "certificate verify failed" in s:
|
|
144
|
+
return True
|
|
145
|
+
if "ssl" in s and ("certificate" in s or "cert" in s or "handshake" in s):
|
|
146
|
+
return True
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
|
|
18
150
|
if TYPE_CHECKING:
|
|
19
151
|
from curl_cffi.requests import Response as CurlCffiResponse
|
|
20
152
|
from curl_cffi.requests import Session as CurlCffiSession
|
|
21
153
|
from httpx import Client as HttpxClient
|
|
22
154
|
from httpx import Response as HttpxResponse
|
|
23
155
|
|
|
24
|
-
log = logging.getLogger(__name__)
|
|
25
|
-
|
|
26
156
|
|
|
27
157
|
DEFAULT_TIMEOUT = 30
|
|
28
158
|
CURL_CFFI_IMPERSONATE_VERSION = "chrome120"
|
|
@@ -199,54 +329,57 @@ def fetch_url(
|
|
|
199
329
|
|
|
200
330
|
from curl_cffi.requests import Session
|
|
201
331
|
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
332
|
+
exc: Exception | None = None
|
|
333
|
+
try:
|
|
334
|
+
with Session() as client:
|
|
335
|
+
# Set headers on the session - they will be sent with all requests
|
|
336
|
+
client.headers.update(req_headers)
|
|
337
|
+
_prime_host(
|
|
338
|
+
parsed_url.netloc, client, timeout, impersonate=CURL_CFFI_IMPERSONATE_VERSION
|
|
339
|
+
)
|
|
340
|
+
log.debug("fetch_url (curl_cffi): using session headers: %s", client.headers)
|
|
341
|
+
response = client.get(
|
|
342
|
+
url,
|
|
343
|
+
impersonate=CURL_CFFI_IMPERSONATE_VERSION,
|
|
344
|
+
timeout=timeout,
|
|
345
|
+
auth=auth,
|
|
346
|
+
allow_redirects=True,
|
|
347
|
+
)
|
|
348
|
+
log.info(
|
|
349
|
+
"Fetched (curl_cffi): %s (%s bytes): %s",
|
|
350
|
+
response.status_code,
|
|
351
|
+
len(response.content),
|
|
352
|
+
url,
|
|
353
|
+
)
|
|
354
|
+
response.raise_for_status()
|
|
355
|
+
return response
|
|
356
|
+
except Exception as e:
|
|
357
|
+
exc = e
|
|
358
|
+
|
|
359
|
+
if exc and _is_tls_cert_error(exc):
|
|
360
|
+
log.warning(
|
|
361
|
+
"TLS/SSL verification failed with curl_cffi for %s: %s; falling back to httpx",
|
|
362
|
+
url,
|
|
363
|
+
exc,
|
|
207
364
|
)
|
|
208
|
-
|
|
209
|
-
|
|
365
|
+
# Fallback to httpx with browser-like headers (uses system trust if available)
|
|
366
|
+
return _httpx_fetch(
|
|
210
367
|
url,
|
|
211
|
-
impersonate=CURL_CFFI_IMPERSONATE_VERSION,
|
|
212
368
|
timeout=timeout,
|
|
213
369
|
auth=auth,
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
"Fetched (curl_cffi): %s (%s bytes): %s",
|
|
218
|
-
response.status_code,
|
|
219
|
-
len(response.content),
|
|
220
|
-
url,
|
|
370
|
+
headers=headers,
|
|
371
|
+
mode=ClientMode.BROWSER_HEADERS,
|
|
372
|
+
log_label="httpx fallback",
|
|
221
373
|
)
|
|
222
|
-
|
|
223
|
-
|
|
374
|
+
|
|
375
|
+
if exc:
|
|
376
|
+
raise exc
|
|
224
377
|
|
|
225
378
|
# Handle httpx modes
|
|
226
379
|
else:
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
follow_redirects=True,
|
|
231
|
-
timeout=timeout,
|
|
232
|
-
auth=auth,
|
|
233
|
-
headers=req_headers,
|
|
234
|
-
) as client:
|
|
235
|
-
log.debug("fetch_url (httpx): using headers: %s", client.headers)
|
|
236
|
-
|
|
237
|
-
# Cookie priming only makes sense for the browser-like mode
|
|
238
|
-
if mode is ClientMode.BROWSER_HEADERS:
|
|
239
|
-
_prime_host(parsed_url.netloc, client, timeout)
|
|
240
|
-
|
|
241
|
-
response = client.get(url)
|
|
242
|
-
log.info(
|
|
243
|
-
"Fetched (httpx): %s (%s bytes): %s",
|
|
244
|
-
response.status_code,
|
|
245
|
-
len(response.content),
|
|
246
|
-
url,
|
|
247
|
-
)
|
|
248
|
-
response.raise_for_status()
|
|
249
|
-
return response
|
|
380
|
+
return _httpx_fetch(
|
|
381
|
+
url, timeout=timeout, auth=auth, headers=headers, mode=mode, log_label="httpx"
|
|
382
|
+
)
|
|
250
383
|
|
|
251
384
|
|
|
252
385
|
@dataclass(frozen=True)
|
|
@@ -289,38 +422,13 @@ def download_url(
|
|
|
289
422
|
copyfile_atomic(parsed_url.netloc + parsed_url.path, target_filename, make_parents=True)
|
|
290
423
|
return None
|
|
291
424
|
elif parsed_url.scheme == "s3":
|
|
292
|
-
import boto3 # pyright: ignore
|
|
293
|
-
|
|
294
|
-
s3 = boto3.resource("s3")
|
|
295
|
-
s3_path = parsed_url.path.lstrip("/")
|
|
296
425
|
with atomic_output_file(target_filename, make_parents=True) as temp_filename:
|
|
297
|
-
|
|
426
|
+
s3_download_file(url, temp_filename)
|
|
298
427
|
return None
|
|
299
428
|
|
|
300
429
|
req_headers = _get_req_headers(mode, headers)
|
|
301
430
|
response_headers = None
|
|
302
431
|
|
|
303
|
-
def stream_to_file(response_iterator, total_size):
|
|
304
|
-
with atomic_output_file(target_filename, make_parents=True) as temp_filename:
|
|
305
|
-
with open(temp_filename, "wb") as f:
|
|
306
|
-
if not show_progress:
|
|
307
|
-
for chunk in response_iterator:
|
|
308
|
-
if chunk: # Skip empty chunks
|
|
309
|
-
f.write(chunk)
|
|
310
|
-
else:
|
|
311
|
-
from tqdm import tqdm
|
|
312
|
-
|
|
313
|
-
with tqdm(
|
|
314
|
-
total=total_size,
|
|
315
|
-
unit="B",
|
|
316
|
-
unit_scale=True,
|
|
317
|
-
desc=f"Downloading {Path(target_filename).name}",
|
|
318
|
-
) as progress:
|
|
319
|
-
for chunk in response_iterator:
|
|
320
|
-
if chunk: # Skip empty chunks
|
|
321
|
-
f.write(chunk)
|
|
322
|
-
progress.update(len(chunk))
|
|
323
|
-
|
|
324
432
|
# Handle curl_cffi mode
|
|
325
433
|
if mode is ClientMode.CURL_CFFI:
|
|
326
434
|
if not _have_curl_cffi():
|
|
@@ -328,47 +436,111 @@ def download_url(
|
|
|
328
436
|
|
|
329
437
|
from curl_cffi.requests import Session
|
|
330
438
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
439
|
+
exc: Exception | None = None
|
|
440
|
+
try:
|
|
441
|
+
with Session() as client:
|
|
442
|
+
# Set headers on the session; they will be sent with all requests
|
|
443
|
+
client.headers.update(req_headers)
|
|
444
|
+
_prime_host(
|
|
445
|
+
parsed_url.netloc, client, timeout, impersonate=CURL_CFFI_IMPERSONATE_VERSION
|
|
446
|
+
)
|
|
447
|
+
log.debug("download_url (curl_cffi): using session headers: %s", client.headers)
|
|
448
|
+
response = client.get(
|
|
449
|
+
url,
|
|
450
|
+
impersonate=CURL_CFFI_IMPERSONATE_VERSION,
|
|
451
|
+
timeout=timeout,
|
|
452
|
+
auth=auth,
|
|
453
|
+
allow_redirects=True,
|
|
454
|
+
stream=True,
|
|
455
|
+
)
|
|
456
|
+
response.raise_for_status()
|
|
457
|
+
response_headers = dict(response.headers)
|
|
458
|
+
total = int(response.headers.get("content-length", "0"))
|
|
459
|
+
|
|
460
|
+
# Use iter_content for streaming; this is the standard method for curl_cffi
|
|
461
|
+
chunk_iterator = response.iter_content(chunk_size=8192)
|
|
462
|
+
_stream_to_file(target_filename, chunk_iterator, total, show_progress)
|
|
463
|
+
except Exception as e:
|
|
464
|
+
exc = e
|
|
338
465
|
|
|
339
|
-
|
|
466
|
+
if exc and _is_tls_cert_error(exc):
|
|
467
|
+
log.warning(
|
|
468
|
+
"TLS/SSL verification failed with curl_cffi for %s: %s; falling back to httpx",
|
|
340
469
|
url,
|
|
341
|
-
|
|
470
|
+
exc,
|
|
471
|
+
)
|
|
472
|
+
# Fallback to httpx streaming with browser-like headers (system trust store if available)
|
|
473
|
+
response_headers = _httpx_download(
|
|
474
|
+
url,
|
|
475
|
+
target_filename,
|
|
476
|
+
show_progress=show_progress,
|
|
342
477
|
timeout=timeout,
|
|
343
478
|
auth=auth,
|
|
344
|
-
|
|
345
|
-
|
|
479
|
+
headers=headers,
|
|
480
|
+
mode=ClientMode.BROWSER_HEADERS,
|
|
481
|
+
log_label="httpx fallback",
|
|
346
482
|
)
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
total = int(response.headers.get("content-length", "0"))
|
|
350
|
-
|
|
351
|
-
# Use iter_content for streaming; this is the standard method for curl_cffi
|
|
352
|
-
chunk_iterator = response.iter_content(chunk_size=8192)
|
|
353
|
-
stream_to_file(chunk_iterator, total)
|
|
483
|
+
elif exc:
|
|
484
|
+
raise exc
|
|
354
485
|
|
|
355
486
|
# Handle httpx modes
|
|
356
487
|
else:
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
total = int(response.headers.get("content-length", "0"))
|
|
368
|
-
stream_to_file(response.iter_bytes(), total)
|
|
488
|
+
response_headers = _httpx_download(
|
|
489
|
+
url,
|
|
490
|
+
target_filename,
|
|
491
|
+
show_progress=show_progress,
|
|
492
|
+
timeout=timeout,
|
|
493
|
+
auth=auth,
|
|
494
|
+
headers=headers,
|
|
495
|
+
mode=mode,
|
|
496
|
+
log_label="httpx",
|
|
497
|
+
)
|
|
369
498
|
|
|
370
499
|
# Filter out None values from headers for HttpHeaders type compatibility
|
|
371
500
|
if response_headers:
|
|
372
501
|
clean_headers = {k: v for k, v in response_headers.items() if v is not None}
|
|
373
502
|
return HttpHeaders(clean_headers)
|
|
374
503
|
return None
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def main() -> None:
|
|
507
|
+
"""
|
|
508
|
+
Simple CLI test harness for fetch and download.
|
|
509
|
+
|
|
510
|
+
Usage examples:
|
|
511
|
+
uv run python -m kash.web_content.web_fetch
|
|
512
|
+
uv run python -m kash.web_content.web_fetch https://www.example.com
|
|
513
|
+
"""
|
|
514
|
+
import sys
|
|
515
|
+
import traceback
|
|
516
|
+
|
|
517
|
+
# Try to use the system trust store for TLS like command-line curl
|
|
518
|
+
try:
|
|
519
|
+
import truststore # type: ignore
|
|
520
|
+
|
|
521
|
+
truststore.inject_into_ssl()
|
|
522
|
+
log.warning("truststore initialized for test harness: using system TLS trust store")
|
|
523
|
+
except Exception as exc:
|
|
524
|
+
log.info("truststore not available for test harness; using default TLS trust (%s)", exc)
|
|
525
|
+
|
|
526
|
+
urls = [
|
|
527
|
+
"https://www.example.com",
|
|
528
|
+
"https://www.businessdefense.gov/ibr/mceip/dpai/dpat3/index.html",
|
|
529
|
+
]
|
|
530
|
+
|
|
531
|
+
args = [a for a in sys.argv[1:] if a and a.strip()]
|
|
532
|
+
if args:
|
|
533
|
+
urls = args
|
|
534
|
+
|
|
535
|
+
for u in urls:
|
|
536
|
+
try:
|
|
537
|
+
log.warning("Testing fetch_url: %s", u)
|
|
538
|
+
r = fetch_url(Url(u))
|
|
539
|
+
log.warning("fetch_url OK: %s -> %s bytes", u, len(r.content))
|
|
540
|
+
except Exception as exc:
|
|
541
|
+
log.exception("fetch_url FAILED for %s: %s", u, exc)
|
|
542
|
+
traceback.print_exc()
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
if __name__ == "__main__":
|
|
546
|
+
main()
|
kash/workspaces/workspaces.py
CHANGED
|
@@ -95,6 +95,8 @@ def get_ws(name_or_path: str | Path, auto_init: bool = True) -> FileStore:
|
|
|
95
95
|
Get a workspace by name or path. Adds to the in-memory registry so we reuse it.
|
|
96
96
|
With `auto_init` true, will initialize the workspace if it is not already initialized.
|
|
97
97
|
"""
|
|
98
|
+
if isinstance(name_or_path, Path):
|
|
99
|
+
name_or_path = name_or_path.expanduser().absolute()
|
|
98
100
|
name = Path(name_or_path).name
|
|
99
101
|
name = check_strict_workspace_name(name)
|
|
100
102
|
info = resolve_ws(name_or_path)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kash-shell
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.35
|
|
4
4
|
Summary: The knowledge agent shell (core)
|
|
5
5
|
Project-URL: Repository, https://github.com/jlevy/kash-shell
|
|
6
6
|
Author-email: Joshua Levy <joshua@cal.berkeley.edu>
|
|
@@ -72,6 +72,7 @@ Requires-Dist: thefuzz>=0.22.1
|
|
|
72
72
|
Requires-Dist: tiktoken>=0.9.0
|
|
73
73
|
Requires-Dist: tldr>=3.3.0
|
|
74
74
|
Requires-Dist: tminify>=0.1.6
|
|
75
|
+
Requires-Dist: truststore>=0.10.4
|
|
75
76
|
Requires-Dist: typing-extensions>=4.12.2
|
|
76
77
|
Requires-Dist: uvicorn>=0.34.0
|
|
77
78
|
Requires-Dist: xonsh>=0.19.3
|