kash-shell 0.3.22__py3-none-any.whl → 0.3.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,11 @@ log = get_logger(__name__)
12
12
 
13
13
 
14
14
  def fetch_url_item(
15
- locator: Url | StorePath, *, save_content: bool = True, refetch: bool = False
15
+ locator: Url | StorePath,
16
+ *,
17
+ save_content: bool = True,
18
+ refetch: bool = False,
19
+ cache: bool = True,
16
20
  ) -> Item:
17
21
  from kash.workspaces import current_ws
18
22
 
@@ -28,17 +32,21 @@ def fetch_url_item(
28
32
  else:
29
33
  raise InvalidInput(f"Not a URL or URL resource: {fmt_loc(locator)}")
30
34
 
31
- return fetch_url_item_content(item, save_content=save_content, refetch=refetch)
35
+ return fetch_url_item_content(item, save_content=save_content, refetch=refetch, cache=cache)
32
36
 
33
37
 
34
- def fetch_url_item_content(item: Item, *, save_content: bool = True, refetch: bool = False) -> Item:
38
+ def fetch_url_item_content(
39
+ item: Item, *, save_content: bool = True, refetch: bool = False, cache: bool = True
40
+ ) -> Item:
35
41
  """
36
42
  Fetch content and metadata for a URL using a media service if we
37
43
  recognize the URL as a known media service. Otherwise, fetch and extract the
38
44
  metadata and content from the web page and save it to the URL item.
39
45
 
40
- If `save_content` is true, a copy of the content is also saved as
41
- a resource item.
46
+ If `save_content` is true, a copy of the content is also saved to the workspace
47
+ as a resource item.
48
+
49
+ If `cache` is true, the content is also cached in the local file cache.
42
50
 
43
51
  The content item is returned if content was saved. Otherwise, the updated
44
52
  URL item is returned.
@@ -49,7 +57,7 @@ def fetch_url_item_content(item: Item, *, save_content: bool = True, refetch: bo
49
57
 
50
58
  ws = current_ws()
51
59
  if not refetch and item.title and item.description and item.body:
52
- log.message(
60
+ log.info(
53
61
  "Already have title, description, and body, will not fetch: %s",
54
62
  item.fmt_loc(),
55
63
  )
@@ -59,7 +67,7 @@ def fetch_url_item_content(item: Item, *, save_content: bool = True, refetch: bo
59
67
  raise InvalidInput(f"No URL for item: {item.fmt_loc()}")
60
68
 
61
69
  url = canonicalize_url(item.url)
62
- log.message("No metadata for URL, will fetch: %s", url)
70
+ log.info("No metadata for URL, will fetch: %s", url)
63
71
 
64
72
  # Prefer fetching metadata from media using the media service if possible.
65
73
  # Data is cleaner and YouTube for example often blocks regular scraping.
@@ -73,12 +81,12 @@ def fetch_url_item_content(item: Item, *, save_content: bool = True, refetch: bo
73
81
  if slice:
74
82
  new_url = add_slice_to_url(media_metadata.url, slice)
75
83
  if new_url != item.url:
76
- log.message("Updated URL from metadata and added slice: %s", new_url)
84
+ log.info("Updated URL from metadata and added slice: %s", new_url)
77
85
  url_item.url = new_url
78
86
 
79
87
  url_item = item.merged_copy(url_item)
80
88
  else:
81
- page_data = fetch_page_content(url, refetch=refetch, cache=save_content)
89
+ page_data = fetch_page_content(url, refetch=refetch, cache=cache)
82
90
  url_item = item.new_copy_with(
83
91
  title=page_data.title or item.title,
84
92
  description=page_data.description or item.description,
@@ -289,7 +289,7 @@ class FileStore(Workspace):
289
289
  if self.exists(default_path):
290
290
  old_item = self.load(default_path)
291
291
  if old_item.item_id() == item_id:
292
- log.message(
292
+ log.info(
293
293
  "Item with the same id already saved (disk check):\n%s",
294
294
  fmt_lines([fmt_loc(default_path), item_id]),
295
295
  )
@@ -297,7 +297,7 @@ class FileStore(Workspace):
297
297
  self.id_map[item_id] = default_path
298
298
  return default_path
299
299
  if store_path and self.exists(store_path):
300
- log.message(
300
+ log.info(
301
301
  "Item with the same id already saved (disk check):\n%s",
302
302
  fmt_lines([fmt_loc(store_path), item_id]),
303
303
  )
@@ -536,7 +536,7 @@ class FileStore(Workspace):
536
536
  item = Item(item_type, url=url, format=Format.url)
537
537
  previous_store_path = self.find_by_id(item)
538
538
  if previous_store_path and not reimport:
539
- log.message(
539
+ log.info(
540
540
  "Workspace already has this URL:\n%s",
541
541
  fmt_lines([fmt_loc(previous_store_path), url]),
542
542
  )
@@ -3,6 +3,41 @@ from __future__ import annotations
3
3
  import random
4
4
  from collections.abc import Callable
5
5
  from dataclasses import dataclass
6
+ from enum import Enum
7
+
8
+
9
+ class HTTPRetryBehavior(Enum):
10
+ """HTTP status code retry behavior classification."""
11
+
12
+ FULL = "full"
13
+ """Fully retry these status codes (e.g., 429, 500, 502, 503, 504)"""
14
+
15
+ CONSERVATIVE = "conservative"
16
+ """Retry conservatively: may indicate rate limiting or temporary issues (e.g., 403, 408)"""
17
+
18
+ NEVER = "never"
19
+ """Never retry these status codes (e.g., 400, 401, 404, 410)"""
20
+
21
+
22
+ # Default HTTP status code retry classifications
23
+ DEFAULT_HTTP_RETRY_MAP: dict[int, HTTPRetryBehavior] = {
24
+ # Fully retriable: server errors and explicit rate limiting
25
+ 429: HTTPRetryBehavior.FULL, # Too Many Requests
26
+ 500: HTTPRetryBehavior.FULL, # Internal Server Error
27
+ 502: HTTPRetryBehavior.FULL, # Bad Gateway
28
+ 503: HTTPRetryBehavior.FULL, # Service Unavailable
29
+ 504: HTTPRetryBehavior.FULL, # Gateway Timeout
30
+ # Conservatively retriable: might be temporary
31
+ 403: HTTPRetryBehavior.CONSERVATIVE, # Forbidden (could be rate limiting)
32
+ 408: HTTPRetryBehavior.CONSERVATIVE, # Request Timeout
33
+ # Never retriable: client errors
34
+ 400: HTTPRetryBehavior.NEVER, # Bad Request
35
+ 401: HTTPRetryBehavior.NEVER, # Unauthorized
36
+ 404: HTTPRetryBehavior.NEVER, # Not Found
37
+ 405: HTTPRetryBehavior.NEVER, # Method Not Allowed
38
+ 410: HTTPRetryBehavior.NEVER, # Gone
39
+ 422: HTTPRetryBehavior.NEVER, # Unprocessable Entity
40
+ }
6
41
 
7
42
 
8
43
  class RetryException(RuntimeError):
@@ -27,9 +62,54 @@ class RetryExhaustedException(RetryException):
27
62
  )
28
63
 
29
64
 
65
+ def extract_http_status_code(exception: Exception) -> int | None:
66
+ """
67
+ Extract HTTP status code from various exception types.
68
+
69
+ Args:
70
+ exception: The exception to extract status code from
71
+
72
+ Returns:
73
+ HTTP status code or None if not found
74
+ """
75
+ # Check for httpx.HTTPStatusError and requests.HTTPError
76
+ if hasattr(exception, "response"):
77
+ response = getattr(exception, "response", None)
78
+ if response and hasattr(response, "status_code"):
79
+ return getattr(response, "status_code", None)
80
+
81
+ # Check for aiohttp errors
82
+ if hasattr(exception, "status"):
83
+ return getattr(exception, "status", None)
84
+
85
+ # Parse from exception message as fallback
86
+ exception_str = str(exception)
87
+
88
+ # Try to find status code patterns in the message
89
+ import re
90
+
91
+ # Pattern for "403 Forbidden", "HTTP 429", etc.
92
+ status_patterns = [
93
+ r"\b(\d{3})\s+(?:Forbidden|Unauthorized|Not Found|Too Many Requests|Internal Server Error|Bad Gateway|Service Unavailable|Gateway Timeout)\b",
94
+ r"\bHTTP\s+(\d{3})\b",
95
+ r"\b(\d{3})\s+error\b",
96
+ r"status\s*(?:code)?:\s*(\d{3})\b",
97
+ ]
98
+
99
+ for pattern in status_patterns:
100
+ match = re.search(pattern, exception_str, re.IGNORECASE)
101
+ if match:
102
+ try:
103
+ return int(match.group(1))
104
+ except (ValueError, IndexError):
105
+ continue
106
+
107
+ return None
108
+
109
+
30
110
  def default_is_retriable(exception: Exception) -> bool:
31
111
  """
32
- Default retriable exception checker for common rate limit patterns.
112
+ Default retriable exception checker with HTTP status code awareness.
33
113
 
34
114
  Args:
35
115
  exception: The exception to check
@@ -51,12 +131,22 @@ def default_is_retriable(exception: Exception) -> bool:
51
131
  ):
52
132
  return True
53
133
  except ImportError:
54
- # LiteLLM not available, fall back to string-based detection
134
+ # LiteLLM not available, fall back to other detection methods
55
135
  pass
56
136
 
57
- # Fallback to string-based detection for general patterns
137
+ # Try to extract HTTP status code for more precise handling
138
+ status_code = extract_http_status_code(exception)
139
+ if status_code is not None:
140
+ return is_http_status_retriable(status_code, DEFAULT_HTTP_RETRY_MAP)
141
+
142
+ # Fallback to string-based detection for transient errors
58
143
  exception_str = str(exception).lower()
59
- rate_limit_indicators = [
144
+
145
+ # Check exception type names for common transient network errors
146
+ exception_type = type(exception).__name__.lower()
147
+
148
+ transient_error_indicators = [
149
+ # Rate limiting and quota errors
60
150
  "rate limit",
61
151
  "too many requests",
62
152
  "try again later",
@@ -65,9 +155,92 @@ def default_is_retriable(exception: Exception) -> bool:
65
155
  "throttled",
66
156
  "rate_limit_error",
67
157
  "ratelimiterror",
158
+ # Server errors
159
+ "server error",
160
+ "service unavailable",
161
+ "bad gateway",
162
+ "gateway timeout",
163
+ "internal server error",
164
+ "502",
165
+ "503",
166
+ "504",
167
+ "500",
168
+ # Network connectivity errors
169
+ "connection timeout",
170
+ "connection timed out",
171
+ "read timeout",
172
+ "timeout error",
173
+ "timed out",
174
+ "connection reset",
175
+ "connection refused",
176
+ "connection aborted",
177
+ "connection error",
178
+ "network error",
179
+ "network unreachable",
180
+ "network is unreachable",
181
+ "no route to host",
182
+ "temporary failure",
183
+ "name resolution failed",
184
+ "dns",
185
+ "resolver",
186
+ # SSL/TLS transient errors
187
+ "ssl error",
188
+ "certificate verify failed",
189
+ "handshake timeout",
190
+ # Common transient exception types
191
+ "connectionerror",
192
+ "timeouterror",
193
+ "connecttimeout",
194
+ "readtimeout",
195
+ "httperror",
196
+ "requestexception",
68
197
  ]
69
198
 
70
- return any(indicator in exception_str for indicator in rate_limit_indicators)
199
+ # Check both exception message and type name
200
+ return any(indicator in exception_str for indicator in transient_error_indicators) or any(
201
+ indicator in exception_type for indicator in transient_error_indicators
202
+ )
203
+
204
+
205
+ def is_http_status_retriable(
206
+ status_code: int,
207
+ retry_map: dict[int, HTTPRetryBehavior] | None = None,
208
+ ) -> bool:
209
+ """
210
+ Determine if an HTTP status code should be retried.
211
+
212
+ Args:
213
+ status_code: HTTP status code
214
+ retry_map: Custom retry behavior map (uses default if None)
215
+
216
+ Returns:
217
+ True if the status code should be retried
218
+ """
219
+ if retry_map is None:
220
+ retry_map = DEFAULT_HTTP_RETRY_MAP
221
+
222
+ behavior = retry_map.get(status_code)
223
+
224
+ if behavior == HTTPRetryBehavior.FULL:
225
+ return True
226
+ elif behavior == HTTPRetryBehavior.CONSERVATIVE:
227
+ return True # Conservative retries are enabled by default
228
+ elif behavior == HTTPRetryBehavior.NEVER:
229
+ return False
230
+
231
+ # Unknown status code: use heuristics
232
+ if 500 <= status_code <= 599:
233
+ # Server errors are generally retriable
234
+ return True
235
+ elif status_code == 429:
236
+ # Rate limiting is always retriable
237
+ return True
238
+ elif 400 <= status_code <= 499:
239
+ # Client errors are generally not retriable, except for specific cases
240
+ return False
241
+
242
+ # Default to not retriable for unknown codes
243
+ return False
71
244
 
72
245
 
73
246
  @dataclass(frozen=True)
@@ -94,6 +267,9 @@ class RetrySettings:
94
267
  is_retriable: Callable[[Exception], bool] = default_is_retriable
95
268
  """Function to determine if an exception should be retried"""
96
269
 
270
+ http_retry_map: dict[int, HTTPRetryBehavior] | None = None
271
+ """Custom HTTP status code retry behavior (None = use defaults)"""
272
+
97
273
 
98
274
  DEFAULT_RETRIES = RetrySettings(
99
275
  max_task_retries=10,
@@ -106,6 +282,48 @@ DEFAULT_RETRIES = RetrySettings(
106
282
  """Reasonable default retry settings with both per-task and global limits."""
107
283
 
108
284
 
285
+ # Preset configurations for different use cases
286
+ AGGRESSIVE_RETRIES = RetrySettings(
287
+ max_task_retries=15,
288
+ max_total_retries=200,
289
+ initial_backoff=0.5,
290
+ max_backoff=64.0,
291
+ backoff_factor=1.8,
292
+ )
293
+ """Aggressive retry settings - retry more often with shorter initial backoff."""
294
+
295
+
296
+ # Conservative retry settings use a custom retry map that excludes conservative retries
297
+ _CONSERVATIVE_HTTP_RETRY_MAP = {
298
+ # Fully retriable: server errors and explicit rate limiting
299
+ 429: HTTPRetryBehavior.FULL,
300
+ 500: HTTPRetryBehavior.FULL,
301
+ 502: HTTPRetryBehavior.FULL,
302
+ 503: HTTPRetryBehavior.FULL,
303
+ 504: HTTPRetryBehavior.FULL,
304
+ # Conservative codes become NEVER for conservative mode
305
+ 403: HTTPRetryBehavior.NEVER,
306
+ 408: HTTPRetryBehavior.NEVER,
307
+ # Never retriable: client errors
308
+ 400: HTTPRetryBehavior.NEVER,
309
+ 401: HTTPRetryBehavior.NEVER,
310
+ 404: HTTPRetryBehavior.NEVER,
311
+ 405: HTTPRetryBehavior.NEVER,
312
+ 410: HTTPRetryBehavior.NEVER,
313
+ 422: HTTPRetryBehavior.NEVER,
314
+ }
315
+
316
+ CONSERVATIVE_RETRIES = RetrySettings(
317
+ max_task_retries=5,
318
+ max_total_retries=50,
319
+ initial_backoff=2.0,
320
+ max_backoff=60.0,
321
+ backoff_factor=2.5,
322
+ http_retry_map=_CONSERVATIVE_HTTP_RETRY_MAP,
323
+ )
324
+ """Conservative retry settings - fewer retries, longer backoff, no conservative HTTP retries."""
325
+
326
+
109
327
  NO_RETRIES = RetrySettings(
110
328
  max_task_retries=0,
111
329
  max_total_retries=0,
@@ -190,9 +408,97 @@ def calculate_backoff(
190
408
  ## Tests
191
409
 
192
410
 
411
+ def test_extract_http_status_code():
412
+ """Test HTTP status code extraction from various exception types."""
413
+
414
+ class MockHTTPXResponse:
415
+ def __init__(self, status_code):
416
+ self.status_code = status_code
417
+
418
+ class MockHTTPXException(Exception):
419
+ def __init__(self, status_code):
420
+ self.response = MockHTTPXResponse(status_code)
421
+ super().__init__(f"HTTP {status_code} error")
422
+
423
+ class MockAioHTTPException(Exception):
424
+ def __init__(self, status):
425
+ self.status = status
426
+ super().__init__(f"HTTP {status} error")
427
+
428
+ # Test httpx-style exceptions
429
+ assert extract_http_status_code(MockHTTPXException(403)) == 403
430
+ assert extract_http_status_code(MockHTTPXException(429)) == 429
431
+
432
+ # Test aiohttp-style exceptions
433
+ assert extract_http_status_code(MockAioHTTPException(500)) == 500
434
+
435
+ # Test string parsing fallback
436
+ assert extract_http_status_code(Exception("Client error '403 Forbidden'")) == 403
437
+ assert extract_http_status_code(Exception("HTTP 429 Too Many Requests")) == 429
438
+ assert extract_http_status_code(Exception("500 error occurred")) == 500
439
+
440
+ # Test no status code
441
+ assert extract_http_status_code(Exception("Network error")) is None
442
+
443
+
444
+ def test_is_http_status_retriable():
445
+ """Test HTTP status code retry logic."""
446
+
447
+ # Fully retriable
448
+ assert is_http_status_retriable(429) # Too Many Requests
449
+ assert is_http_status_retriable(500) # Internal Server Error
450
+ assert is_http_status_retriable(502) # Bad Gateway
451
+ assert is_http_status_retriable(503) # Service Unavailable
452
+ assert is_http_status_retriable(504) # Gateway Timeout
453
+
454
+ # Conservative retriable (enabled by default)
455
+ assert is_http_status_retriable(403) # Forbidden
456
+ assert is_http_status_retriable(408) # Request Timeout
457
+
458
+ # Conservative retriable with custom conservative map (disabled)
459
+ assert not is_http_status_retriable(403, _CONSERVATIVE_HTTP_RETRY_MAP)
460
+ assert not is_http_status_retriable(408, _CONSERVATIVE_HTTP_RETRY_MAP)
461
+
462
+ # Never retriable
463
+ assert not is_http_status_retriable(400) # Bad Request
464
+ assert not is_http_status_retriable(401) # Unauthorized
465
+ assert not is_http_status_retriable(404) # Not Found
466
+ assert not is_http_status_retriable(410) # Gone
467
+
468
+ # Unknown status codes - use heuristics
469
+ assert is_http_status_retriable(599) # Unknown 5xx - retriable
470
+ assert not is_http_status_retriable(499) # Unknown 4xx - not retriable
471
+ assert not is_http_status_retriable(299) # Unknown 2xx - not retriable
472
+
473
+
474
+ def test_default_is_retriable_with_http():
475
+ """Test enhanced default_is_retriable with HTTP status code awareness."""
476
+
477
+ class MockHTTPXResponse:
478
+ def __init__(self, status_code):
479
+ self.status_code = status_code
480
+
481
+ class MockHTTPXException(Exception):
482
+ def __init__(self, status_code):
483
+ self.response = MockHTTPXResponse(status_code)
484
+ super().__init__(f"HTTP {status_code} error")
485
+
486
+ # Test HTTP exceptions with known status codes
487
+ assert default_is_retriable(MockHTTPXException(429)) # Rate limit - retriable
488
+ assert default_is_retriable(MockHTTPXException(500)) # Server error - retriable
489
+ assert default_is_retriable(MockHTTPXException(403)) # Conditional - retriable by default
490
+ assert not default_is_retriable(MockHTTPXException(404)) # Not found - not retriable
491
+ assert not default_is_retriable(MockHTTPXException(401)) # Unauthorized - not retriable
492
+
493
+ # Test string-based fallback still works
494
+ assert default_is_retriable(Exception("Rate limit exceeded"))
495
+ assert default_is_retriable(Exception("503 Service Unavailable"))
496
+ assert not default_is_retriable(Exception("Authentication failed"))
497
+
498
+
193
499
  def test_default_is_retriable():
194
- """Test string-based rate limit detection."""
195
- # Positive cases
500
+ """Test string-based transient error detection."""
501
+ # Rate limiting cases
196
502
  assert default_is_retriable(Exception("Rate limit exceeded"))
197
503
  assert default_is_retriable(Exception("Too many requests"))
198
504
  assert default_is_retriable(Exception("HTTP 429 error"))
@@ -200,10 +506,30 @@ def test_default_is_retriable():
200
506
  assert default_is_retriable(Exception("throttled"))
201
507
  assert default_is_retriable(Exception("RateLimitError"))
202
508
 
203
- # Negative cases
509
+ # Network connectivity cases
510
+ assert default_is_retriable(Exception("Network error"))
511
+ assert default_is_retriable(Exception("Connection timeout"))
512
+ assert default_is_retriable(Exception("Connection timed out"))
513
+ assert default_is_retriable(Exception("Connection refused"))
514
+ assert default_is_retriable(Exception("Network unreachable"))
515
+ assert default_is_retriable(Exception("DNS resolution failed"))
516
+ assert default_is_retriable(Exception("SSL error"))
517
+
518
+ # Exception type-based detection
519
+ class ConnectionError(Exception):
520
+ pass
521
+
522
+ class TimeoutError(Exception):
523
+ pass
524
+
525
+ assert default_is_retriable(ConnectionError("Some connection issue"))
526
+ assert default_is_retriable(TimeoutError("Operation timed out"))
527
+
528
+ # Non-retriable cases
204
529
  assert not default_is_retriable(Exception("Authentication failed"))
205
530
  assert not default_is_retriable(Exception("Invalid API key"))
206
- assert not default_is_retriable(Exception("Network error"))
531
+ assert not default_is_retriable(Exception("Permission denied"))
532
+ assert not default_is_retriable(Exception("File not found"))
207
533
 
208
534
 
209
535
  def test_default_is_retriable_litellm():