kash-shell 0.3.22__py3-none-any.whl → 0.3.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/combine_docs.py +52 -0
- kash/actions/core/concat_docs.py +47 -0
- kash/commands/workspace/workspace_commands.py +2 -2
- kash/config/logger.py +3 -2
- kash/config/settings.py +8 -0
- kash/docs/markdown/topics/a2_installation.md +2 -2
- kash/embeddings/embeddings.py +1 -1
- kash/exec/action_exec.py +1 -1
- kash/exec/fetch_url_items.py +52 -16
- kash/file_storage/file_store.py +3 -3
- kash/llm_utils/llm_completion.py +1 -1
- kash/mcp/mcp_cli.py +2 -2
- kash/utils/api_utils/api_retries.py +348 -14
- kash/utils/api_utils/gather_limited.py +366 -512
- kash/utils/api_utils/http_utils.py +46 -0
- kash/utils/api_utils/progress_protocol.py +49 -56
- kash/utils/rich_custom/multitask_status.py +70 -21
- kash/utils/text_handling/markdown_utils.py +14 -3
- kash/web_content/web_extract.py +13 -9
- kash/web_content/web_fetch.py +289 -60
- kash/web_content/web_page_model.py +5 -0
- {kash_shell-0.3.22.dist-info → kash_shell-0.3.24.dist-info}/METADATA +5 -3
- {kash_shell-0.3.22.dist-info → kash_shell-0.3.24.dist-info}/RECORD +26 -23
- {kash_shell-0.3.22.dist-info → kash_shell-0.3.24.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.22.dist-info → kash_shell-0.3.24.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.22.dist-info → kash_shell-0.3.24.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def extract_http_status_code(exception: Exception) -> int | None:
|
|
5
|
+
"""
|
|
6
|
+
Extract HTTP status code from various exception types.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
exception: The exception to extract status code from
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
HTTP status code or None if not found
|
|
13
|
+
"""
|
|
14
|
+
# Check for httpx.HTTPStatusError and requests.HTTPError
|
|
15
|
+
if hasattr(exception, "response"):
|
|
16
|
+
response = getattr(exception, "response", None)
|
|
17
|
+
if response and hasattr(response, "status_code"):
|
|
18
|
+
return getattr(response, "status_code", None)
|
|
19
|
+
|
|
20
|
+
# Check for aiohttp errors
|
|
21
|
+
if hasattr(exception, "status"):
|
|
22
|
+
return getattr(exception, "status", None)
|
|
23
|
+
|
|
24
|
+
# Parse from exception message as fallback
|
|
25
|
+
exception_str = str(exception)
|
|
26
|
+
|
|
27
|
+
# Try to find status code patterns in the message
|
|
28
|
+
import re
|
|
29
|
+
|
|
30
|
+
# Pattern for "403 Forbidden", "HTTP 429", etc.
|
|
31
|
+
status_patterns = [
|
|
32
|
+
r"\b(\d{3})\s+(?:Forbidden|Unauthorized|Not Found|Too Many Requests|Internal Server Error|Bad Gateway|Service Unavailable|Gateway Timeout)\b",
|
|
33
|
+
r"\bHTTP\s+(\d{3})\b",
|
|
34
|
+
r"\b(\d{3})\s+error\b",
|
|
35
|
+
r"status\s*(?:code)?:\s*(\d{3})\b",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
for pattern in status_patterns:
|
|
39
|
+
match = re.search(pattern, exception_str, re.IGNORECASE)
|
|
40
|
+
if match:
|
|
41
|
+
try:
|
|
42
|
+
return int(match.group(1))
|
|
43
|
+
except (ValueError, IndexError):
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
return None
|
|
@@ -6,7 +6,7 @@ from enum import Enum
|
|
|
6
6
|
from typing import Any, Protocol, TypeAlias, TypeVar
|
|
7
7
|
|
|
8
8
|
T = TypeVar("T")
|
|
9
|
-
|
|
9
|
+
TaskId = TypeVar("TaskId")
|
|
10
10
|
|
|
11
11
|
# Generic task spec types for labeler functions
|
|
12
12
|
TaskSpec = TypeVar("TaskSpec")
|
|
@@ -16,14 +16,16 @@ Labeler: TypeAlias = Callable[[int, TaskSpec], str]
|
|
|
16
16
|
EMOJI_SUCCESS = "[✔︎]"
|
|
17
17
|
EMOJI_FAILURE = "[✘]"
|
|
18
18
|
EMOJI_SKIP = "[-]"
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
EMOJI_WAITING = " ⧖ "
|
|
20
|
+
EMOJI_WARN = "∆"
|
|
21
|
+
EMOJI_RETRY = "⟲"
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
class TaskState(Enum):
|
|
24
25
|
"""Task execution states."""
|
|
25
26
|
|
|
26
27
|
QUEUED = "queued"
|
|
28
|
+
WAITING = "waiting"
|
|
27
29
|
RUNNING = "running"
|
|
28
30
|
COMPLETED = "completed"
|
|
29
31
|
FAILED = "failed"
|
|
@@ -38,7 +40,7 @@ class TaskInfo:
|
|
|
38
40
|
retry_count: int = 0
|
|
39
41
|
failures: list[str] = field(default_factory=list)
|
|
40
42
|
label: str = ""
|
|
41
|
-
|
|
43
|
+
steps_total: int = 1
|
|
42
44
|
|
|
43
45
|
|
|
44
46
|
@dataclass(frozen=True)
|
|
@@ -47,30 +49,9 @@ class TaskSummary:
|
|
|
47
49
|
|
|
48
50
|
task_states: list[TaskState]
|
|
49
51
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
return sum(1 for state in self.task_states if state == TaskState.QUEUED)
|
|
54
|
-
|
|
55
|
-
@property
|
|
56
|
-
def running(self) -> int:
|
|
57
|
-
"""Number of running tasks."""
|
|
58
|
-
return sum(1 for state in self.task_states if state == TaskState.RUNNING)
|
|
59
|
-
|
|
60
|
-
@property
|
|
61
|
-
def completed(self) -> int:
|
|
62
|
-
"""Number of completed tasks."""
|
|
63
|
-
return sum(1 for state in self.task_states if state == TaskState.COMPLETED)
|
|
64
|
-
|
|
65
|
-
@property
|
|
66
|
-
def failed(self) -> int:
|
|
67
|
-
"""Number of failed tasks."""
|
|
68
|
-
return sum(1 for state in self.task_states if state == TaskState.FAILED)
|
|
69
|
-
|
|
70
|
-
@property
|
|
71
|
-
def skipped(self) -> int:
|
|
72
|
-
"""Number of skipped tasks."""
|
|
73
|
-
return sum(1 for state in self.task_states if state == TaskState.SKIPPED)
|
|
52
|
+
def count(self, state: TaskState) -> int:
|
|
53
|
+
"""Count the number of tasks in a given state."""
|
|
54
|
+
return sum(1 for s in self.task_states if s == state)
|
|
74
55
|
|
|
75
56
|
@property
|
|
76
57
|
def total(self) -> int:
|
|
@@ -84,30 +65,35 @@ class TaskSummary:
|
|
|
84
65
|
if not self.task_states:
|
|
85
66
|
return "No tasks to process"
|
|
86
67
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
68
|
+
completed = self.count(TaskState.COMPLETED)
|
|
69
|
+
skipped = self.count(TaskState.SKIPPED)
|
|
70
|
+
failed = self.count(TaskState.FAILED)
|
|
71
|
+
queued = self.count(TaskState.QUEUED)
|
|
72
|
+
|
|
73
|
+
if completed == self.total:
|
|
74
|
+
return f"All tasks successful: {completed}/{self.total} completed"
|
|
75
|
+
elif completed + skipped == self.total:
|
|
76
|
+
return f"All tasks successful: {completed}/{self.total} completed, {skipped} skipped"
|
|
77
|
+
elif failed == self.total:
|
|
78
|
+
return f"All tasks failed: {failed}/{self.total} failed"
|
|
93
79
|
else:
|
|
94
80
|
parts = []
|
|
95
|
-
if
|
|
96
|
-
parts.append(f"{
|
|
97
|
-
if
|
|
98
|
-
parts.append(f"{
|
|
99
|
-
if
|
|
100
|
-
parts.append(f"{
|
|
101
|
-
if
|
|
102
|
-
parts.append(f"{
|
|
103
|
-
|
|
104
|
-
if
|
|
81
|
+
if completed > 0:
|
|
82
|
+
parts.append(f"{completed}/{self.total} tasks completed")
|
|
83
|
+
if failed > 0:
|
|
84
|
+
parts.append(f"{failed} tasks failed")
|
|
85
|
+
if skipped > 0:
|
|
86
|
+
parts.append(f"{skipped} tasks skipped")
|
|
87
|
+
if queued > 0:
|
|
88
|
+
parts.append(f"{queued} tasks not yet run")
|
|
89
|
+
|
|
90
|
+
if queued > 0:
|
|
105
91
|
return "Tasks were interrupted: " + ", ".join(parts)
|
|
106
92
|
else:
|
|
107
93
|
return "Tasks had errors: " + ", ".join(parts)
|
|
108
94
|
|
|
109
95
|
|
|
110
|
-
class ProgressTracker(Protocol[
|
|
96
|
+
class ProgressTracker(Protocol[TaskId]):
|
|
111
97
|
"""
|
|
112
98
|
Protocol for progress tracking that gather_limited can depend on.
|
|
113
99
|
|
|
@@ -123,19 +109,20 @@ class ProgressTracker(Protocol[TaskID]):
|
|
|
123
109
|
"""
|
|
124
110
|
...
|
|
125
111
|
|
|
126
|
-
async def add(self, label: str,
|
|
112
|
+
async def add(self, label: str, steps_total: int = 1) -> TaskId:
|
|
127
113
|
"""Add a new task to track."""
|
|
128
114
|
...
|
|
129
115
|
|
|
130
|
-
async def start(self, task_id:
|
|
116
|
+
async def start(self, task_id: TaskId) -> None:
|
|
131
117
|
"""Mark task as started (after rate limiting/queuing)."""
|
|
132
118
|
...
|
|
133
119
|
|
|
134
120
|
async def update(
|
|
135
121
|
self,
|
|
136
|
-
task_id:
|
|
122
|
+
task_id: TaskId,
|
|
123
|
+
state: TaskState | None = None,
|
|
137
124
|
*,
|
|
138
|
-
|
|
125
|
+
steps_done: int | None = None,
|
|
139
126
|
label: str | None = None,
|
|
140
127
|
error_msg: str | None = None,
|
|
141
128
|
) -> None:
|
|
@@ -144,7 +131,8 @@ class ProgressTracker(Protocol[TaskID]):
|
|
|
144
131
|
|
|
145
132
|
Args:
|
|
146
133
|
task_id: Task ID from add()
|
|
147
|
-
|
|
134
|
+
state: New task state (None = no change)
|
|
135
|
+
steps_done: Steps to advance (None = no change)
|
|
148
136
|
label: New label (None = no change)
|
|
149
137
|
error_msg: Error message to record as retry (None = no retry)
|
|
150
138
|
"""
|
|
@@ -152,7 +140,7 @@ class ProgressTracker(Protocol[TaskID]):
|
|
|
152
140
|
|
|
153
141
|
async def finish(
|
|
154
142
|
self,
|
|
155
|
-
task_id:
|
|
143
|
+
task_id: TaskId,
|
|
156
144
|
state: TaskState,
|
|
157
145
|
message: str = "",
|
|
158
146
|
) -> None:
|
|
@@ -167,10 +155,10 @@ class ProgressTracker(Protocol[TaskID]):
|
|
|
167
155
|
...
|
|
168
156
|
|
|
169
157
|
|
|
170
|
-
class AsyncProgressContext(Protocol[
|
|
158
|
+
class AsyncProgressContext(Protocol[TaskId]):
|
|
171
159
|
"""Protocol for async context manager progress trackers."""
|
|
172
160
|
|
|
173
|
-
async def __aenter__(self) -> ProgressTracker[
|
|
161
|
+
async def __aenter__(self) -> ProgressTracker[TaskId]:
|
|
174
162
|
"""Start progress tracking."""
|
|
175
163
|
...
|
|
176
164
|
|
|
@@ -198,11 +186,11 @@ class SimpleProgressTracker:
|
|
|
198
186
|
"""Console-based tracker works with standard logging."""
|
|
199
187
|
return False
|
|
200
188
|
|
|
201
|
-
async def add(self, label: str,
|
|
189
|
+
async def add(self, label: str, steps_total: int = 1) -> int: # pyright: ignore[reportUnusedParameter]
|
|
202
190
|
task_id = self._next_id
|
|
203
191
|
self._next_id += 1
|
|
204
192
|
|
|
205
|
-
self._tasks[task_id] = TaskInfo(label=label)
|
|
193
|
+
self._tasks[task_id] = TaskInfo(label=label, steps_total=steps_total)
|
|
206
194
|
|
|
207
195
|
if self.verbose:
|
|
208
196
|
self.print_fn(f"Queued: {label}")
|
|
@@ -223,8 +211,9 @@ class SimpleProgressTracker:
|
|
|
223
211
|
async def update(
|
|
224
212
|
self,
|
|
225
213
|
task_id: int,
|
|
214
|
+
state: TaskState | None = None,
|
|
226
215
|
*,
|
|
227
|
-
|
|
216
|
+
steps_done: int | None = None, # pyright: ignore[reportUnusedParameter]
|
|
228
217
|
label: str | None = None,
|
|
229
218
|
error_msg: str | None = None,
|
|
230
219
|
) -> None:
|
|
@@ -232,6 +221,10 @@ class SimpleProgressTracker:
|
|
|
232
221
|
if not task_info:
|
|
233
222
|
return
|
|
234
223
|
|
|
224
|
+
# Update state if provided
|
|
225
|
+
if state is not None:
|
|
226
|
+
task_info.state = state
|
|
227
|
+
|
|
235
228
|
# Update label if provided
|
|
236
229
|
if label is not None:
|
|
237
230
|
task_info.label = label
|
|
@@ -22,6 +22,7 @@ from kash.utils.api_utils.progress_protocol import (
|
|
|
22
22
|
EMOJI_RETRY,
|
|
23
23
|
EMOJI_SKIP,
|
|
24
24
|
EMOJI_SUCCESS,
|
|
25
|
+
EMOJI_WAITING,
|
|
25
26
|
TaskInfo,
|
|
26
27
|
TaskState,
|
|
27
28
|
TaskSummary,
|
|
@@ -30,7 +31,7 @@ from kash.utils.api_utils.progress_protocol import (
|
|
|
30
31
|
T = TypeVar("T")
|
|
31
32
|
|
|
32
33
|
# Spinner configuration
|
|
33
|
-
SPINNER_NAME = "
|
|
34
|
+
SPINNER_NAME = "dots8Bit"
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
@dataclass(frozen=True)
|
|
@@ -45,6 +46,7 @@ class StatusStyles:
|
|
|
45
46
|
failure_symbol: str = EMOJI_FAILURE
|
|
46
47
|
skip_symbol: str = EMOJI_SKIP
|
|
47
48
|
retry_symbol: str = EMOJI_RETRY
|
|
49
|
+
wait_symbol: str = EMOJI_WAITING
|
|
48
50
|
|
|
49
51
|
# Status styles
|
|
50
52
|
retry_style: str = "red"
|
|
@@ -52,6 +54,7 @@ class StatusStyles:
|
|
|
52
54
|
failure_style: str = "red"
|
|
53
55
|
skip_style: str = "yellow"
|
|
54
56
|
running_style: str = "blue"
|
|
57
|
+
waiting_style: str = "yellow"
|
|
55
58
|
error_style: str = "dim red"
|
|
56
59
|
|
|
57
60
|
# Progress bar styles
|
|
@@ -114,12 +117,13 @@ class SpinnerStatusColumn(ProgressColumn):
|
|
|
114
117
|
self.spinner: Spinner = Spinner(spinner_name)
|
|
115
118
|
self.styles = styles
|
|
116
119
|
|
|
117
|
-
# Calculate fixed width for consistent column sizing
|
|
120
|
+
# Calculate fixed width for consistent column sizing, adding 2 for padding (space on each side)
|
|
118
121
|
self.column_width: int = max(
|
|
119
|
-
_get_spinner_width(spinner_name),
|
|
122
|
+
_get_spinner_width(spinner_name) + 2,
|
|
120
123
|
len(styles.success_symbol),
|
|
121
124
|
len(styles.failure_symbol),
|
|
122
125
|
len(styles.skip_symbol),
|
|
126
|
+
len(styles.wait_symbol),
|
|
123
127
|
)
|
|
124
128
|
|
|
125
129
|
@override
|
|
@@ -136,13 +140,18 @@ class SpinnerStatusColumn(ProgressColumn):
|
|
|
136
140
|
text = Text(self.styles.failure_symbol, style=self.styles.failure_style)
|
|
137
141
|
elif task_info.state == TaskState.SKIPPED:
|
|
138
142
|
text = Text(self.styles.skip_symbol, style=self.styles.skip_style)
|
|
139
|
-
|
|
140
|
-
|
|
143
|
+
elif task_info.state == TaskState.WAITING:
|
|
144
|
+
text = Text(self.styles.wait_symbol, style=self.styles.waiting_style)
|
|
145
|
+
elif task_info.state == TaskState.RUNNING:
|
|
146
|
+
# Running: show spinner with padding
|
|
141
147
|
spinner_result = self.spinner.render(task.get_time())
|
|
142
148
|
if isinstance(spinner_result, Text):
|
|
143
|
-
text = spinner_result
|
|
149
|
+
text = Text(" ") + spinner_result + Text(" ")
|
|
144
150
|
else:
|
|
145
|
-
text = Text(str(spinner_result))
|
|
151
|
+
text = Text(" " + str(spinner_result) + " ")
|
|
152
|
+
else:
|
|
153
|
+
# Should not happen, but return empty space
|
|
154
|
+
return Text(" " * self.column_width)
|
|
146
155
|
|
|
147
156
|
# Ensure consistent width
|
|
148
157
|
current_len = len(text.plain)
|
|
@@ -376,13 +385,13 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
376
385
|
summary = self.get_summary()
|
|
377
386
|
self.console.print(summary)
|
|
378
387
|
|
|
379
|
-
async def add(self, label: str,
|
|
388
|
+
async def add(self, label: str, steps_total: int | None = None) -> int:
|
|
380
389
|
"""
|
|
381
390
|
Add a new task to the display. Task won't appear until start() is called.
|
|
382
391
|
|
|
383
392
|
Args:
|
|
384
393
|
label: Human-readable task description
|
|
385
|
-
|
|
394
|
+
steps_total: Total steps for progress bar (None for no default bar)
|
|
386
395
|
|
|
387
396
|
Returns:
|
|
388
397
|
Task ID for subsequent updates
|
|
@@ -392,7 +401,7 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
392
401
|
task_id: int = self._next_id
|
|
393
402
|
self._next_id += 1
|
|
394
403
|
|
|
395
|
-
task_info = TaskInfo(label=label,
|
|
404
|
+
task_info = TaskInfo(label=label, steps_total=steps_total or 1)
|
|
396
405
|
self._task_info[task_id] = task_info
|
|
397
406
|
return task_id
|
|
398
407
|
|
|
@@ -413,7 +422,7 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
413
422
|
# Now add to Rich Progress display
|
|
414
423
|
rich_task_id = self._progress.add_task(
|
|
415
424
|
"",
|
|
416
|
-
total=task_info.
|
|
425
|
+
total=task_info.steps_total,
|
|
417
426
|
label=task_info.label,
|
|
418
427
|
task_info=task_info,
|
|
419
428
|
progress_display=None,
|
|
@@ -440,8 +449,9 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
440
449
|
async def update(
|
|
441
450
|
self,
|
|
442
451
|
task_id: int,
|
|
452
|
+
state: TaskState | None = None,
|
|
443
453
|
*,
|
|
444
|
-
|
|
454
|
+
steps_done: int | None = None,
|
|
445
455
|
label: str | None = None,
|
|
446
456
|
error_msg: str | None = None,
|
|
447
457
|
) -> None:
|
|
@@ -450,7 +460,8 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
450
460
|
|
|
451
461
|
Args:
|
|
452
462
|
task_id: Task ID from add()
|
|
453
|
-
|
|
463
|
+
state: New task state (None = no change)
|
|
464
|
+
steps_done: Steps to advance (None = no change)
|
|
454
465
|
label: New label (None = no change)
|
|
455
466
|
error_msg: Error message to record as retry (None = no retry)
|
|
456
467
|
"""
|
|
@@ -461,6 +472,12 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
461
472
|
task_info = self._task_info[task_id]
|
|
462
473
|
rich_task_id = self._rich_task_ids.get(task_id)
|
|
463
474
|
|
|
475
|
+
# Update state if provided
|
|
476
|
+
if state is not None:
|
|
477
|
+
task_info.state = state
|
|
478
|
+
if rich_task_id is not None:
|
|
479
|
+
self._progress.update(rich_task_id, task_info=task_info)
|
|
480
|
+
|
|
464
481
|
# Update label if provided
|
|
465
482
|
if label is not None:
|
|
466
483
|
task_info.label = label
|
|
@@ -468,8 +485,8 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
468
485
|
self._progress.update(rich_task_id, label=label, task_info=task_info)
|
|
469
486
|
|
|
470
487
|
# Advance progress if provided
|
|
471
|
-
if
|
|
472
|
-
self._progress.advance(rich_task_id, advance=
|
|
488
|
+
if steps_done is not None and rich_task_id is not None:
|
|
489
|
+
self._progress.advance(rich_task_id, advance=steps_done)
|
|
473
490
|
|
|
474
491
|
# Record retry if error message provided
|
|
475
492
|
if error_msg is not None:
|
|
@@ -511,9 +528,9 @@ class MultiTaskStatus(AbstractAsyncContextManager):
|
|
|
511
528
|
# Task was never started, but we still need to add it to show completion
|
|
512
529
|
rich_task_id = self._progress.add_task(
|
|
513
530
|
"",
|
|
514
|
-
total=task_info.
|
|
531
|
+
total=task_info.steps_total,
|
|
515
532
|
label=task_info.label,
|
|
516
|
-
completed=task_info.
|
|
533
|
+
completed=task_info.steps_total,
|
|
517
534
|
task_info=task_info,
|
|
518
535
|
)
|
|
519
536
|
self._rich_task_ids[task_id] = rich_task_id
|
|
@@ -573,9 +590,9 @@ def test_task_status_with_progress():
|
|
|
573
590
|
settings=StatusSettings(show_progress=True),
|
|
574
591
|
) as status:
|
|
575
592
|
# Traditional progress bar
|
|
576
|
-
download_task = await status.add("Downloading",
|
|
593
|
+
download_task = await status.add("Downloading", steps_total=100)
|
|
577
594
|
for i in range(0, 101, 10):
|
|
578
|
-
await status.update(download_task,
|
|
595
|
+
await status.update(download_task, steps_done=10)
|
|
579
596
|
await asyncio.sleep(0.1)
|
|
580
597
|
await status.finish(download_task, TaskState.COMPLETED)
|
|
581
598
|
|
|
@@ -608,14 +625,14 @@ def test_task_status_mixed():
|
|
|
608
625
|
settings=StatusSettings(show_progress=True, transient=True),
|
|
609
626
|
) as status:
|
|
610
627
|
# Multiple concurrent tasks
|
|
611
|
-
install_task = await status.add("Installing packages",
|
|
628
|
+
install_task = await status.add("Installing packages", steps_total=50)
|
|
612
629
|
test_task = await status.add("Running tests")
|
|
613
630
|
build_task = await status.add("Building project")
|
|
614
631
|
optional_task = await status.add("Optional feature")
|
|
615
632
|
|
|
616
633
|
# Simulate concurrent work
|
|
617
634
|
for i in range(5):
|
|
618
|
-
await status.update(install_task,
|
|
635
|
+
await status.update(install_task, steps_done=10)
|
|
619
636
|
await status.set_progress_display(test_task, f"Test {i + 1}/10")
|
|
620
637
|
await status.set_progress_display(build_task, Text(f"Step {i + 1}", style="blue"))
|
|
621
638
|
await asyncio.sleep(0.2)
|
|
@@ -629,3 +646,35 @@ def test_task_status_mixed():
|
|
|
629
646
|
await status.finish(optional_task, TaskState.SKIPPED, "Feature disabled in config")
|
|
630
647
|
|
|
631
648
|
asyncio.run(_test_impl())
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def test_task_status_retry_states():
|
|
652
|
+
"""Test TaskStatus with retry wait states."""
|
|
653
|
+
print("Testing TaskStatus with retry wait states...")
|
|
654
|
+
|
|
655
|
+
async def _test_impl():
|
|
656
|
+
async with MultiTaskStatus(
|
|
657
|
+
settings=StatusSettings(show_progress=False, transient=True),
|
|
658
|
+
) as status:
|
|
659
|
+
# Task that will demonstrate retry wait state
|
|
660
|
+
retry_task = await status.add("API call with retries")
|
|
661
|
+
await status.start(retry_task)
|
|
662
|
+
|
|
663
|
+
# Simulate retry cycle
|
|
664
|
+
await status.update(retry_task, error_msg="Connection timeout", state=TaskState.WAITING)
|
|
665
|
+
await asyncio.sleep(1.0) # Simulate backoff
|
|
666
|
+
|
|
667
|
+
await status.update(retry_task, state=TaskState.RUNNING)
|
|
668
|
+
await asyncio.sleep(0.5) # Simulate execution
|
|
669
|
+
|
|
670
|
+
await status.update(
|
|
671
|
+
retry_task, error_msg="Rate limit exceeded", state=TaskState.WAITING
|
|
672
|
+
)
|
|
673
|
+
await asyncio.sleep(1.0) # Simulate longer backoff
|
|
674
|
+
|
|
675
|
+
await status.update(retry_task, state=TaskState.RUNNING)
|
|
676
|
+
await asyncio.sleep(0.5) # Simulate final execution
|
|
677
|
+
|
|
678
|
+
await status.finish(retry_task, TaskState.COMPLETED)
|
|
679
|
+
|
|
680
|
+
asyncio.run(_test_impl())
|
|
@@ -78,13 +78,23 @@ def _tree_links(element, include_internal=False):
|
|
|
78
78
|
|
|
79
79
|
def extract_links(content: str, include_internal=False) -> list[str]:
|
|
80
80
|
"""
|
|
81
|
-
Extract all links from Markdown content.
|
|
81
|
+
Extract all links from Markdown content. Deduplicates and
|
|
82
|
+
preserves order.
|
|
82
83
|
|
|
83
84
|
Raises:
|
|
84
85
|
marko.ParseError: If the markdown content contains invalid syntax that cannot be parsed.
|
|
85
86
|
"""
|
|
86
87
|
document = marko.parse(content)
|
|
87
|
-
|
|
88
|
+
all_links = _tree_links(document, include_internal)
|
|
89
|
+
|
|
90
|
+
# Deduplicate while preserving order
|
|
91
|
+
seen: dict[str, None] = {}
|
|
92
|
+
result = []
|
|
93
|
+
for link in all_links:
|
|
94
|
+
if link not in seen:
|
|
95
|
+
seen[link] = None
|
|
96
|
+
result.append(link)
|
|
97
|
+
return result
|
|
88
98
|
|
|
89
99
|
|
|
90
100
|
def extract_file_links(file_path: Path, include_internal=False) -> list[str]:
|
|
@@ -813,11 +823,12 @@ def test_extract_reference_style_links() -> None:
|
|
|
813
823
|
assert len(result) == 2
|
|
814
824
|
|
|
815
825
|
|
|
816
|
-
def
|
|
826
|
+
def test_extract_links_and_dups() -> None:
|
|
817
827
|
"""Test that internal fragment links are excluded by default but included when requested."""
|
|
818
828
|
content = dedent("""
|
|
819
829
|
See [this section](#introduction) and [external link](https://example.com).
|
|
820
830
|
Also check [another section](#conclusion) here.
|
|
831
|
+
Adding a [duplicate](https://example.com).
|
|
821
832
|
""")
|
|
822
833
|
|
|
823
834
|
# Default behavior: exclude internal links
|
kash/web_content/web_extract.py
CHANGED
|
@@ -8,7 +8,7 @@ from kash.web_content.web_extract_justext import extract_text_justext
|
|
|
8
8
|
from kash.web_content.web_page_model import PageExtractor, WebPageData
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
@log_calls(level="
|
|
11
|
+
@log_calls(level="info")
|
|
12
12
|
def fetch_page_content(
|
|
13
13
|
url: Url,
|
|
14
14
|
*,
|
|
@@ -17,19 +17,22 @@ def fetch_page_content(
|
|
|
17
17
|
text_extractor: PageExtractor = extract_text_justext,
|
|
18
18
|
) -> WebPageData:
|
|
19
19
|
"""
|
|
20
|
-
Fetches a URL and extracts the title, description, and content
|
|
21
|
-
|
|
20
|
+
Fetches a URL and extracts the title, description, and content,
|
|
21
|
+
with optional caching.
|
|
22
|
+
|
|
23
|
+
Always uses the content cache for fetching. Cached file path is
|
|
24
|
+
returned in the content, unless `cache` is false, in which case
|
|
25
|
+
the cached content is deleted.
|
|
22
26
|
|
|
23
27
|
Force re-fetching and updating the cache by setting `refetch` to true.
|
|
24
|
-
|
|
25
|
-
in case the cached content is deleted.
|
|
28
|
+
|
|
26
29
|
|
|
27
30
|
For HTML and other text files, uses the `text_extractor` to extract
|
|
28
31
|
clean text and page metadata.
|
|
29
32
|
"""
|
|
30
33
|
expiration_sec = 0 if refetch else None
|
|
31
|
-
|
|
32
|
-
path =
|
|
34
|
+
cache_result = cache_file(url, expiration_sec=expiration_sec)
|
|
35
|
+
path = cache_result.content.path
|
|
33
36
|
format_info = file_format_info(path)
|
|
34
37
|
|
|
35
38
|
content = None
|
|
@@ -40,13 +43,14 @@ def fetch_page_content(
|
|
|
40
43
|
page_data = WebPageData(url)
|
|
41
44
|
|
|
42
45
|
# Add file format info (for both HTML/text and all other file types).
|
|
43
|
-
|
|
44
46
|
page_data.format_info = format_info
|
|
45
47
|
|
|
46
48
|
# Add a thumbnail, if known for this URL.
|
|
47
49
|
page_data.thumbnail_url = thumbnail_url(url)
|
|
48
50
|
|
|
49
|
-
# Return
|
|
51
|
+
# Return whether this is from cache and the local cache path
|
|
52
|
+
# if we will be keeping it.
|
|
53
|
+
page_data.cache_result = cache_result
|
|
50
54
|
if cache:
|
|
51
55
|
page_data.saved_content = path
|
|
52
56
|
else:
|