kash-shell 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,13 +11,10 @@ from kash.web_content.web_extract_readabilipy import extract_text_readabilipy
11
11
  log = get_logger(__name__)
12
12
 
13
13
 
14
- @kash_action(
15
- precondition=is_url_resource | has_html_body,
16
- mcp_tool=True,
17
- )
18
- def markdownify(item: Item) -> Item:
14
+ @kash_action(precondition=is_url_resource | has_html_body, mcp_tool=True)
15
+ def markdownify_html(item: Item) -> Item:
19
16
  """
20
- Converts a URL or raw HTML item to Markdown, fetching with the content
17
+ Converts raw HTML or the URL of an HTML page to Markdown, fetching with the content
21
18
  cache if needed. Also uses readability to clean up the HTML.
22
19
  """
23
20
 
@@ -23,14 +23,12 @@ from kash.exec import (
23
23
  resolve_locator_arg,
24
24
  )
25
25
  from kash.exec.action_registry import get_all_actions_defaults
26
- from kash.exec.fetch_url_metadata import fetch_url_metadata
26
+ from kash.exec.fetch_url_items import fetch_url_item
27
27
  from kash.exec.precondition_checks import actions_matching_paths
28
28
  from kash.exec.precondition_registry import get_all_preconditions
29
- from kash.exec.preconditions import is_url_resource
30
29
  from kash.exec_model.shell_model import ShellResult
31
30
  from kash.local_server.local_url_formatters import local_url_formatter
32
31
  from kash.media_base import media_tools
33
- from kash.media_base.media_services import is_media_url
34
32
  from kash.model.items_model import Item, ItemType
35
33
  from kash.model.params_model import GLOBAL_PARAMS
36
34
  from kash.model.paths_model import StorePath, fmt_store_path
@@ -54,12 +52,11 @@ from kash.utils.common.format_utils import fmt_loc
54
52
  from kash.utils.common.obj_replace import remove_values
55
53
  from kash.utils.common.parse_key_vals import parse_key_value
56
54
  from kash.utils.common.type_utils import not_none
57
- from kash.utils.common.url import Url, is_url, parse_http_url
55
+ from kash.utils.common.url import Url
58
56
  from kash.utils.errors import InvalidInput
59
57
  from kash.utils.file_formats.chat_format import tail_chat_history
60
58
  from kash.utils.file_utils.dir_info import is_nonempty_dir
61
59
  from kash.utils.file_utils.file_formats_model import Format
62
- from kash.utils.text_handling.doc_normalization import can_normalize
63
60
  from kash.web_content.file_cache_utils import cache_file
64
61
  from kash.workspaces import (
65
62
  current_ws,
@@ -189,85 +186,6 @@ def cache_content(*urls_or_paths: str, refetch: bool = False) -> None:
189
186
  PrintHooks.spacer()
190
187
 
191
188
 
192
- @kash_command
193
- def download(*urls_or_paths: str, refetch: bool = False, no_format: bool = False) -> ShellResult:
194
- """
195
- Download a URL or resource. Uses cached content if available, unless `refetch` is true.
196
- Inputs can be URLs or paths to URL resources.
197
- Creates both resource and document versions for text content.
198
-
199
- :param no_format: If true, do not also normalize Markdown content.
200
- """
201
- ws = current_ws()
202
- saved_paths = []
203
-
204
- for url_or_path in urls_or_paths:
205
- locator = resolve_locator_arg(url_or_path)
206
- url: Url | None = None
207
-
208
- # Get the URL from the locator
209
- if not isinstance(locator, Path) and is_url(locator):
210
- url = Url(locator)
211
- elif isinstance(locator, StorePath):
212
- url_item = ws.load(locator)
213
- if is_url_resource(url_item):
214
- url = url_item.url
215
-
216
- if not url:
217
- raise InvalidInput(f"Not a URL or URL resource: {fmt_loc(locator)}")
218
-
219
- # Handle media URLs differently
220
- if is_media_url(url):
221
- log.message(
222
- "URL is a media URL, so adding as a resource and will cache media: %s", fmt_loc(url)
223
- )
224
- store_path = ws.import_item(url, as_type=ItemType.resource, reimport=refetch)
225
- saved_paths.append(store_path)
226
- media_tools.cache_media(url)
227
- else:
228
- # Cache the content first
229
- expiration_sec = 0 if refetch else None
230
- cache_result = cache_file(url, expiration_sec=expiration_sec)
231
- original_filename = Path(parse_http_url(url).path).name
232
- mime_type = cache_result.content.headers and cache_result.content.headers.mime_type
233
-
234
- # Create a resource item
235
- resource_item = Item.from_external_path(
236
- cache_result.content.path,
237
- ItemType.resource,
238
- url=url,
239
- mime_type=mime_type,
240
- original_filename=original_filename,
241
- )
242
- # For initial content, do not format or add frontmatter.
243
- store_path = ws.save(resource_item, no_frontmatter=True, no_format=True)
244
- saved_paths.append(store_path)
245
- select(store_path)
246
-
247
- # Also create a doc version for text content if we want to normalize formatting.
248
- if resource_item.format and can_normalize(resource_item.format) and not no_format:
249
- doc_item = Item.from_external_path(
250
- cache_result.content.path,
251
- ItemType.doc,
252
- url=url,
253
- mime_type=mime_type,
254
- original_filename=original_filename,
255
- )
256
- # Now use default formatting and frontmatter.
257
- doc_store_path = ws.save(doc_item)
258
- saved_paths.append(doc_store_path)
259
- select(doc_store_path)
260
-
261
- print_status(
262
- "Downloaded %s %s:\n%s",
263
- len(saved_paths),
264
- plural("item", len(saved_paths)),
265
- fmt_lines(saved_paths),
266
- )
267
-
268
- return ShellResult(show_selection=True)
269
-
270
-
271
189
  @kash_command
272
190
  def history(max: int = 30, raw: bool = False) -> None:
273
191
  """
@@ -536,10 +454,14 @@ def save_clipboard(
536
454
 
537
455
 
538
456
  @kash_command
539
- def fetch_metadata(*files_or_urls: str, refetch: bool = False) -> ShellResult:
457
+ def fetch_url(*files_or_urls: str, refetch: bool = False) -> ShellResult:
540
458
  """
541
- Fetch metadata for the given URLs or resources. Imports new URLs and saves back
542
- the fetched metadata for existing resources.
459
+ Fetch content and metadata for the given URLs or resources, saving to the
460
+ current workspace.
461
+
462
+ Imports new URLs and saves back the fetched metadata for existing resources.
463
+ Also saves a resource item with the content of the URL, either HTML, text, or
464
+ of any other type.
543
465
 
544
466
  Skips items that already have a title and description, unless `refetch` is true.
545
467
  Skips (with a warning) items that are not URL resources.
@@ -552,7 +474,7 @@ def fetch_metadata(*files_or_urls: str, refetch: bool = False) -> ShellResult:
552
474
  store_paths = []
553
475
  for locator in locators:
554
476
  try:
555
- fetched_item = fetch_url_metadata(locator, refetch=refetch)
477
+ fetched_item = fetch_url_item(locator, refetch=refetch)
556
478
  store_paths.append(fetched_item.store_path)
557
479
  except InvalidInput as e:
558
480
  log.warning(
@@ -34,7 +34,7 @@ the Python framework, a few core utilities, and the Kash command-line shell.
34
34
  Additional actions for handling more complex tasks like converting documents and
35
35
  transcribing, researching, or annotating videos, are in the
36
36
  [kash-docs](https://github.com/jlevy/kash-docs) and
37
- [kash-media](https://github.com/jlevy/kash-docs) packages, all available on PyPI and
37
+ [kash-media](https://github.com/jlevy/kash-media) packages, all available on PyPI and
38
38
  quick to install via uv.
39
39
 
40
40
  ### Key Concepts
kash/exec/__init__.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from kash.exec.action_decorators import kash_action, kash_action_class
2
2
  from kash.exec.action_exec import SkipItem, prepare_action_input, run_action_with_shell_context
3
3
  from kash.exec.command_registry import kash_command
4
+ from kash.exec.fetch_url_items import fetch_url_item, fetch_url_item_content
4
5
  from kash.exec.importing import import_and_register
5
6
  from kash.exec.llm_transforms import llm_transform_item, llm_transform_str
6
7
  from kash.exec.precondition_registry import kash_precondition
@@ -21,6 +22,8 @@ __all__ = [
21
22
  "prepare_action_input",
22
23
  "run_action_with_shell_context",
23
24
  "kash_command",
25
+ "fetch_url_item",
26
+ "fetch_url_item_content",
24
27
  "kash_runtime",
25
28
  "current_runtime_settings",
26
29
  "import_and_register",
kash/exec/action_exec.py CHANGED
@@ -43,7 +43,7 @@ def prepare_action_input(*input_args: CommandArg, refetch: bool = False) -> Acti
43
43
  URL or file resources, either finding them in the workspace or importing them.
44
44
  Also fetches metadata for URLs if they don't already have title and description.
45
45
  """
46
- from kash.exec.fetch_url_metadata import fetch_url_item_metadata
46
+ from kash.exec.fetch_url_items import fetch_url_item_content
47
47
 
48
48
  ws = current_ws()
49
49
 
@@ -55,7 +55,7 @@ def prepare_action_input(*input_args: CommandArg, refetch: bool = False) -> Acti
55
55
  if input_items:
56
56
  log.message("Assembling metadata for input items:\n%s", fmt_lines(input_items))
57
57
  input_items = [
58
- fetch_url_item_metadata(item, refetch=refetch) if is_url_resource(item) else item
58
+ fetch_url_item_content(item, refetch=refetch) if is_url_resource(item) else item
59
59
  for item in input_items
60
60
  ]
61
61
 
@@ -11,7 +11,9 @@ from kash.utils.errors import InvalidInput
11
11
  log = get_logger(__name__)
12
12
 
13
13
 
14
- def fetch_url_metadata(locator: Url | StorePath, refetch: bool = False) -> Item:
14
+ def fetch_url_item(
15
+ locator: Url | StorePath, *, save_content: bool = True, refetch: bool = False
16
+ ) -> Item:
15
17
  from kash.workspaces import current_ws
16
18
 
17
19
  ws = current_ws()
@@ -26,16 +28,23 @@ def fetch_url_metadata(locator: Url | StorePath, refetch: bool = False) -> Item:
26
28
  else:
27
29
  raise InvalidInput(f"Not a URL or URL resource: {fmt_loc(locator)}")
28
30
 
29
- return fetch_url_item_metadata(item, refetch=refetch)
31
+ return fetch_url_item_content(item, save_content=save_content, refetch=refetch)
30
32
 
31
33
 
32
- def fetch_url_item_metadata(item: Item, refetch: bool = False) -> Item:
34
+ def fetch_url_item_content(item: Item, *, save_content: bool = True, refetch: bool = False) -> Item:
33
35
  """
34
- Fetch metadata for a URL using a media service if we recognize the URL,
35
- and otherwise fetching and extracting it from the web page HTML.
36
+ Fetch content and metadata for a URL using a media service if we
37
+ recognize the URL as a known media service. Otherwise, fetch and extract the
38
+ metadata and content from the web page and save it to the URL item.
39
+
40
+ If `save_content` is true, a copy of the content is also saved as
41
+ a resource item.
42
+
43
+ The content item is returned if content was saved. Otherwise, the updated
44
+ URL item is returned.
36
45
  """
37
46
  from kash.web_content.canon_url import canonicalize_url
38
- from kash.web_content.web_extract import fetch_extract
47
+ from kash.web_content.web_extract import fetch_page_content
39
48
  from kash.workspaces import current_ws
40
49
 
41
50
  ws = current_ws()
@@ -54,28 +63,47 @@ def fetch_url_item_metadata(item: Item, refetch: bool = False) -> Item:
54
63
  # Prefer fetching metadata from media using the media service if possible.
55
64
  # Data is cleaner and YouTube for example often blocks regular scraping.
56
65
  media_metadata = get_media_metadata(url)
66
+ url_item: Item | None = None
67
+ content_item: Item | None = None
57
68
  if media_metadata:
58
- fetched_item = Item.from_media_metadata(media_metadata)
69
+ url_item = Item.from_media_metadata(media_metadata)
59
70
  # Preserve and canonicalize any slice suffix on the URL.
60
71
  _base_url, slice = parse_url_slice(item.url)
61
72
  if slice:
62
73
  new_url = add_slice_to_url(media_metadata.url, slice)
63
74
  if new_url != item.url:
64
75
  log.message("Updated URL from metadata and added slice: %s", new_url)
65
- fetched_item.url = new_url
76
+ url_item.url = new_url
66
77
 
67
- fetched_item = item.merged_copy(fetched_item)
78
+ url_item = item.merged_copy(url_item)
68
79
  else:
69
- page_data = fetch_extract(url, refetch=refetch)
70
- fetched_item = item.new_copy_with(
80
+ page_data = fetch_page_content(url, refetch=refetch, cache=save_content)
81
+ url_item = item.new_copy_with(
71
82
  title=page_data.title or item.title,
72
83
  description=page_data.description or item.description,
73
84
  thumbnail_url=page_data.thumbnail_url or item.thumbnail_url,
74
85
  )
86
+ if save_content:
87
+ assert page_data.saved_content
88
+ assert page_data.format_info
89
+ content_item = url_item.new_copy_with(
90
+ external_path=str(page_data.saved_content),
91
+ # Use the original filename, not the local cache filename (which has a hash suffix).
92
+ original_filename=item.get_filename(),
93
+ format=page_data.format_info.format,
94
+ )
95
+ ws.save(content_item)
75
96
 
76
- if not fetched_item.title:
97
+ if not url_item.title:
77
98
  log.warning("Failed to fetch page data: title is missing: %s", item.url)
78
99
 
79
- ws.save(fetched_item)
100
+ # Now save the updated URL item and also the content item if we have one.
101
+ ws.save(url_item)
102
+ assert url_item.store_path
103
+ log.debug("Saved URL item: %s", url_item.fmt_loc())
104
+ if content_item:
105
+ ws.save(content_item)
106
+ assert content_item.store_path
107
+ log.debug("Saved content item: %s", content_item.fmt_loc())
80
108
 
81
- return fetched_item
109
+ return content_item or url_item
@@ -405,6 +405,7 @@ class FileStore(Workspace):
405
405
  # If external path already exists and is within the workspace, the file was
406
406
  # already saved (e.g. by an action that wrote the item directly to the store).
407
407
  external_path = item.external_path and Path(item.external_path).resolve()
408
+ skipped_save = False
408
409
  if external_path and self._is_in_store(external_path):
409
410
  log.info("Item with external_path already saved: %s", fmt_loc(external_path))
410
411
  rel_path = external_path.relative_to(self.base_dir)
@@ -480,12 +481,17 @@ class FileStore(Workspace):
480
481
  )
481
482
  os.unlink(full_path)
482
483
  store_path = old_store_path
484
+ skipped_save = True
483
485
 
484
486
  # Update in-memory store_path only after successful save.
485
487
  item.store_path = str(store_path)
486
488
  self._id_index_item(store_path)
487
489
 
488
- log.message("%s Saved item: %s", EMOJI_SAVED, fmt_loc(store_path))
490
+ if not skipped_save:
491
+ log.message("%s Saved item: %s", EMOJI_SAVED, fmt_loc(store_path))
492
+ else:
493
+ log.info("%s Already saved: %s", EMOJI_SAVED, fmt_loc(store_path))
494
+
489
495
  return store_path
490
496
 
491
497
  @log_calls(level="debug")
@@ -30,6 +30,10 @@ def folder_for_type(item_type: ItemType) -> Path:
30
30
 
31
31
 
32
32
  def join_suffix(base_slug: str, full_suffix: str) -> str:
33
+ """
34
+ Create a store filename by joining a base slug and a full suffix, i.e. a filename
35
+ extension with or without an item type (`.html` or `.resource.html`, for example).
36
+ """
33
37
  return f"{base_slug}.{full_suffix.lstrip('.')}"
34
38
 
35
39
 
@@ -2,9 +2,9 @@ from collections.abc import Callable
2
2
  from dataclasses import replace
3
3
  from typing import Any
4
4
 
5
- from kash.help.docstring_utils import parse_docstring
6
5
  from kash.model.params_model import ALL_COMMON_PARAMS, Param
7
6
  from kash.utils.common.function_inspect import FuncParam, inspect_function_params
7
+ from kash.utils.common.parse_docstring import parse_docstring
8
8
 
9
9
 
10
10
  def _look_up_param_docs(func: Callable[..., Any], kw_params: list[FuncParam]) -> list[Param]:
kash/help/help_pages.py CHANGED
@@ -3,7 +3,6 @@ from rich.text import Text
3
3
  from kash.config.logger import get_logger
4
4
  from kash.config.text_styles import STYLE_HINT
5
5
  from kash.docs.all_docs import DocSelection, all_docs
6
- from kash.help.docstring_utils import parse_docstring
7
6
  from kash.shell.output.shell_formatting import format_name_and_value
8
7
  from kash.shell.output.shell_output import (
9
8
  PrintHooks,
@@ -12,6 +11,7 @@ from kash.shell.output.shell_output import (
12
11
  print_hrule,
13
12
  print_markdown,
14
13
  )
14
+ from kash.utils.common.parse_docstring import parse_docstring
15
15
 
16
16
  log = get_logger(__name__)
17
17
 
@@ -6,7 +6,6 @@ from kash.docs.all_docs import DocSelection
6
6
  from kash.exec.action_registry import look_up_action_class
7
7
  from kash.exec.command_registry import CommandFunction, look_up_command
8
8
  from kash.help.assistant import assist_preamble, assistance_unstructured
9
- from kash.help.docstring_utils import parse_docstring
10
9
  from kash.help.function_param_info import annotate_param_info
11
10
  from kash.help.help_lookups import look_up_faq
12
11
  from kash.help.tldr_help import tldr_help
@@ -22,6 +21,7 @@ from kash.shell.output.shell_output import (
22
21
  print_help,
23
22
  print_markdown,
24
23
  )
24
+ from kash.utils.common.parse_docstring import parse_docstring
25
25
  from kash.utils.errors import InvalidInput, NoMatch
26
26
  from kash.utils.file_formats.chat_format import ChatHistory, ChatMessage, ChatRole
27
27
 
kash/model/items_model.py CHANGED
@@ -675,9 +675,21 @@ class Item:
675
675
  raise FileFormatError(f"Config item is not YAML: {self.format}: {self}")
676
676
  return from_yaml_string(self.body)
677
677
 
678
+ def get_filename(self) -> str | None:
679
+ """
680
+ Get the store or external path filename of the item, including the
681
+ file extension.
682
+ """
683
+ if self.store_path:
684
+ return Path(self.store_path).name
685
+ elif self.external_path:
686
+ return Path(self.external_path).name
687
+ else:
688
+ return None
689
+
678
690
  def get_file_ext(self) -> FileExt:
679
691
  """
680
- Get or infer file extension.
692
+ Get or infer the base file extension for the item.
681
693
  """
682
694
  if self.file_ext:
683
695
  return self.file_ext
@@ -688,7 +700,8 @@ class Item:
688
700
 
689
701
  def get_full_suffix(self) -> str:
690
702
  """
691
- Get the full file extension suffix (e.g. "note.md") for this item.
703
+ Assemble the full file extension suffix (e.g. "resource.yml") for this item.
704
+ Without a leading dot.
692
705
  """
693
706
  if self.type == ItemType.extension:
694
707
  # Python files cannot have more than one . in them.
@@ -892,12 +905,14 @@ class Item:
892
905
 
893
906
  def fmt_loc(self) -> str:
894
907
  """
895
- Formatted store path, external path, or title. For error messages etc.
908
+ Formatted store path, external path, URL, or title. Use for logging etc.
896
909
  """
897
910
  if self.store_path:
898
911
  return fmt_store_path(self.store_path)
899
912
  elif self.external_path:
900
913
  return fmt_loc(self.external_path)
914
+ elif self.url:
915
+ return fmt_loc(self.url)
901
916
  else:
902
917
  return repr(self.pick_title())
903
918
 
@@ -0,0 +1,347 @@
1
+ import re
2
+ from dataclasses import dataclass, field
3
+ from textwrap import dedent
4
+
5
+
6
+ @dataclass
7
+ class Docstring:
8
+ """
9
+ A parsed docstring.
10
+ """
11
+
12
+ body: str = ""
13
+ param: dict[str, str] = field(default_factory=dict)
14
+ type: dict[str, str] = field(default_factory=dict)
15
+ returns: str = ""
16
+ rtype: str = ""
17
+
18
+
19
+ def parse_docstring(docstring: str) -> Docstring:
20
+ """
21
+ Parse a docstring in either reStructuredText or Google style format.
22
+
23
+ Supports two formats:
24
+ - reStructuredText style: `:param name: description`, `:type name: type`, etc.
25
+ - Google style: `Args:` section with `name (type): description` format
26
+
27
+ The parser automatically detects which format is used based on the presence
28
+ of `:param` directives or `Args:` sections.
29
+ """
30
+ docstring = dedent(docstring).strip()
31
+
32
+ if not docstring:
33
+ return Docstring()
34
+
35
+ # Detect format based on content
36
+ if ":param " in docstring or ":type " in docstring or ":return" in docstring:
37
+ return _parse_rst_docstring(docstring)
38
+ elif re.search(r"\b(Args|Arguments|Returns?):", docstring):
39
+ return _parse_google_docstring(docstring)
40
+ else:
41
+ # No special formatting, just treat as body
42
+ return Docstring(body=docstring)
43
+
44
+
45
+ def _parse_rst_docstring(docstring: str) -> Docstring:
46
+ """
47
+ Parse reStructuredText-style docstring with :param: and :type: directives.
48
+ """
49
+ lines = docstring.split("\n")
50
+
51
+ result = Docstring()
52
+ body_lines = []
53
+
54
+ for line in lines:
55
+ if line.strip().startswith(":"):
56
+ break
57
+ body_lines.append(line)
58
+
59
+ result.body = "\n".join(body_lines).strip()
60
+ _parse_rst_fields(lines[len(body_lines) :], result)
61
+ return result
62
+
63
+
64
+ def _parse_google_docstring(docstring: str) -> Docstring:
65
+ """
66
+ Parse Google-style docstring with Args: and Returns: sections.
67
+ """
68
+ lines = docstring.split("\n")
69
+ result = Docstring()
70
+
71
+ # Find sections using regex
72
+ sections = {}
73
+ for i, line in enumerate(lines):
74
+ stripped = line.strip()
75
+ if re.match(r"^(Args|Arguments):\s*$", stripped, re.IGNORECASE):
76
+ sections["args"] = i
77
+ elif re.match(r"^Returns?:\s*$", stripped, re.IGNORECASE):
78
+ sections["returns"] = i
79
+
80
+ # Body is everything before the first section
81
+ body_end = min(sections.values()) if sections else len(lines)
82
+ result.body = "\n".join(lines[:body_end]).strip()
83
+
84
+ # Parse each section
85
+ if "args" in sections:
86
+ _parse_google_args_section(lines, sections["args"] + 1, result, sections)
87
+ if "returns" in sections:
88
+ _parse_google_returns_section(lines, sections["returns"] + 1, result, sections)
89
+
90
+ return result
91
+
92
+
93
+ def _parse_google_args_section(
94
+ lines: list[str], start_idx: int, result: Docstring, sections: dict[str, int]
95
+ ) -> None:
96
+ """
97
+ Parse the Args: section of a Google-style docstring.
98
+ """
99
+ # Find the end of this section
100
+ end_idx = len(lines)
101
+ for section_start in sections.values():
102
+ if section_start > start_idx:
103
+ end_idx = min(end_idx, section_start)
104
+
105
+ # Determine base indentation from first non-empty line
106
+ base_indent = None
107
+ for i in range(start_idx, end_idx):
108
+ line = lines[i]
109
+ if line.strip():
110
+ base_indent = len(line) - len(line.lstrip())
111
+ break
112
+
113
+ if base_indent is None:
114
+ return
115
+
116
+ i = start_idx
117
+ while i < end_idx:
118
+ line = lines[i]
119
+
120
+ # Skip empty lines
121
+ if not line.strip():
122
+ i += 1
123
+ continue
124
+
125
+ # Check if this line is at the base indentation level (parameter line)
126
+ line_indent = len(line) - len(line.lstrip())
127
+ if line_indent == base_indent:
128
+ param_line = line.strip()
129
+
130
+ # More robust regex that allows underscores and handles various formats
131
+ # Match: name (type): description
132
+ match = re.match(r"([a-zA-Z_]\w*)\s*\(([^)]+)\)\s*:\s*(.*)", param_line)
133
+ if match:
134
+ name, param_type, description = match.groups()
135
+ result.param[name] = description.strip()
136
+ result.type[name] = param_type.strip()
137
+ else:
138
+ # Match: name: description
139
+ match = re.match(r"([a-zA-Z_]\w*)\s*:\s*(.*)", param_line)
140
+ if match:
141
+ name, description = match.groups()
142
+ result.param[name] = description.strip()
143
+
144
+ # Collect continuation lines (more indented than base)
145
+ i += 1
146
+ continuation_lines = []
147
+ while i < end_idx:
148
+ if not lines[i].strip():
149
+ i += 1
150
+ continue
151
+ next_indent = len(lines[i]) - len(lines[i].lstrip())
152
+ if next_indent > base_indent:
153
+ continuation_lines.append(lines[i].strip())
154
+ i += 1
155
+ else:
156
+ break
157
+
158
+ # Add continuation to the last parameter
159
+ if continuation_lines and result.param:
160
+ last_param = list(result.param.keys())[-1]
161
+ result.param[last_param] += " " + " ".join(continuation_lines)
162
+ else:
163
+ i += 1
164
+
165
+
166
+ def _parse_google_returns_section(
167
+ lines: list[str], start_idx: int, result: Docstring, sections: dict[str, int]
168
+ ) -> None:
169
+ """
170
+ Parse the Returns: section of a Google-style docstring.
171
+ """
172
+ # Find the end of this section
173
+ end_idx = len(lines)
174
+ for section_start in sections.values():
175
+ if section_start > start_idx:
176
+ end_idx = min(end_idx, section_start)
177
+
178
+ # Collect all content from this section
179
+ content_lines = []
180
+ for i in range(start_idx, end_idx):
181
+ line = lines[i]
182
+ if line.strip():
183
+ content_lines.append(line.strip())
184
+
185
+ if content_lines:
186
+ content = " ".join(content_lines).strip()
187
+
188
+ # Try to parse "type: description" format
189
+ if ":" in content and not content.startswith(":"):
190
+ parts = content.split(":", 1)
191
+ if len(parts) == 2 and parts[0].strip():
192
+ result.rtype = parts[0].strip()
193
+ result.returns = parts[1].strip()
194
+ else:
195
+ result.returns = content
196
+ else:
197
+ result.returns = content
198
+
199
+
200
+ def _parse_rst_fields(lines: list[str], result: Docstring) -> None:
201
+ """Parse reStructuredText-style field directives."""
202
+ current_field = None
203
+ current_content = []
204
+
205
+ def save_current_field():
206
+ if current_field and current_content:
207
+ content = " ".join(current_content).strip()
208
+ if current_field.startswith("param "):
209
+ result.param[current_field[6:]] = content
210
+ elif current_field.startswith("type "):
211
+ result.type[current_field[5:]] = content
212
+ elif current_field == "return":
213
+ result.returns = content
214
+ elif current_field == "rtype":
215
+ result.rtype = content
216
+
217
+ for line in lines:
218
+ if line.strip().startswith(":"):
219
+ save_current_field()
220
+ current_field, _, content = line.strip()[1:].partition(":")
221
+ current_content = [content.strip()]
222
+ else:
223
+ current_content.append(line.strip())
224
+
225
+ save_current_field()
226
+
227
+
228
+ ## Tests
229
+
230
+
231
+ def test_parse_rst_docstring():
232
+ rst_docstring = """
233
+ Search for a string in files at the given paths and return their store paths.
234
+ Useful to find all docs or resources matching a string or regex.
235
+
236
+ :param sort: How to sort results. Can be `path` or `score`.
237
+ :param ignore_case: Ignore case when searching.
238
+ :type sort: str
239
+ :type ignore_case: bool
240
+ :return: The search results.
241
+ :rtype: CommandOutput
242
+ """
243
+
244
+ parsed = parse_docstring(rst_docstring)
245
+
246
+ assert (
247
+ parsed.body
248
+ == "Search for a string in files at the given paths and return their store paths.\nUseful to find all docs or resources matching a string or regex."
249
+ )
250
+ assert parsed.param == {
251
+ "sort": "How to sort results. Can be `path` or `score`.",
252
+ "ignore_case": "Ignore case when searching.",
253
+ }
254
+ assert parsed.type == {"sort": "str", "ignore_case": "bool"}
255
+ assert parsed.returns == "The search results."
256
+ assert parsed.rtype == "CommandOutput"
257
+
258
+
259
+ def test_parse_google_docstring_with_types():
260
+ google_docstring = """
261
+ Search for a string in files at the given paths and return their store paths.
262
+ Useful to find all docs or resources matching a string or regex.
263
+
264
+ Args:
265
+ sort (str): How to sort results. Can be `path` or `score`.
266
+ ignore_case (bool): Ignore case when searching.
267
+
268
+ Returns:
269
+ CommandOutput: The search results.
270
+ """
271
+
272
+ parsed = parse_docstring(google_docstring)
273
+
274
+ assert (
275
+ parsed.body
276
+ == "Search for a string in files at the given paths and return their store paths.\nUseful to find all docs or resources matching a string or regex."
277
+ )
278
+ assert parsed.param == {
279
+ "sort": "How to sort results. Can be `path` or `score`.",
280
+ "ignore_case": "Ignore case when searching.",
281
+ }
282
+ assert parsed.type == {"sort": "str", "ignore_case": "bool"}
283
+ assert parsed.returns == "The search results."
284
+ assert parsed.rtype == "CommandOutput"
285
+
286
+
287
+ def test_parse_google_docstring_without_types():
288
+ google_no_types = """
289
+ Process the data.
290
+
291
+ Args:
292
+ data: The input data to process.
293
+ verbose: Whether to print verbose output.
294
+
295
+ Returns:
296
+ The processed result.
297
+ """
298
+
299
+ parsed = parse_docstring(google_no_types)
300
+
301
+ assert parsed.body == "Process the data."
302
+ assert parsed.param == {
303
+ "data": "The input data to process.",
304
+ "verbose": "Whether to print verbose output.",
305
+ }
306
+ assert parsed.type == {}
307
+ assert parsed.returns == "The processed result."
308
+ assert parsed.rtype == ""
309
+
310
+
311
+ def test_parse_simple_docstring():
312
+ simple_docstring = """Some text."""
313
+ parsed = parse_docstring(simple_docstring)
314
+
315
+ assert parsed.body == "Some text."
316
+ assert parsed.param == {}
317
+ assert parsed.type == {}
318
+ assert parsed.returns == ""
319
+ assert parsed.rtype == ""
320
+
321
+
322
+ def test_parse_docstring_with_underscores():
323
+ docstring = """
324
+ Test function.
325
+
326
+ Args:
327
+ some_param (str): A parameter with underscores.
328
+ another_param_name: Another parameter without type.
329
+ """
330
+
331
+ parsed = parse_docstring(docstring)
332
+
333
+ assert parsed.param == {
334
+ "some_param": "A parameter with underscores.",
335
+ "another_param_name": "Another parameter without type.",
336
+ }
337
+ assert parsed.type == {"some_param": "str"}
338
+
339
+
340
+ def test_parse_empty_docstring():
341
+ """Test empty docstring handling."""
342
+ parsed = parse_docstring("")
343
+ assert parsed.body == ""
344
+ assert parsed.param == {}
345
+ assert parsed.type == {}
346
+ assert parsed.returns == ""
347
+ assert parsed.rtype == ""
@@ -1,38 +1,57 @@
1
1
  from funlog import log_calls
2
2
 
3
3
  from kash.utils.common.url import Url
4
+ from kash.utils.file_utils.file_formats_model import file_format_info
4
5
  from kash.web_content.canon_url import thumbnail_url
5
6
  from kash.web_content.file_cache_utils import cache_file
6
7
  from kash.web_content.web_extract_justext import extract_text_justext
7
- from kash.web_content.web_fetch import fetch_url
8
8
  from kash.web_content.web_page_model import PageExtractor, WebPageData
9
9
 
10
10
 
11
11
  @log_calls(level="message")
12
- def fetch_extract(
12
+ def fetch_page_content(
13
13
  url: Url,
14
+ *,
14
15
  refetch: bool = False,
15
- use_cache: bool = True,
16
- extractor: PageExtractor = extract_text_justext,
16
+ cache: bool = True,
17
+ text_extractor: PageExtractor = extract_text_justext,
17
18
  ) -> WebPageData:
18
19
  """
19
20
  Fetches a URL and extracts the title, description, and content.
20
- By default, uses the content cache if available. Can force re-fetching and
21
- updating the cache by setting `refetch` to true.
21
+ Always uses the content cache, at least temporarily.
22
+
23
+ Force re-fetching and updating the cache by setting `refetch` to true.
24
+ Cached file path is returned in the content, unless `cache` is false,
25
+ in case the cached content is deleted.
26
+
27
+ For HTML and other text files, uses the `text_extractor` to extract
28
+ clean text and page metadata.
22
29
  """
23
30
  expiration_sec = 0 if refetch else None
24
- if use_cache:
25
- path = cache_file(url, expiration_sec=expiration_sec).content.path
26
- with open(path, "rb") as file:
27
- content = file.read()
28
- page_data = extractor(url, content)
31
+
32
+ path = cache_file(url, expiration_sec=expiration_sec).content.path
33
+ format_info = file_format_info(path)
34
+
35
+ content = None
36
+ if format_info.format and format_info.format.is_text:
37
+ content = path.read_bytes()
38
+ page_data = text_extractor(url, content)
29
39
  else:
30
- response = fetch_url(url)
31
- page_data = extractor(url, response.content)
40
+ page_data = WebPageData(url)
32
41
 
33
- # Add a thumbnail, if available.
42
+ # Add file format info (for both HTML/text and all other file types).
43
+
44
+ page_data.format_info = format_info
45
+
46
+ # Add a thumbnail, if known for this URL.
34
47
  page_data.thumbnail_url = thumbnail_url(url)
35
48
 
49
+ # Return the local cache path if we will be keeping it.
50
+ if cache:
51
+ page_data.saved_content = path
52
+ else:
53
+ path.unlink()
54
+
36
55
  return page_data
37
56
 
38
57
 
@@ -53,5 +72,5 @@ if __name__ == "__main__":
53
72
 
54
73
  for url in sample_urls:
55
74
  print(f"URL: {url}")
56
- print(fetch_extract(Url(url)))
75
+ print(fetch_page_content(Url(url)))
57
76
  print()
@@ -5,12 +5,19 @@ from prettyfmt import abbrev_obj
5
5
  from pydantic.dataclasses import dataclass
6
6
 
7
7
  from kash.utils.common.url import Url
8
+ from kash.utils.file_utils.file_formats_model import FileFormatInfo
8
9
 
9
10
 
10
11
  @dataclass
11
12
  class WebPageData:
12
13
  """
13
- Data about a web page, including URL, title and optionally description and extracted content.
14
+ Data about a web page, including URL, title and optionally description and
15
+ extracted content.
16
+
17
+ The `text` field should be a clean text version of the page, if available.
18
+ The `clean_html` field should be a clean HTML version of the page, if available.
19
+ The `saved_content` is optional but can be used to reference the original content,
20
+ especially for large or non-text content.
14
21
  """
15
22
 
16
23
  locator: Url | Path
@@ -19,6 +26,8 @@ class WebPageData:
19
26
  description: str | None = None
20
27
  text: str | None = None
21
28
  clean_html: str | None = None
29
+ saved_content: Path | None = None
30
+ format_info: FileFormatInfo | None = None
22
31
  thumbnail_url: Url | None = None
23
32
 
24
33
  def __repr__(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kash-shell
3
- Version: 0.3.18
3
+ Version: 0.3.20
4
4
  Summary: The knowledge agent shell (core)
5
5
  Project-URL: Repository, https://github.com/jlevy/kash-shell
6
6
  Author-email: Joshua Levy <joshua@cal.berkeley.edu>
@@ -115,7 +115,7 @@ the Python framework, a few core utilities, and the Kash command-line shell.
115
115
  Additional actions for handling more complex tasks like converting documents and
116
116
  transcribing, researching, or annotating videos, are in the
117
117
  [kash-docs](https://github.com/jlevy/kash-docs) and
118
- [kash-media](https://github.com/jlevy/kash-docs) packages, all available on PyPI and
118
+ [kash-media](https://github.com/jlevy/kash-media) packages, all available on PyPI and
119
119
  quick to install via uv.
120
120
 
121
121
  ### Key Concepts
@@ -4,7 +4,7 @@ kash/actions/__init__.py,sha256=a4pQw8O-Y3q5N4Qg2jUV0xEZLX6d164FQhZ6zizY9fE,1357
4
4
  kash/actions/core/assistant_chat.py,sha256=28G20cSr7Z94cltouTPve5TXY3km0lACrRvpLE27fK8,1837
5
5
  kash/actions/core/chat.py,sha256=yCannBFa0cSpR_in-XSSuMm1x2ZZQUCKmlqzhsUfpOo,2696
6
6
  kash/actions/core/format_markdown_template.py,sha256=ZJbtyTSypPo2ewLiGRSyIpVf711vQMhI_-Ng-FgCs80,2991
7
- kash/actions/core/markdownify.py,sha256=KjdUeY4c9EhZ5geQrn22IoBv0P_p62q4zyyOYE0NRHM,1270
7
+ kash/actions/core/markdownify_html.py,sha256=RXsC59rhDoLssRujkS32PETN1zzncIofFO7J3qTrnJc,1277
8
8
  kash/actions/core/minify_html.py,sha256=99r3SjpI2NQP7e5MnMixAiT5lxPx7t2nyJvJi6Yps6w,1365
9
9
  kash/actions/core/readability.py,sha256=ljdB2rOpzfKU2FpEJ2UELIzcdOAWvdUjFsxoHRTE3xo,989
10
10
  kash/actions/core/render_as_html.py,sha256=CIPGKCjUEVNsnXmpqHCUnjGwTfEfOyCXxlYFUN8mahY,1870
@@ -35,7 +35,7 @@ kash/commands/help/help_commands.py,sha256=eJTpIhXck123PAUq2k-D3Q6UL6IQ8atOVYurL
35
35
  kash/commands/help/logo.py,sha256=W8SUach9FjoTqpHZwTGS582ry4ZluxbBp86ZCiAtDkY,3505
36
36
  kash/commands/help/welcome.py,sha256=F4QBgj3e1dM9Pf0H4TSzCrkVfXQVKUIl0b6Qmofbdo4,905
37
37
  kash/commands/workspace/selection_commands.py,sha256=nZzA-H7Pk8kqSJVRlX7j1m6cZX-e0X8isOryDU41vqU,8156
38
- kash/commands/workspace/workspace_commands.py,sha256=ZJ3aPsnQ0FOkaA6stpV4YPEOQRCOKTazbMCIQkk9Cmk,25119
38
+ kash/commands/workspace/workspace_commands.py,sha256=_2TcthGOu-nU9E_-jjf4kba9ldLRA6qe6Do6zV06EKc,21960
39
39
  kash/config/__init__.py,sha256=ytly9Typ1mWV4CXfV9G3CIPtPQ02u2rpZ304L3GlFro,148
40
40
  kash/config/capture_output.py,sha256=ud3uUVNuDicHj3mI_nBUBO-VmOrxtBdA3z-I3D1lSCU,2398
41
41
  kash/config/colors.py,sha256=qUUUE-x8srSp1IdJePNAUtNQbOZKBLix8FeOe-Rxmgg,13421
@@ -61,7 +61,7 @@ kash/docs/markdown/assistant_instructions_template.md,sha256=Fzeledd_nr3bKhvQ1qZ
61
61
  kash/docs/markdown/readme_template.md,sha256=iGx9IjSni1t_9BuYD5d2GgkxkNIkqvE3k78IufHF6Yg,409
62
62
  kash/docs/markdown/warning.md,sha256=bG0T3UFqAkzF8217J8AbIbQ7ftJ_GKMRilIGq9eLdYc,162
63
63
  kash/docs/markdown/welcome.md,sha256=yp_tmGXGIb8043ZDIL97Q1uaKVys7-ArHCprgDlon7w,611
64
- kash/docs/markdown/topics/a1_what_is_kash.md,sha256=zR1UNXSXwbk9vMmSTSyYADRNTxMcuYU6C0mzF7KS8PM,6757
64
+ kash/docs/markdown/topics/a1_what_is_kash.md,sha256=rgVrv6tRXEwdqQ54DAfHP3BSAuq8Ux4wCNeluTwpkhU,6758
65
65
  kash/docs/markdown/topics/a2_installation.md,sha256=DSzaniHjOYPC3soGLPTGOGDVvbiPTROtb3S8zYUCPEs,5736
66
66
  kash/docs/markdown/topics/a3_getting_started.md,sha256=xOMevEXMIpVJvTGuuwI9Cc9sun3tQM3OqCgynSgMpeM,9376
67
67
  kash/docs/markdown/topics/a4_elements.md,sha256=XNJRw-iqnytiIHOAshp1YnUpHM5KBgFAhuOdp_fekxQ,4615
@@ -81,14 +81,14 @@ kash/docs_base/recipes/tldr_standard_commands.sh,sha256=7nPES55aT45HF3eDhQRrEUiW
81
81
  kash/embeddings/cosine.py,sha256=QTWPWUHivXjxCM6APSqij_-4mywM2BVVm0xb0hu7FHA,1587
82
82
  kash/embeddings/embeddings.py,sha256=v6RmrEHsx5PuE3fPrY15RK4fgW0K_VlNWDTjCVr11zY,4451
83
83
  kash/embeddings/text_similarity.py,sha256=BOo9Vcs5oi2Zs5La56uTkPMHo65XSd4qz_yr6GTfUA4,1924
84
- kash/exec/__init__.py,sha256=rdSsKzTaXfSZmD5JvmUSSwmpfvl-moNv9PUgtE_WUpQ,1148
84
+ kash/exec/__init__.py,sha256=Najls8No143yoj_KAaOQgo8ufC2LWCB_DwwEQ-8nDM0,1277
85
85
  kash/exec/action_decorators.py,sha256=VOSCnFiev2_DuFoSk0i_moejwM4wJ1j6QfsQd93uetI,16480
86
- kash/exec/action_exec.py,sha256=k0HtAvpfISzEN6GQE-iMU36EihmwCzuOa679bk7b8jQ,19022
86
+ kash/exec/action_exec.py,sha256=O_4UB_Vt7QRxltviMeBwNIfw9ten06n4fQ39MregacE,19017
87
87
  kash/exec/action_registry.py,sha256=numU9pH_W5RgIrYmfi0iYMYy_kLJl6vup8PMrhxAfdc,2627
88
88
  kash/exec/combiners.py,sha256=AJ6wgPUHsmwanObsUw64B83XzU26yuh5t4l7igLn82I,4291
89
89
  kash/exec/command_exec.py,sha256=zc-gWm7kyB5J5Kp8xhULQ9Jj9AL927KkDPXXk-Yr1Bw,1292
90
90
  kash/exec/command_registry.py,sha256=1s2ogU8b8nqK_AEtslbr1eYrXCGDkeT30UrB7L0BRoM,2027
91
- kash/exec/fetch_url_metadata.py,sha256=-ojwEpOpDNIS5xBSVJ7oHSgoue_ABywlZ2Y2A5fh0Kg,3159
91
+ kash/exec/fetch_url_items.py,sha256=UUj3wrP3adzZHV_Icx7zSB9zRtl0S7EC54Ios8fuvbg,4404
92
92
  kash/exec/history.py,sha256=l2XwHGBR1UgTGSFPSBE9mltmxvjR_5qFFO6d-Z008nc,1208
93
93
  kash/exec/importing.py,sha256=xunmBapeUMNc6Zox7y6e_DZkidyWeouiFZpphajwSzc,1843
94
94
  kash/exec/llm_transforms.py,sha256=p_aLp70VoIgheW4v8uoweeuEVWj06AzQekvn_jM3B-g,4378
@@ -104,22 +104,21 @@ kash/exec_model/commands_model.py,sha256=iM8QhzA0tAas5OwF5liUfHtm45XIH1LcvCviuh3
104
104
  kash/exec_model/script_model.py,sha256=1VG3LhkTmlKzHOYouZ92ZpOSKSCcsz3-tHNcFMQF788,5031
105
105
  kash/exec_model/shell_model.py,sha256=LUhQivbpXlerM-DUzNY7BtctNBbn08Wto8CSSxQDxRU,568
106
106
  kash/file_storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
- kash/file_storage/file_store.py,sha256=5Hkw_fE-WbJyu8OeZoA8hCpVOnNYclsWY0iotHSnv10,30690
107
+ kash/file_storage/file_store.py,sha256=_OxcExZOQ-ef9Sm2sdD28BSMcQ64KsSikq3CJPUKYUU,30886
108
108
  kash/file_storage/item_file_format.py,sha256=_o2CjWstk_Z__qMr-Inct9wJm2VEUK0GZvF-fDZ8bcc,5377
109
109
  kash/file_storage/metadata_dirs.py,sha256=9AqO3S3SSY1dtvP2iLX--E4ui0VIzXttG8R040otfyg,3820
110
110
  kash/file_storage/persisted_yaml.py,sha256=4-4RkFqdlBUkTOwkdA4vRKUywEE9TaDo13OGaDUyU9M,1309
111
111
  kash/file_storage/store_cache_warmer.py,sha256=cQ_KwxkBPWT3lMmYOCTkXgo7CKaGINns2YzIH32ExSU,1013
112
- kash/file_storage/store_filenames.py,sha256=RmuZ3hHuo95bV9Jv5qtUqN8NdJU8qI_7SYqgc5B5UL4,1720
112
+ kash/file_storage/store_filenames.py,sha256=zvmVQxQy8QOHkUeh-6iFKlBSk0GzqUt9xmmqkh45Zdo,1909
113
113
  kash/help/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  kash/help/assistant.py,sha256=R0XHNi-h51QoQ7rGouD2chrDPGomYaPQUqJdvpjlCs8,11535
115
115
  kash/help/assistant_instructions.py,sha256=jW5XAsmLx8YZMKSDJgWnqo9Vwe7VuiTURQHjKBqr_L8,2549
116
116
  kash/help/assistant_output.py,sha256=9sM-OVLc6eMSOkxyovB88dNlsknFpf8Wz89Zp5PuEA8,1668
117
- kash/help/docstring_utils.py,sha256=80vcEGR05G1wwIfzaKhX5QzU38mkKna-gN6_wTNHjXg,3208
118
- kash/help/function_param_info.py,sha256=IVi6dtRjAWUCxpy9rZ1fRqKLCciK_kuMybTwCZ9XiYg,1692
117
+ kash/help/function_param_info.py,sha256=yGuFLVZoDF1E1YsXqGwJhuY2uJB3R1B1EC77MEMSc-U,1700
119
118
  kash/help/help_embeddings.py,sha256=le7yqxGOtzIU_LUkl-Ef-7WQGuWa0wAOx9umZGva8zg,2787
120
119
  kash/help/help_lookups.py,sha256=0dtuLWEXncqhJCijC98IA9stBDNNcJewt1JYqMLkTx4,2029
121
- kash/help/help_pages.py,sha256=XyV3SN1cvGjpS8xAbZ_X_6Z9hTIH7vXdgK8DCX2cwNg,3886
122
- kash/help/help_printing.py,sha256=NAJa8qfaZGce5NKtUqdyDE8Ey0ag2ywjL2LP8ACT060,6078
120
+ kash/help/help_pages.py,sha256=TaKsE26R-pZTrK4Pa593DK5osdJodFHaVm5pZpjqgaI,3894
121
+ kash/help/help_printing.py,sha256=eZbZdyJC158JiXcEk2zvUmqYbYzbYOpHvxEhC1kIN-Q,6086
123
122
  kash/help/help_types.py,sha256=xo0AXfaNjlMrWp6dkGixo6P61n1tIhFhlvv9sWhNBrI,7909
124
123
  kash/help/recommended_commands.py,sha256=jqc3TjWFBqDJ-iSzXn8vTOplb4uHndwvdAGJfcUV_qs,2486
125
124
  kash/help/tldr_help.py,sha256=bcu__MIF4vYlZEeqQqieGIBcRhNCTK5u8jPV08ObzCI,9654
@@ -165,7 +164,7 @@ kash/model/compound_actions_model.py,sha256=HiDK5wwCu3WwZYHATZoLEguiqwR9V6V296wi
165
164
  kash/model/concept_model.py,sha256=we2qOcy9Mv1q7XPfkDLp_CyO_-8DwAUfUYlpgy_jrFs,1011
166
165
  kash/model/exec_model.py,sha256=IlfvtQyoFRRWhWju7vdXp9J-w_NGcGtL5DhDLy9gRd8,2250
167
166
  kash/model/graph_model.py,sha256=jnctrPiBZ0xwAR8D54JMAJPanA1yZdaxSFQoIpe8anA,2662
168
- kash/model/items_model.py,sha256=B2YYIppOiPX-n5dseMqdp_PPMBJfie-EZ1yZGWeq-iQ,35201
167
+ kash/model/items_model.py,sha256=ZWAsqTHA4p6GSNz3QjPr46LRwY7pbv7d73_KqU3gsO0,35686
169
168
  kash/model/language_list.py,sha256=I3RIbxTseVmPdhExQimimEv18Gmy2ImMbpXe0-_t1Qw,450
170
169
  kash/model/llm_actions_model.py,sha256=a29uXVNfS2CiqvM7HPdC6H9A23rSQQihAideuBLMH8g,2110
171
170
  kash/model/media_model.py,sha256=ZnlZ-FkswbAIGpUAuNqLce1WDZK-WbnwHn2ipg8x7-0,3511
@@ -206,6 +205,7 @@ kash/utils/common/function_inspect.py,sha256=gczPhFlrF4NotkJKw8rDcl1DFlWfHdur_J4
206
205
  kash/utils/common/import_utils.py,sha256=zyCa5sG_vTxzgIgjOS98xAwqkSeCQzN-8UkM6k9ZZOI,4615
207
206
  kash/utils/common/lazyobject.py,sha256=9dmOfSheblOXgo2RRakMwgfPIKdTgtyrlm6dCKAze04,5157
208
207
  kash/utils/common/obj_replace.py,sha256=AuiXptUOnuDNcWDgAJ3jEHkLh89XIqCP_SOkgaVyFIQ,2075
208
+ kash/utils/common/parse_docstring.py,sha256=oM1ecGGySRA3L_poddjReJ_qPY5506Le7E8_CDUrU8k,10922
209
209
  kash/utils/common/parse_key_vals.py,sha256=yZRZIa5GD9SlnBSn2YNZm8PRVKoSJMY8DCmdGujQj_I,2418
210
210
  kash/utils/common/parse_shell_args.py,sha256=UZXTZDbV5m5Jy39jdAQ6W8uilr1TNa0__RqnE8UmQ_M,10604
211
211
  kash/utils/common/stack_traces.py,sha256=a2NwlK_0xxnjMCDC4LrQu7ueFylF-OImFG3bAAHpPwY,1392
@@ -245,11 +245,11 @@ kash/web_content/dir_store.py,sha256=BJc-s-RL5CC-GwhFTC_lhLXSMWluPPnLVmVBx-66DiM
245
245
  kash/web_content/file_cache_utils.py,sha256=JRXUCAmrc83iAgdiICU2EYGWcoORflWNl6GAVq-O80I,5529
246
246
  kash/web_content/file_processing.py,sha256=cQC-MnJMM5qG9-y0S4yobkmRi6A75qhHjV6xTwbtYDY,1904
247
247
  kash/web_content/local_file_cache.py,sha256=PEDKU5VIwhCnSC-HXG4EkO2OzrOUDuuDBMuo3lP2EN0,9466
248
- kash/web_content/web_extract.py,sha256=LbuG4AFEeIiXyUrN9CAxX0ret41Fqu_iTJSjIWyk3Bg,2296
248
+ kash/web_content/web_extract.py,sha256=FLn4LPAQHe79kx9LhSbGNnvoDM59X-AqeyBMkyLdyZo,2847
249
249
  kash/web_content/web_extract_justext.py,sha256=74HLJBKDGKatwxyRDX6za70bZG9LrVmtj9jLX7UJzg4,2540
250
250
  kash/web_content/web_extract_readabilipy.py,sha256=IT7ET5IoU2-Nf37-Neh6CkKMvLL3WTNVJjq7ZMOx6OM,808
251
251
  kash/web_content/web_fetch.py,sha256=J8DLFP1vzp7aScanFq0Bd7xCP6AVL4JgMMBqyRPtZjQ,4720
252
- kash/web_content/web_page_model.py,sha256=9bPuqZxXo6hSUB_llEcz8bs3W1lW0r-Y3Q7pZgknlQU,693
252
+ kash/web_content/web_page_model.py,sha256=aPpgC1fH2z2LTzGJhEDvZgq_mYwgsQIZaDS3UE7v98w,1147
253
253
  kash/web_gen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
254
254
  kash/web_gen/simple_webpage.py,sha256=ks_0ljxCeS2-gAAEaUc1JEnzY3JY0nzqGFiyyqyRuZs,1537
255
255
  kash/web_gen/tabbed_webpage.py,sha256=DiZV48TVvcjOf31g3nzTAtGKpH5Cek1Unksr7Cwcwog,4949
@@ -286,8 +286,8 @@ kash/xonsh_custom/xonsh_modern_tools.py,sha256=mj_b34LZXfE8MJe9EpDmp5JZ0tDM1biYN
286
286
  kash/xonsh_custom/xonsh_ranking_completer.py,sha256=ZRGiAfoEgqgnlq2-ReUVEaX5oOgW1DQ9WxIv2OJLuTo,5620
287
287
  kash/xontrib/fnm.py,sha256=V2tsOdmIDgbFbZSfMLpsvDIwwJJqiYnOkOySD1cXNXw,3700
288
288
  kash/xontrib/kash_extension.py,sha256=FLIMlgR3C_6A1fwKE-Ul0nmmpJSszVPbAriinUyQ8Zg,1896
289
- kash_shell-0.3.18.dist-info/METADATA,sha256=5Lnqsbx7FMfH3GPjaANtJ5cBdONspmqPUf4oxvfeoG0,32585
290
- kash_shell-0.3.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
291
- kash_shell-0.3.18.dist-info/entry_points.txt,sha256=SQraWDAo8SqYpthLXThei0mf_hGGyhYBUO-Er_0HcwI,85
292
- kash_shell-0.3.18.dist-info/licenses/LICENSE,sha256=rCh2PsfYeiU6FK_0wb58kHGm_Fj5c43fdcHEexiVzIo,34562
293
- kash_shell-0.3.18.dist-info/RECORD,,
289
+ kash_shell-0.3.20.dist-info/METADATA,sha256=aImcO_J_PXzx57HrdQOq66nSLoqCp5oBg6__ZzSwN8k,32586
290
+ kash_shell-0.3.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
291
+ kash_shell-0.3.20.dist-info/entry_points.txt,sha256=SQraWDAo8SqYpthLXThei0mf_hGGyhYBUO-Er_0HcwI,85
292
+ kash_shell-0.3.20.dist-info/licenses/LICENSE,sha256=rCh2PsfYeiU6FK_0wb58kHGm_Fj5c43fdcHEexiVzIo,34562
293
+ kash_shell-0.3.20.dist-info/RECORD,,
@@ -1,111 +0,0 @@
1
- from dataclasses import field
2
- from textwrap import dedent
3
-
4
- from pydantic.dataclasses import dataclass
5
-
6
-
7
- @dataclass
8
- class Docstring:
9
- body: str = ""
10
- param: dict[str, str] = field(default_factory=dict)
11
- type: dict[str, str] = field(default_factory=dict)
12
- returns: str = ""
13
- rtype: str = ""
14
-
15
-
16
- def parse_docstring(docstring: str) -> Docstring:
17
- """
18
- Parse a reStructuredText-style docstring.
19
- """
20
- # TODO: Support other standard docstring formats too.
21
- docstring = dedent(docstring).strip()
22
-
23
- lines = docstring.split("\n")
24
-
25
- result = Docstring()
26
- body_lines = []
27
-
28
- for line in lines:
29
- if line.strip().startswith(":"):
30
- break
31
- body_lines.append(line)
32
-
33
- result.body = "\n".join(body_lines).strip()
34
-
35
- parse_fields(lines[len(body_lines) :], result)
36
-
37
- return result
38
-
39
-
40
- def parse_fields(lines: list[str], result: Docstring):
41
- current_field = None
42
- current_content = []
43
-
44
- def save_current_field():
45
- if current_field and current_content:
46
- content = " ".join(current_content).strip()
47
- if current_field.startswith("param "):
48
- result.param[current_field[6:]] = content
49
- elif current_field.startswith("type "):
50
- result.type[current_field[5:]] = content
51
- elif current_field == "return":
52
- result.returns = content
53
- elif current_field == "rtype":
54
- result.rtype = content
55
-
56
- for line in lines:
57
- if line.strip().startswith(":"):
58
- save_current_field()
59
- current_field, _, content = line.strip()[1:].partition(":")
60
- current_content = [content.strip()]
61
- else:
62
- current_content.append(line.strip())
63
-
64
- save_current_field()
65
-
66
-
67
- ## Tests
68
-
69
-
70
- def test_parse_docstring():
71
- docstring1 = """
72
- Search for a string in files at the given paths and return their store paths.
73
- Useful to find all docs or resources matching a string or regex.
74
-
75
- :param sort: How to sort results. Can be `path` or `score`.
76
- :param ignore_case: Ignore case when searching.
77
- :type sort: str
78
- :type ignore_case: bool
79
- :return: The search results.
80
- :rtype: CommandOutput
81
- """
82
-
83
- parsed1 = parse_docstring(docstring1)
84
-
85
- print(f"Body: {parsed1.body}")
86
- print(f"Params: {parsed1.param}")
87
- print(f"Types: {parsed1.type}")
88
- print(f"Returns: {parsed1.returns}")
89
- print(f"Return type: {parsed1.rtype}")
90
-
91
- assert (
92
- parsed1.body
93
- == "Search for a string in files at the given paths and return their store paths.\nUseful to find all docs or resources matching a string or regex."
94
- )
95
- assert parsed1.param == {
96
- "sort": "How to sort results. Can be `path` or `score`.",
97
- "ignore_case": "Ignore case when searching.",
98
- }
99
- assert parsed1.type == {"sort": "str", "ignore_case": "bool"}
100
- assert parsed1.returns == "The search results."
101
- assert parsed1.rtype == "CommandOutput"
102
-
103
- docstring2 = """Some text."""
104
-
105
- parsed2 = parse_docstring(docstring2)
106
-
107
- assert parsed2.body == "Some text."
108
- assert parsed2.param == {}
109
- assert parsed2.type == {}
110
- assert parsed2.returns == ""
111
- assert parsed2.rtype == ""