kash-shell 0.3.13__py3-none-any.whl → 0.3.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
1
  from kash.config.logger import get_logger
2
2
  from kash.exec import kash_action
3
3
  from kash.exec.preconditions import has_html_body, is_url_resource
4
+ from kash.exec.runtime_settings import current_runtime_settings
4
5
  from kash.model import Format, Item
5
- from kash.model.params_model import common_params
6
+ from kash.model.items_model import ItemType
6
7
  from kash.utils.text_handling.markdownify_utils import markdownify_custom
7
8
  from kash.web_content.file_cache_utils import get_url_html
8
9
  from kash.web_content.web_extract_readabilipy import extract_text_readabilipy
@@ -12,20 +13,22 @@ log = get_logger(__name__)
12
13
 
13
14
  @kash_action(
14
15
  precondition=is_url_resource | has_html_body,
15
- params=common_params("refetch"),
16
16
  mcp_tool=True,
17
17
  )
18
- def markdownify(item: Item, refetch: bool = False) -> Item:
18
+ def markdownify(item: Item) -> Item:
19
19
  """
20
20
  Converts a URL or raw HTML item to Markdown, fetching with the content
21
21
  cache if needed. Also uses readability to clean up the HTML.
22
22
  """
23
23
 
24
+ refetch = current_runtime_settings().refetch
24
25
  expiration_sec = 0 if refetch else None
25
26
  url, html_content = get_url_html(item, expiration_sec=expiration_sec)
26
27
  page_data = extract_text_readabilipy(url, html_content)
27
28
  assert page_data.clean_html
28
29
  markdown_content = markdownify_custom(page_data.clean_html)
29
30
 
30
- output_item = item.derived_copy(format=Format.markdown, body=markdown_content)
31
+ output_item = item.derived_copy(
32
+ type=ItemType.doc, format=Format.markdown, body=markdown_content
33
+ )
31
34
  return output_item
@@ -1,8 +1,8 @@
1
1
  from kash.config.logger import get_logger
2
2
  from kash.exec import kash_action
3
3
  from kash.exec.preconditions import has_html_body, is_url_resource
4
+ from kash.exec.runtime_settings import current_runtime_settings
4
5
  from kash.model import Format, Item
5
- from kash.model.params_model import common_params
6
6
  from kash.web_content.file_cache_utils import get_url_html
7
7
  from kash.web_content.web_extract_readabilipy import extract_text_readabilipy
8
8
 
@@ -11,14 +11,15 @@ log = get_logger(__name__)
11
11
 
12
12
  @kash_action(
13
13
  precondition=is_url_resource | has_html_body,
14
- params=common_params("refetch"),
15
14
  mcp_tool=True,
16
15
  )
17
- def readability(item: Item, refetch: bool = False) -> Item:
16
+ def readability(item: Item) -> Item:
18
17
  """
19
18
  Extracts clean HTML from a raw HTML item.
20
19
  See `markdownify` to also convert to Markdown.
21
20
  """
21
+
22
+ refetch = current_runtime_settings().refetch
22
23
  expiration_sec = 0 if refetch else None
23
24
  locator, html_content = get_url_html(item, expiration_sec=expiration_sec)
24
25
  page_data = extract_text_readabilipy(locator, html_content)
@@ -1,7 +1,7 @@
1
1
  from kash.actions.core.tabbed_webpage_config import tabbed_webpage_config
2
2
  from kash.actions.core.tabbed_webpage_generate import tabbed_webpage_generate
3
3
  from kash.exec import kash_action
4
- from kash.exec.preconditions import has_full_html_page_body, has_html_body, has_simple_text_body
4
+ from kash.exec.preconditions import has_fullpage_html_body, has_html_body, has_simple_text_body
5
5
  from kash.exec_model.args_model import ONE_OR_MORE_ARGS
6
6
  from kash.model import ActionInput, ActionResult, Param
7
7
  from kash.model.items_model import ItemType
@@ -11,7 +11,7 @@ from kash.web_gen.simple_webpage import simple_webpage_render
11
11
 
12
12
  @kash_action(
13
13
  expected_args=ONE_OR_MORE_ARGS,
14
- precondition=(has_html_body | has_simple_text_body) & ~has_full_html_page_body,
14
+ precondition=(has_html_body | has_simple_text_body) & ~has_fullpage_html_body,
15
15
  params=(Param("no_title", "Don't add a title to the page body.", type=bool),),
16
16
  )
17
17
  def render_as_html(input: ActionInput, no_title: bool = False) -> ActionResult:
@@ -1,7 +1,7 @@
1
1
  from kash.actions.core.render_as_html import render_as_html
2
2
  from kash.commands.base.show_command import show
3
3
  from kash.exec import kash_action
4
- from kash.exec.preconditions import has_full_html_page_body, has_html_body, has_simple_text_body
4
+ from kash.exec.preconditions import has_fullpage_html_body, has_html_body, has_simple_text_body
5
5
  from kash.exec_model.args_model import ONE_OR_MORE_ARGS
6
6
  from kash.exec_model.commands_model import Command
7
7
  from kash.exec_model.shell_model import ShellResult
@@ -10,7 +10,7 @@ from kash.model import ActionInput, ActionResult
10
10
 
11
11
  @kash_action(
12
12
  expected_args=ONE_OR_MORE_ARGS,
13
- precondition=(has_html_body | has_simple_text_body) & ~has_full_html_page_body,
13
+ precondition=(has_html_body | has_simple_text_body) & ~has_fullpage_html_body,
14
14
  )
15
15
  def show_webpage(input: ActionInput) -> ActionResult:
16
16
  """
@@ -193,49 +193,68 @@ def download(*urls_or_paths: str, refetch: bool = False) -> ShellResult:
193
193
  """
194
194
  Download a URL or resource. Uses cached content if available, unless `refetch` is true.
195
195
  Inputs can be URLs or paths to URL resources.
196
+ Creates both resource and document versions for text content.
196
197
  """
197
- expiration_sec = 0 if refetch else None
198
-
199
- # TODO: Add option to include frontmatter metadata for text files.
200
198
  ws = current_ws()
201
199
  saved_paths = []
200
+
202
201
  for url_or_path in urls_or_paths:
203
202
  locator = resolve_locator_arg(url_or_path)
204
203
  url: Url | None = None
204
+
205
+ # Get the URL from the locator
205
206
  if not isinstance(locator, Path) and is_url(locator):
206
207
  url = Url(locator)
207
- if isinstance(locator, StorePath):
208
+ elif isinstance(locator, StorePath):
208
209
  url_item = ws.load(locator)
209
210
  if is_url_resource(url_item):
210
211
  url = url_item.url
212
+
211
213
  if not url:
212
214
  raise InvalidInput(f"Not a URL or URL resource: {fmt_loc(locator)}")
213
215
 
216
+ # Handle media URLs differently
214
217
  if is_media_url(url):
215
218
  log.message(
216
219
  "URL is a media URL, so adding as a resource and will cache media: %s", fmt_loc(url)
217
220
  )
218
- store_path = ws.import_item(locator, as_type=ItemType.resource)
221
+ store_path = ws.import_item(url, as_type=ItemType.resource, reimport=refetch)
222
+ saved_paths.append(store_path)
219
223
  media_tools.cache_media(url)
220
224
  else:
221
- log.message("Will cache file and save to workspace: %s", fmt_loc(url))
222
- original_filename = Path(parse_http_url(url).path).name
225
+ # Cache the content first
226
+ expiration_sec = 0 if refetch else None
223
227
  cache_result = cache_file(url, expiration_sec=expiration_sec)
224
- # If available, use the mime type to help set item file extension.
228
+ original_filename = Path(parse_http_url(url).path).name
225
229
  mime_type = cache_result.content.headers and cache_result.content.headers.mime_type
226
- item = Item.from_external_path(
230
+
231
+ # Create a resource item
232
+ resource_item = Item.from_external_path(
227
233
  cache_result.content.path,
228
234
  ItemType.resource,
235
+ url=url,
229
236
  mime_type=mime_type,
230
237
  original_filename=original_filename,
231
238
  )
232
- store_path = ws.save(item)
239
+ store_path = ws.save(resource_item, no_frontmatter=True, no_format=True)
233
240
  saved_paths.append(store_path)
234
241
 
242
+ # Also create a doc version for text content
243
+ if resource_item.format and resource_item.format.supports_frontmatter:
244
+ doc_item = Item.from_external_path(
245
+ cache_result.content.path,
246
+ ItemType.doc,
247
+ url=url,
248
+ mime_type=mime_type,
249
+ original_filename=original_filename,
250
+ )
251
+ doc_store_path = ws.save(doc_item, no_frontmatter=False, no_format=False)
252
+ saved_paths.append(doc_store_path)
253
+
235
254
  print_status(
236
255
  "Downloaded %s %s:\n%s",
237
- len(urls_or_paths),
238
- plural("item", len(urls_or_paths)),
256
+ len(saved_paths),
257
+ plural("item", len(saved_paths)),
239
258
  fmt_lines(saved_paths),
240
259
  )
241
260
  select(*saved_paths)
@@ -483,7 +502,7 @@ def import_item(
483
502
 
484
503
 
485
504
  @kash_command
486
- def import_clipboard(
505
+ def save_clipboard(
487
506
  title: str | None = "pasted_text",
488
507
  type: ItemType = ItemType.resource,
489
508
  format: Format = Format.plaintext,
@@ -518,8 +537,6 @@ def fetch_metadata(*files_or_urls: str, refetch: bool = False) -> ShellResult:
518
537
 
519
538
  Skips items that already have a title and description, unless `refetch` is true.
520
539
  Skips (with a warning) items that are not URL resources.
521
-
522
- :param use_cache: If true, also save page in content cache.
523
540
  """
524
541
  if not files_or_urls:
525
542
  locators = assemble_store_path_args()
@@ -529,12 +546,12 @@ def fetch_metadata(*files_or_urls: str, refetch: bool = False) -> ShellResult:
529
546
  store_paths = []
530
547
  for locator in locators:
531
548
  try:
532
- if isinstance(locator, Path):
533
- raise InvalidInput()
534
549
  fetched_item = fetch_url_metadata(locator, refetch=refetch)
535
550
  store_paths.append(fetched_item.store_path)
536
- except InvalidInput:
537
- log.warning("Not a URL or URL resource, will not fetch metadata: %s", fmt_loc(locator))
551
+ except InvalidInput as e:
552
+ log.warning(
553
+ "Not a URL or URL resource, will not fetch metadata: %s: %s", fmt_loc(locator), e
554
+ )
538
555
 
539
556
  if store_paths:
540
557
  select(*store_paths)
@@ -716,7 +733,7 @@ def reset_ignore_file(append: bool = False) -> None:
716
733
  ignore_path = ws.base_dir / ws.dirs.ignore_file
717
734
  write_ignore(ignore_path, append=append)
718
735
 
719
- log.message("Rewrote kash ignore file: %s", fmt_loc(ignore_path))
736
+ log.message("Rewritten kash ignore file: %s", fmt_loc(ignore_path))
720
737
 
721
738
 
722
739
  @kash_command
kash/exec/action_exec.py CHANGED
@@ -32,7 +32,6 @@ from kash.utils.common.task_stack import task_stack
32
32
  from kash.utils.common.type_utils import not_none
33
33
  from kash.utils.errors import ContentError, InvalidOutput, get_nonfatal_exceptions
34
34
  from kash.workspaces import Selection, current_ws
35
- from kash.workspaces.workspace_importing import import_and_load
36
35
 
37
36
  log = get_logger(__name__)
38
37
 
@@ -49,7 +48,7 @@ def prepare_action_input(*input_args: CommandArg, refetch: bool = False) -> Acti
49
48
 
50
49
  # Ensure input items are already saved in the workspace and load the corresponding items.
51
50
  # This also imports any URLs.
52
- input_items = [import_and_load(ws, arg) for arg in input_args]
51
+ input_items = [ws.import_and_load(arg) for arg in input_args]
53
52
 
54
53
  # URLs should have metadata like a title and be valid, so we fetch them.
55
54
  if input_items:
@@ -383,7 +382,7 @@ def run_action_with_caching(
383
382
 
384
383
  PrintHooks.before_done_message()
385
384
  log.message(
386
- "%s Done: `%s` completed with %s %s",
385
+ "%s Action: `%s` completed with %s %s",
387
386
  EMOJI_SUCCESS,
388
387
  action.name,
389
388
  len(result.items),
@@ -7,7 +7,7 @@ from chopdiff.html import has_timestamp
7
7
 
8
8
  from kash.exec.precondition_registry import kash_precondition
9
9
  from kash.model.items_model import Item, ItemType
10
- from kash.utils.file_utils.file_formats import is_full_html_page
10
+ from kash.utils.file_utils.file_formats import is_fullpage_html
11
11
  from kash.utils.file_utils.file_formats_model import Format
12
12
  from kash.utils.text_handling.markdown_utils import extract_bullet_points
13
13
 
@@ -22,9 +22,14 @@ def is_doc_resource(item: Item) -> bool:
22
22
  return bool(is_resource(item) and item.format and item.format.is_doc)
23
23
 
24
24
 
25
+ @kash_precondition
26
+ def is_markdown_resource(item: Item) -> bool:
27
+ return bool(is_resource(item) and item.format and item.format.is_markdown)
28
+
29
+
25
30
  @kash_precondition
26
31
  def is_html_resource(item: Item) -> bool:
27
- return bool(is_resource(item) and item.format and item.format == Format.html)
32
+ return bool(is_resource(item) and item.format and item.format.is_html)
28
33
 
29
34
 
30
35
  @kash_precondition
@@ -100,8 +105,18 @@ def has_html_body(item: Item) -> bool:
100
105
 
101
106
 
102
107
  @kash_precondition
103
- def has_full_html_page_body(item: Item) -> bool:
104
- return bool(has_html_body(item) and item.body and is_full_html_page(item.body))
108
+ def has_markdown_body(item: Item) -> bool:
109
+ return bool(has_body(item) and item.format and item.format.is_markdown)
110
+
111
+
112
+ @kash_precondition
113
+ def has_markdown_with_html_body(item: Item) -> bool:
114
+ return bool(has_body(item) and item.format and item.format.is_markdown_with_html)
115
+
116
+
117
+ @kash_precondition
118
+ def has_fullpage_html_body(item: Item) -> bool:
119
+ return bool(has_html_body(item) and item.body and is_fullpage_html(item.body))
105
120
 
106
121
 
107
122
  @kash_precondition
@@ -114,6 +129,11 @@ def is_markdown(item: Item) -> bool:
114
129
  return bool(has_body(item) and item.format and item.format.is_markdown)
115
130
 
116
131
 
132
+ @kash_precondition
133
+ def is_markdown_with_html(item: Item) -> bool:
134
+ return bool(has_body(item) and item.format and item.format.is_markdown_with_html)
135
+
136
+
117
137
  @kash_precondition
118
138
  def is_markdown_template(item: Item) -> bool:
119
139
  return is_markdown(item) and contains_curly_vars(item)
@@ -22,7 +22,7 @@ from kash.model.paths_model import StorePath
22
22
  from kash.shell.output.shell_output import PrintHooks
23
23
  from kash.utils.common.format_utils import fmt_loc
24
24
  from kash.utils.common.uniquifier import Uniquifier
25
- from kash.utils.common.url import Locator, Url, is_url
25
+ from kash.utils.common.url import Locator, UnresolvedLocator, Url, is_url
26
26
  from kash.utils.errors import FileExists, FileNotFound, InvalidFilename, SkippableError
27
27
  from kash.utils.file_utils.file_formats_model import Format
28
28
  from kash.utils.file_utils.file_walk import walk_by_dir
@@ -290,7 +290,7 @@ class FileStore(Workspace):
290
290
  elif item_id in self.id_map and self.exists(self.id_map[item_id]):
291
291
  # If this item has an identity and we've saved under that id before, use the same store path.
292
292
  store_path = self.id_map[item_id]
293
- log.warning(
293
+ log.info(
294
294
  "Found existing item with same id:\n%s",
295
295
  fmt_lines([fmt_loc(store_path), item_id]),
296
296
  )
@@ -334,6 +334,7 @@ class FileStore(Workspace):
334
334
  skip_dup_names: bool = False,
335
335
  as_tmp: bool = False,
336
336
  no_format: bool = False,
337
+ no_frontmatter: bool = False,
337
338
  ) -> StorePath:
338
339
  """
339
340
  Save the item. Uses the `store_path` if it's already set or generates a new one.
@@ -342,6 +343,8 @@ class FileStore(Workspace):
342
343
  Unless `no_format` is true, also normalizes body text formatting (for Markdown)
343
344
  and updates the item's body to match.
344
345
 
346
+ If `no_frontmatter` is true, will not add frontmatter metadata to the item.
347
+
345
348
  If `overwrite` is true, will overwrite a file that has the same path.
346
349
 
347
350
  If `as_tmp` is true, will save the item to a temporary file.
@@ -390,9 +393,14 @@ class FileStore(Workspace):
390
393
 
391
394
  # Now save the new item.
392
395
  try:
393
- if item.external_path:
396
+ supports_frontmatter = item.format and item.format.supports_frontmatter
397
+ # For binary or unknown formats or if we're not adding frontmatter, copy the file exactly.
398
+ if item.external_path and (no_frontmatter or not supports_frontmatter):
394
399
  copyfile_atomic(item.external_path, full_path, make_parents=True)
395
400
  else:
401
+ # Save as a text item with frontmatter.
402
+ if item.external_path:
403
+ item.body = Path(item.external_path).read_text()
396
404
  if overwrite and full_path.exists():
397
405
  log.info(
398
406
  "Overwrite is enabled and a previous file exists so will archive it: %s",
@@ -448,7 +456,7 @@ class FileStore(Workspace):
448
456
 
449
457
  def import_item(
450
458
  self,
451
- locator: Locator,
459
+ locator: UnresolvedLocator,
452
460
  *,
453
461
  as_type: ItemType | None = None,
454
462
  reimport: bool = False,
@@ -462,7 +470,10 @@ class FileStore(Workspace):
462
470
  """
463
471
  from kash.web_content.canon_url import canonicalize_url
464
472
 
465
- if is_url(locator):
473
+ if isinstance(locator, StorePath) and not reimport:
474
+ log.info("Store path already imported: %s", fmt_loc(locator))
475
+ return locator
476
+ elif is_url(locator):
466
477
  # Import a URL as a resource.
467
478
  orig_url = Url(str(locator))
468
479
  url = canonicalize_url(orig_url)
@@ -480,9 +491,6 @@ class FileStore(Workspace):
480
491
  else:
481
492
  store_path = self.save(item)
482
493
  return store_path
483
- elif isinstance(locator, StorePath) and not reimport:
484
- log.info("Store path already imported: %s", fmt_loc(locator))
485
- return locator
486
494
  else:
487
495
  # We have a path, possibly outside of or inside of the store.
488
496
  path = Path(locator).resolve()
@@ -553,6 +561,13 @@ class FileStore(Workspace):
553
561
  self.import_item(locator, as_type=as_type, reimport=reimport) for locator in locators
554
562
  ]
555
563
 
564
+ def import_and_load(self, locator: UnresolvedLocator) -> Item:
565
+ """
566
+ Import a locator and return the item.
567
+ """
568
+ store_path = self.import_item(locator)
569
+ return self.load(store_path)
570
+
556
571
  def _filter_selection_paths(self):
557
572
  """
558
573
  Filter out any paths that don't exist from all selections.
@@ -695,14 +710,20 @@ class FileStore(Workspace):
695
710
  dirs_ignored,
696
711
  )
697
712
 
698
- def normalize(self, store_path: StorePath) -> StorePath:
713
+ def normalize(
714
+ self,
715
+ store_path: StorePath,
716
+ *,
717
+ no_format: bool = False,
718
+ no_frontmatter: bool = False,
719
+ ) -> StorePath:
699
720
  """
700
721
  Normalize an item or all items in a folder to make sure contents are in current
701
- format.
722
+ format. This is the same as loading and saving the item.
702
723
  """
703
724
  log.info("Normalizing item: %s", fmt_path(store_path))
704
725
 
705
726
  item = self.load(store_path)
706
- new_store_path = self.save(item)
727
+ new_store_path = self.save(item, no_format=no_format, no_frontmatter=no_frontmatter)
707
728
 
708
729
  return new_store_path
kash/model/items_model.py CHANGED
@@ -181,7 +181,7 @@ class ItemId:
181
181
  item_id = ItemId(item.type, IdType.url, canonicalize_url(item.url))
182
182
  elif item.type == ItemType.concept and item.title:
183
183
  item_id = ItemId(item.type, IdType.concept, canonicalize_concept(item.title))
184
- elif item.source and item.source.cacheable:
184
+ elif item.source and item.source.cacheable and item.source.operation.has_known_inputs:
185
185
  # We know the source of this and if the action was cacheable, we can create
186
186
  # an identity based on the source.
187
187
  item_id = ItemId(item.type, IdType.source, item.source.as_str())
@@ -363,21 +363,24 @@ class Item:
363
363
  *,
364
364
  title: str | None = None,
365
365
  original_filename: str | None = None,
366
+ url: Url | None = None,
366
367
  mime_type: MimeType | None = None,
367
368
  ) -> Item:
368
369
  """
369
370
  Create a resource Item for a file with a format inferred from the file extension
370
371
  or the content. Only sets basic metadata. Does not read the content. Will set
371
372
  `format` and `file_ext` if possible but will leave them as None if unrecognized.
372
- If `mime_type` is provided, it can help determine the file extension.
373
+ If `mime_type` is provided, it can help determine the file extension if the
374
+ extension isn't recognized from the filename or URL.
373
375
  """
374
376
  from kash.file_storage.store_filenames import parse_item_filename
375
- from kash.utils.file_utils.file_formats_model import choose_file_ext, detect_file_format
377
+ from kash.utils.file_utils.file_formats_model import file_format_info
376
378
 
377
379
  # Will raise error for unrecognized file ext.
378
380
  _name, filename_item_type, format, file_ext = parse_item_filename(path)
381
+ format_info = file_format_info(path, suggested_mime_type=mime_type)
379
382
  if not format:
380
- format = detect_file_format(path)
383
+ format = format_info.format
381
384
  if not item_type and filename_item_type:
382
385
  item_type = filename_item_type
383
386
  if not item_type:
@@ -385,9 +388,10 @@ class Item:
385
388
  item_type = (
386
389
  ItemType.doc if format and format.supports_frontmatter else ItemType.resource
387
390
  )
388
- # Do our best to determine a good file extension if it's not already on the filename.
389
- if not file_ext and mime_type:
390
- file_ext = choose_file_ext(path, mime_type)
391
+
392
+ # Try to determine a good file extension if it's not already on the filename.
393
+ if not file_ext:
394
+ file_ext = format_info.suggested_file_ext
391
395
 
392
396
  item = cls(
393
397
  type=item_type,
@@ -396,6 +400,7 @@ class Item:
396
400
  format=format,
397
401
  external_path=str(path),
398
402
  original_filename=original_filename,
403
+ url=url,
399
404
  )
400
405
 
401
406
  # Update modified time from the file system.
@@ -66,6 +66,13 @@ class Input:
66
66
  else:
67
67
  return "[input info missing]"
68
68
 
69
+ @property
70
+ def is_known(self) -> bool:
71
+ """
72
+ Whether the input is known, i.e. we had saved inputs with hashes.
73
+ """
74
+ return bool(self.path and self.hash)
75
+
69
76
  # Inputs are equal if the hashes match (even if the paths have changed).
70
77
 
71
78
  def __hash__(self):
@@ -117,6 +124,13 @@ class Operation:
117
124
 
118
125
  return d
119
126
 
127
+ @property
128
+ def has_known_inputs(self) -> bool:
129
+ """
130
+ Whether the operation has known inputs, i.e. all inputs have hashes.
131
+ """
132
+ return all(arg.is_known for arg in self.arguments)
133
+
120
134
  def summary(self) -> OperationSummary:
121
135
  return OperationSummary(self.action_name)
122
136
 
@@ -23,7 +23,7 @@ from kash.shell.output.shell_output import cprint
23
23
  from kash.utils.common.format_utils import fmt_loc
24
24
  from kash.utils.common.url import as_file_url, is_file_url, is_url
25
25
  from kash.utils.errors import FileNotFound, SetupError
26
- from kash.utils.file_utils.file_formats import is_full_html_page, read_partial_text
26
+ from kash.utils.file_utils.file_formats import is_fullpage_html, read_partial_text
27
27
  from kash.utils.file_utils.file_formats_model import file_format_info
28
28
 
29
29
  log = get_logger(__name__)
@@ -88,7 +88,7 @@ def _detect_view_mode(file_or_url: str) -> ViewMode:
88
88
  path = Path(file_or_url)
89
89
  if path.is_file(): # File or symlink.
90
90
  content = read_partial_text(path)
91
- if content and is_full_html_page(content):
91
+ if content and is_fullpage_html(content):
92
92
  return ViewMode.browser
93
93
 
94
94
  info = file_format_info(path)
kash/utils/common/url.py CHANGED
@@ -47,7 +47,9 @@ def check_if_url(
47
47
  if only_schemes:
48
48
  return result if result.scheme in only_schemes else None
49
49
  else:
50
- return result if result.scheme != "" else None
50
+ # Consider it a URL if the scheme is present and longer than a single character.
51
+ # This helps avoid misinterpreting Windows drive letters (e.g., "C:\foo") as schemes.
52
+ return result if result.scheme and len(result.scheme) > 1 else None
51
53
  except ValueError:
52
54
  return None
53
55
 
@@ -145,6 +147,41 @@ def normalize_url(
145
147
  return Url(normalized_url)
146
148
 
147
149
 
150
+ def is_valid_path(text: UnresolvedLocator) -> bool:
151
+ """
152
+ Sanity check if the input is plausibly a file path, i.e. not a URL or malformed in
153
+ an obvious way. Does not check for existence or OS-specific naming restrictions.
154
+ For a more thorough check there are other more complex options like:
155
+ https://github.com/thombashi/pathvalidate
156
+ """
157
+ if isinstance(text, Path):
158
+ return True
159
+ elif isinstance(text, str):
160
+ path_str = text
161
+ else:
162
+ return False
163
+
164
+ # Check for empty or whitespace-only strings or null characters
165
+ # (never acceptable paths).
166
+ if not path_str or path_str.isspace():
167
+ return False
168
+ if "\0" in path_str:
169
+ return False
170
+
171
+ # Explicitly disallow URLs.
172
+ if is_url(path_str):
173
+ return False
174
+
175
+ # As a final lightweight check, ensure it can be instantiated as a Path object
176
+ # This doesn't validate existence or character restrictions.
177
+ try:
178
+ _ = Path(path_str)
179
+ except (TypeError, ValueError):
180
+ return False
181
+
182
+ return True
183
+
184
+
148
185
  ## Tests
149
186
 
150
187
 
@@ -155,13 +192,19 @@ def test_is_url():
155
192
  assert is_url("ftp://example.com") == True
156
193
  assert is_url("file:///path/to/file") == True
157
194
  assert is_url("file://hostname/path/to/file") == True
158
- assert is_url("invalid-url") == False
159
- assert is_url("www.example.com") == False
160
195
  assert is_url("http://example.com", only_schemes=HTTP_ONLY) == True
161
196
  assert is_url("https://example.com", only_schemes=HTTP_ONLY) == True
197
+
198
+ assert is_url("invalid-url") == False
199
+ assert is_url("www.example.com") == False
162
200
  assert is_url("ftp://example.com", only_schemes=HTTP_ONLY) == False
163
201
  assert is_url("file:///path/to/file", only_schemes=HTTP_ONLY) == False
164
202
 
203
+ assert is_url("www.example.com") is False
204
+ assert is_url("c:\\path\\to\\file") is False
205
+ assert is_url("/foo/bar") is False
206
+ assert is_url("//foo") is False
207
+
165
208
 
166
209
  def test_as_file_url():
167
210
  assert as_file_url("file:///path/to/file") == "file:///path/to/file"
@@ -205,3 +248,37 @@ def test_normalize_url():
205
248
  str(e)
206
249
  == "Scheme 'ftp' not in allowed schemes: ['http', 'https', 'file']: ftp://example.com"
207
250
  )
251
+
252
+
253
+ def test_is_path():
254
+ assert is_valid_path("foo/bar") is True
255
+ assert is_valid_path("/foo/bar") is True
256
+ assert is_valid_path("./foo/bar") is True
257
+ assert is_valid_path("../foo/bar") is True
258
+ assert is_valid_path("foo.txt") is True
259
+ assert is_valid_path(Path("foo/bar")) is True
260
+ assert is_valid_path(Path()) is True
261
+ assert is_valid_path(".") is True
262
+ assert is_valid_path("..") is True
263
+ assert is_valid_path("C:\\Users\\name") is True # Windows-style
264
+ assert is_valid_path("file_with:colon.txt") is True # Valid on POSIX
265
+ assert is_valid_path(Url("relative/path")) is True # Url type with relative content
266
+
267
+ assert is_valid_path("http://example.com") is False
268
+ assert is_valid_path("https://example.com/path") is False
269
+ assert is_valid_path("file:///path/to/file") is False
270
+ assert is_valid_path(Url("http://example.com")) is False
271
+ assert is_valid_path("") is False
272
+ assert is_valid_path(" ") is False
273
+ assert is_valid_path("foo\0bar.txt") is False
274
+ assert is_valid_path(None) is False # pyright: ignore
275
+ assert is_valid_path(123) is False # pyright: ignore
276
+
277
+ # Edge cases
278
+ assert is_valid_path("www.example.com") is True # No scheme
279
+ assert str(Path("")) == "."
280
+ assert str(Path(" ")) == " "
281
+ assert is_valid_path(Path(" ")) is True # A bad idea but allowed
282
+ assert is_valid_path(Path("")) is True
283
+ assert is_valid_path(" ") is False
284
+ assert is_valid_path("") is False
@@ -11,9 +11,10 @@ from kash.config.logger import get_logger
11
11
  log = get_logger(__name__)
12
12
 
13
13
 
14
- def is_full_html_page(content: str) -> bool:
14
+ def is_fullpage_html(content: str) -> bool:
15
15
  """
16
- A full HTML document that is probably best rendered in a browser.
16
+ A full HTML document that is a full page (headers, footers, etc.) and
17
+ so probably best rendered in a browser.
17
18
  """
18
19
  return bool(re.search(r"<!DOCTYPE html>|<html>|<body>|<head>", content[:2048], re.IGNORECASE))
19
20
 
@@ -4,7 +4,7 @@ from dataclasses import dataclass
4
4
  from enum import Enum
5
5
  from pathlib import Path
6
6
 
7
- from kash.utils.common.url import Url, is_file_url, is_url, parse_file_url
7
+ from kash.utils.common.url import is_valid_path
8
8
  from kash.utils.file_utils.file_ext import FileExt
9
9
  from kash.utils.file_utils.file_formats import (
10
10
  MIME_EMPTY,
@@ -143,7 +143,13 @@ class Format(Enum):
143
143
 
144
144
  @property
145
145
  def is_markdown(self) -> bool:
146
- return self in [self.markdown, self.md_html]
146
+ """Is in pure Markdown (no HTML)."""
147
+ return self in [self.markdown]
148
+
149
+ @property
150
+ def is_markdown_with_html(self) -> bool:
151
+ """Is in Markdown with HTML."""
152
+ return self in [self.md_html]
147
153
 
148
154
  @property
149
155
  def is_html(self) -> bool:
@@ -406,15 +412,6 @@ class FileFormatInfo:
406
412
  return self.as_str()
407
413
 
408
414
 
409
- def _guess_format(file_ext: FileExt | None, mime_type: MimeType | None) -> Format | None:
410
- format = None
411
- if file_ext:
412
- format = Format.guess_by_file_ext(file_ext)
413
- if not format and mime_type:
414
- format = Format.from_mime_type(mime_type)
415
- return format
416
-
417
-
418
415
  def guess_format_by_name(path: str | Path) -> Format | None:
419
416
  """
420
417
  Fast guess of file format by the file name only.
@@ -423,22 +420,39 @@ def guess_format_by_name(path: str | Path) -> Format | None:
423
420
  return Format.guess_by_file_ext(file_ext) if file_ext else None
424
421
 
425
422
 
426
- def file_format_info(path: str | Path, always_check_content: bool = False) -> FileFormatInfo:
423
+ def file_format_info(
424
+ path: str | Path,
425
+ suggested_mime_type: MimeType | None = None,
426
+ ) -> FileFormatInfo:
427
427
  """
428
428
  Get info on the file format path and content (file extension and file content).
429
429
  Looks at the file extension first and then the file content if needed.
430
- If `always_check_content` is True, look at the file content even if we
431
- recognize the file extension.
430
+ If `suggested_mime_type` is provided, it will be used as the detected mime type
431
+ instead of detecting it from the file content.
432
432
  """
433
+ if not is_valid_path(path):
434
+ raise ValueError(f"Expected a file path but got: {path!r}")
435
+
433
436
  path = Path(path)
434
437
  file_ext = parse_file_ext(path)
435
- if always_check_content or not file_ext:
438
+ if not suggested_mime_type and not file_ext:
436
439
  # Look at the file content.
437
440
  detected_mime_type = detect_mime_type(path)
441
+ elif suggested_mime_type:
442
+ detected_mime_type = suggested_mime_type
438
443
  else:
439
444
  detected_mime_type = None
440
- format = _guess_format(file_ext, detected_mime_type)
445
+
446
+ # Pick format first by file extension, then by detected mime type.
447
+ format = None
448
+ if file_ext:
449
+ format = Format.guess_by_file_ext(file_ext)
450
+ if not format and detected_mime_type:
451
+ format = Format.from_mime_type(detected_mime_type)
452
+
453
+ # Attempt to canonicalize the mime type to match the format.
441
454
  final_mime_type = format.mime_type if format else detected_mime_type
455
+
442
456
  return FileFormatInfo(file_ext, format, final_mime_type)
443
457
 
444
458
 
@@ -456,35 +470,3 @@ def detect_media_type(filename: str | Path) -> MediaType:
456
470
  fmt = detect_file_format(filename)
457
471
  media_type = fmt.media_type if fmt else MediaType.binary
458
472
  return media_type
459
-
460
-
461
- def choose_file_ext(
462
- url_or_path: Url | Path | str, mime_type: MimeType | None = None
463
- ) -> FileExt | None:
464
- """
465
- Pick a file extension to reflect the type of the content. First tries from any
466
- provided content type (e.g. if this item was just downloaded). Then
467
- recognizes known file extensions on the filename or URL, then tries looking
468
- at the content with libmagic and heuristics, then gives up.
469
- """
470
- if mime_type:
471
- fmt = Format.from_mime_type(mime_type)
472
- if fmt:
473
- return fmt.file_ext
474
-
475
- # First check if it's a known standard extension.
476
- filename_ext = parse_file_ext(url_or_path)
477
- if filename_ext:
478
- return filename_ext
479
-
480
- local_path = None
481
- if isinstance(url_or_path, str) and is_file_url(url_or_path):
482
- local_path = parse_file_url(url_or_path)
483
- elif not is_url(url_or_path):
484
- local_path = Path(url_or_path)
485
-
486
- # If it's local based the extension on the file content.
487
- if local_path:
488
- return file_format_info(local_path).suggested_file_ext
489
-
490
- return None
@@ -10,10 +10,17 @@ from funlog import log_if_modifies
10
10
  from prettyfmt import fmt_path
11
11
  from strif import atomic_output_file, copyfile_atomic
12
12
 
13
- from kash.utils.common.url import Url, is_file_url, is_url, normalize_url, parse_file_url
13
+ from kash.utils.common.url import (
14
+ Url,
15
+ is_file_url,
16
+ is_url,
17
+ is_valid_path,
18
+ normalize_url,
19
+ parse_file_url,
20
+ )
14
21
  from kash.utils.errors import FileNotFound
15
- from kash.utils.file_utils.file_formats import MimeType
16
- from kash.utils.file_utils.file_formats_model import choose_file_ext
22
+ from kash.utils.file_utils.file_formats_model import file_format_info
23
+ from kash.utils.file_utils.filename_parsing import parse_file_ext
17
24
  from kash.web_content.dir_store import DirStore
18
25
  from kash.web_content.web_fetch import HttpHeaders, download_url
19
26
 
@@ -91,9 +98,25 @@ class CacheResult:
91
98
  was_cached: bool
92
99
 
93
100
 
94
- def _suffix_for(cacheable: Cacheable, mime_type: MimeType | None = None) -> str | None:
101
+ def _suffix_for(cacheable: Cacheable) -> str | None:
95
102
  key = cacheable.key if isinstance(cacheable, Loadable) else cacheable
96
- file_ext = choose_file_ext(key, mime_type)
103
+
104
+ # Check for recognized file extensions on URLs and Paths.
105
+ filename_ext = parse_file_ext(str(key))
106
+ if filename_ext:
107
+ return filename_ext.dot_ext
108
+
109
+ # Handle local paths
110
+ if is_file_url(str(key)):
111
+ path = parse_file_url(str(key))
112
+ elif is_valid_path(str(key)):
113
+ path = Path(str(key))
114
+ else:
115
+ # A non-local path with no recognized extension.
116
+ return None
117
+
118
+ # If it's a local file, check the file content too.
119
+ file_ext = file_format_info(path).suggested_file_ext
97
120
  return file_ext.dot_ext if file_ext else None
98
121
 
99
122
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kash-shell
3
- Version: 0.3.13
3
+ Version: 0.3.14
4
4
  Summary: The knowledge agent shell (core)
5
5
  Project-URL: Repository, https://github.com/jlevy/kash-shell
6
6
  Author-email: Joshua Levy <joshua@cal.berkeley.edu>
@@ -4,10 +4,10 @@ kash/actions/__init__.py,sha256=a4pQw8O-Y3q5N4Qg2jUV0xEZLX6d164FQhZ6zizY9fE,1357
4
4
  kash/actions/core/assistant_chat.py,sha256=28G20cSr7Z94cltouTPve5TXY3km0lACrRvpLE27fK8,1837
5
5
  kash/actions/core/chat.py,sha256=yCannBFa0cSpR_in-XSSuMm1x2ZZQUCKmlqzhsUfpOo,2696
6
6
  kash/actions/core/format_markdown_template.py,sha256=ZJbtyTSypPo2ewLiGRSyIpVf711vQMhI_-Ng-FgCs80,2991
7
- kash/actions/core/markdownify.py,sha256=sCfPXQCylQjrVbZxeRR11kZ8CtFw-tMsUSrRT6gimVU,1190
8
- kash/actions/core/readability.py,sha256=C71mHruXDoTn0Y6Q5v5FnUfUn1Q4WVuWQ5PbOGpZEuk,985
9
- kash/actions/core/render_as_html.py,sha256=JdrVvMo3DC1xDQ1SXrTDCLzm3S-Z9V6SSrjG-_z5Stw,1826
10
- kash/actions/core/show_webpage.py,sha256=052HG80kSBP98qITp0gfKJ0G4PxGGS0o0N-L64JxWP0,889
7
+ kash/actions/core/markdownify.py,sha256=KjdUeY4c9EhZ5geQrn22IoBv0P_p62q4zyyOYE0NRHM,1270
8
+ kash/actions/core/readability.py,sha256=ljdB2rOpzfKU2FpEJ2UELIzcdOAWvdUjFsxoHRTE3xo,989
9
+ kash/actions/core/render_as_html.py,sha256=bSyZdX9nZnP33QBdGSzWhInRREWXWayMG2oyiKn4rxw,1824
10
+ kash/actions/core/show_webpage.py,sha256=Ggba9jkx9U-FZOcuL0lkS-SwtPNUyxVsGdeQrqwWs1s,887
11
11
  kash/actions/core/strip_html.py,sha256=FDLN_4CKB11q5cU4NixTf7PGrAq92AjQNbKAdvQDwCY,849
12
12
  kash/actions/core/summarize_as_bullets.py,sha256=Zwr8lNzL77pwpnW_289LQjNBijNDpTPANfFdOJA-PZ4,2070
13
13
  kash/actions/core/tabbed_webpage_config.py,sha256=rIbzEhBTmnkbSiRZC-Rj46T1J6c0jOztiKE9Usa4nsc,980
@@ -34,7 +34,7 @@ kash/commands/help/help_commands.py,sha256=eJTpIhXck123PAUq2k-D3Q6UL6IQ8atOVYurL
34
34
  kash/commands/help/logo.py,sha256=W8SUach9FjoTqpHZwTGS582ry4ZluxbBp86ZCiAtDkY,3505
35
35
  kash/commands/help/welcome.py,sha256=F4QBgj3e1dM9Pf0H4TSzCrkVfXQVKUIl0b6Qmofbdo4,905
36
36
  kash/commands/workspace/selection_commands.py,sha256=yr0fFPlFIJUPHyFni1byXz8UDvYstIw4oRpOMa8iOBo,7428
37
- kash/commands/workspace/workspace_commands.py,sha256=0dyZ2EkThWArW5MfSjGVottgXKtibKXAVfI41NGvsUM,24177
37
+ kash/commands/workspace/workspace_commands.py,sha256=smPNGmY8y7gcmh0hAFOf4GYYMuNAoqkf3kRMJamYcMQ,24768
38
38
  kash/config/__init__.py,sha256=ytly9Typ1mWV4CXfV9G3CIPtPQ02u2rpZ304L3GlFro,148
39
39
  kash/config/capture_output.py,sha256=ud3uUVNuDicHj3mI_nBUBO-VmOrxtBdA3z-I3D1lSCU,2398
40
40
  kash/config/colors.py,sha256=6lqrB2RQYF2OLw-njfOqVHO9Bwiq7bW6K1ROCOAd1EM,9949
@@ -82,7 +82,7 @@ kash/embeddings/embeddings.py,sha256=v6RmrEHsx5PuE3fPrY15RK4fgW0K_VlNWDTjCVr11zY
82
82
  kash/embeddings/text_similarity.py,sha256=BOo9Vcs5oi2Zs5La56uTkPMHo65XSd4qz_yr6GTfUA4,1924
83
83
  kash/exec/__init__.py,sha256=rdSsKzTaXfSZmD5JvmUSSwmpfvl-moNv9PUgtE_WUpQ,1148
84
84
  kash/exec/action_decorators.py,sha256=VOSCnFiev2_DuFoSk0i_moejwM4wJ1j6QfsQd93uetI,16480
85
- kash/exec/action_exec.py,sha256=RXuTvsnkVqnE_PdbFqCWZ94morLUd06folT0lcmwCwk,18563
85
+ kash/exec/action_exec.py,sha256=wndn9WsH9dGIzRjbiNCLfHHKZPlTzlFp-eogDvqkfbI,18500
86
86
  kash/exec/action_registry.py,sha256=numU9pH_W5RgIrYmfi0iYMYy_kLJl6vup8PMrhxAfdc,2627
87
87
  kash/exec/combiners.py,sha256=AJ6wgPUHsmwanObsUw64B83XzU26yuh5t4l7igLn82I,4291
88
88
  kash/exec/command_exec.py,sha256=zc-gWm7kyB5J5Kp8xhULQ9Jj9AL927KkDPXXk-Yr1Bw,1292
@@ -93,7 +93,7 @@ kash/exec/importing.py,sha256=xunmBapeUMNc6Zox7y6e_DZkidyWeouiFZpphajwSzc,1843
93
93
  kash/exec/llm_transforms.py,sha256=p_aLp70VoIgheW4v8uoweeuEVWj06AzQekvn_jM3B-g,4378
94
94
  kash/exec/precondition_checks.py,sha256=HymxL7qm4Yz8V76Um5pKdIRnQ2N-p9rpQQi1fI38bNA,2139
95
95
  kash/exec/precondition_registry.py,sha256=cmp0mUfLS42AbAByDhwGx8GWz9PuZNR7z5rPZW9WQE4,1244
96
- kash/exec/preconditions.py,sha256=yJSQ1MWnejxQHPH4ULb6mEPPsMUK_ViLkUaFMW09z_w,4375
96
+ kash/exec/preconditions.py,sha256=kJXJQwqwsGBmzbrYy8s-soJeY8-gXx5ahbBPSqo7UvY,4965
97
97
  kash/exec/resolve_args.py,sha256=yGU6Jjzn5yyAN9pNZx8Qfc9oBrosFEdazIs5g9pjWTs,4410
98
98
  kash/exec/runtime_settings.py,sha256=aK6nGbZhKSIDVmV6AqV68hQkiaIGWnCiNzHtwwZ5V0w,3960
99
99
  kash/exec/shell_callable_action.py,sha256=x-Hs4EqpsZfKEcwhWkhc27HCIfoI91b-DrbG40BLxRY,4350
@@ -103,7 +103,7 @@ kash/exec_model/commands_model.py,sha256=iM8QhzA0tAas5OwF5liUfHtm45XIH1LcvCviuh3
103
103
  kash/exec_model/script_model.py,sha256=1VG3LhkTmlKzHOYouZ92ZpOSKSCcsz3-tHNcFMQF788,5031
104
104
  kash/exec_model/shell_model.py,sha256=LUhQivbpXlerM-DUzNY7BtctNBbn08Wto8CSSxQDxRU,568
105
105
  kash/file_storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
106
- kash/file_storage/file_store.py,sha256=_Up5TkgnIekVfPZ4mCyhjMUxLtGL_GsAhcpo1m3-Dj0,28428
106
+ kash/file_storage/file_store.py,sha256=sqhNFZtWDgaJbR8ah-yCYuaVbLKzs7XpAcYRfU9rtss,29425
107
107
  kash/file_storage/item_file_format.py,sha256=YAz7VqyfIoiSLQOoFdWsp-FI_2tTLXAPi8V8QXbo5ag,5475
108
108
  kash/file_storage/metadata_dirs.py,sha256=9AqO3S3SSY1dtvP2iLX--E4ui0VIzXttG8R040otfyg,3820
109
109
  kash/file_storage/persisted_yaml.py,sha256=4-4RkFqdlBUkTOwkdA4vRKUywEE9TaDo13OGaDUyU9M,1309
@@ -164,11 +164,11 @@ kash/model/compound_actions_model.py,sha256=HiDK5wwCu3WwZYHATZoLEguiqwR9V6V296wi
164
164
  kash/model/concept_model.py,sha256=we2qOcy9Mv1q7XPfkDLp_CyO_-8DwAUfUYlpgy_jrFs,1011
165
165
  kash/model/exec_model.py,sha256=IlfvtQyoFRRWhWju7vdXp9J-w_NGcGtL5DhDLy9gRd8,2250
166
166
  kash/model/graph_model.py,sha256=jnctrPiBZ0xwAR8D54JMAJPanA1yZdaxSFQoIpe8anA,2662
167
- kash/model/items_model.py,sha256=OZ88M15qp0m2OnUqu1pvrJrGrP-hANdXUVbCXdKKrqQ,34700
167
+ kash/model/items_model.py,sha256=429FXlEsKxUFCqT_Z5t2zAFcfVEpjOGMdvz7q4hMEtw,34891
168
168
  kash/model/language_list.py,sha256=I3RIbxTseVmPdhExQimimEv18Gmy2ImMbpXe0-_t1Qw,450
169
169
  kash/model/llm_actions_model.py,sha256=a29uXVNfS2CiqvM7HPdC6H9A23rSQQihAideuBLMH8g,2110
170
170
  kash/model/media_model.py,sha256=64Zic4cRjQpgf_-tOuZlZZe59mz_qu0s6OQSU0YlDUI,3357
171
- kash/model/operations_model.py,sha256=dPgccwh6HwWhag_MkhEfEwByuZamcJEFrvq4w4NtrII,6112
171
+ kash/model/operations_model.py,sha256=WmU-xeWGsqMLVN369dQEyVGU8T7G_KyLLsj6YFc5sVw,6517
172
172
  kash/model/params_model.py,sha256=qGhsGvtDQoSqWkrKk9QZZfEh-jO1q2V-s-p6X-F37_M,14939
173
173
  kash/model/paths_model.py,sha256=KDFm7wan7hjObHbnV2rR8-jsyLTVqbKcwFdKeLFRtdM,15889
174
174
  kash/model/preconditions_model.py,sha256=-IfsVR0NkQhq_3hUTXzK2bFYAd--3YjSwUiDKHVQQqk,2887
@@ -195,7 +195,7 @@ kash/shell/ui/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
195
195
  kash/shell/ui/shell_results.py,sha256=mvFHxK_oz3bNfF5_Twt6VqDO44TA1b256Bjf5oco804,4130
196
196
  kash/shell/ui/shell_syntax.py,sha256=1fuDqcCV16AAWwWS4w4iT-tlSnl-Ywdrf68Ge8XIfmQ,751
197
197
  kash/shell/utils/exception_printing.py,sha256=UizjOkBPhW6YbkiFP965BE5FrCwn04MXGDbxyTuyvOk,1908
198
- kash/shell/utils/native_utils.py,sha256=FtIjjB1sOgNifTx1u7nKSAwg9A8wc9-fkACw2_xWnNg,9153
198
+ kash/shell/utils/native_utils.py,sha256=pAiuqqrjfNTesdArSya6CVavKVsuAXOcX3_XAIQrWtE,9151
199
199
  kash/shell/utils/shell_function_wrapper.py,sha256=fgUuVhocYMKLkGJJQJOER5nFMAvM0ZVpfGu7iJPJI9s,7385
200
200
  kash/utils/__init__.py,sha256=4Jl_AtgRADdGORimWhYZwbSfQSpQ6SiexNIZzmbcngI,111
201
201
  kash/utils/errors.py,sha256=2lPL0fxI8pPOiDvjl0j-rvwY8uhmWetsrYYIc2-x1WY,3906
@@ -211,13 +211,13 @@ kash/utils/common/stack_traces.py,sha256=a2NwlK_0xxnjMCDC4LrQu7ueFylF-OImFG3bAAH
211
211
  kash/utils/common/task_stack.py,sha256=XkeBz3BwYY1HxxTqd3f7CulV0s61PePAKw1Irrtvf5o,4536
212
212
  kash/utils/common/type_utils.py,sha256=SJirXhPilQom_-OKkFToDLm_82ZwpjcNjRy8U1HaQ0Q,3829
213
213
  kash/utils/common/uniquifier.py,sha256=75OY4KIVF8u1eoO0FCPbEGTyVpPOtM-0ctoG_s_jahM,3082
214
- kash/utils/common/url.py,sha256=hEDC0ImO3DLvaPRflcmiUZ1wK_Ilsm6_9fLaG23sUfo,6515
214
+ kash/utils/common/url.py,sha256=R_P-CkOUiFxVdo9COcaL7YFvFIoAULj5-XxvmlFLvzo,9416
215
215
  kash/utils/file_formats/chat_format.py,sha256=Onby7Zany1UQSUo_JzLs6MIfmoXViZeOAacRTMVe92M,11818
216
216
  kash/utils/file_utils/__init__.py,sha256=loL_iW0oOZs0mJ5GelBPptBcqzYKSWdsGcHrpRyxitQ,43
217
217
  kash/utils/file_utils/dir_info.py,sha256=HamMr58k_DanTLifj7A2JDxTGWXEZZx2pQuE6Hjcm8g,1856
218
218
  kash/utils/file_utils/file_ext.py,sha256=-H63vlrVI3pfE2Cn_9qF7-QLDaUIu_njc4TieNgAHSY,1860
219
- kash/utils/file_utils/file_formats.py,sha256=tmrmqM5YTxfvlpvqmeOVz0yVRuPxxIAkVZf1-D0fp5Y,4902
220
- kash/utils/file_utils/file_formats_model.py,sha256=pwCkt84xOmt2cvnPk4uB5lB6q9pKy_ARr-o4OE2t8A0,15801
219
+ kash/utils/file_utils/file_formats.py,sha256=vnihRFLl85G1uzpqDc_uiGH9SIvbFTYVszz3srdSSz0,4949
220
+ kash/utils/file_utils/file_formats_model.py,sha256=0rwWlkgMaZu5Ap7IFPt3poA1owdwFCfgGGtvu__15GY,15099
221
221
  kash/utils/file_utils/file_sort_filter.py,sha256=_k1chT3dJl5lSmKA2PW90KaoG4k4zftGdtwWoNEljP4,7136
222
222
  kash/utils/file_utils/file_walk.py,sha256=cpwVDPuaVm95_ZwFJiAdIuZAGhASI3gJ3ZUsCGP75b8,5527
223
223
  kash/utils/file_utils/filename_parsing.py,sha256=drHrH2B9W_5yAbXURNGJxNqj9GmTe8FayH6Gjw9e4-U,4194
@@ -241,7 +241,7 @@ kash/web_content/canon_url.py,sha256=Zv2q7xQdIHBFkxxwyJn3_ME-qqMFRi_fKxE_IgV2Z50
241
241
  kash/web_content/dir_store.py,sha256=BJc-s-RL5CC-GwhFTC_lhLXSMWluPPnLVmVBx-66DiM,3425
242
242
  kash/web_content/file_cache_utils.py,sha256=JRXUCAmrc83iAgdiICU2EYGWcoORflWNl6GAVq-O80I,5529
243
243
  kash/web_content/file_processing.py,sha256=cQC-MnJMM5qG9-y0S4yobkmRi6A75qhHjV6xTwbtYDY,1904
244
- kash/web_content/local_file_cache.py,sha256=zajvWajDx-TYMGn3p8-K4KZOo2C2PQOomQRLbT3808o,8968
244
+ kash/web_content/local_file_cache.py,sha256=PEDKU5VIwhCnSC-HXG4EkO2OzrOUDuuDBMuo3lP2EN0,9466
245
245
  kash/web_content/web_extract.py,sha256=LbuG4AFEeIiXyUrN9CAxX0ret41Fqu_iTJSjIWyk3Bg,2296
246
246
  kash/web_content/web_extract_justext.py,sha256=74HLJBKDGKatwxyRDX6za70bZG9LrVmtj9jLX7UJzg4,2540
247
247
  kash/web_content/web_extract_readabilipy.py,sha256=IT7ET5IoU2-Nf37-Neh6CkKMvLL3WTNVJjq7ZMOx6OM,808
@@ -263,7 +263,6 @@ kash/workspaces/param_state.py,sha256=vT_eGWqg2SRviIM5jqEAauznX2B5Xt2nHHu2oRxTcI
263
263
  kash/workspaces/selections.py,sha256=rEUuQlrQ3C_54bzBSKDTTptgX8oZPqN0Ao4uaXSWA-Q,12003
264
264
  kash/workspaces/source_items.py,sha256=Pwnw3OhjR2IJEMEeHf6hpKloj-ellM5vsY7LgkGevRY,2861
265
265
  kash/workspaces/workspace_dirs.py,sha256=kjuY4t7mSSXq00fZmln7p9TWq4kAZoPTCDM0DG7uEaI,1545
266
- kash/workspaces/workspace_importing.py,sha256=4IJo713Kuoynhd_lcZF9M_DZ0rrMK_IDfhTVgwKmVyQ,1934
267
266
  kash/workspaces/workspace_output.py,sha256=MMg_KumkHKFGc0DOUFaW5ImpgqIfdlsLtvXbLEt1hwI,5692
268
267
  kash/workspaces/workspace_registry.py,sha256=SQt2DZgBEu95Zj9fpy67XdJPgJyKFDCU2laSuiZswNo,2200
269
268
  kash/workspaces/workspaces.py,sha256=kQyS3F57Y9A9xVT_Ss7HzJhDGlI-UXHKvRDnEVkBnik,6764
@@ -280,8 +279,8 @@ kash/xonsh_custom/xonsh_modern_tools.py,sha256=mj_b34LZXfE8MJe9EpDmp5JZ0tDM1biYN
280
279
  kash/xonsh_custom/xonsh_ranking_completer.py,sha256=ZRGiAfoEgqgnlq2-ReUVEaX5oOgW1DQ9WxIv2OJLuTo,5620
281
280
  kash/xontrib/fnm.py,sha256=V2tsOdmIDgbFbZSfMLpsvDIwwJJqiYnOkOySD1cXNXw,3700
282
281
  kash/xontrib/kash_extension.py,sha256=JRRJC3cZSMOl4sSWEdKAQ_dVRMubWaOltKr8G0dWt6Y,1876
283
- kash_shell-0.3.13.dist-info/METADATA,sha256=X_tk6xWJni7Il0DKb_fAD8VCQR4D6y64XFyy5VZNap4,31258
284
- kash_shell-0.3.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
285
- kash_shell-0.3.13.dist-info/entry_points.txt,sha256=SQraWDAo8SqYpthLXThei0mf_hGGyhYBUO-Er_0HcwI,85
286
- kash_shell-0.3.13.dist-info/licenses/LICENSE,sha256=rCh2PsfYeiU6FK_0wb58kHGm_Fj5c43fdcHEexiVzIo,34562
287
- kash_shell-0.3.13.dist-info/RECORD,,
282
+ kash_shell-0.3.14.dist-info/METADATA,sha256=w_L4jxifwPdsDvYqMRJZSbhc3u9bV5mYORRcbXHDj9k,31258
283
+ kash_shell-0.3.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
284
+ kash_shell-0.3.14.dist-info/entry_points.txt,sha256=SQraWDAo8SqYpthLXThei0mf_hGGyhYBUO-Er_0HcwI,85
285
+ kash_shell-0.3.14.dist-info/licenses/LICENSE,sha256=rCh2PsfYeiU6FK_0wb58kHGm_Fj5c43fdcHEexiVzIo,34562
286
+ kash_shell-0.3.14.dist-info/RECORD,,
@@ -1,56 +0,0 @@
1
- from pathlib import Path
2
-
3
- from kash.config.logger import get_logger
4
- from kash.file_storage.file_store import FileStore
5
- from kash.model.items_model import Item, ItemType
6
- from kash.model.paths_model import StorePath
7
- from kash.utils.common.url import Locator, Url, is_url
8
- from kash.utils.errors import InvalidInput
9
- from kash.utils.file_utils.file_formats_model import Format
10
- from kash.web_content.canon_url import canonicalize_url
11
-
12
- # TODO: Clean this up, move into FileStore.
13
-
14
- log = get_logger(__name__)
15
-
16
-
17
- def import_url(ws: FileStore, url: Url) -> Item:
18
- """
19
- Import a URL as a resource. Does not fetch metadata.
20
- """
21
- canon_url = canonicalize_url(url)
22
- log.message(
23
- "Importing URL: %s%s", canon_url, f" canonicalized from {url}" if url != canon_url else ""
24
- )
25
- item = Item(ItemType.resource, url=canon_url, format=Format.url)
26
- # No need to overwrite any resource we already have for the identical URL.
27
- store_path = ws.save(item, skip_dup_names=True)
28
- # Load to fill in any metadata we may already have.
29
- item = ws.load(store_path)
30
- return item
31
-
32
-
33
- def import_and_load(ws: FileStore, locator: Locator | str) -> Item:
34
- """
35
- Ensure that a URL or file path is imported into the workspace and
36
- return the Item.
37
- """
38
-
39
- if isinstance(locator, str) and is_url(locator):
40
- log.message("Importing locator as URL: %r", locator)
41
- item = import_url(ws, Url(locator))
42
- else:
43
- if isinstance(locator, StorePath):
44
- log.info("Locator is in the file store: %r", locator)
45
- # It's already a StorePath.
46
- item = ws.load(locator)
47
- else:
48
- log.info("Importing locator as local path: %r", locator)
49
- path = Path(locator)
50
- if not path.exists():
51
- raise InvalidInput(f"File not found: {path}")
52
-
53
- store_path = ws.import_item(path)
54
- item = ws.load(store_path)
55
-
56
- return item