kash-shell 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. kash/actions/core/markdownify.py +5 -4
  2. kash/actions/core/readability.py +4 -4
  3. kash/actions/core/render_as_html.py +8 -6
  4. kash/actions/core/show_webpage.py +2 -2
  5. kash/actions/core/strip_html.py +2 -2
  6. kash/commands/base/basic_file_commands.py +24 -3
  7. kash/commands/base/diff_commands.py +38 -3
  8. kash/commands/base/files_command.py +5 -4
  9. kash/commands/base/reformat_command.py +1 -1
  10. kash/commands/base/show_command.py +1 -1
  11. kash/commands/extras/parse_uv_lock.py +12 -3
  12. kash/commands/workspace/selection_commands.py +1 -1
  13. kash/commands/workspace/workspace_commands.py +62 -16
  14. kash/config/env_settings.py +2 -42
  15. kash/config/logger.py +30 -25
  16. kash/config/logger_basic.py +6 -6
  17. kash/config/settings.py +23 -7
  18. kash/config/setup.py +33 -5
  19. kash/config/text_styles.py +25 -22
  20. kash/docs/load_source_code.py +1 -1
  21. kash/embeddings/cosine.py +12 -4
  22. kash/embeddings/embeddings.py +16 -6
  23. kash/embeddings/text_similarity.py +10 -4
  24. kash/exec/__init__.py +3 -0
  25. kash/exec/action_decorators.py +4 -19
  26. kash/exec/action_exec.py +46 -27
  27. kash/exec/fetch_url_metadata.py +8 -5
  28. kash/exec/importing.py +4 -4
  29. kash/exec/llm_transforms.py +2 -2
  30. kash/exec/preconditions.py +11 -19
  31. kash/exec/runtime_settings.py +134 -0
  32. kash/exec/shell_callable_action.py +5 -3
  33. kash/file_storage/file_store.py +91 -53
  34. kash/file_storage/item_file_format.py +6 -3
  35. kash/file_storage/store_filenames.py +7 -3
  36. kash/help/help_embeddings.py +2 -2
  37. kash/llm_utils/clean_headings.py +1 -1
  38. kash/{text_handling → llm_utils}/custom_sliding_transforms.py +0 -3
  39. kash/llm_utils/init_litellm.py +16 -0
  40. kash/llm_utils/llm_api_keys.py +6 -2
  41. kash/llm_utils/llm_completion.py +12 -5
  42. kash/local_server/__init__.py +1 -1
  43. kash/local_server/local_server_commands.py +2 -1
  44. kash/mcp/__init__.py +1 -1
  45. kash/mcp/mcp_cli.py +3 -2
  46. kash/mcp/mcp_server_commands.py +8 -2
  47. kash/mcp/mcp_server_routes.py +11 -12
  48. kash/media_base/media_cache.py +10 -3
  49. kash/media_base/transcription_deepgram.py +15 -2
  50. kash/model/__init__.py +1 -1
  51. kash/model/actions_model.py +9 -54
  52. kash/model/exec_model.py +79 -0
  53. kash/model/items_model.py +131 -81
  54. kash/model/operations_model.py +38 -15
  55. kash/model/paths_model.py +2 -0
  56. kash/shell/output/shell_output.py +10 -8
  57. kash/shell/shell_main.py +2 -2
  58. kash/shell/ui/shell_results.py +2 -1
  59. kash/shell/utils/exception_printing.py +2 -2
  60. kash/utils/common/format_utils.py +0 -14
  61. kash/utils/common/import_utils.py +46 -18
  62. kash/utils/common/task_stack.py +4 -15
  63. kash/utils/errors.py +14 -9
  64. kash/utils/file_utils/file_formats_model.py +61 -26
  65. kash/utils/file_utils/file_sort_filter.py +10 -3
  66. kash/utils/file_utils/filename_parsing.py +41 -16
  67. kash/{text_handling → utils/text_handling}/doc_normalization.py +23 -13
  68. kash/utils/text_handling/escape_html_tags.py +156 -0
  69. kash/{text_handling → utils/text_handling}/markdown_utils.py +82 -4
  70. kash/utils/text_handling/markdownify_utils.py +87 -0
  71. kash/{text_handling → utils/text_handling}/unified_diffs.py +1 -44
  72. kash/web_content/file_cache_utils.py +42 -34
  73. kash/web_content/local_file_cache.py +29 -12
  74. kash/web_content/web_extract.py +1 -1
  75. kash/web_content/web_extract_readabilipy.py +4 -2
  76. kash/web_content/web_fetch.py +42 -7
  77. kash/web_content/web_page_model.py +2 -1
  78. kash/web_gen/simple_webpage.py +1 -1
  79. kash/web_gen/templates/base_styles.css.jinja +139 -16
  80. kash/web_gen/templates/simple_webpage.html.jinja +1 -1
  81. kash/workspaces/__init__.py +12 -3
  82. kash/workspaces/selections.py +2 -2
  83. kash/workspaces/workspace_dirs.py +58 -0
  84. kash/workspaces/workspace_importing.py +2 -2
  85. kash/workspaces/workspace_output.py +2 -2
  86. kash/workspaces/workspaces.py +26 -90
  87. kash/xonsh_custom/load_into_xonsh.py +4 -2
  88. {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/METADATA +4 -4
  89. {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/RECORD +93 -89
  90. kash/shell/utils/argparse_utils.py +0 -20
  91. kash/utils/lang_utils/inflection.py +0 -18
  92. /kash/{text_handling → utils/text_handling}/markdown_render.py +0 -0
  93. {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/WHEEL +0 -0
  94. {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/entry_points.txt +0 -0
  95. {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,5 @@
1
1
  import os
2
+ from functools import cache
2
3
  from pathlib import Path
3
4
 
4
5
  from prettyfmt import fmt_lines, fmt_path
@@ -11,7 +12,6 @@ from kash.media_base.media_services import (
11
12
  download_media_by_service,
12
13
  get_media_services,
13
14
  )
14
- from kash.media_base.transcription_deepgram import deepgram_transcribe_audio
15
15
  from kash.utils.common.format_utils import fmt_loc
16
16
  from kash.utils.common.url import Url, as_file_url, is_url
17
17
  from kash.utils.errors import FileNotFound, InvalidInput, UnexpectedError
@@ -22,7 +22,14 @@ log = get_logger(__name__)
22
22
 
23
23
  # FIXME: Hard-coded dependency for now. Would be better to make it settable.
24
24
  # transcribe_audio = whisper_transcribe_audio_small
25
- transcribe_audio = deepgram_transcribe_audio
25
+
26
+
27
+ @cache
28
+ def get_transcriber():
29
+ from kash.media_base.transcription_deepgram import deepgram_transcribe_audio
30
+
31
+ transcribe_audio = deepgram_transcribe_audio
32
+ return transcribe_audio
26
33
 
27
34
 
28
35
  # For simplicity we assume all audio is converted to mp3.
@@ -83,7 +90,7 @@ class MediaCache(DirStore):
83
90
  url,
84
91
  fmt_path(downsampled_audio_file),
85
92
  )
86
- transcript = transcribe_audio(downsampled_audio_file, language=language)
93
+ transcript = get_transcriber()(downsampled_audio_file, language=language)
87
94
  self._write_transcript(url, transcript)
88
95
  return transcript
89
96
 
@@ -1,8 +1,10 @@
1
+ from __future__ import annotations
2
+
1
3
  from os.path import getsize
2
4
  from pathlib import Path
5
+ from typing import TYPE_CHECKING
3
6
 
4
7
  from clideps.env_vars.dotenv_utils import load_dotenv_paths
5
- from deepgram import ListenRESTClient, PrerecordedResponse
6
8
  from httpx import Timeout
7
9
 
8
10
  from kash.config.logger import CustomLogger, get_logger
@@ -10,6 +12,9 @@ from kash.config.settings import global_settings
10
12
  from kash.media_base.transcription_format import SpeakerSegment, format_speaker_segments
11
13
  from kash.utils.errors import ApiError, ContentError
12
14
 
15
+ if TYPE_CHECKING:
16
+ from deepgram import PrerecordedResponse
17
+
13
18
  log: CustomLogger = get_logger(__name__)
14
19
 
15
20
 
@@ -19,7 +24,15 @@ def deepgram_transcribe_raw(
19
24
  """
20
25
  Transcribe an audio file using Deepgram and return the raw response.
21
26
  """
22
- from deepgram import ClientOptionsFromEnv, DeepgramClient, FileSource, PrerecordedOptions
27
+ # Slow import, do lazily.
28
+ from deepgram import (
29
+ ClientOptionsFromEnv,
30
+ DeepgramClient,
31
+ FileSource,
32
+ ListenRESTClient,
33
+ PrerecordedOptions,
34
+ PrerecordedResponse,
35
+ )
23
36
 
24
37
  size = getsize(audio_file_path)
25
38
  log.info(
kash/model/__init__.py CHANGED
@@ -20,7 +20,6 @@ from kash.model.actions_model import (
20
20
  Action,
21
21
  ActionInput,
22
22
  ActionResult,
23
- ExecContext,
24
23
  LLMOptions,
25
24
  PathOp,
26
25
  PathOpType,
@@ -33,6 +32,7 @@ from kash.model.compound_actions_model import (
33
32
  look_up_actions,
34
33
  )
35
34
  from kash.model.concept_model import Concept, canonicalize_concept, normalize_concepts
35
+ from kash.model.exec_model import ExecContext
36
36
  from kash.model.graph_model import GraphData, Link, Node
37
37
  from kash.model.items_model import (
38
38
  SLUG_MAX_LEN,
@@ -4,7 +4,6 @@ from abc import ABC, abstractmethod
4
4
  from dataclasses import Field as DataclassField
5
5
  from dataclasses import field, replace
6
6
  from enum import Enum
7
- from pathlib import Path
8
7
  from textwrap import dedent
9
8
  from typing import Any, TypeVar, cast
10
9
 
@@ -20,10 +19,10 @@ from typing_extensions import override
20
19
  from kash.config.logger import get_logger
21
20
  from kash.exec_model.args_model import NO_ARGS, ONE_ARG, ArgCount, ArgType, Signature
22
21
  from kash.exec_model.shell_model import ShellResult
23
- from kash.file_storage.file_store import FileStore
24
22
  from kash.llm_utils import LLM, LLMName
25
23
  from kash.llm_utils.llm_messages import Message, MessageTemplate
26
- from kash.model.items_model import UNTITLED, Item, ItemType, State
24
+ from kash.model.exec_model import ExecContext
25
+ from kash.model.items_model import UNTITLED, Item, ItemType
27
26
  from kash.model.operations_model import Operation, Source
28
27
  from kash.model.params_model import (
29
28
  ALL_COMMON_PARAMS,
@@ -38,7 +37,6 @@ from kash.model.preconditions_model import Precondition
38
37
  from kash.utils.common.parse_key_vals import format_key_value
39
38
  from kash.utils.common.type_utils import not_none
40
39
  from kash.utils.errors import InvalidDefinition, InvalidInput
41
- from kash.workspaces.workspaces import get_ws
42
40
 
43
41
  log = get_logger(__name__)
44
42
 
@@ -64,53 +62,6 @@ class ActionInput:
64
62
  return ActionInput(items=[])
65
63
 
66
64
 
67
- @dataclass(frozen=True)
68
- class ExecContext:
69
- """
70
- An action and its context for execution. This is a good place for settings
71
- that apply to any action and are bothersome to pass as parameters.
72
- """
73
-
74
- action: Action
75
- """The action being executed."""
76
-
77
- workspace_dir: Path
78
- """The workspace directory in which the action is being executed."""
79
-
80
- rerun: bool = False
81
- """If True, always run actions, even cacheable ones that have results."""
82
-
83
- refetch: bool = False
84
- """If True, will refetch items even if they are already in the content caches."""
85
-
86
- override_state: State | None = None
87
- """If specified, override the state of result items. Useful to mark items as transient."""
88
-
89
- tmp_output: bool = False
90
- """If True, will save output items to a temporary file."""
91
-
92
- no_format: bool = False
93
- """If True, will not normalize the output item's body text formatting (for Markdown)."""
94
-
95
- @property
96
- def workspace(self) -> FileStore:
97
- return get_ws(self.workspace_dir)
98
-
99
- @property
100
- def runtime_options(self) -> dict[str, str]:
101
- """Return non-default runtime options."""
102
- opts: dict[str, str] = {}
103
- # Only these two settings directly affect the output:
104
- if self.no_format:
105
- opts["no_format"] = "true"
106
- if self.override_state:
107
- opts["override_state"] = self.override_state.name
108
- return opts
109
-
110
- def __repr__(self):
111
- return abbrev_obj(self, field_max_len=80)
112
-
113
-
114
65
  class PathOpType(Enum):
115
66
  archive = "archive"
116
67
  select = "select"
@@ -139,6 +90,9 @@ class ActionResult:
139
90
  replaces_input: bool = False
140
91
  """If True, a hint to archive the input items."""
141
92
 
93
+ overwrite: bool = False
94
+ """If True, will not pick unique output paths to save to, overwriting existing files of the same name."""
95
+
142
96
  skip_duplicates: bool = False
143
97
  """If True, do not save duplicate items (based on identity)."""
144
98
 
@@ -365,8 +319,8 @@ class Action(ABC):
365
319
  """
366
320
  Declaration sanity checks.
367
321
  """
368
- if not self.name or not self.description:
369
- raise InvalidDefinition("Action must have a name and description")
322
+ if not self.name:
323
+ raise InvalidDefinition("Action must have a name")
370
324
 
371
325
  for param in self.params:
372
326
  if not self.has_param(param.name):
@@ -535,7 +489,7 @@ class Action(ABC):
535
489
  log.info("Ignoring parameter for action `%s`: `%s`", self.name, param_name)
536
490
 
537
491
  if overrides:
538
- log.message(
492
+ log.info(
539
493
  "Overriding parameters for action `%s`:\n%s",
540
494
  self.name,
541
495
  fmt_lines(overrides),
@@ -677,3 +631,4 @@ class PerItemAction(Action, ABC):
677
631
 
678
632
  # Handle circular dependency in Python dataclasses.
679
633
  rebuild_dataclass(Item) # pyright: ignore
634
+ rebuild_dataclass(ExecContext) # pyright: ignore
@@ -0,0 +1,79 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import TYPE_CHECKING
5
+
6
+ from prettyfmt import abbrev_obj
7
+ from pydantic.dataclasses import dataclass
8
+
9
+ from kash.config.logger import get_logger
10
+ from kash.model.items_model import State
11
+
12
+ if TYPE_CHECKING:
13
+ from kash.file_storage.file_store import FileStore
14
+ from kash.model.actions_model import Action
15
+
16
+
17
+ log = get_logger(__name__)
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class RuntimeSettings:
22
+ """
23
+ Workspace and other runtime settings that may be set across runs of
24
+ one or more actions.
25
+ """
26
+
27
+ workspace_dir: Path
28
+ """The workspace directory in which the action is being executed."""
29
+
30
+ rerun: bool = False
31
+ """If True, always run actions, even cacheable ones that have results."""
32
+
33
+ refetch: bool = False
34
+ """If True, will refetch items even if they are already in the content caches."""
35
+
36
+ override_state: State | None = None
37
+ """If specified, override the state of result items. Useful to mark items as transient."""
38
+
39
+ tmp_output: bool = False
40
+ """If True, will save output items to a temporary file."""
41
+
42
+ no_format: bool = False
43
+ """If True, will not normalize the output item's body text formatting (for Markdown)."""
44
+
45
+ @property
46
+ def workspace(self) -> FileStore:
47
+ from kash.workspaces.workspaces import get_ws
48
+
49
+ return get_ws(self.workspace_dir)
50
+
51
+ @property
52
+ def non_default_options(self) -> dict[str, str]:
53
+ """
54
+ Summarize non-default runtime options as a dict.
55
+ """
56
+ opts: dict[str, str] = {}
57
+ # Only these two settings directly affect the output:
58
+ if self.no_format:
59
+ opts["no_format"] = "true"
60
+ if self.override_state:
61
+ opts["override_state"] = self.override_state.name
62
+ return opts
63
+
64
+ def __repr__(self):
65
+ return abbrev_obj(self, field_max_len=80)
66
+
67
+
68
+ @dataclass(frozen=True)
69
+ class ExecContext:
70
+ """
71
+ An action and its context for execution. This is a good place for settings
72
+ that apply to any action and are bothersome to pass as parameters.
73
+ """
74
+
75
+ action: Action
76
+ """The action being executed."""
77
+
78
+ settings: RuntimeSettings
79
+ """The workspace and other run-time settings for the action."""
kash/model/items_model.py CHANGED
@@ -24,15 +24,17 @@ from kash.model.concept_model import canonicalize_concept
24
24
  from kash.model.media_model import MediaMetadata
25
25
  from kash.model.operations_model import OperationSummary, Source
26
26
  from kash.model.paths_model import StorePath, fmt_store_path
27
- from kash.text_handling.markdown_render import markdown_to_html
28
27
  from kash.utils.common.format_utils import fmt_loc, html_to_plaintext, plaintext_to_html
29
28
  from kash.utils.common.url import Locator, Url
30
29
  from kash.utils.errors import FileFormatError
31
30
  from kash.utils.file_formats.chat_format import ChatHistory
31
+ from kash.utils.file_utils.file_formats import MimeType
32
32
  from kash.utils.file_utils.file_formats_model import FileExt, Format
33
+ from kash.utils.text_handling.markdown_render import markdown_to_html
34
+ from kash.utils.text_handling.markdown_utils import first_heading
33
35
 
34
36
  if TYPE_CHECKING:
35
- from kash.model.actions_model import ExecContext
37
+ from kash.model.exec_model import ExecContext
36
38
  from kash.workspaces import Workspace
37
39
 
38
40
  log = get_logger(__name__)
@@ -178,9 +180,7 @@ class ItemId:
178
180
  if item.type == ItemType.resource and item.format == Format.url and item.url:
179
181
  item_id = ItemId(item.type, IdType.url, canonicalize_url(item.url))
180
182
  elif item.type == ItemType.concept and item.title:
181
- item_id = ItemId(
182
- item.type, IdType.concept, canonicalize_concept(item.title)
183
- )
183
+ item_id = ItemId(item.type, IdType.concept, canonicalize_concept(item.title))
184
184
  elif item.source and item.source.cacheable:
185
185
  # We know the source of this and if the action was cacheable, we can create
186
186
  # an identity based on the source.
@@ -281,11 +281,9 @@ class Item:
281
281
  """
282
282
  item_dict = {**item_dict, **kwargs}
283
283
 
284
- info_prefix = (
285
- f"{fmt_store_path(item_dict['store_path'])}: "
286
- if "store_path" in item_dict
287
- else ""
288
- )
284
+ info_prefix = ""
285
+ if "store_path" in item_dict and item_dict["store_path"]:
286
+ info_prefix = f"{fmt_store_path(item_dict['store_path'])}: "
289
287
 
290
288
  # Metadata formats might change over time so it's important to gracefully handle issues.
291
289
  def set_field(key: str, default: Any, cls_: type[T]) -> T:
@@ -314,9 +312,7 @@ class Item:
314
312
  body = item_dict.get("body")
315
313
  history = [OperationSummary(**op) for op in item_dict.get("history", [])]
316
314
  relations = (
317
- ItemRelations(**item_dict["relations"])
318
- if "relations" in item_dict
319
- else ItemRelations()
315
+ ItemRelations(**item_dict["relations"]) if "relations" in item_dict else ItemRelations()
320
316
  )
321
317
  store_path = item_dict.get("store_path")
322
318
 
@@ -334,9 +330,7 @@ class Item:
334
330
  ]
335
331
  all_fields = [f.name for f in cls.__dataclass_fields__.values()]
336
332
  allowed_fields = [f for f in all_fields if f not in excluded_fields]
337
- other_metadata = {
338
- key: value for key, value in item_dict.items() if key in allowed_fields
339
- }
333
+ other_metadata = {key: value for key, value in item_dict.items() if key in allowed_fields}
340
334
  unexpected_metadata = {
341
335
  key: value for key, value in item_dict.items() if key not in all_fields
342
336
  }
@@ -366,15 +360,19 @@ class Item:
366
360
  cls,
367
361
  path: Path | str,
368
362
  item_type: ItemType | None = None,
363
+ *,
369
364
  title: str | None = None,
365
+ original_filename: str | None = None,
366
+ mime_type: MimeType | None = None,
370
367
  ) -> Item:
371
368
  """
372
369
  Create a resource Item for a file with a format inferred from the file extension
373
370
  or the content. Only sets basic metadata. Does not read the content. Will set
374
371
  `format` and `file_ext` if possible but will leave them as None if unrecognized.
372
+ If `mime_type` is provided, it can help determine the file extension.
375
373
  """
376
374
  from kash.file_storage.store_filenames import parse_item_filename
377
- from kash.utils.file_utils.file_formats_model import detect_file_format
375
+ from kash.utils.file_utils.file_formats_model import choose_file_ext, detect_file_format
378
376
 
379
377
  # Will raise error for unrecognized file ext.
380
378
  _name, filename_item_type, format, file_ext = parse_item_filename(path)
@@ -385,16 +383,19 @@ class Item:
385
383
  if not item_type:
386
384
  # Default to doc for general text files and resource for everything else.
387
385
  item_type = (
388
- ItemType.doc
389
- if format and format.supports_frontmatter
390
- else ItemType.resource
386
+ ItemType.doc if format and format.supports_frontmatter else ItemType.resource
391
387
  )
388
+ # Do our best to determine a good file extension if it's not already on the filename.
389
+ if not file_ext and mime_type:
390
+ file_ext = choose_file_ext(path, mime_type)
391
+
392
392
  item = cls(
393
393
  type=item_type,
394
394
  title=title,
395
395
  file_ext=file_ext,
396
396
  format=format,
397
397
  external_path=str(path),
398
+ original_filename=original_filename,
398
399
  )
399
400
 
400
401
  # Update modified time from the file system.
@@ -438,11 +439,9 @@ class Item:
438
439
  if not self.format:
439
440
  raise ValueError(f"Item has no format: {self}")
440
441
  if self.type.expects_body and self.format.has_body and not self.body:
441
- raise ValueError(
442
- f"Item type `{self.type.value}` is text but has no body: {self}"
443
- )
442
+ raise ValueError(f"Item type `{self.type.value}` is text but has no body: {self}")
444
443
 
445
- def absolute_path(self, ws: "Workspace | None" = None) -> Path: # noqa: UP037
444
+ def absolute_path(self, ws: Workspace | None = None) -> Path:
446
445
  """
447
446
  Get the absolute path to the item. Throws `ValueError` if the item has no
448
447
  store path. If no workspace is provided, uses the current workspace.
@@ -493,9 +492,7 @@ class Item:
493
492
  return {k: serialize(v) for k, v in v.items()}
494
493
  elif isinstance(v, Enum):
495
494
  return v.value
496
- elif hasattr(
497
- v, "as_dict"
498
- ): # Handle Operation or any object with as_dict method.
495
+ elif hasattr(v, "as_dict"): # Handle Operation or any object with as_dict method.
499
496
  return v.as_dict()
500
497
  elif is_dataclass(v) and not isinstance(v, type):
501
498
  # Handle Python and Pydantic dataclasses.
@@ -520,49 +517,87 @@ class Item:
520
517
 
521
518
  return item_dict
522
519
 
523
- def display_title(self) -> str:
520
+ def filename_stem(self) -> str | None:
524
521
  """
525
- A display title for this item. Same as abbrev_title() but will fall back
526
- to the filename if it is available.
522
+ If the item has an existing or previous filename, return its stem,
523
+ for use in picking new filenames.
527
524
  """
528
- display_title = self.title
529
- if not display_title and self.store_path:
530
- display_title = Path(self.store_path).name
531
- if not display_title:
532
- display_title = self.abbrev_title()
533
- return display_title
525
+ from kash.file_storage.store_filenames import parse_item_filename
534
526
 
535
- def abbrev_title(self, max_len: int = 100, add_ops_suffix: bool = True) -> str:
527
+ # Prefer original to external, e.g. if we know the original but the external might
528
+ # be a cache filename.
529
+ path = self.store_path or self.original_filename or self.external_path
530
+ if path:
531
+ path_name, _item_type, _format, _file_ext = parse_item_filename(Path(path).name)
532
+ else:
533
+ path_name = None
534
+ return path_name
535
+
536
+ def slug_name(self, max_len: int = SLUG_MAX_LEN, prefer_title: bool = False) -> str:
537
+ """
538
+ Get a readable slugified name for this item, either from a previous filename
539
+ or from slugifying the title or content. May not be unique.
540
+ """
541
+ filename_stem = self.filename_stem()
542
+ if filename_stem and not prefer_title:
543
+ return slugify_snake(filename_stem)
544
+ else:
545
+ return slugify_snake(self.abbrev_title(max_len=max_len, add_ops_suffix=True))
546
+
547
+ def default_filename(self) -> str:
536
548
  """
537
- Get or infer a title for this item, falling back to the filename, URL,
538
- description, or finally body text.
539
- Optionally, include the last operation as a parenthetical at the end of the title.
549
+ Get the default filename for an item based on slugifying its title or other
550
+ metadata. May not be unique.
540
551
  """
541
- # Special case for URLs with no title..
552
+ from kash.file_storage.store_filenames import join_suffix
553
+
554
+ slug = self.slug_name()
555
+ full_suffix = self.get_full_suffix()
556
+ return join_suffix(slug, full_suffix)
557
+
558
+ def abbrev_title(
559
+ self,
560
+ *,
561
+ max_len: int = 100,
562
+ add_ops_suffix: bool = False,
563
+ pull_body_heading: bool = False,
564
+ ) -> str:
565
+ """
566
+ Get or infer a title for this item, falling back to the filename, URL, description, or
567
+ finally body text. Optionally, include the last operation as a parenthetical at the end
568
+ of the title. Will use "Untitled" if all else fails.
569
+ """
570
+ # First special case: if we are pulling the title from the body header, check
571
+ # that.
572
+ if not self.title and pull_body_heading:
573
+ heading = self.body_heading()
574
+ if heading:
575
+ return heading
576
+
577
+ # Next special case: URLs with no title use the url itself.
542
578
  if not self.title and self.url:
543
579
  return abbrev_str(self.url, max_len)
544
580
 
545
- # Special case for filenames with no title.
546
- path_name = (
547
- (self.store_path and Path(self.store_path).name)
548
- or (self.external_path and Path(self.external_path).name)
549
- or (self.original_filename and Path(self.original_filename).name)
550
- )
581
+ filename_stem = self.filename_stem()
551
582
 
552
583
  # Use the title or the path if possible, falling back to description or even body text.
553
584
  title_raw_text = (
554
585
  self.title
555
- or path_name
586
+ or filename_stem
556
587
  or self.description
557
588
  or (not self.is_binary and self.abbrev_body(max_len))
558
589
  or UNTITLED
559
590
  )
560
591
 
561
592
  suffix = ""
562
- if add_ops_suffix and self.type not in [ItemType.concept, ItemType.resource]:
563
- # For notes, exports, etc but not for concepts, add a parenthical note
564
- # indicating the last operation, if there was one. This makes filename slugs
565
- # more readable.
593
+ # For docs, etc but not for concepts/resources/exports, add a parenthical note
594
+ # indicating the last operation, if there was one. This makes filename slugs
595
+ # more readable.
596
+ if add_ops_suffix and self.type not in [
597
+ ItemType.concept,
598
+ ItemType.resource,
599
+ ItemType.export,
600
+ ]:
566
601
  last_op = self.history and self.history[-1].action_name
567
602
  if last_op:
568
603
  step_num = len(self.history) + 1 if self.history else 1
@@ -579,9 +614,36 @@ class Item:
579
614
 
580
615
  return final_text
581
616
 
617
+ def display_title(self) -> str:
618
+ """
619
+ A display title for this item. Same as abbrev_title() but will fall back
620
+ to the filename if it is available.
621
+ """
622
+ display_title = self.title
623
+ if not display_title and self.store_path:
624
+ display_title = Path(self.store_path).name
625
+ if not display_title:
626
+ display_title = self.abbrev_title()
627
+ return display_title
628
+
629
+ def abbrev_description(self, max_len: int = 1000) -> str:
630
+ """
631
+ Get or infer description.
632
+ """
633
+ return abbrev_on_words(html_to_plaintext(self.description or self.body or ""), max_len)
634
+
635
+ def body_heading(self) -> str | None:
636
+ """
637
+ Get the first h1 or h2 heading from the body text, if present.
638
+ """
639
+ if self.format in [Format.markdown, Format.md_html]:
640
+ return first_heading(self.body_text(), allowed_tags=("h1", "h2"))
641
+ # TODO: Support HTML <h1> and <h2> as well.
642
+ return None
643
+
582
644
  def abbrev_body(self, max_len: int) -> str:
583
645
  """
584
- Get a cut off version of the body text. Must not be a binary Item.
646
+ Get an abbreviated version of the body text. Must not be a binary Item.
585
647
  Abbreviates YAML bodies like {"role": "user", "content": "Hello"} to "user Hello".
586
648
  """
587
649
  body_text = self.body_text()[:max_len]
@@ -604,23 +666,6 @@ class Item:
604
666
  """
605
667
  return bool(self.body and self.body.strip())
606
668
 
607
- def slug_name(self, max_len: int = SLUG_MAX_LEN) -> str:
608
- """
609
- Get a readable slugified version of the title or filename or content
610
- appropriate for this item. May not be unique.
611
- """
612
- title = self.abbrev_title(max_len=max_len)
613
- slug = slugify_snake(title)
614
- return slug
615
-
616
- def abbrev_description(self, max_len: int = 1000) -> str:
617
- """
618
- Get or infer description.
619
- """
620
- return abbrev_on_words(
621
- html_to_plaintext(self.description or self.body or ""), max_len
622
- )
623
-
624
669
  def read_as_config(self) -> Any:
625
670
  """
626
671
  If it is a config Item, return the parsed YAML.
@@ -639,8 +684,6 @@ class Item:
639
684
  """
640
685
  if self.file_ext:
641
686
  return self.file_ext
642
- if self.is_binary and not self.file_ext:
643
- raise ValueError(f"Binary Items must have a file extension: {self}")
644
687
  inferred_ext = self.format and self.format.file_ext
645
688
  if not inferred_ext:
646
689
  raise ValueError(f"Cannot infer file extension for Item: {self}")
@@ -656,6 +699,9 @@ class Item:
656
699
  elif self.type == ItemType.script:
657
700
  # Same for kash/xonsh scripts.
658
701
  return f"{self.type.value}.{FileExt.xsh.value}"
702
+ elif self.type == ItemType.export:
703
+ # For exports, skip the item type to keep it maximally compatible for external tools.
704
+ return f"{self.get_file_ext().value}"
659
705
  else:
660
706
  return f"{self.type.value}.{self.get_file_ext().value}"
661
707
 
@@ -668,11 +714,19 @@ class Item:
668
714
  return "\n\n".join(part for part in parts if part)
669
715
 
670
716
  def body_text(self) -> str:
717
+ """
718
+ Body text of the item, also validating that the item is not binary.
719
+ """
671
720
  if self.is_binary:
672
721
  raise ValueError("Cannot get text content of a binary Item")
673
722
  return self.body or ""
674
723
 
675
724
  def body_as_html(self) -> str:
725
+ """
726
+ Body of the item, converted to HTML format. Validates that the body format can be
727
+ converted and then converts plaintext or Markdown to HTML. Simply returns the body
728
+ if it is already HTML.
729
+ """
676
730
  if self.format == Format.html:
677
731
  return self.body_text()
678
732
  elif self.format == Format.plaintext:
@@ -708,12 +762,10 @@ class Item:
708
762
  self, update_timestamp: bool = True, **other_updates: Unpack[ItemUpdateOptions]
709
763
  ) -> Item:
710
764
  """
711
- Copy item with the given field updates. Resets store_path to None. Updates
712
- created time if requested.
765
+ Copy item with the given field updates. Resets `store_path` to None but preserves
766
+ other fields, including the body. Updates created time if requested.
713
767
  """
714
- new_fields = self._copy_and_update(
715
- update_timestamp=update_timestamp, **other_updates
716
- )
768
+ new_fields = self._copy_and_update(update_timestamp=update_timestamp, **other_updates)
717
769
  return Item(**new_fields)
718
770
 
719
771
  def merged_copy(self, other: Item) -> Item:
@@ -734,7 +786,7 @@ class Item:
734
786
  if self.relations.derived_from:
735
787
  log.message(
736
788
  "Deriving from an item that has not been saved so using "
737
- "its derived_from relation: %s on %s",
789
+ "upstream derived_from relation: %s on %s",
738
790
  self.relations.derived_from,
739
791
  self,
740
792
  )
@@ -768,9 +820,7 @@ class Item:
768
820
 
769
821
  # Fall back to action title template if we have it and title wasn't explicitly set.
770
822
  if "title" not in updates:
771
- prev_title = self.title or (
772
- Path(self.store_path).stem if self.store_path else UNTITLED
773
- )
823
+ prev_title = self.title or (Path(self.store_path).stem if self.store_path else UNTITLED)
774
824
  if self.context:
775
825
  action = self.context.action
776
826
  new_item.title = action.title_template.format(