kash-shell 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/markdownify.py +5 -4
- kash/actions/core/readability.py +4 -4
- kash/actions/core/render_as_html.py +8 -6
- kash/actions/core/show_webpage.py +2 -2
- kash/actions/core/strip_html.py +2 -2
- kash/commands/base/basic_file_commands.py +24 -3
- kash/commands/base/diff_commands.py +38 -3
- kash/commands/base/files_command.py +5 -4
- kash/commands/base/reformat_command.py +1 -1
- kash/commands/base/show_command.py +1 -1
- kash/commands/extras/parse_uv_lock.py +12 -3
- kash/commands/workspace/selection_commands.py +1 -1
- kash/commands/workspace/workspace_commands.py +62 -16
- kash/config/env_settings.py +2 -42
- kash/config/logger.py +30 -25
- kash/config/logger_basic.py +6 -6
- kash/config/settings.py +23 -7
- kash/config/setup.py +33 -5
- kash/config/text_styles.py +25 -22
- kash/docs/load_source_code.py +1 -1
- kash/embeddings/cosine.py +12 -4
- kash/embeddings/embeddings.py +16 -6
- kash/embeddings/text_similarity.py +10 -4
- kash/exec/__init__.py +3 -0
- kash/exec/action_decorators.py +4 -19
- kash/exec/action_exec.py +46 -27
- kash/exec/fetch_url_metadata.py +8 -5
- kash/exec/importing.py +4 -4
- kash/exec/llm_transforms.py +2 -2
- kash/exec/preconditions.py +11 -19
- kash/exec/runtime_settings.py +134 -0
- kash/exec/shell_callable_action.py +5 -3
- kash/file_storage/file_store.py +91 -53
- kash/file_storage/item_file_format.py +6 -3
- kash/file_storage/store_filenames.py +7 -3
- kash/help/help_embeddings.py +2 -2
- kash/llm_utils/clean_headings.py +1 -1
- kash/{text_handling → llm_utils}/custom_sliding_transforms.py +0 -3
- kash/llm_utils/init_litellm.py +16 -0
- kash/llm_utils/llm_api_keys.py +6 -2
- kash/llm_utils/llm_completion.py +12 -5
- kash/local_server/__init__.py +1 -1
- kash/local_server/local_server_commands.py +2 -1
- kash/mcp/__init__.py +1 -1
- kash/mcp/mcp_cli.py +3 -2
- kash/mcp/mcp_server_commands.py +8 -2
- kash/mcp/mcp_server_routes.py +11 -12
- kash/media_base/media_cache.py +10 -3
- kash/media_base/transcription_deepgram.py +15 -2
- kash/model/__init__.py +1 -1
- kash/model/actions_model.py +9 -54
- kash/model/exec_model.py +79 -0
- kash/model/items_model.py +131 -81
- kash/model/operations_model.py +38 -15
- kash/model/paths_model.py +2 -0
- kash/shell/output/shell_output.py +10 -8
- kash/shell/shell_main.py +2 -2
- kash/shell/ui/shell_results.py +2 -1
- kash/shell/utils/exception_printing.py +2 -2
- kash/utils/common/format_utils.py +0 -14
- kash/utils/common/import_utils.py +46 -18
- kash/utils/common/task_stack.py +4 -15
- kash/utils/errors.py +14 -9
- kash/utils/file_utils/file_formats_model.py +61 -26
- kash/utils/file_utils/file_sort_filter.py +10 -3
- kash/utils/file_utils/filename_parsing.py +41 -16
- kash/{text_handling → utils/text_handling}/doc_normalization.py +23 -13
- kash/utils/text_handling/escape_html_tags.py +156 -0
- kash/{text_handling → utils/text_handling}/markdown_utils.py +82 -4
- kash/utils/text_handling/markdownify_utils.py +87 -0
- kash/{text_handling → utils/text_handling}/unified_diffs.py +1 -44
- kash/web_content/file_cache_utils.py +42 -34
- kash/web_content/local_file_cache.py +29 -12
- kash/web_content/web_extract.py +1 -1
- kash/web_content/web_extract_readabilipy.py +4 -2
- kash/web_content/web_fetch.py +42 -7
- kash/web_content/web_page_model.py +2 -1
- kash/web_gen/simple_webpage.py +1 -1
- kash/web_gen/templates/base_styles.css.jinja +139 -16
- kash/web_gen/templates/simple_webpage.html.jinja +1 -1
- kash/workspaces/__init__.py +12 -3
- kash/workspaces/selections.py +2 -2
- kash/workspaces/workspace_dirs.py +58 -0
- kash/workspaces/workspace_importing.py +2 -2
- kash/workspaces/workspace_output.py +2 -2
- kash/workspaces/workspaces.py +26 -90
- kash/xonsh_custom/load_into_xonsh.py +4 -2
- {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/METADATA +4 -4
- {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/RECORD +93 -89
- kash/shell/utils/argparse_utils.py +0 -20
- kash/utils/lang_utils/inflection.py +0 -18
- /kash/{text_handling → utils/text_handling}/markdown_render.py +0 -0
- {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.11.dist-info → kash_shell-0.3.13.dist-info}/licenses/LICENSE +0 -0
kash/media_base/media_cache.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from functools import cache
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
|
|
4
5
|
from prettyfmt import fmt_lines, fmt_path
|
|
@@ -11,7 +12,6 @@ from kash.media_base.media_services import (
|
|
|
11
12
|
download_media_by_service,
|
|
12
13
|
get_media_services,
|
|
13
14
|
)
|
|
14
|
-
from kash.media_base.transcription_deepgram import deepgram_transcribe_audio
|
|
15
15
|
from kash.utils.common.format_utils import fmt_loc
|
|
16
16
|
from kash.utils.common.url import Url, as_file_url, is_url
|
|
17
17
|
from kash.utils.errors import FileNotFound, InvalidInput, UnexpectedError
|
|
@@ -22,7 +22,14 @@ log = get_logger(__name__)
|
|
|
22
22
|
|
|
23
23
|
# FIXME: Hard-coded dependency for now. Would be better to make it settable.
|
|
24
24
|
# transcribe_audio = whisper_transcribe_audio_small
|
|
25
|
-
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@cache
|
|
28
|
+
def get_transcriber():
|
|
29
|
+
from kash.media_base.transcription_deepgram import deepgram_transcribe_audio
|
|
30
|
+
|
|
31
|
+
transcribe_audio = deepgram_transcribe_audio
|
|
32
|
+
return transcribe_audio
|
|
26
33
|
|
|
27
34
|
|
|
28
35
|
# For simplicity we assume all audio is converted to mp3.
|
|
@@ -83,7 +90,7 @@ class MediaCache(DirStore):
|
|
|
83
90
|
url,
|
|
84
91
|
fmt_path(downsampled_audio_file),
|
|
85
92
|
)
|
|
86
|
-
transcript =
|
|
93
|
+
transcript = get_transcriber()(downsampled_audio_file, language=language)
|
|
87
94
|
self._write_transcript(url, transcript)
|
|
88
95
|
return transcript
|
|
89
96
|
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from os.path import getsize
|
|
2
4
|
from pathlib import Path
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
3
6
|
|
|
4
7
|
from clideps.env_vars.dotenv_utils import load_dotenv_paths
|
|
5
|
-
from deepgram import ListenRESTClient, PrerecordedResponse
|
|
6
8
|
from httpx import Timeout
|
|
7
9
|
|
|
8
10
|
from kash.config.logger import CustomLogger, get_logger
|
|
@@ -10,6 +12,9 @@ from kash.config.settings import global_settings
|
|
|
10
12
|
from kash.media_base.transcription_format import SpeakerSegment, format_speaker_segments
|
|
11
13
|
from kash.utils.errors import ApiError, ContentError
|
|
12
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from deepgram import PrerecordedResponse
|
|
17
|
+
|
|
13
18
|
log: CustomLogger = get_logger(__name__)
|
|
14
19
|
|
|
15
20
|
|
|
@@ -19,7 +24,15 @@ def deepgram_transcribe_raw(
|
|
|
19
24
|
"""
|
|
20
25
|
Transcribe an audio file using Deepgram and return the raw response.
|
|
21
26
|
"""
|
|
22
|
-
|
|
27
|
+
# Slow import, do lazily.
|
|
28
|
+
from deepgram import (
|
|
29
|
+
ClientOptionsFromEnv,
|
|
30
|
+
DeepgramClient,
|
|
31
|
+
FileSource,
|
|
32
|
+
ListenRESTClient,
|
|
33
|
+
PrerecordedOptions,
|
|
34
|
+
PrerecordedResponse,
|
|
35
|
+
)
|
|
23
36
|
|
|
24
37
|
size = getsize(audio_file_path)
|
|
25
38
|
log.info(
|
kash/model/__init__.py
CHANGED
|
@@ -20,7 +20,6 @@ from kash.model.actions_model import (
|
|
|
20
20
|
Action,
|
|
21
21
|
ActionInput,
|
|
22
22
|
ActionResult,
|
|
23
|
-
ExecContext,
|
|
24
23
|
LLMOptions,
|
|
25
24
|
PathOp,
|
|
26
25
|
PathOpType,
|
|
@@ -33,6 +32,7 @@ from kash.model.compound_actions_model import (
|
|
|
33
32
|
look_up_actions,
|
|
34
33
|
)
|
|
35
34
|
from kash.model.concept_model import Concept, canonicalize_concept, normalize_concepts
|
|
35
|
+
from kash.model.exec_model import ExecContext
|
|
36
36
|
from kash.model.graph_model import GraphData, Link, Node
|
|
37
37
|
from kash.model.items_model import (
|
|
38
38
|
SLUG_MAX_LEN,
|
kash/model/actions_model.py
CHANGED
|
@@ -4,7 +4,6 @@ from abc import ABC, abstractmethod
|
|
|
4
4
|
from dataclasses import Field as DataclassField
|
|
5
5
|
from dataclasses import field, replace
|
|
6
6
|
from enum import Enum
|
|
7
|
-
from pathlib import Path
|
|
8
7
|
from textwrap import dedent
|
|
9
8
|
from typing import Any, TypeVar, cast
|
|
10
9
|
|
|
@@ -20,10 +19,10 @@ from typing_extensions import override
|
|
|
20
19
|
from kash.config.logger import get_logger
|
|
21
20
|
from kash.exec_model.args_model import NO_ARGS, ONE_ARG, ArgCount, ArgType, Signature
|
|
22
21
|
from kash.exec_model.shell_model import ShellResult
|
|
23
|
-
from kash.file_storage.file_store import FileStore
|
|
24
22
|
from kash.llm_utils import LLM, LLMName
|
|
25
23
|
from kash.llm_utils.llm_messages import Message, MessageTemplate
|
|
26
|
-
from kash.model.
|
|
24
|
+
from kash.model.exec_model import ExecContext
|
|
25
|
+
from kash.model.items_model import UNTITLED, Item, ItemType
|
|
27
26
|
from kash.model.operations_model import Operation, Source
|
|
28
27
|
from kash.model.params_model import (
|
|
29
28
|
ALL_COMMON_PARAMS,
|
|
@@ -38,7 +37,6 @@ from kash.model.preconditions_model import Precondition
|
|
|
38
37
|
from kash.utils.common.parse_key_vals import format_key_value
|
|
39
38
|
from kash.utils.common.type_utils import not_none
|
|
40
39
|
from kash.utils.errors import InvalidDefinition, InvalidInput
|
|
41
|
-
from kash.workspaces.workspaces import get_ws
|
|
42
40
|
|
|
43
41
|
log = get_logger(__name__)
|
|
44
42
|
|
|
@@ -64,53 +62,6 @@ class ActionInput:
|
|
|
64
62
|
return ActionInput(items=[])
|
|
65
63
|
|
|
66
64
|
|
|
67
|
-
@dataclass(frozen=True)
|
|
68
|
-
class ExecContext:
|
|
69
|
-
"""
|
|
70
|
-
An action and its context for execution. This is a good place for settings
|
|
71
|
-
that apply to any action and are bothersome to pass as parameters.
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
action: Action
|
|
75
|
-
"""The action being executed."""
|
|
76
|
-
|
|
77
|
-
workspace_dir: Path
|
|
78
|
-
"""The workspace directory in which the action is being executed."""
|
|
79
|
-
|
|
80
|
-
rerun: bool = False
|
|
81
|
-
"""If True, always run actions, even cacheable ones that have results."""
|
|
82
|
-
|
|
83
|
-
refetch: bool = False
|
|
84
|
-
"""If True, will refetch items even if they are already in the content caches."""
|
|
85
|
-
|
|
86
|
-
override_state: State | None = None
|
|
87
|
-
"""If specified, override the state of result items. Useful to mark items as transient."""
|
|
88
|
-
|
|
89
|
-
tmp_output: bool = False
|
|
90
|
-
"""If True, will save output items to a temporary file."""
|
|
91
|
-
|
|
92
|
-
no_format: bool = False
|
|
93
|
-
"""If True, will not normalize the output item's body text formatting (for Markdown)."""
|
|
94
|
-
|
|
95
|
-
@property
|
|
96
|
-
def workspace(self) -> FileStore:
|
|
97
|
-
return get_ws(self.workspace_dir)
|
|
98
|
-
|
|
99
|
-
@property
|
|
100
|
-
def runtime_options(self) -> dict[str, str]:
|
|
101
|
-
"""Return non-default runtime options."""
|
|
102
|
-
opts: dict[str, str] = {}
|
|
103
|
-
# Only these two settings directly affect the output:
|
|
104
|
-
if self.no_format:
|
|
105
|
-
opts["no_format"] = "true"
|
|
106
|
-
if self.override_state:
|
|
107
|
-
opts["override_state"] = self.override_state.name
|
|
108
|
-
return opts
|
|
109
|
-
|
|
110
|
-
def __repr__(self):
|
|
111
|
-
return abbrev_obj(self, field_max_len=80)
|
|
112
|
-
|
|
113
|
-
|
|
114
65
|
class PathOpType(Enum):
|
|
115
66
|
archive = "archive"
|
|
116
67
|
select = "select"
|
|
@@ -139,6 +90,9 @@ class ActionResult:
|
|
|
139
90
|
replaces_input: bool = False
|
|
140
91
|
"""If True, a hint to archive the input items."""
|
|
141
92
|
|
|
93
|
+
overwrite: bool = False
|
|
94
|
+
"""If True, will not pick unique output paths to save to, overwriting existing files of the same name."""
|
|
95
|
+
|
|
142
96
|
skip_duplicates: bool = False
|
|
143
97
|
"""If True, do not save duplicate items (based on identity)."""
|
|
144
98
|
|
|
@@ -365,8 +319,8 @@ class Action(ABC):
|
|
|
365
319
|
"""
|
|
366
320
|
Declaration sanity checks.
|
|
367
321
|
"""
|
|
368
|
-
if not self.name
|
|
369
|
-
raise InvalidDefinition("Action must have a name
|
|
322
|
+
if not self.name:
|
|
323
|
+
raise InvalidDefinition("Action must have a name")
|
|
370
324
|
|
|
371
325
|
for param in self.params:
|
|
372
326
|
if not self.has_param(param.name):
|
|
@@ -535,7 +489,7 @@ class Action(ABC):
|
|
|
535
489
|
log.info("Ignoring parameter for action `%s`: `%s`", self.name, param_name)
|
|
536
490
|
|
|
537
491
|
if overrides:
|
|
538
|
-
log.
|
|
492
|
+
log.info(
|
|
539
493
|
"Overriding parameters for action `%s`:\n%s",
|
|
540
494
|
self.name,
|
|
541
495
|
fmt_lines(overrides),
|
|
@@ -677,3 +631,4 @@ class PerItemAction(Action, ABC):
|
|
|
677
631
|
|
|
678
632
|
# Handle circular dependency in Python dataclasses.
|
|
679
633
|
rebuild_dataclass(Item) # pyright: ignore
|
|
634
|
+
rebuild_dataclass(ExecContext) # pyright: ignore
|
kash/model/exec_model.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from prettyfmt import abbrev_obj
|
|
7
|
+
from pydantic.dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from kash.config.logger import get_logger
|
|
10
|
+
from kash.model.items_model import State
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from kash.file_storage.file_store import FileStore
|
|
14
|
+
from kash.model.actions_model import Action
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
log = get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class RuntimeSettings:
|
|
22
|
+
"""
|
|
23
|
+
Workspace and other runtime settings that may be set across runs of
|
|
24
|
+
one or more actions.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
workspace_dir: Path
|
|
28
|
+
"""The workspace directory in which the action is being executed."""
|
|
29
|
+
|
|
30
|
+
rerun: bool = False
|
|
31
|
+
"""If True, always run actions, even cacheable ones that have results."""
|
|
32
|
+
|
|
33
|
+
refetch: bool = False
|
|
34
|
+
"""If True, will refetch items even if they are already in the content caches."""
|
|
35
|
+
|
|
36
|
+
override_state: State | None = None
|
|
37
|
+
"""If specified, override the state of result items. Useful to mark items as transient."""
|
|
38
|
+
|
|
39
|
+
tmp_output: bool = False
|
|
40
|
+
"""If True, will save output items to a temporary file."""
|
|
41
|
+
|
|
42
|
+
no_format: bool = False
|
|
43
|
+
"""If True, will not normalize the output item's body text formatting (for Markdown)."""
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def workspace(self) -> FileStore:
|
|
47
|
+
from kash.workspaces.workspaces import get_ws
|
|
48
|
+
|
|
49
|
+
return get_ws(self.workspace_dir)
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def non_default_options(self) -> dict[str, str]:
|
|
53
|
+
"""
|
|
54
|
+
Summarize non-default runtime options as a dict.
|
|
55
|
+
"""
|
|
56
|
+
opts: dict[str, str] = {}
|
|
57
|
+
# Only these two settings directly affect the output:
|
|
58
|
+
if self.no_format:
|
|
59
|
+
opts["no_format"] = "true"
|
|
60
|
+
if self.override_state:
|
|
61
|
+
opts["override_state"] = self.override_state.name
|
|
62
|
+
return opts
|
|
63
|
+
|
|
64
|
+
def __repr__(self):
|
|
65
|
+
return abbrev_obj(self, field_max_len=80)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass(frozen=True)
|
|
69
|
+
class ExecContext:
|
|
70
|
+
"""
|
|
71
|
+
An action and its context for execution. This is a good place for settings
|
|
72
|
+
that apply to any action and are bothersome to pass as parameters.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
action: Action
|
|
76
|
+
"""The action being executed."""
|
|
77
|
+
|
|
78
|
+
settings: RuntimeSettings
|
|
79
|
+
"""The workspace and other run-time settings for the action."""
|
kash/model/items_model.py
CHANGED
|
@@ -24,15 +24,17 @@ from kash.model.concept_model import canonicalize_concept
|
|
|
24
24
|
from kash.model.media_model import MediaMetadata
|
|
25
25
|
from kash.model.operations_model import OperationSummary, Source
|
|
26
26
|
from kash.model.paths_model import StorePath, fmt_store_path
|
|
27
|
-
from kash.text_handling.markdown_render import markdown_to_html
|
|
28
27
|
from kash.utils.common.format_utils import fmt_loc, html_to_plaintext, plaintext_to_html
|
|
29
28
|
from kash.utils.common.url import Locator, Url
|
|
30
29
|
from kash.utils.errors import FileFormatError
|
|
31
30
|
from kash.utils.file_formats.chat_format import ChatHistory
|
|
31
|
+
from kash.utils.file_utils.file_formats import MimeType
|
|
32
32
|
from kash.utils.file_utils.file_formats_model import FileExt, Format
|
|
33
|
+
from kash.utils.text_handling.markdown_render import markdown_to_html
|
|
34
|
+
from kash.utils.text_handling.markdown_utils import first_heading
|
|
33
35
|
|
|
34
36
|
if TYPE_CHECKING:
|
|
35
|
-
from kash.model.
|
|
37
|
+
from kash.model.exec_model import ExecContext
|
|
36
38
|
from kash.workspaces import Workspace
|
|
37
39
|
|
|
38
40
|
log = get_logger(__name__)
|
|
@@ -178,9 +180,7 @@ class ItemId:
|
|
|
178
180
|
if item.type == ItemType.resource and item.format == Format.url and item.url:
|
|
179
181
|
item_id = ItemId(item.type, IdType.url, canonicalize_url(item.url))
|
|
180
182
|
elif item.type == ItemType.concept and item.title:
|
|
181
|
-
item_id = ItemId(
|
|
182
|
-
item.type, IdType.concept, canonicalize_concept(item.title)
|
|
183
|
-
)
|
|
183
|
+
item_id = ItemId(item.type, IdType.concept, canonicalize_concept(item.title))
|
|
184
184
|
elif item.source and item.source.cacheable:
|
|
185
185
|
# We know the source of this and if the action was cacheable, we can create
|
|
186
186
|
# an identity based on the source.
|
|
@@ -281,11 +281,9 @@ class Item:
|
|
|
281
281
|
"""
|
|
282
282
|
item_dict = {**item_dict, **kwargs}
|
|
283
283
|
|
|
284
|
-
info_prefix =
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
else ""
|
|
288
|
-
)
|
|
284
|
+
info_prefix = ""
|
|
285
|
+
if "store_path" in item_dict and item_dict["store_path"]:
|
|
286
|
+
info_prefix = f"{fmt_store_path(item_dict['store_path'])}: "
|
|
289
287
|
|
|
290
288
|
# Metadata formats might change over time so it's important to gracefully handle issues.
|
|
291
289
|
def set_field(key: str, default: Any, cls_: type[T]) -> T:
|
|
@@ -314,9 +312,7 @@ class Item:
|
|
|
314
312
|
body = item_dict.get("body")
|
|
315
313
|
history = [OperationSummary(**op) for op in item_dict.get("history", [])]
|
|
316
314
|
relations = (
|
|
317
|
-
ItemRelations(**item_dict["relations"])
|
|
318
|
-
if "relations" in item_dict
|
|
319
|
-
else ItemRelations()
|
|
315
|
+
ItemRelations(**item_dict["relations"]) if "relations" in item_dict else ItemRelations()
|
|
320
316
|
)
|
|
321
317
|
store_path = item_dict.get("store_path")
|
|
322
318
|
|
|
@@ -334,9 +330,7 @@ class Item:
|
|
|
334
330
|
]
|
|
335
331
|
all_fields = [f.name for f in cls.__dataclass_fields__.values()]
|
|
336
332
|
allowed_fields = [f for f in all_fields if f not in excluded_fields]
|
|
337
|
-
other_metadata = {
|
|
338
|
-
key: value for key, value in item_dict.items() if key in allowed_fields
|
|
339
|
-
}
|
|
333
|
+
other_metadata = {key: value for key, value in item_dict.items() if key in allowed_fields}
|
|
340
334
|
unexpected_metadata = {
|
|
341
335
|
key: value for key, value in item_dict.items() if key not in all_fields
|
|
342
336
|
}
|
|
@@ -366,15 +360,19 @@ class Item:
|
|
|
366
360
|
cls,
|
|
367
361
|
path: Path | str,
|
|
368
362
|
item_type: ItemType | None = None,
|
|
363
|
+
*,
|
|
369
364
|
title: str | None = None,
|
|
365
|
+
original_filename: str | None = None,
|
|
366
|
+
mime_type: MimeType | None = None,
|
|
370
367
|
) -> Item:
|
|
371
368
|
"""
|
|
372
369
|
Create a resource Item for a file with a format inferred from the file extension
|
|
373
370
|
or the content. Only sets basic metadata. Does not read the content. Will set
|
|
374
371
|
`format` and `file_ext` if possible but will leave them as None if unrecognized.
|
|
372
|
+
If `mime_type` is provided, it can help determine the file extension.
|
|
375
373
|
"""
|
|
376
374
|
from kash.file_storage.store_filenames import parse_item_filename
|
|
377
|
-
from kash.utils.file_utils.file_formats_model import detect_file_format
|
|
375
|
+
from kash.utils.file_utils.file_formats_model import choose_file_ext, detect_file_format
|
|
378
376
|
|
|
379
377
|
# Will raise error for unrecognized file ext.
|
|
380
378
|
_name, filename_item_type, format, file_ext = parse_item_filename(path)
|
|
@@ -385,16 +383,19 @@ class Item:
|
|
|
385
383
|
if not item_type:
|
|
386
384
|
# Default to doc for general text files and resource for everything else.
|
|
387
385
|
item_type = (
|
|
388
|
-
ItemType.doc
|
|
389
|
-
if format and format.supports_frontmatter
|
|
390
|
-
else ItemType.resource
|
|
386
|
+
ItemType.doc if format and format.supports_frontmatter else ItemType.resource
|
|
391
387
|
)
|
|
388
|
+
# Do our best to determine a good file extension if it's not already on the filename.
|
|
389
|
+
if not file_ext and mime_type:
|
|
390
|
+
file_ext = choose_file_ext(path, mime_type)
|
|
391
|
+
|
|
392
392
|
item = cls(
|
|
393
393
|
type=item_type,
|
|
394
394
|
title=title,
|
|
395
395
|
file_ext=file_ext,
|
|
396
396
|
format=format,
|
|
397
397
|
external_path=str(path),
|
|
398
|
+
original_filename=original_filename,
|
|
398
399
|
)
|
|
399
400
|
|
|
400
401
|
# Update modified time from the file system.
|
|
@@ -438,11 +439,9 @@ class Item:
|
|
|
438
439
|
if not self.format:
|
|
439
440
|
raise ValueError(f"Item has no format: {self}")
|
|
440
441
|
if self.type.expects_body and self.format.has_body and not self.body:
|
|
441
|
-
raise ValueError(
|
|
442
|
-
f"Item type `{self.type.value}` is text but has no body: {self}"
|
|
443
|
-
)
|
|
442
|
+
raise ValueError(f"Item type `{self.type.value}` is text but has no body: {self}")
|
|
444
443
|
|
|
445
|
-
def absolute_path(self, ws:
|
|
444
|
+
def absolute_path(self, ws: Workspace | None = None) -> Path:
|
|
446
445
|
"""
|
|
447
446
|
Get the absolute path to the item. Throws `ValueError` if the item has no
|
|
448
447
|
store path. If no workspace is provided, uses the current workspace.
|
|
@@ -493,9 +492,7 @@ class Item:
|
|
|
493
492
|
return {k: serialize(v) for k, v in v.items()}
|
|
494
493
|
elif isinstance(v, Enum):
|
|
495
494
|
return v.value
|
|
496
|
-
elif hasattr(
|
|
497
|
-
v, "as_dict"
|
|
498
|
-
): # Handle Operation or any object with as_dict method.
|
|
495
|
+
elif hasattr(v, "as_dict"): # Handle Operation or any object with as_dict method.
|
|
499
496
|
return v.as_dict()
|
|
500
497
|
elif is_dataclass(v) and not isinstance(v, type):
|
|
501
498
|
# Handle Python and Pydantic dataclasses.
|
|
@@ -520,49 +517,87 @@ class Item:
|
|
|
520
517
|
|
|
521
518
|
return item_dict
|
|
522
519
|
|
|
523
|
-
def
|
|
520
|
+
def filename_stem(self) -> str | None:
|
|
524
521
|
"""
|
|
525
|
-
|
|
526
|
-
|
|
522
|
+
If the item has an existing or previous filename, return its stem,
|
|
523
|
+
for use in picking new filenames.
|
|
527
524
|
"""
|
|
528
|
-
|
|
529
|
-
if not display_title and self.store_path:
|
|
530
|
-
display_title = Path(self.store_path).name
|
|
531
|
-
if not display_title:
|
|
532
|
-
display_title = self.abbrev_title()
|
|
533
|
-
return display_title
|
|
525
|
+
from kash.file_storage.store_filenames import parse_item_filename
|
|
534
526
|
|
|
535
|
-
|
|
527
|
+
# Prefer original to external, e.g. if we know the original but the external might
|
|
528
|
+
# be a cache filename.
|
|
529
|
+
path = self.store_path or self.original_filename or self.external_path
|
|
530
|
+
if path:
|
|
531
|
+
path_name, _item_type, _format, _file_ext = parse_item_filename(Path(path).name)
|
|
532
|
+
else:
|
|
533
|
+
path_name = None
|
|
534
|
+
return path_name
|
|
535
|
+
|
|
536
|
+
def slug_name(self, max_len: int = SLUG_MAX_LEN, prefer_title: bool = False) -> str:
|
|
537
|
+
"""
|
|
538
|
+
Get a readable slugified name for this item, either from a previous filename
|
|
539
|
+
or from slugifying the title or content. May not be unique.
|
|
540
|
+
"""
|
|
541
|
+
filename_stem = self.filename_stem()
|
|
542
|
+
if filename_stem and not prefer_title:
|
|
543
|
+
return slugify_snake(filename_stem)
|
|
544
|
+
else:
|
|
545
|
+
return slugify_snake(self.abbrev_title(max_len=max_len, add_ops_suffix=True))
|
|
546
|
+
|
|
547
|
+
def default_filename(self) -> str:
|
|
536
548
|
"""
|
|
537
|
-
Get
|
|
538
|
-
|
|
539
|
-
Optionally, include the last operation as a parenthetical at the end of the title.
|
|
549
|
+
Get the default filename for an item based on slugifying its title or other
|
|
550
|
+
metadata. May not be unique.
|
|
540
551
|
"""
|
|
541
|
-
|
|
552
|
+
from kash.file_storage.store_filenames import join_suffix
|
|
553
|
+
|
|
554
|
+
slug = self.slug_name()
|
|
555
|
+
full_suffix = self.get_full_suffix()
|
|
556
|
+
return join_suffix(slug, full_suffix)
|
|
557
|
+
|
|
558
|
+
def abbrev_title(
|
|
559
|
+
self,
|
|
560
|
+
*,
|
|
561
|
+
max_len: int = 100,
|
|
562
|
+
add_ops_suffix: bool = False,
|
|
563
|
+
pull_body_heading: bool = False,
|
|
564
|
+
) -> str:
|
|
565
|
+
"""
|
|
566
|
+
Get or infer a title for this item, falling back to the filename, URL, description, or
|
|
567
|
+
finally body text. Optionally, include the last operation as a parenthetical at the end
|
|
568
|
+
of the title. Will use "Untitled" if all else fails.
|
|
569
|
+
"""
|
|
570
|
+
# First special case: if we are pulling the title from the body header, check
|
|
571
|
+
# that.
|
|
572
|
+
if not self.title and pull_body_heading:
|
|
573
|
+
heading = self.body_heading()
|
|
574
|
+
if heading:
|
|
575
|
+
return heading
|
|
576
|
+
|
|
577
|
+
# Next special case: URLs with no title use the url itself.
|
|
542
578
|
if not self.title and self.url:
|
|
543
579
|
return abbrev_str(self.url, max_len)
|
|
544
580
|
|
|
545
|
-
|
|
546
|
-
path_name = (
|
|
547
|
-
(self.store_path and Path(self.store_path).name)
|
|
548
|
-
or (self.external_path and Path(self.external_path).name)
|
|
549
|
-
or (self.original_filename and Path(self.original_filename).name)
|
|
550
|
-
)
|
|
581
|
+
filename_stem = self.filename_stem()
|
|
551
582
|
|
|
552
583
|
# Use the title or the path if possible, falling back to description or even body text.
|
|
553
584
|
title_raw_text = (
|
|
554
585
|
self.title
|
|
555
|
-
or
|
|
586
|
+
or filename_stem
|
|
556
587
|
or self.description
|
|
557
588
|
or (not self.is_binary and self.abbrev_body(max_len))
|
|
558
589
|
or UNTITLED
|
|
559
590
|
)
|
|
560
591
|
|
|
561
592
|
suffix = ""
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
593
|
+
# For docs, etc but not for concepts/resources/exports, add a parenthical note
|
|
594
|
+
# indicating the last operation, if there was one. This makes filename slugs
|
|
595
|
+
# more readable.
|
|
596
|
+
if add_ops_suffix and self.type not in [
|
|
597
|
+
ItemType.concept,
|
|
598
|
+
ItemType.resource,
|
|
599
|
+
ItemType.export,
|
|
600
|
+
]:
|
|
566
601
|
last_op = self.history and self.history[-1].action_name
|
|
567
602
|
if last_op:
|
|
568
603
|
step_num = len(self.history) + 1 if self.history else 1
|
|
@@ -579,9 +614,36 @@ class Item:
|
|
|
579
614
|
|
|
580
615
|
return final_text
|
|
581
616
|
|
|
617
|
+
def display_title(self) -> str:
|
|
618
|
+
"""
|
|
619
|
+
A display title for this item. Same as abbrev_title() but will fall back
|
|
620
|
+
to the filename if it is available.
|
|
621
|
+
"""
|
|
622
|
+
display_title = self.title
|
|
623
|
+
if not display_title and self.store_path:
|
|
624
|
+
display_title = Path(self.store_path).name
|
|
625
|
+
if not display_title:
|
|
626
|
+
display_title = self.abbrev_title()
|
|
627
|
+
return display_title
|
|
628
|
+
|
|
629
|
+
def abbrev_description(self, max_len: int = 1000) -> str:
|
|
630
|
+
"""
|
|
631
|
+
Get or infer description.
|
|
632
|
+
"""
|
|
633
|
+
return abbrev_on_words(html_to_plaintext(self.description or self.body or ""), max_len)
|
|
634
|
+
|
|
635
|
+
def body_heading(self) -> str | None:
|
|
636
|
+
"""
|
|
637
|
+
Get the first h1 or h2 heading from the body text, if present.
|
|
638
|
+
"""
|
|
639
|
+
if self.format in [Format.markdown, Format.md_html]:
|
|
640
|
+
return first_heading(self.body_text(), allowed_tags=("h1", "h2"))
|
|
641
|
+
# TODO: Support HTML <h1> and <h2> as well.
|
|
642
|
+
return None
|
|
643
|
+
|
|
582
644
|
def abbrev_body(self, max_len: int) -> str:
|
|
583
645
|
"""
|
|
584
|
-
Get
|
|
646
|
+
Get an abbreviated version of the body text. Must not be a binary Item.
|
|
585
647
|
Abbreviates YAML bodies like {"role": "user", "content": "Hello"} to "user Hello".
|
|
586
648
|
"""
|
|
587
649
|
body_text = self.body_text()[:max_len]
|
|
@@ -604,23 +666,6 @@ class Item:
|
|
|
604
666
|
"""
|
|
605
667
|
return bool(self.body and self.body.strip())
|
|
606
668
|
|
|
607
|
-
def slug_name(self, max_len: int = SLUG_MAX_LEN) -> str:
|
|
608
|
-
"""
|
|
609
|
-
Get a readable slugified version of the title or filename or content
|
|
610
|
-
appropriate for this item. May not be unique.
|
|
611
|
-
"""
|
|
612
|
-
title = self.abbrev_title(max_len=max_len)
|
|
613
|
-
slug = slugify_snake(title)
|
|
614
|
-
return slug
|
|
615
|
-
|
|
616
|
-
def abbrev_description(self, max_len: int = 1000) -> str:
|
|
617
|
-
"""
|
|
618
|
-
Get or infer description.
|
|
619
|
-
"""
|
|
620
|
-
return abbrev_on_words(
|
|
621
|
-
html_to_plaintext(self.description or self.body or ""), max_len
|
|
622
|
-
)
|
|
623
|
-
|
|
624
669
|
def read_as_config(self) -> Any:
|
|
625
670
|
"""
|
|
626
671
|
If it is a config Item, return the parsed YAML.
|
|
@@ -639,8 +684,6 @@ class Item:
|
|
|
639
684
|
"""
|
|
640
685
|
if self.file_ext:
|
|
641
686
|
return self.file_ext
|
|
642
|
-
if self.is_binary and not self.file_ext:
|
|
643
|
-
raise ValueError(f"Binary Items must have a file extension: {self}")
|
|
644
687
|
inferred_ext = self.format and self.format.file_ext
|
|
645
688
|
if not inferred_ext:
|
|
646
689
|
raise ValueError(f"Cannot infer file extension for Item: {self}")
|
|
@@ -656,6 +699,9 @@ class Item:
|
|
|
656
699
|
elif self.type == ItemType.script:
|
|
657
700
|
# Same for kash/xonsh scripts.
|
|
658
701
|
return f"{self.type.value}.{FileExt.xsh.value}"
|
|
702
|
+
elif self.type == ItemType.export:
|
|
703
|
+
# For exports, skip the item type to keep it maximally compatible for external tools.
|
|
704
|
+
return f"{self.get_file_ext().value}"
|
|
659
705
|
else:
|
|
660
706
|
return f"{self.type.value}.{self.get_file_ext().value}"
|
|
661
707
|
|
|
@@ -668,11 +714,19 @@ class Item:
|
|
|
668
714
|
return "\n\n".join(part for part in parts if part)
|
|
669
715
|
|
|
670
716
|
def body_text(self) -> str:
|
|
717
|
+
"""
|
|
718
|
+
Body text of the item, also validating that the item is not binary.
|
|
719
|
+
"""
|
|
671
720
|
if self.is_binary:
|
|
672
721
|
raise ValueError("Cannot get text content of a binary Item")
|
|
673
722
|
return self.body or ""
|
|
674
723
|
|
|
675
724
|
def body_as_html(self) -> str:
|
|
725
|
+
"""
|
|
726
|
+
Body of the item, converted to HTML format. Validates that the body format can be
|
|
727
|
+
converted and then converts plaintext or Markdown to HTML. Simply returns the body
|
|
728
|
+
if it is already HTML.
|
|
729
|
+
"""
|
|
676
730
|
if self.format == Format.html:
|
|
677
731
|
return self.body_text()
|
|
678
732
|
elif self.format == Format.plaintext:
|
|
@@ -708,12 +762,10 @@ class Item:
|
|
|
708
762
|
self, update_timestamp: bool = True, **other_updates: Unpack[ItemUpdateOptions]
|
|
709
763
|
) -> Item:
|
|
710
764
|
"""
|
|
711
|
-
Copy item with the given field updates. Resets store_path to None
|
|
712
|
-
created time if requested.
|
|
765
|
+
Copy item with the given field updates. Resets `store_path` to None but preserves
|
|
766
|
+
other fields, including the body. Updates created time if requested.
|
|
713
767
|
"""
|
|
714
|
-
new_fields = self._copy_and_update(
|
|
715
|
-
update_timestamp=update_timestamp, **other_updates
|
|
716
|
-
)
|
|
768
|
+
new_fields = self._copy_and_update(update_timestamp=update_timestamp, **other_updates)
|
|
717
769
|
return Item(**new_fields)
|
|
718
770
|
|
|
719
771
|
def merged_copy(self, other: Item) -> Item:
|
|
@@ -734,7 +786,7 @@ class Item:
|
|
|
734
786
|
if self.relations.derived_from:
|
|
735
787
|
log.message(
|
|
736
788
|
"Deriving from an item that has not been saved so using "
|
|
737
|
-
"
|
|
789
|
+
"upstream derived_from relation: %s on %s",
|
|
738
790
|
self.relations.derived_from,
|
|
739
791
|
self,
|
|
740
792
|
)
|
|
@@ -768,9 +820,7 @@ class Item:
|
|
|
768
820
|
|
|
769
821
|
# Fall back to action title template if we have it and title wasn't explicitly set.
|
|
770
822
|
if "title" not in updates:
|
|
771
|
-
prev_title = self.title or (
|
|
772
|
-
Path(self.store_path).stem if self.store_path else UNTITLED
|
|
773
|
-
)
|
|
823
|
+
prev_title = self.title or (Path(self.store_path).stem if self.store_path else UNTITLED)
|
|
774
824
|
if self.context:
|
|
775
825
|
action = self.context.action
|
|
776
826
|
new_item.title = action.title_template.format(
|