kash-shell 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/__init__.py +4 -4
- kash/actions/core/markdownify.py +5 -2
- kash/actions/core/readability.py +5 -2
- kash/actions/core/render_as_html.py +18 -0
- kash/actions/core/webpage_config.py +12 -4
- kash/commands/__init__.py +8 -20
- kash/commands/base/basic_file_commands.py +15 -0
- kash/commands/base/debug_commands.py +15 -2
- kash/commands/base/general_commands.py +27 -18
- kash/commands/base/logs_commands.py +1 -4
- kash/commands/base/model_commands.py +8 -8
- kash/commands/base/search_command.py +3 -2
- kash/commands/base/show_command.py +5 -3
- kash/commands/extras/parse_uv_lock.py +186 -0
- kash/commands/help/doc_commands.py +2 -31
- kash/commands/help/welcome.py +33 -0
- kash/commands/workspace/selection_commands.py +11 -6
- kash/commands/workspace/workspace_commands.py +19 -16
- kash/config/colors.py +2 -0
- kash/config/env_settings.py +72 -0
- kash/config/init.py +2 -2
- kash/config/logger.py +61 -59
- kash/config/logger_basic.py +12 -5
- kash/config/server_config.py +6 -6
- kash/config/settings.py +117 -67
- kash/config/setup.py +35 -9
- kash/config/suppress_warnings.py +30 -12
- kash/config/text_styles.py +3 -13
- kash/docs/load_api_docs.py +2 -1
- kash/docs/markdown/topics/a2_installation.md +7 -3
- kash/docs/markdown/topics/a3_getting_started.md +3 -2
- kash/docs/markdown/warning.md +3 -8
- kash/docs/markdown/welcome.md +4 -0
- kash/docs_base/load_recipe_snippets.py +1 -1
- kash/docs_base/recipes/{general_system_commands.ksh → general_system_commands.sh} +1 -1
- kash/{concepts → embeddings}/cosine.py +2 -1
- kash/embeddings/text_similarity.py +57 -0
- kash/exec/__init__.py +20 -3
- kash/exec/action_decorators.py +18 -4
- kash/exec/action_exec.py +41 -23
- kash/exec/action_registry.py +13 -48
- kash/exec/command_registry.py +2 -1
- kash/exec/fetch_url_metadata.py +4 -6
- kash/exec/importing.py +56 -0
- kash/exec/llm_transforms.py +6 -6
- kash/exec/precondition_registry.py +2 -1
- kash/exec/preconditions.py +16 -1
- kash/exec/shell_callable_action.py +33 -19
- kash/file_storage/file_store.py +23 -14
- kash/file_storage/item_file_format.py +13 -3
- kash/file_storage/metadata_dirs.py +11 -2
- kash/help/assistant.py +2 -2
- kash/help/assistant_instructions.py +2 -1
- kash/help/help_embeddings.py +2 -2
- kash/help/help_printing.py +14 -10
- kash/help/tldr_help.py +5 -3
- kash/llm_utils/clean_headings.py +1 -1
- kash/llm_utils/llm_api_keys.py +4 -4
- kash/llm_utils/llm_completion.py +2 -2
- kash/llm_utils/llm_features.py +68 -0
- kash/llm_utils/llm_messages.py +1 -2
- kash/llm_utils/llm_names.py +1 -1
- kash/llm_utils/llms.py +17 -12
- kash/local_server/__init__.py +5 -2
- kash/local_server/local_server.py +56 -46
- kash/local_server/local_server_commands.py +15 -15
- kash/local_server/local_server_routes.py +2 -2
- kash/local_server/local_url_formatters.py +1 -1
- kash/mcp/__init__.py +5 -2
- kash/mcp/mcp_cli.py +54 -17
- kash/mcp/mcp_server_commands.py +5 -6
- kash/mcp/mcp_server_routes.py +14 -11
- kash/mcp/mcp_server_sse.py +61 -34
- kash/mcp/mcp_server_stdio.py +0 -8
- kash/media_base/audio_processing.py +81 -7
- kash/media_base/media_cache.py +18 -18
- kash/media_base/media_services.py +1 -1
- kash/media_base/media_tools.py +6 -6
- kash/media_base/services/local_file_media.py +2 -2
- kash/media_base/{speech_transcription.py → transcription_deepgram.py} +25 -109
- kash/media_base/transcription_format.py +73 -0
- kash/media_base/transcription_whisper.py +38 -0
- kash/model/__init__.py +73 -5
- kash/model/actions_model.py +38 -4
- kash/model/concept_model.py +30 -0
- kash/model/items_model.py +56 -13
- kash/model/params_model.py +24 -0
- kash/shell/completions/completion_scoring.py +37 -5
- kash/shell/output/kerm_codes.py +1 -2
- kash/shell/output/shell_formatting.py +14 -4
- kash/shell/shell_main.py +2 -2
- kash/shell/utils/exception_printing.py +6 -0
- kash/shell/utils/native_utils.py +26 -20
- kash/text_handling/custom_sliding_transforms.py +12 -4
- kash/text_handling/doc_normalization.py +6 -2
- kash/text_handling/markdown_render.py +117 -0
- kash/text_handling/markdown_utils.py +204 -0
- kash/utils/common/import_utils.py +12 -3
- kash/utils/common/type_utils.py +0 -29
- kash/utils/common/url.py +80 -28
- kash/utils/errors.py +6 -0
- kash/utils/file_utils/{dir_size.py → dir_info.py} +25 -4
- kash/utils/file_utils/file_ext.py +2 -3
- kash/utils/file_utils/file_formats.py +28 -2
- kash/utils/file_utils/file_formats_model.py +50 -19
- kash/utils/file_utils/filename_parsing.py +10 -4
- kash/web_content/dir_store.py +1 -2
- kash/web_content/file_cache_utils.py +37 -10
- kash/web_content/file_processing.py +68 -0
- kash/web_content/local_file_cache.py +12 -9
- kash/web_content/web_extract.py +8 -3
- kash/web_content/web_fetch.py +12 -4
- kash/web_gen/tabbed_webpage.py +5 -2
- kash/web_gen/templates/base_styles.css.jinja +120 -14
- kash/web_gen/templates/base_webpage.html.jinja +60 -13
- kash/web_gen/templates/content_styles.css.jinja +4 -2
- kash/web_gen/templates/item_view.html.jinja +2 -2
- kash/web_gen/templates/tabbed_webpage.html.jinja +1 -2
- kash/workspaces/__init__.py +15 -2
- kash/workspaces/selections.py +18 -3
- kash/workspaces/source_items.py +4 -2
- kash/workspaces/workspace_output.py +11 -4
- kash/workspaces/workspaces.py +5 -11
- kash/xonsh_custom/command_nl_utils.py +40 -19
- kash/xonsh_custom/custom_shell.py +44 -12
- kash/xonsh_custom/customize_prompt.py +39 -21
- kash/xonsh_custom/load_into_xonsh.py +26 -27
- kash/xonsh_custom/shell_load_commands.py +2 -2
- kash/xonsh_custom/xonsh_completers.py +2 -249
- kash/xonsh_custom/xonsh_keybindings.py +282 -0
- kash/xonsh_custom/xonsh_modern_tools.py +3 -3
- kash/xontrib/kash_extension.py +5 -6
- {kash_shell-0.3.8.dist-info → kash_shell-0.3.10.dist-info}/METADATA +26 -12
- {kash_shell-0.3.8.dist-info → kash_shell-0.3.10.dist-info}/RECORD +140 -140
- {kash_shell-0.3.8.dist-info → kash_shell-0.3.10.dist-info}/entry_points.txt +1 -1
- kash/concepts/concept_formats.py +0 -23
- kash/concepts/text_similarity.py +0 -112
- kash/shell/clideps/api_keys.py +0 -99
- kash/shell/clideps/dotenv_setup.py +0 -114
- kash/shell/clideps/dotenv_utils.py +0 -89
- kash/shell/clideps/pkg_deps.py +0 -232
- kash/shell/clideps/platforms.py +0 -11
- kash/shell/clideps/terminal_features.py +0 -56
- kash/shell/utils/osc_utils.py +0 -95
- kash/shell/utils/terminal_images.py +0 -133
- kash/text_handling/markdown_util.py +0 -167
- kash/utils/common/atomic_var.py +0 -158
- kash/utils/common/string_replace.py +0 -93
- kash/utils/common/string_template.py +0 -101
- /kash/docs_base/recipes/{python_dev_commands.ksh → python_dev_commands.sh} +0 -0
- /kash/docs_base/recipes/{tldr_standard_commands.ksh → tldr_standard_commands.sh} +0 -0
- /kash/{concepts → embeddings}/embeddings.py +0 -0
- {kash_shell-0.3.8.dist-info → kash_shell-0.3.10.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.8.dist-info → kash_shell-0.3.10.dist-info}/licenses/LICENSE +0 -0
kash/media_base/media_cache.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
-
from prettyfmt import fmt_lines
|
|
4
|
+
from prettyfmt import fmt_lines, fmt_path
|
|
5
5
|
from strif import atomic_output_file
|
|
6
6
|
|
|
7
7
|
from kash.config.logger import get_logger
|
|
@@ -11,7 +11,7 @@ from kash.media_base.media_services import (
|
|
|
11
11
|
download_media_by_service,
|
|
12
12
|
get_media_services,
|
|
13
13
|
)
|
|
14
|
-
from kash.media_base.
|
|
14
|
+
from kash.media_base.transcription_deepgram import deepgram_transcribe_audio
|
|
15
15
|
from kash.utils.common.format_utils import fmt_loc
|
|
16
16
|
from kash.utils.common.url import Url, as_file_url, is_url
|
|
17
17
|
from kash.utils.errors import FileNotFound, InvalidInput, UnexpectedError
|
|
@@ -48,12 +48,12 @@ class MediaCache(DirStore):
|
|
|
48
48
|
with atomic_output_file(transcript_path) as temp_output:
|
|
49
49
|
with open(temp_output, "w") as f:
|
|
50
50
|
f.write(content)
|
|
51
|
-
log.message("Transcript saved to cache: %s",
|
|
51
|
+
log.message("Transcript saved to cache: %s", fmt_path(transcript_path))
|
|
52
52
|
|
|
53
53
|
def _read_transcript(self, url: Url) -> str | None:
|
|
54
54
|
transcript_file = self.find(url, suffix=SUFFIX_TRANSCRIPT)
|
|
55
55
|
if transcript_file:
|
|
56
|
-
log.message("Video transcript already in cache: %s: %s", url,
|
|
56
|
+
log.message("Video transcript already in cache: %s: %s", url, fmt_path(transcript_file))
|
|
57
57
|
with open(transcript_file) as f:
|
|
58
58
|
return f.read()
|
|
59
59
|
return None
|
|
@@ -67,8 +67,8 @@ class MediaCache(DirStore):
|
|
|
67
67
|
downsampled_audio_file = self.path_for(url, suffix=SUFFIX_16KMP3)
|
|
68
68
|
log.message(
|
|
69
69
|
"Downsampling audio: %s -> %s",
|
|
70
|
-
|
|
71
|
-
|
|
70
|
+
fmt_path(full_audio_file),
|
|
71
|
+
fmt_path(downsampled_audio_file),
|
|
72
72
|
)
|
|
73
73
|
downsample_to_16khz(full_audio_file, downsampled_audio_file)
|
|
74
74
|
return downsampled_audio_file
|
|
@@ -81,35 +81,35 @@ class MediaCache(DirStore):
|
|
|
81
81
|
log.message(
|
|
82
82
|
"Transcribing audio: %s: %s",
|
|
83
83
|
url,
|
|
84
|
-
|
|
84
|
+
fmt_path(downsampled_audio_file),
|
|
85
85
|
)
|
|
86
86
|
transcript = transcribe_audio(downsampled_audio_file, language=language)
|
|
87
87
|
self._write_transcript(url, transcript)
|
|
88
88
|
return transcript
|
|
89
89
|
|
|
90
90
|
def cache(
|
|
91
|
-
self, url: Url,
|
|
91
|
+
self, url: Url, refetch=False, media_types: list[MediaType] | None = None
|
|
92
92
|
) -> dict[MediaType, Path]:
|
|
93
93
|
"""
|
|
94
94
|
Cache the media files for the given media URL. Returns paths to cached copies
|
|
95
95
|
for each media type (video or audio). Returns cached copies if available,
|
|
96
|
-
unless `
|
|
96
|
+
unless `refetch` is True.
|
|
97
97
|
"""
|
|
98
98
|
cached_paths: dict[MediaType, Path] = {}
|
|
99
99
|
|
|
100
100
|
if not media_types:
|
|
101
101
|
media_types = [MediaType.audio, MediaType.video]
|
|
102
102
|
|
|
103
|
-
if not
|
|
103
|
+
if not refetch:
|
|
104
104
|
if MediaType.audio in media_types:
|
|
105
105
|
audio_file = self.find(url, suffix=SUFFIX_MP3)
|
|
106
106
|
if audio_file:
|
|
107
|
-
log.message("Audio already in cache: %s: %s", url,
|
|
107
|
+
log.message("Audio already in cache: %s: %s", url, fmt_path(audio_file))
|
|
108
108
|
cached_paths[MediaType.audio] = audio_file
|
|
109
109
|
if MediaType.video in media_types:
|
|
110
110
|
video_file = self.find(url, suffix=SUFFIX_MP4)
|
|
111
111
|
if video_file:
|
|
112
|
-
log.message("Video already in cache: %s: %s", url,
|
|
112
|
+
log.message("Video already in cache: %s: %s", url, fmt_path(video_file))
|
|
113
113
|
cached_paths[MediaType.video] = video_file
|
|
114
114
|
if set(media_types).issubset(cached_paths.keys()):
|
|
115
115
|
return cached_paths
|
|
@@ -133,7 +133,7 @@ class MediaCache(DirStore):
|
|
|
133
133
|
|
|
134
134
|
log.message(
|
|
135
135
|
"Downloaded media and saved to cache:\n%s",
|
|
136
|
-
fmt_lines([f"{t.name}: {
|
|
136
|
+
fmt_lines([f"{t.name}: {fmt_path(p)}" for (t, p) in cached_paths.items()]),
|
|
137
137
|
)
|
|
138
138
|
|
|
139
139
|
self._downsample_audio(url)
|
|
@@ -141,11 +141,11 @@ class MediaCache(DirStore):
|
|
|
141
141
|
return cached_paths
|
|
142
142
|
|
|
143
143
|
def transcribe(
|
|
144
|
-
self, url_or_path: Url | Path,
|
|
144
|
+
self, url_or_path: Url | Path, refetch=False, language: str | None = None
|
|
145
145
|
) -> str:
|
|
146
146
|
"""
|
|
147
147
|
Transcribe the audio file, caching audio, downsampled audio, and the transcription.
|
|
148
|
-
Return the cached transcript if available, unless `
|
|
148
|
+
Return the cached transcript if available, unless `refetch` is True.
|
|
149
149
|
"""
|
|
150
150
|
if not isinstance(url_or_path, Path) and is_url(url_or_path):
|
|
151
151
|
# If it is a URL, cache it locally.
|
|
@@ -156,12 +156,12 @@ class MediaCache(DirStore):
|
|
|
156
156
|
raise InvalidInput(
|
|
157
157
|
"Unrecognized media URL (is this media service configured?): %s" % url_or_path
|
|
158
158
|
)
|
|
159
|
-
if not
|
|
159
|
+
if not refetch:
|
|
160
160
|
transcript = self._read_transcript(url)
|
|
161
161
|
if transcript:
|
|
162
162
|
return transcript
|
|
163
163
|
# Cache all formats since we usually will want them.
|
|
164
|
-
self.cache(url,
|
|
164
|
+
self.cache(url, refetch)
|
|
165
165
|
elif isinstance(url_or_path, Path):
|
|
166
166
|
# Treat local media files as file:// URLs.
|
|
167
167
|
# Don't need to cache originals but we still will cache audio and transcriptions.
|
|
@@ -169,7 +169,7 @@ class MediaCache(DirStore):
|
|
|
169
169
|
raise FileNotFound(f"File not found: {fmt_loc(url_or_path)}")
|
|
170
170
|
url = as_file_url(url_or_path)
|
|
171
171
|
else:
|
|
172
|
-
raise InvalidInput(f"Not a media URL or path: {url_or_path}")
|
|
172
|
+
raise InvalidInput(f"Not a media URL or path: {fmt_loc(url_or_path)}")
|
|
173
173
|
|
|
174
174
|
# Now do the transcription.
|
|
175
175
|
transcript = self._do_transcription(url, language=language)
|
|
@@ -2,10 +2,10 @@ import logging
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
from funlog import log_calls
|
|
5
|
+
from strif import AtomicVar
|
|
5
6
|
|
|
6
7
|
from kash.media_base.services.local_file_media import LocalFileMedia
|
|
7
8
|
from kash.model.media_model import MediaMetadata, MediaService
|
|
8
|
-
from kash.utils.common.atomic_var import AtomicVar
|
|
9
9
|
from kash.utils.common.url import Url
|
|
10
10
|
from kash.utils.errors import InvalidInput
|
|
11
11
|
from kash.utils.file_utils.file_formats_model import MediaType
|
kash/media_base/media_tools.py
CHANGED
|
@@ -28,20 +28,20 @@ def reset_media_cache_dir(path: Path):
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def cache_and_transcribe(
|
|
31
|
-
url_or_path: Url | Path,
|
|
31
|
+
url_or_path: Url | Path, refetch=False, language: str | None = None
|
|
32
32
|
) -> str:
|
|
33
33
|
"""
|
|
34
|
-
Download and transcribe audio or video, saving in cache. If
|
|
34
|
+
Download and transcribe audio or video, saving in cache. If `refetch` is
|
|
35
35
|
True, force fresh download.
|
|
36
36
|
"""
|
|
37
|
-
return _media_cache.transcribe(url_or_path,
|
|
37
|
+
return _media_cache.transcribe(url_or_path, refetch=refetch, language=language)
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
def cache_media(
|
|
41
|
-
url: Url,
|
|
41
|
+
url: Url, refetch=False, media_types: list[MediaType] | None = None
|
|
42
42
|
) -> dict[MediaType, Path]:
|
|
43
43
|
"""
|
|
44
|
-
Download audio and video (if available), saving in cache. If
|
|
44
|
+
Download audio and video (if available), saving in cache. If refetch is
|
|
45
45
|
True, force fresh download.
|
|
46
46
|
"""
|
|
47
|
-
return _media_cache.cache(url,
|
|
47
|
+
return _media_cache.cache(url, refetch, media_types)
|
|
@@ -4,13 +4,13 @@ import subprocess # Add this import
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from urllib.parse import urlparse
|
|
6
6
|
|
|
7
|
+
from clideps.pkgs.pkg_check import pkg_check
|
|
7
8
|
from strif import copyfile_atomic
|
|
8
9
|
from typing_extensions import override
|
|
9
10
|
|
|
10
11
|
from kash.config.logger import get_log_file_stream, get_logger
|
|
11
12
|
from kash.file_storage.store_filenames import parse_item_filename
|
|
12
13
|
from kash.model.media_model import MediaMetadata, MediaService, MediaUrlType
|
|
13
|
-
from kash.shell.clideps.pkg_deps import Pkg, pkg_check
|
|
14
14
|
from kash.utils.common.format_utils import fmt_loc
|
|
15
15
|
from kash.utils.common.url import Url
|
|
16
16
|
from kash.utils.errors import FileNotFound, InvalidInput
|
|
@@ -20,7 +20,7 @@ log = get_logger(__name__)
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def _run_ffmpeg(cmdline: list[str]) -> None:
|
|
23
|
-
pkg_check().require(
|
|
23
|
+
pkg_check().require("ffmpeg")
|
|
24
24
|
log.message("Running: %s", " ".join([shlex.quote(arg) for arg in cmdline]))
|
|
25
25
|
subprocess.run(
|
|
26
26
|
cmdline,
|
|
@@ -1,60 +1,23 @@
|
|
|
1
1
|
from os.path import getsize
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import NamedTuple
|
|
4
3
|
|
|
4
|
+
from clideps.env_vars.dotenv_utils import load_dotenv_paths
|
|
5
|
+
from deepgram import ListenRESTClient, PrerecordedResponse
|
|
5
6
|
from httpx import Timeout
|
|
6
|
-
from openai import OpenAI
|
|
7
7
|
|
|
8
8
|
from kash.config.logger import CustomLogger, get_logger
|
|
9
|
-
from kash.
|
|
10
|
-
from kash.
|
|
11
|
-
from kash.utils.errors import ContentError
|
|
9
|
+
from kash.config.settings import global_settings
|
|
10
|
+
from kash.media_base.transcription_format import SpeakerSegment, format_speaker_segments
|
|
11
|
+
from kash.utils.errors import ApiError, ContentError
|
|
12
12
|
|
|
13
13
|
log: CustomLogger = get_logger(__name__)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def
|
|
16
|
+
def deepgram_transcribe_raw(
|
|
17
|
+
audio_file_path: Path, language: str | None = None
|
|
18
|
+
) -> PrerecordedResponse:
|
|
17
19
|
"""
|
|
18
|
-
Transcribe an audio file
|
|
19
|
-
OpenAI's version does not support diarization and must be under 25MB.
|
|
20
|
-
|
|
21
|
-
https://help.openai.com/en/articles/7031512-whisper-api-faq
|
|
22
|
-
"""
|
|
23
|
-
WHISPER_MAX_SIZE = 25 * 1024 * 1024
|
|
24
|
-
|
|
25
|
-
size = getsize(audio_file_path)
|
|
26
|
-
if size > WHISPER_MAX_SIZE:
|
|
27
|
-
raise ValueError("Audio file too large for Whisper (%s > %s)" % (size, WHISPER_MAX_SIZE))
|
|
28
|
-
log.info(
|
|
29
|
-
"Transcribing via Whisper: %s (size %s)",
|
|
30
|
-
audio_file_path,
|
|
31
|
-
size,
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
client = OpenAI()
|
|
35
|
-
with open(audio_file_path, "rb") as audio_file:
|
|
36
|
-
transcription = client.audio.transcriptions.create(
|
|
37
|
-
model="whisper-1",
|
|
38
|
-
file=audio_file,
|
|
39
|
-
# For when we want timestamps:
|
|
40
|
-
# response_format="verbose_json",
|
|
41
|
-
# timestamp_granularities=["word"]
|
|
42
|
-
)
|
|
43
|
-
text = transcription.text
|
|
44
|
-
return text
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class SpeakerSegment(NamedTuple):
|
|
48
|
-
words: list[tuple[float, str]]
|
|
49
|
-
start: float
|
|
50
|
-
end: float
|
|
51
|
-
speaker: int
|
|
52
|
-
average_confidence: float
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def deepgram_transcribe_audio(audio_file_path: Path, language: str | None = None) -> str:
|
|
56
|
-
"""
|
|
57
|
-
Transcribe an audio file using Deepgram.
|
|
20
|
+
Transcribe an audio file using Deepgram and return the raw response.
|
|
58
21
|
"""
|
|
59
22
|
from deepgram import ClientOptionsFromEnv, DeepgramClient, FileSource, PrerecordedOptions
|
|
60
23
|
|
|
@@ -63,7 +26,7 @@ def deepgram_transcribe_audio(audio_file_path: Path, language: str | None = None
|
|
|
63
26
|
"Transcribing via Deepgram (language %r): %s (size %s)", language, audio_file_path, size
|
|
64
27
|
)
|
|
65
28
|
|
|
66
|
-
load_dotenv_paths()
|
|
29
|
+
load_dotenv_paths(True, True, global_settings().system_config_dir)
|
|
67
30
|
deepgram = DeepgramClient("", ClientOptionsFromEnv())
|
|
68
31
|
|
|
69
32
|
with open(audio_file_path, "rb") as audio_file:
|
|
@@ -74,7 +37,17 @@ def deepgram_transcribe_audio(audio_file_path: Path, language: str | None = None
|
|
|
74
37
|
}
|
|
75
38
|
|
|
76
39
|
options = PrerecordedOptions(model="nova-2", smart_format=True, diarize=True, language=language)
|
|
77
|
-
|
|
40
|
+
client: ListenRESTClient = deepgram.listen.rest.v("1") # pyright: ignore
|
|
41
|
+
|
|
42
|
+
response = client.transcribe_file(payload, options, timeout=Timeout(500))
|
|
43
|
+
if not isinstance(response, PrerecordedResponse):
|
|
44
|
+
raise ApiError("Deepgram returned an unexpected response type")
|
|
45
|
+
|
|
46
|
+
return response
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def deepgram_transcribe_audio(audio_file_path: Path, language: str | None = None) -> str:
|
|
50
|
+
response = deepgram_transcribe_raw(audio_file_path, language)
|
|
78
51
|
|
|
79
52
|
log.save_object("Deepgram response", None, response)
|
|
80
53
|
|
|
@@ -86,13 +59,15 @@ def deepgram_transcribe_audio(audio_file_path: Path, language: str | None = None
|
|
|
86
59
|
f"No speaker segments found in Deepgram response (are voices silent or missing?): {audio_file_path}"
|
|
87
60
|
)
|
|
88
61
|
|
|
89
|
-
formatted_segments = format_speaker_segments(diarized_segments)
|
|
62
|
+
formatted_segments = format_speaker_segments(diarized_segments) # noqa: F821
|
|
90
63
|
|
|
91
64
|
return formatted_segments
|
|
92
65
|
|
|
93
66
|
|
|
94
67
|
def _deepgram_diarized_segments(data, confidence_threshold=0.3) -> list[SpeakerSegment]:
|
|
95
|
-
"""
|
|
68
|
+
"""
|
|
69
|
+
Process Deepgram diarized results into text segments per speaker.
|
|
70
|
+
"""
|
|
96
71
|
|
|
97
72
|
speaker_segments: list[SpeakerSegment] = []
|
|
98
73
|
current_speaker = 0
|
|
@@ -163,62 +138,3 @@ def _deepgram_diarized_segments(data, confidence_threshold=0.3) -> list[SpeakerS
|
|
|
163
138
|
)
|
|
164
139
|
|
|
165
140
|
return speaker_segments
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
def _is_new_sentence(word: str, next_word: str | None) -> bool:
|
|
169
|
-
return (
|
|
170
|
-
(word.endswith(".") or word.endswith("?") or word.endswith("!"))
|
|
171
|
-
and next_word is not None
|
|
172
|
-
and next_word[0].isupper()
|
|
173
|
-
)
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
def _format_words(words: list[tuple[float, str]], include_sentence_timestamps=True) -> str:
|
|
177
|
-
"""Format words with timestamps added in spans."""
|
|
178
|
-
|
|
179
|
-
if not words:
|
|
180
|
-
return ""
|
|
181
|
-
|
|
182
|
-
sentences = []
|
|
183
|
-
current_sentence = []
|
|
184
|
-
for i, (timestamp, word) in enumerate(words):
|
|
185
|
-
current_sentence.append(word)
|
|
186
|
-
next_word = words[i + 1][1] if i + 1 < len(words) else None
|
|
187
|
-
if _is_new_sentence(word, next_word):
|
|
188
|
-
sentences.append((timestamp, current_sentence))
|
|
189
|
-
current_sentence = []
|
|
190
|
-
|
|
191
|
-
if current_sentence:
|
|
192
|
-
sentences.append((words[-1][0], current_sentence))
|
|
193
|
-
|
|
194
|
-
formatted_text = []
|
|
195
|
-
for timestamp, sentence in sentences:
|
|
196
|
-
formatted_sentence = " ".join(sentence)
|
|
197
|
-
if include_sentence_timestamps:
|
|
198
|
-
formatted_text.append(html_timestamp_span(formatted_sentence, timestamp))
|
|
199
|
-
else:
|
|
200
|
-
formatted_text.append(formatted_sentence)
|
|
201
|
-
|
|
202
|
-
return "\n".join(formatted_text)
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def format_speaker_segments(speaker_segments: list[SpeakerSegment]) -> str:
|
|
206
|
-
"""
|
|
207
|
-
Format speaker segments in a simple HTML format with <span> tags including speaker
|
|
208
|
-
ids and timestamps.
|
|
209
|
-
"""
|
|
210
|
-
|
|
211
|
-
# Use \n\n for readability between segments so each speaker is its own
|
|
212
|
-
# paragraph.
|
|
213
|
-
SEGMENT_SEP = "\n\n"
|
|
214
|
-
|
|
215
|
-
speakers = set(segment.speaker for segment in speaker_segments)
|
|
216
|
-
if len(speakers) > 1:
|
|
217
|
-
lines = []
|
|
218
|
-
for segment in speaker_segments:
|
|
219
|
-
lines.append(
|
|
220
|
-
f"{html_speaker_id_span(f'SPEAKER {segment.speaker}:', str(segment.speaker))}\n{_format_words(segment.words)}"
|
|
221
|
-
)
|
|
222
|
-
return SEGMENT_SEP.join(lines)
|
|
223
|
-
else:
|
|
224
|
-
return SEGMENT_SEP.join(_format_words(segment.words) for segment in speaker_segments)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import NamedTuple
|
|
2
|
+
|
|
3
|
+
from kash.config.logger import CustomLogger, get_logger
|
|
4
|
+
from kash.media_base.timestamp_citations import html_speaker_id_span, html_timestamp_span
|
|
5
|
+
|
|
6
|
+
log: CustomLogger = get_logger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _is_new_sentence(word: str, next_word: str | None) -> bool:
|
|
10
|
+
return (
|
|
11
|
+
(word.endswith(".") or word.endswith("?") or word.endswith("!"))
|
|
12
|
+
and next_word is not None
|
|
13
|
+
and next_word[0].isupper()
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _format_words(words: list[tuple[float, str]], include_sentence_timestamps=True) -> str:
|
|
18
|
+
"""Format words with timestamps added in spans."""
|
|
19
|
+
|
|
20
|
+
if not words:
|
|
21
|
+
return ""
|
|
22
|
+
|
|
23
|
+
sentences = []
|
|
24
|
+
current_sentence = []
|
|
25
|
+
for i, (timestamp, word) in enumerate(words):
|
|
26
|
+
current_sentence.append(word)
|
|
27
|
+
next_word = words[i + 1][1] if i + 1 < len(words) else None
|
|
28
|
+
if _is_new_sentence(word, next_word):
|
|
29
|
+
sentences.append((timestamp, current_sentence))
|
|
30
|
+
current_sentence = []
|
|
31
|
+
|
|
32
|
+
if current_sentence:
|
|
33
|
+
sentences.append((words[-1][0], current_sentence))
|
|
34
|
+
|
|
35
|
+
formatted_text = []
|
|
36
|
+
for timestamp, sentence in sentences:
|
|
37
|
+
formatted_sentence = " ".join(sentence)
|
|
38
|
+
if include_sentence_timestamps:
|
|
39
|
+
formatted_text.append(html_timestamp_span(formatted_sentence, timestamp))
|
|
40
|
+
else:
|
|
41
|
+
formatted_text.append(formatted_sentence)
|
|
42
|
+
|
|
43
|
+
return "\n".join(formatted_text)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SpeakerSegment(NamedTuple):
|
|
47
|
+
words: list[tuple[float, str]]
|
|
48
|
+
start: float
|
|
49
|
+
end: float
|
|
50
|
+
speaker: int
|
|
51
|
+
average_confidence: float
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def format_speaker_segments(speaker_segments: list[SpeakerSegment]) -> str:
|
|
55
|
+
"""
|
|
56
|
+
Format speaker segments in a simple HTML format with <span> tags including speaker
|
|
57
|
+
ids and timestamps.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
# Use \n\n for readability between segments so each speaker is its own
|
|
61
|
+
# paragraph.
|
|
62
|
+
SEGMENT_SEP = "\n\n"
|
|
63
|
+
|
|
64
|
+
speakers = set(segment.speaker for segment in speaker_segments)
|
|
65
|
+
if len(speakers) > 1:
|
|
66
|
+
lines = []
|
|
67
|
+
for segment in speaker_segments:
|
|
68
|
+
lines.append(
|
|
69
|
+
f"{html_speaker_id_span(f'SPEAKER {segment.speaker}:', str(segment.speaker))}\n{_format_words(segment.words)}"
|
|
70
|
+
)
|
|
71
|
+
return SEGMENT_SEP.join(lines)
|
|
72
|
+
else:
|
|
73
|
+
return SEGMENT_SEP.join(_format_words(segment.words) for segment in speaker_segments)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from os.path import getsize
|
|
2
|
+
|
|
3
|
+
from openai import OpenAI
|
|
4
|
+
|
|
5
|
+
from kash.config.logger import CustomLogger, get_logger
|
|
6
|
+
|
|
7
|
+
log: CustomLogger = get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def openai_whisper_transcribe_audio_small(audio_file_path: str) -> str:
|
|
11
|
+
"""
|
|
12
|
+
Transcribe an audio file. Whisper is very good quality but (as of 2024-05)
|
|
13
|
+
OpenAI's version does not support diarization and must be under 25MB.
|
|
14
|
+
|
|
15
|
+
https://help.openai.com/en/articles/7031512-whisper-api-faq
|
|
16
|
+
"""
|
|
17
|
+
WHISPER_MAX_SIZE = 25 * 1024 * 1024
|
|
18
|
+
|
|
19
|
+
size = getsize(audio_file_path)
|
|
20
|
+
if size > WHISPER_MAX_SIZE:
|
|
21
|
+
raise ValueError("Audio file too large for Whisper (%s > %s)" % (size, WHISPER_MAX_SIZE))
|
|
22
|
+
log.info(
|
|
23
|
+
"Transcribing via Whisper: %s (size %s)",
|
|
24
|
+
audio_file_path,
|
|
25
|
+
size,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
client = OpenAI()
|
|
29
|
+
with open(audio_file_path, "rb") as audio_file:
|
|
30
|
+
transcription = client.audio.transcriptions.create(
|
|
31
|
+
model="whisper-1",
|
|
32
|
+
file=audio_file,
|
|
33
|
+
# For when we want timestamps:
|
|
34
|
+
# response_format="verbose_json",
|
|
35
|
+
# timestamp_granularities=["word"]
|
|
36
|
+
)
|
|
37
|
+
text = transcription.text
|
|
38
|
+
return text
|
kash/model/__init__.py
CHANGED
|
@@ -1,11 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
The core classes for modeling kash's framework.
|
|
3
|
-
|
|
4
|
-
We include essential logic here but try to keep logic and dependencies minimal.
|
|
5
3
|
"""
|
|
6
4
|
|
|
7
|
-
# flake8: noqa: F401
|
|
8
|
-
|
|
9
5
|
from kash.exec_model.args_model import (
|
|
10
6
|
ANY_ARGS,
|
|
11
7
|
NO_ARGS,
|
|
@@ -31,7 +27,12 @@ from kash.model.actions_model import (
|
|
|
31
27
|
PerItemAction,
|
|
32
28
|
TitleTemplate,
|
|
33
29
|
)
|
|
34
|
-
from kash.model.compound_actions_model import
|
|
30
|
+
from kash.model.compound_actions_model import (
|
|
31
|
+
ComboAction,
|
|
32
|
+
SequenceAction,
|
|
33
|
+
look_up_actions,
|
|
34
|
+
)
|
|
35
|
+
from kash.model.concept_model import Concept, canonicalize_concept, normalize_concepts
|
|
35
36
|
from kash.model.graph_model import GraphData, Link, Node
|
|
36
37
|
from kash.model.items_model import (
|
|
37
38
|
SLUG_MAX_LEN,
|
|
@@ -69,3 +70,70 @@ from kash.model.paths_model import StorePath
|
|
|
69
70
|
from kash.model.preconditions_model import Precondition
|
|
70
71
|
from kash.utils.common.format_utils import fmt_loc
|
|
71
72
|
from kash.utils.file_utils.file_formats_model import FileExt, Format, MediaType
|
|
73
|
+
|
|
74
|
+
__all__ = [
|
|
75
|
+
"ANY_ARGS",
|
|
76
|
+
"NO_ARGS",
|
|
77
|
+
"ONE_ARG",
|
|
78
|
+
"ONE_OR_MORE_ARGS",
|
|
79
|
+
"ONE_OR_NO_ARGS",
|
|
80
|
+
"TWO_ARGS",
|
|
81
|
+
"TWO_OR_MORE_ARGS",
|
|
82
|
+
"ArgCount",
|
|
83
|
+
"CommandArg",
|
|
84
|
+
"Command",
|
|
85
|
+
"CommentedCommand",
|
|
86
|
+
"BareComment",
|
|
87
|
+
"Script",
|
|
88
|
+
"ShellResult",
|
|
89
|
+
"Action",
|
|
90
|
+
"ActionInput",
|
|
91
|
+
"ActionResult",
|
|
92
|
+
"ExecContext",
|
|
93
|
+
"LLMOptions",
|
|
94
|
+
"PathOp",
|
|
95
|
+
"PathOpType",
|
|
96
|
+
"PerItemAction",
|
|
97
|
+
"TitleTemplate",
|
|
98
|
+
"ComboAction",
|
|
99
|
+
"SequenceAction",
|
|
100
|
+
"look_up_actions",
|
|
101
|
+
"Concept",
|
|
102
|
+
"canonicalize_concept",
|
|
103
|
+
"normalize_concepts",
|
|
104
|
+
"GraphData",
|
|
105
|
+
"Link",
|
|
106
|
+
"Node",
|
|
107
|
+
"SLUG_MAX_LEN",
|
|
108
|
+
"UNTITLED",
|
|
109
|
+
"IdType",
|
|
110
|
+
"Item",
|
|
111
|
+
"ItemId",
|
|
112
|
+
"ItemRelations",
|
|
113
|
+
"ItemType",
|
|
114
|
+
"State",
|
|
115
|
+
"SERVICE_APPLE_PODCASTS",
|
|
116
|
+
"SERVICE_VIMEO",
|
|
117
|
+
"SERVICE_YOUTUBE",
|
|
118
|
+
"HeatmapValue",
|
|
119
|
+
"MediaMetadata",
|
|
120
|
+
"MediaService",
|
|
121
|
+
"MediaUrlType",
|
|
122
|
+
"ALL_COMMON_PARAMS",
|
|
123
|
+
"COMMON_ACTION_PARAMS",
|
|
124
|
+
"GLOBAL_PARAMS",
|
|
125
|
+
"RUNTIME_ACTION_PARAMS",
|
|
126
|
+
"USER_SETTABLE_PARAMS",
|
|
127
|
+
"Param",
|
|
128
|
+
"ParamDeclarations",
|
|
129
|
+
"RawParamValues",
|
|
130
|
+
"TypedParamValues",
|
|
131
|
+
"common_param",
|
|
132
|
+
"common_params",
|
|
133
|
+
"StorePath",
|
|
134
|
+
"Precondition",
|
|
135
|
+
"fmt_loc",
|
|
136
|
+
"FileExt",
|
|
137
|
+
"Format",
|
|
138
|
+
"MediaType",
|
|
139
|
+
]
|
kash/model/actions_model.py
CHANGED
|
@@ -9,12 +9,12 @@ from textwrap import dedent
|
|
|
9
9
|
from typing import Any, TypeVar, cast
|
|
10
10
|
|
|
11
11
|
from chopdiff.docs import DiffFilter
|
|
12
|
-
from chopdiff.docs.token_diffs import DIFF_FILTER_NONE
|
|
13
12
|
from chopdiff.transforms import WINDOW_NONE, WindowSettings
|
|
14
13
|
from flowmark import fill_text
|
|
15
14
|
from prettyfmt import abbrev_obj, fmt_lines
|
|
16
15
|
from pydantic.dataclasses import dataclass, rebuild_dataclass
|
|
17
16
|
from pydantic.json_schema import JsonSchemaValue
|
|
17
|
+
from strif import StringTemplate
|
|
18
18
|
from typing_extensions import override
|
|
19
19
|
|
|
20
20
|
from kash.config.logger import get_logger
|
|
@@ -27,13 +27,15 @@ from kash.model.items_model import UNTITLED, Item, ItemType, State
|
|
|
27
27
|
from kash.model.operations_model import Operation, Source
|
|
28
28
|
from kash.model.params_model import (
|
|
29
29
|
ALL_COMMON_PARAMS,
|
|
30
|
+
COMMON_SHELL_PARAMS,
|
|
31
|
+
RUNTIME_ACTION_PARAMS,
|
|
32
|
+
Param,
|
|
30
33
|
ParamDeclarations,
|
|
31
34
|
TypedParamValues,
|
|
32
35
|
)
|
|
33
36
|
from kash.model.paths_model import StorePath
|
|
34
37
|
from kash.model.preconditions_model import Precondition
|
|
35
38
|
from kash.utils.common.parse_key_vals import format_key_value
|
|
36
|
-
from kash.utils.common.string_template import StringTemplate
|
|
37
39
|
from kash.utils.common.type_utils import not_none
|
|
38
40
|
from kash.utils.errors import InvalidDefinition, InvalidInput
|
|
39
41
|
from kash.workspaces.workspaces import get_ws
|
|
@@ -65,7 +67,8 @@ class ActionInput:
|
|
|
65
67
|
@dataclass(frozen=True)
|
|
66
68
|
class ExecContext:
|
|
67
69
|
"""
|
|
68
|
-
An action and its context for execution.
|
|
70
|
+
An action and its context for execution. This is a good place for settings
|
|
71
|
+
that apply to any action and are bothersome to pass as parameters.
|
|
69
72
|
"""
|
|
70
73
|
|
|
71
74
|
action: Action
|
|
@@ -77,13 +80,33 @@ class ExecContext:
|
|
|
77
80
|
rerun: bool = False
|
|
78
81
|
"""If True, always run actions, even cacheable ones that have results."""
|
|
79
82
|
|
|
83
|
+
refetch: bool = False
|
|
84
|
+
"""If True, will refetch items even if they are already in the content caches."""
|
|
85
|
+
|
|
80
86
|
override_state: State | None = None
|
|
81
87
|
"""If specified, override the state of result items. Useful to mark items as transient."""
|
|
82
88
|
|
|
89
|
+
tmp_output: bool = False
|
|
90
|
+
"""If True, will save output items to a temporary file."""
|
|
91
|
+
|
|
92
|
+
no_format: bool = False
|
|
93
|
+
"""If True, will not normalize the output item's body text formatting (for Markdown)."""
|
|
94
|
+
|
|
83
95
|
@property
|
|
84
96
|
def workspace(self) -> FileStore:
|
|
85
97
|
return get_ws(self.workspace_dir)
|
|
86
98
|
|
|
99
|
+
@property
|
|
100
|
+
def runtime_options(self) -> dict[str, str]:
|
|
101
|
+
"""Return non-default runtime options."""
|
|
102
|
+
opts: dict[str, str] = {}
|
|
103
|
+
# Only these two settings directly affect the output:
|
|
104
|
+
if self.no_format:
|
|
105
|
+
opts["no_format"] = "true"
|
|
106
|
+
if self.override_state:
|
|
107
|
+
opts["override_state"] = self.override_state.name
|
|
108
|
+
return opts
|
|
109
|
+
|
|
87
110
|
def __repr__(self):
|
|
88
111
|
return abbrev_obj(self, field_max_len=80)
|
|
89
112
|
|
|
@@ -175,7 +198,7 @@ class LLMOptions:
|
|
|
175
198
|
system_message: Message = Message("")
|
|
176
199
|
body_template: MessageTemplate = MessageTemplate("{body}")
|
|
177
200
|
windowing: WindowSettings = WINDOW_NONE
|
|
178
|
-
diff_filter: DiffFilter =
|
|
201
|
+
diff_filter: DiffFilter | None = None
|
|
179
202
|
|
|
180
203
|
def updated_with(self, param_name: str, value: Any) -> LLMOptions:
|
|
181
204
|
"""Update option from an action parameter."""
|
|
@@ -409,6 +432,17 @@ class Action(ABC):
|
|
|
409
432
|
# Update corresponding LLM option if appropriate.
|
|
410
433
|
self.llm_options = self.llm_options.updated_with(param_name, value)
|
|
411
434
|
|
|
435
|
+
@property
|
|
436
|
+
def shell_params(self) -> list[Param]:
|
|
437
|
+
"""
|
|
438
|
+
List of parameters that are relevant to shell usage.
|
|
439
|
+
"""
|
|
440
|
+
return (
|
|
441
|
+
list(self.params)
|
|
442
|
+
+ list(RUNTIME_ACTION_PARAMS.values())
|
|
443
|
+
+ list(COMMON_SHELL_PARAMS.values())
|
|
444
|
+
)
|
|
445
|
+
|
|
412
446
|
def param_value_summary(self) -> dict[str, str]:
|
|
413
447
|
"""
|
|
414
448
|
Readable, serializable summary of the action's non-default parameters, to include in
|