kash-shell 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/__init__.py +4 -4
- kash/actions/core/format_markdown_template.py +2 -5
- kash/actions/core/markdownify.py +7 -6
- kash/actions/core/readability.py +7 -6
- kash/actions/core/render_as_html.py +37 -0
- kash/actions/core/show_webpage.py +6 -11
- kash/actions/core/strip_html.py +2 -6
- kash/actions/core/tabbed_webpage_config.py +31 -0
- kash/actions/core/{webpage_generate.py → tabbed_webpage_generate.py} +5 -4
- kash/commands/__init__.py +8 -20
- kash/commands/base/basic_file_commands.py +15 -0
- kash/commands/base/debug_commands.py +13 -0
- kash/commands/base/files_command.py +28 -10
- kash/commands/base/general_commands.py +21 -16
- kash/commands/base/logs_commands.py +4 -2
- kash/commands/base/model_commands.py +8 -8
- kash/commands/base/search_command.py +3 -2
- kash/commands/base/show_command.py +5 -3
- kash/commands/extras/parse_uv_lock.py +186 -0
- kash/commands/help/doc_commands.py +2 -31
- kash/commands/help/welcome.py +33 -0
- kash/commands/workspace/selection_commands.py +11 -6
- kash/commands/workspace/workspace_commands.py +19 -17
- kash/config/colors.py +3 -1
- kash/config/env_settings.py +14 -1
- kash/config/init.py +2 -2
- kash/config/logger.py +59 -56
- kash/config/logger_basic.py +3 -3
- kash/config/settings.py +116 -57
- kash/config/setup.py +28 -12
- kash/config/text_styles.py +3 -13
- kash/docs/load_api_docs.py +2 -1
- kash/docs/markdown/topics/a3_getting_started.md +3 -2
- kash/{concepts → embeddings}/text_similarity.py +2 -2
- kash/exec/__init__.py +20 -3
- kash/exec/action_decorators.py +24 -10
- kash/exec/action_exec.py +41 -23
- kash/exec/action_registry.py +13 -48
- kash/exec/command_registry.py +2 -1
- kash/exec/fetch_url_metadata.py +4 -6
- kash/exec/importing.py +56 -0
- kash/exec/llm_transforms.py +12 -10
- kash/exec/precondition_registry.py +2 -1
- kash/exec/preconditions.py +22 -1
- kash/exec/resolve_args.py +4 -0
- kash/exec/shell_callable_action.py +33 -19
- kash/file_storage/file_store.py +42 -27
- kash/file_storage/item_file_format.py +5 -2
- kash/file_storage/metadata_dirs.py +11 -2
- kash/help/assistant.py +1 -1
- kash/help/assistant_instructions.py +2 -1
- kash/help/function_param_info.py +1 -1
- kash/help/help_embeddings.py +2 -2
- kash/help/help_printing.py +7 -11
- kash/llm_utils/clean_headings.py +1 -1
- kash/llm_utils/llm_api_keys.py +4 -4
- kash/llm_utils/llm_features.py +68 -0
- kash/llm_utils/llm_messages.py +1 -2
- kash/llm_utils/llm_names.py +1 -1
- kash/llm_utils/llms.py +8 -3
- kash/local_server/__init__.py +5 -2
- kash/local_server/local_server.py +8 -5
- kash/local_server/local_server_commands.py +2 -2
- kash/local_server/local_server_routes.py +1 -7
- kash/local_server/local_url_formatters.py +1 -1
- kash/mcp/__init__.py +5 -2
- kash/mcp/mcp_cli.py +5 -5
- kash/mcp/mcp_server_commands.py +5 -5
- kash/mcp/mcp_server_routes.py +5 -5
- kash/mcp/mcp_server_sse.py +4 -2
- kash/media_base/media_cache.py +8 -8
- kash/media_base/media_services.py +1 -1
- kash/media_base/media_tools.py +6 -6
- kash/media_base/services/local_file_media.py +2 -2
- kash/media_base/{speech_transcription.py → transcription_deepgram.py} +25 -110
- kash/media_base/transcription_format.py +73 -0
- kash/media_base/transcription_whisper.py +38 -0
- kash/model/__init__.py +73 -5
- kash/model/actions_model.py +38 -4
- kash/model/concept_model.py +30 -0
- kash/model/items_model.py +115 -32
- kash/model/params_model.py +24 -0
- kash/shell/completions/completion_scoring.py +37 -5
- kash/shell/output/kerm_codes.py +1 -2
- kash/shell/output/shell_formatting.py +14 -4
- kash/shell/shell_main.py +2 -2
- kash/shell/utils/exception_printing.py +6 -0
- kash/shell/utils/native_utils.py +26 -20
- kash/shell/utils/shell_function_wrapper.py +15 -15
- kash/text_handling/custom_sliding_transforms.py +12 -4
- kash/text_handling/doc_normalization.py +6 -2
- kash/text_handling/markdown_render.py +118 -0
- kash/text_handling/markdown_utils.py +226 -0
- kash/utils/common/function_inspect.py +360 -110
- kash/utils/common/import_utils.py +12 -3
- kash/utils/common/type_utils.py +0 -29
- kash/utils/common/url.py +27 -3
- kash/utils/errors.py +6 -0
- kash/utils/file_utils/file_ext.py +4 -0
- kash/utils/file_utils/file_formats.py +2 -2
- kash/utils/file_utils/file_formats_model.py +20 -1
- kash/web_content/dir_store.py +1 -2
- kash/web_content/file_cache_utils.py +37 -10
- kash/web_content/file_processing.py +68 -0
- kash/web_content/local_file_cache.py +12 -9
- kash/web_content/web_extract.py +8 -3
- kash/web_content/web_fetch.py +12 -4
- kash/web_gen/__init__.py +0 -4
- kash/web_gen/simple_webpage.py +52 -0
- kash/web_gen/tabbed_webpage.py +24 -14
- kash/web_gen/template_render.py +37 -2
- kash/web_gen/templates/base_styles.css.jinja +169 -43
- kash/web_gen/templates/base_webpage.html.jinja +110 -45
- kash/web_gen/templates/content_styles.css.jinja +4 -2
- kash/web_gen/templates/item_view.html.jinja +49 -39
- kash/web_gen/templates/simple_webpage.html.jinja +24 -0
- kash/web_gen/templates/tabbed_webpage.html.jinja +42 -33
- kash/workspaces/__init__.py +15 -2
- kash/workspaces/selections.py +18 -3
- kash/workspaces/source_items.py +0 -1
- kash/workspaces/workspaces.py +5 -11
- kash/xonsh_custom/command_nl_utils.py +40 -19
- kash/xonsh_custom/custom_shell.py +43 -11
- kash/xonsh_custom/customize_prompt.py +39 -21
- kash/xonsh_custom/load_into_xonsh.py +22 -25
- kash/xonsh_custom/shell_load_commands.py +2 -2
- kash/xonsh_custom/xonsh_completers.py +2 -249
- kash/xonsh_custom/xonsh_keybindings.py +282 -0
- kash/xonsh_custom/xonsh_modern_tools.py +3 -3
- kash/xontrib/kash_extension.py +5 -6
- {kash_shell-0.3.9.dist-info → kash_shell-0.3.11.dist-info}/METADATA +10 -8
- {kash_shell-0.3.9.dist-info → kash_shell-0.3.11.dist-info}/RECORD +137 -136
- kash/actions/core/webpage_config.py +0 -21
- kash/concepts/concept_formats.py +0 -23
- kash/shell/clideps/api_keys.py +0 -100
- kash/shell/clideps/dotenv_setup.py +0 -115
- kash/shell/clideps/dotenv_utils.py +0 -98
- kash/shell/clideps/pkg_deps.py +0 -257
- kash/shell/clideps/platforms.py +0 -11
- kash/shell/clideps/terminal_features.py +0 -56
- kash/shell/utils/osc_utils.py +0 -95
- kash/shell/utils/terminal_images.py +0 -133
- kash/text_handling/markdown_util.py +0 -167
- kash/utils/common/atomic_var.py +0 -171
- kash/utils/common/string_replace.py +0 -93
- kash/utils/common/string_template.py +0 -101
- /kash/{concepts → embeddings}/cosine.py +0 -0
- /kash/{concepts → embeddings}/embeddings.py +0 -0
- {kash_shell-0.3.9.dist-info → kash_shell-0.3.11.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.9.dist-info → kash_shell-0.3.11.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.9.dist-info → kash_shell-0.3.11.dist-info}/licenses/LICENSE +0 -0
kash/shell/utils/native_utils.py
CHANGED
|
@@ -11,15 +11,15 @@ import webbrowser
|
|
|
11
11
|
from enum import Enum
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
|
|
14
|
+
from clideps.pkgs.pkg_check import pkg_check
|
|
15
|
+
from clideps.pkgs.platform_checks import Platform, get_platform
|
|
16
|
+
from clideps.terminal.terminal_images import terminal_show_image
|
|
14
17
|
from flowmark import Wrap
|
|
15
18
|
from funlog import log_calls
|
|
16
19
|
|
|
17
20
|
from kash.config.logger import get_logger
|
|
18
|
-
from kash.config.text_styles import BAT_STYLE, BAT_THEME, COLOR_ERROR
|
|
19
|
-
from kash.shell.clideps.pkg_deps import Pkg, pkg_check
|
|
20
|
-
from kash.shell.clideps.platforms import PLATFORM, Platform
|
|
21
|
+
from kash.config.text_styles import BAT_STYLE, BAT_STYLE_PLAIN, BAT_THEME, COLOR_ERROR
|
|
21
22
|
from kash.shell.output.shell_output import cprint
|
|
22
|
-
from kash.shell.utils.terminal_images import terminal_show_image
|
|
23
23
|
from kash.utils.common.format_utils import fmt_loc
|
|
24
24
|
from kash.utils.common.url import as_file_url, is_file_url, is_url
|
|
25
25
|
from kash.utils.errors import FileNotFound, SetupError
|
|
@@ -49,11 +49,11 @@ def file_size_check(
|
|
|
49
49
|
def native_open(filename: str | Path):
|
|
50
50
|
filename = str(filename)
|
|
51
51
|
log.message("Opening file: %s", filename)
|
|
52
|
-
if
|
|
52
|
+
if get_platform() == Platform.Darwin:
|
|
53
53
|
subprocess.run(["open", filename])
|
|
54
|
-
elif
|
|
54
|
+
elif get_platform() == Platform.Linux:
|
|
55
55
|
subprocess.run(["xdg-open", filename])
|
|
56
|
-
elif
|
|
56
|
+
elif get_platform() == Platform.Windows:
|
|
57
57
|
subprocess.run(["start", shlex.quote(filename)], shell=True)
|
|
58
58
|
else:
|
|
59
59
|
raise NotImplementedError("Unsupported platform")
|
|
@@ -110,12 +110,14 @@ def _detect_view_mode(file_or_url: str) -> ViewMode:
|
|
|
110
110
|
def view_file_native(
|
|
111
111
|
file_or_url: str | Path,
|
|
112
112
|
view_mode: ViewMode = ViewMode.auto,
|
|
113
|
+
plain: bool = False,
|
|
113
114
|
):
|
|
114
115
|
"""
|
|
115
116
|
Open a file or URL in the console or a native app. If `view_mode` is auto,
|
|
116
117
|
automatically determine whether to use console, web browser, or the user's
|
|
117
118
|
preferred native application. For images, also tries terminal-based image
|
|
118
|
-
display.
|
|
119
|
+
display. The `--plain` flag will disable line numbers, grid, etc. in `bat`
|
|
120
|
+
and force `ViewMode.console`.
|
|
119
121
|
"""
|
|
120
122
|
file_or_url = str(file_or_url)
|
|
121
123
|
path = None
|
|
@@ -124,6 +126,9 @@ def view_file_native(
|
|
|
124
126
|
if not path.exists():
|
|
125
127
|
raise FileNotFound(fmt_loc(path))
|
|
126
128
|
|
|
129
|
+
if plain:
|
|
130
|
+
view_mode = ViewMode.console
|
|
131
|
+
|
|
127
132
|
if view_mode == ViewMode.auto:
|
|
128
133
|
view_mode = _detect_view_mode(file_or_url)
|
|
129
134
|
|
|
@@ -133,7 +138,7 @@ def view_file_native(
|
|
|
133
138
|
webbrowser.open(url)
|
|
134
139
|
elif view_mode == ViewMode.console and path:
|
|
135
140
|
file_size, min_lines = file_size_check(path)
|
|
136
|
-
view_file_console(path, use_pager=min_lines > 40 or file_size > 20 * 1024)
|
|
141
|
+
view_file_console(path, use_pager=min_lines > 40 or file_size > 20 * 1024, plain=plain)
|
|
137
142
|
elif view_mode == ViewMode.terminal_image and path:
|
|
138
143
|
try:
|
|
139
144
|
terminal_show_image(path)
|
|
@@ -187,11 +192,11 @@ def tail_file(
|
|
|
187
192
|
if follow:
|
|
188
193
|
max_lines = follow_max_lines
|
|
189
194
|
|
|
190
|
-
pkg_check().require(
|
|
191
|
-
pkg_check().warn_if_missing(
|
|
195
|
+
pkg_check().require("tail")
|
|
196
|
+
pkg_check().warn_if_missing("bat")
|
|
192
197
|
|
|
193
198
|
if follow:
|
|
194
|
-
if pkg_check().
|
|
199
|
+
if pkg_check().is_found("bat"):
|
|
195
200
|
# Follow the file in real-time.
|
|
196
201
|
command = (
|
|
197
202
|
f"tail -{max_lines} -f {all_paths_str} | "
|
|
@@ -202,8 +207,8 @@ def tail_file(
|
|
|
202
207
|
command = f"tail -f {all_paths_str}"
|
|
203
208
|
cprint("Following file: `%s`", command, text_wrap=Wrap.NONE)
|
|
204
209
|
else:
|
|
205
|
-
pkg_check().require(
|
|
206
|
-
if pkg_check().
|
|
210
|
+
pkg_check().require("less")
|
|
211
|
+
if pkg_check().is_found("bat"):
|
|
207
212
|
command = (
|
|
208
213
|
f"tail -{max_lines} {all_paths_str} | "
|
|
209
214
|
f"bat --paging=never --color=always --style=plain --theme={BAT_THEME} -l log | "
|
|
@@ -216,7 +221,7 @@ def tail_file(
|
|
|
216
221
|
subprocess.run(command, shell=True, check=True)
|
|
217
222
|
|
|
218
223
|
|
|
219
|
-
def view_file_console(filename: str | Path, use_pager: bool = True):
|
|
224
|
+
def view_file_console(filename: str | Path, use_pager: bool = True, plain: bool = False):
|
|
220
225
|
"""
|
|
221
226
|
Displays a file in the console with pagination and syntax highlighting.
|
|
222
227
|
"""
|
|
@@ -226,18 +231,19 @@ def view_file_console(filename: str | Path, use_pager: bool = True):
|
|
|
226
231
|
# TODO: Visualize YAML frontmatter with different syntax/style than Markdown content.
|
|
227
232
|
|
|
228
233
|
is_text = file_format_info(filename).is_text
|
|
234
|
+
bat_style = BAT_STYLE_PLAIN if plain else BAT_STYLE
|
|
229
235
|
if is_text:
|
|
230
|
-
pkg_check().require(
|
|
231
|
-
if pkg_check().
|
|
236
|
+
pkg_check().require("less")
|
|
237
|
+
if pkg_check().is_found("bat"):
|
|
232
238
|
pager_str = "--pager=always --pager=less " if use_pager else ""
|
|
233
|
-
command = f"bat {pager_str}--color=always --style={
|
|
239
|
+
command = f"bat {pager_str}--color=always --style={bat_style} --theme={BAT_THEME} {quoted_filename}"
|
|
234
240
|
else:
|
|
235
|
-
pkg_check().require(
|
|
241
|
+
pkg_check().require("pygmentize")
|
|
236
242
|
command = f"pygmentize -g {quoted_filename}"
|
|
237
243
|
if use_pager:
|
|
238
244
|
command = f"{command} | less -R"
|
|
239
245
|
else:
|
|
240
|
-
pkg_check().require(
|
|
246
|
+
pkg_check().require("hexyl")
|
|
241
247
|
command = f"hexyl {quoted_filename}"
|
|
242
248
|
if use_pager:
|
|
243
249
|
command = f"{command} | less -R"
|
|
@@ -27,7 +27,7 @@ def _map_positional(
|
|
|
27
27
|
keywords_consumed = 0
|
|
28
28
|
|
|
29
29
|
for param in pos_params:
|
|
30
|
-
param_type = param.
|
|
30
|
+
param_type = param.effective_type or str
|
|
31
31
|
if param.is_varargs:
|
|
32
32
|
pos_values.extend([param_type(arg) for arg in pos_args[i:]])
|
|
33
33
|
return pos_values, 0 # All remaining args are consumed, so we can return early.
|
|
@@ -39,7 +39,7 @@ def _map_positional(
|
|
|
39
39
|
|
|
40
40
|
# If there are remaining positional arguments, they will go toward keyword arguments.
|
|
41
41
|
for param in kw_params:
|
|
42
|
-
param_type = param.
|
|
42
|
+
param_type = param.effective_type or str
|
|
43
43
|
if not param.is_varargs and i < len(pos_args):
|
|
44
44
|
pos_values.append(param_type(pos_args[i]))
|
|
45
45
|
i += 1
|
|
@@ -70,30 +70,30 @@ def _map_keyword(kw_args: Mapping[str, str | bool], kw_params: list[FuncParam])
|
|
|
70
70
|
for key, value in kw_args.items():
|
|
71
71
|
matching_param = next((param for param in kw_params if param.name == key), None)
|
|
72
72
|
if matching_param:
|
|
73
|
-
|
|
73
|
+
param_type = matching_param.effective_type or str
|
|
74
74
|
|
|
75
75
|
# Handle UnionType (str | None) specially
|
|
76
|
-
if hasattr(types, "UnionType") and isinstance(
|
|
77
|
-
args = get_args(
|
|
76
|
+
if hasattr(types, "UnionType") and isinstance(param_type, types.UnionType):
|
|
77
|
+
args = get_args(param_type)
|
|
78
78
|
non_none_args = [arg for arg in args if arg is not type(None)]
|
|
79
79
|
if len(non_none_args) == 1 and isinstance(non_none_args[0], type):
|
|
80
|
-
|
|
80
|
+
param_type = non_none_args[0]
|
|
81
81
|
|
|
82
|
-
if isinstance(value, bool) and not issubclass(
|
|
82
|
+
if isinstance(value, bool) and not issubclass(param_type, bool):
|
|
83
83
|
raise InvalidCommand(f"Option `--{key}` expects a value")
|
|
84
|
-
if not isinstance(value, bool) and issubclass(
|
|
84
|
+
if not isinstance(value, bool) and issubclass(param_type, bool):
|
|
85
85
|
raise InvalidCommand(f"Option `--{key}` is boolean and does not take a value")
|
|
86
86
|
|
|
87
87
|
try:
|
|
88
|
-
kw_values[key] = instantiate_as_type(
|
|
89
|
-
value, matching_param_type, accept_enum_names=True
|
|
90
|
-
)
|
|
88
|
+
kw_values[key] = instantiate_as_type(value, param_type, accept_enum_names=True)
|
|
91
89
|
except Exception as e:
|
|
92
90
|
valid_values = ""
|
|
93
|
-
if isinstance(
|
|
94
|
-
valid_values =
|
|
91
|
+
if isinstance(param_type, type) and issubclass(param_type, Enum):
|
|
92
|
+
valid_values = (
|
|
93
|
+
f" (valid values are: {', '.join('`' + v.name + '`' for v in param_type)})"
|
|
94
|
+
)
|
|
95
95
|
raise InvalidCommand(
|
|
96
|
-
f"Invalid value for parameter `{key}` of type {
|
|
96
|
+
f"Invalid value for parameter `{key}` of type {param_type}: {value!r}{valid_values}"
|
|
97
97
|
) from e
|
|
98
98
|
elif var_kw_param:
|
|
99
99
|
var_kw_values[key] = value
|
|
@@ -117,7 +117,7 @@ def wrap_for_shell_args(func: Callable[..., R]) -> Callable[[list[str]], R | Non
|
|
|
117
117
|
from kash.commands.help import help_commands
|
|
118
118
|
|
|
119
119
|
params = inspect_function_params(func)
|
|
120
|
-
pos_params = [p for p in params if p.
|
|
120
|
+
pos_params = [p for p in params if p.is_pure_positional]
|
|
121
121
|
kw_params = [p for p in params if p not in pos_params]
|
|
122
122
|
|
|
123
123
|
@wraps(func)
|
|
@@ -1,10 +1,17 @@
|
|
|
1
1
|
from collections.abc import Callable
|
|
2
2
|
from math import ceil
|
|
3
3
|
|
|
4
|
-
from chopdiff.docs import
|
|
4
|
+
from chopdiff.docs import (
|
|
5
|
+
DIFF_FILTER_NONE,
|
|
6
|
+
DiffFilter,
|
|
7
|
+
Paragraph,
|
|
8
|
+
TextDoc,
|
|
9
|
+
TextUnit,
|
|
10
|
+
diff_docs,
|
|
11
|
+
join_wordtoks,
|
|
12
|
+
)
|
|
5
13
|
from chopdiff.transforms import (
|
|
6
14
|
WindowSettings,
|
|
7
|
-
accept_all,
|
|
8
15
|
remove_window_br,
|
|
9
16
|
sliding_para_window,
|
|
10
17
|
sliding_window_transform,
|
|
@@ -31,7 +38,7 @@ def filtered_transform(
|
|
|
31
38
|
doc: TextDoc,
|
|
32
39
|
transform_func: TextDocTransform,
|
|
33
40
|
windowing: WindowSettings | None,
|
|
34
|
-
diff_filter: DiffFilter =
|
|
41
|
+
diff_filter: DiffFilter | None = None,
|
|
35
42
|
) -> TextDoc:
|
|
36
43
|
"""
|
|
37
44
|
Apply a transform with sliding window across the input doc, enforcing the changes it's
|
|
@@ -39,7 +46,7 @@ def filtered_transform(
|
|
|
39
46
|
|
|
40
47
|
If windowing is None, apply the transform to the entire document at once.
|
|
41
48
|
"""
|
|
42
|
-
has_filter = diff_filter !=
|
|
49
|
+
has_filter = bool(diff_filter and diff_filter != DIFF_FILTER_NONE)
|
|
43
50
|
|
|
44
51
|
if not windowing or not windowing.size:
|
|
45
52
|
transformed_doc = transform_func(doc)
|
|
@@ -52,6 +59,7 @@ def filtered_transform(
|
|
|
52
59
|
transformed_doc = transform_func(input_doc)
|
|
53
60
|
|
|
54
61
|
if has_filter:
|
|
62
|
+
assert diff_filter
|
|
55
63
|
# Check the transform did what it should have.
|
|
56
64
|
diff = diff_docs(input_doc, transformed_doc)
|
|
57
65
|
accepted_diff, rejected_diff = diff.filter(diff_filter)
|
|
@@ -21,7 +21,11 @@ def normalize_formatting_ansi(text: str, format: Format | None, width=DEFAULT_WR
|
|
|
21
21
|
text, width=width, word_splitter=simple_word_splitter, len_fn=ansi_cell_len
|
|
22
22
|
)
|
|
23
23
|
elif format == Format.markdown or format == Format.md_html:
|
|
24
|
-
return fill_markdown(
|
|
24
|
+
return fill_markdown(
|
|
25
|
+
text,
|
|
26
|
+
line_wrapper=line_wrap_by_sentence(len_fn=ansi_cell_len, is_markdown=True),
|
|
27
|
+
cleanups=True, # Safe cleanups like unbolding section headers.
|
|
28
|
+
)
|
|
25
29
|
elif format == Format.html:
|
|
26
30
|
# We don't currently auto-format HTML as we sometimes use HTML with specifically chosen line breaks.
|
|
27
31
|
return text
|
|
@@ -52,7 +56,7 @@ def normalize_text_file(
|
|
|
52
56
|
|
|
53
57
|
|
|
54
58
|
def test_osc8_link():
|
|
55
|
-
from
|
|
59
|
+
from clideps.terminal.osc_utils import osc8_link
|
|
56
60
|
|
|
57
61
|
link = osc8_link("https://example.com/" + "x" * 50, "Example")
|
|
58
62
|
assert ansi_cell_len(link) == 7
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from textwrap import dedent
|
|
2
|
+
|
|
3
|
+
import marko
|
|
4
|
+
import regex
|
|
5
|
+
from marko.block import HTMLBlock
|
|
6
|
+
from marko.ext.gfm import GFM
|
|
7
|
+
from marko.helpers import MarkoExtension
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# When we use divs in Markdown we usually want them to be standalone paragraphs,
|
|
11
|
+
# so it doesn't break other wrapping with flowmark etc. This handles that.
|
|
12
|
+
class CustomHTMLBlockMixin:
|
|
13
|
+
div_pattern = regex.compile(r"^\s*<div\b", regex.IGNORECASE)
|
|
14
|
+
|
|
15
|
+
def render_html_block(self, element: HTMLBlock) -> str:
|
|
16
|
+
# Apply GFM filtering first via the next renderer in the MRO.
|
|
17
|
+
filtered_body = super().render_html_block(element) # pyright: ignore
|
|
18
|
+
|
|
19
|
+
# Check if the original block was a div.
|
|
20
|
+
if self.div_pattern.match(element.body.strip()):
|
|
21
|
+
# If it was a div, wrap the *filtered* result in newlines.
|
|
22
|
+
return f"\n{filtered_body.strip()}\n"
|
|
23
|
+
else:
|
|
24
|
+
# Otherwise, return the GFM-filtered body directly.
|
|
25
|
+
return filtered_body
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# GFM first, adding our custom override as an extension to handle divs our way.
|
|
29
|
+
# Extensions later in this list are earlier in MRO.
|
|
30
|
+
MARKO_GFM = marko.Markdown(
|
|
31
|
+
extensions=["footnote", GFM, MarkoExtension(renderer_mixins=[CustomHTMLBlockMixin])]
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
FOOTNOTE_UP_ARROW = " ↑ "
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def html_postprocess(html: str) -> str:
|
|
39
|
+
"""
|
|
40
|
+
Final tweaks to the HTML.
|
|
41
|
+
"""
|
|
42
|
+
# TODO: Improve rendering of footnote defs to put the up arrow next to the number instead?
|
|
43
|
+
html = html.replace(
|
|
44
|
+
"""class="footnote">↩</a>""", f"""class="footnote">{FOOTNOTE_UP_ARROW}</a>"""
|
|
45
|
+
)
|
|
46
|
+
return html
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def markdown_to_html(markdown: str, converter: marko.Markdown = MARKO_GFM) -> str:
|
|
50
|
+
"""
|
|
51
|
+
Convert Markdown to HTML.
|
|
52
|
+
|
|
53
|
+
Wraps div blocks with newlines for better Markdown compatibility.
|
|
54
|
+
|
|
55
|
+
Output passes through raw HTML! Note per GFM, unsafe script tags etc
|
|
56
|
+
are [allowed in some cases](https://github.github.com/gfm/#example-140) so
|
|
57
|
+
additional sanitization is needed if input isn't trusted.
|
|
58
|
+
"""
|
|
59
|
+
html = converter.convert(markdown)
|
|
60
|
+
return html_postprocess(html)
|
|
61
|
+
return html
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
## Tests
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_markdown_to_html():
|
|
68
|
+
markdown = dedent(
|
|
69
|
+
"""
|
|
70
|
+
# Heading
|
|
71
|
+
|
|
72
|
+
This is a paragraph and a [link](https://example.com).
|
|
73
|
+
|
|
74
|
+
- Item 1
|
|
75
|
+
- Item 2
|
|
76
|
+
|
|
77
|
+
## Subheading
|
|
78
|
+
|
|
79
|
+
This is a paragraph with a <span>span</span> tag.
|
|
80
|
+
This is a paragraph with a <div>div</div> tag.
|
|
81
|
+
This is a paragraph with an <a href='https://example.com'>example link</a>.
|
|
82
|
+
|
|
83
|
+
<div class="div1">This is a div.</div>
|
|
84
|
+
|
|
85
|
+
<div class="div2">This is a second div.
|
|
86
|
+
<iframe src="https://example.com">Inline iframe, note this is sanitized</iframe>
|
|
87
|
+
</div>
|
|
88
|
+
|
|
89
|
+
<!-- Script tag in a block, note this isn't sanitized -->
|
|
90
|
+
<script>console.log("Javascript block!");</script>
|
|
91
|
+
"""
|
|
92
|
+
)
|
|
93
|
+
print(markdown_to_html(markdown))
|
|
94
|
+
|
|
95
|
+
expected_html = dedent(
|
|
96
|
+
"""
|
|
97
|
+
<h1>Heading</h1>
|
|
98
|
+
<p>This is a paragraph and a <a href="https://example.com">link</a>.</p>
|
|
99
|
+
<ul>
|
|
100
|
+
<li>Item 1</li>
|
|
101
|
+
<li>Item 2</li>
|
|
102
|
+
</ul>
|
|
103
|
+
<h2>Subheading</h2>
|
|
104
|
+
<p>This is a paragraph with a <span>span</span> tag.
|
|
105
|
+
This is a paragraph with a <div>div</div> tag.
|
|
106
|
+
This is a paragraph with an <a href='https://example.com'>example link</a>.</p>
|
|
107
|
+
|
|
108
|
+
<div class="div1">This is a div.</div>
|
|
109
|
+
|
|
110
|
+
<div class="div2">This is a second div.
|
|
111
|
+
<iframe src="https://example.com">Inline iframe, note this is sanitized</iframe>
|
|
112
|
+
</div>
|
|
113
|
+
<!-- Script tag in a block, note this isn't sanitized -->
|
|
114
|
+
<script>console.log("Javascript block!");</script>
|
|
115
|
+
"""
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
assert markdown_to_html(markdown).strip() == expected_html.strip()
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import marko
|
|
5
|
+
import regex
|
|
6
|
+
from marko.block import Heading, ListItem
|
|
7
|
+
from marko.inline import Link
|
|
8
|
+
|
|
9
|
+
from kash.config.logger import get_logger
|
|
10
|
+
from kash.utils.common.url import Url
|
|
11
|
+
|
|
12
|
+
log = get_logger(__name__)
|
|
13
|
+
|
|
14
|
+
# Characters that commonly need escaping in Markdown inline text.
|
|
15
|
+
MARKDOWN_ESCAPE_CHARS = r"([\\`*_{}\[\]()#+.!-])"
|
|
16
|
+
MARKDOWN_ESCAPE_RE = re.compile(MARKDOWN_ESCAPE_CHARS)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def escape_markdown(text: str) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Escape characters with special meaning in Markdown.
|
|
22
|
+
"""
|
|
23
|
+
return MARKDOWN_ESCAPE_RE.sub(r"\\\1", text)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def as_bullet_points(values: list[Any]) -> str:
|
|
27
|
+
"""
|
|
28
|
+
Convert a list of values to a Markdown bullet-point list. If a value is a string,
|
|
29
|
+
it is treated like Markdown. If it's something else it's converted to a string
|
|
30
|
+
and also escaped for Markdown.
|
|
31
|
+
"""
|
|
32
|
+
points: list[str] = []
|
|
33
|
+
for value in values:
|
|
34
|
+
value = value.replace("\n", " ").strip()
|
|
35
|
+
if isinstance(value, str):
|
|
36
|
+
points.append(value)
|
|
37
|
+
else:
|
|
38
|
+
points.append(escape_markdown(str(value)))
|
|
39
|
+
|
|
40
|
+
return "\n\n".join(f"- {point}" for point in points)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def markdown_link(text: str, url: str | Url) -> str:
|
|
44
|
+
"""
|
|
45
|
+
Create a Markdown link.
|
|
46
|
+
"""
|
|
47
|
+
text = text.replace("[", "\\[").replace("]", "\\]")
|
|
48
|
+
return f"[{text}]({url})"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def is_markdown_header(markdown: str) -> bool:
|
|
52
|
+
"""
|
|
53
|
+
Is the start of this content a Markdown header?
|
|
54
|
+
"""
|
|
55
|
+
return regex.match(r"^#+ ", markdown) is not None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _tree_links(element, include_internal=False):
|
|
59
|
+
links = []
|
|
60
|
+
|
|
61
|
+
def _find_links(element):
|
|
62
|
+
match element:
|
|
63
|
+
case Link():
|
|
64
|
+
if include_internal or not element.dest.startswith("#"):
|
|
65
|
+
links.append(element.dest)
|
|
66
|
+
case _:
|
|
67
|
+
if hasattr(element, "children"):
|
|
68
|
+
for child in element.children:
|
|
69
|
+
_find_links(child)
|
|
70
|
+
|
|
71
|
+
_find_links(element)
|
|
72
|
+
return links
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def extract_links(file_path: str, include_internal=False) -> list[str]:
|
|
76
|
+
"""
|
|
77
|
+
Extract all links from a Markdown file. Future: Include textual and section context.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
with open(file_path) as file:
|
|
81
|
+
content = file.read()
|
|
82
|
+
document = marko.parse(content)
|
|
83
|
+
return _tree_links(document, include_internal)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def extract_first_header(content: str) -> str | None:
|
|
87
|
+
"""
|
|
88
|
+
Extract the first header from markdown content if present.
|
|
89
|
+
Also drops any formatting, so the result can be used as a document title.
|
|
90
|
+
"""
|
|
91
|
+
document = marko.parse(content)
|
|
92
|
+
|
|
93
|
+
if document.children and isinstance(document.children[0], Heading):
|
|
94
|
+
return _extract_text(document.children[0]).strip()
|
|
95
|
+
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _extract_text(element: Any) -> str:
|
|
100
|
+
if isinstance(element, str):
|
|
101
|
+
return element
|
|
102
|
+
elif hasattr(element, "children"):
|
|
103
|
+
return "".join(_extract_text(child) for child in element.children)
|
|
104
|
+
else:
|
|
105
|
+
return ""
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _tree_bullet_points(element: marko.block.Document) -> list[str]:
|
|
109
|
+
bullet_points: list[str] = []
|
|
110
|
+
|
|
111
|
+
def _find_bullet_points(element):
|
|
112
|
+
if isinstance(element, ListItem):
|
|
113
|
+
bullet_points.append(_extract_text(element).strip())
|
|
114
|
+
elif hasattr(element, "children"):
|
|
115
|
+
for child in element.children:
|
|
116
|
+
_find_bullet_points(child)
|
|
117
|
+
|
|
118
|
+
_find_bullet_points(element)
|
|
119
|
+
return bullet_points
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def extract_bullet_points(content: str) -> list[str]:
|
|
123
|
+
"""
|
|
124
|
+
Extract list item values from a Markdown file.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
document = marko.parse(content)
|
|
128
|
+
return _tree_bullet_points(document)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _type_from_heading(heading: Heading) -> str:
|
|
132
|
+
if heading.level in [1, 2, 3, 4, 5, 6]:
|
|
133
|
+
return f"h{heading.level}"
|
|
134
|
+
else:
|
|
135
|
+
raise ValueError(f"Unsupported heading: {heading}: level {heading.level}")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _last_unescaped_bracket(text: str, index: int) -> str | None:
|
|
139
|
+
escaped = False
|
|
140
|
+
for i in range(index - 1, -1, -1):
|
|
141
|
+
ch = text[i]
|
|
142
|
+
if ch == "\\":
|
|
143
|
+
escaped = not escaped # Toggle escaping chain
|
|
144
|
+
continue
|
|
145
|
+
if ch in "[]":
|
|
146
|
+
if not escaped:
|
|
147
|
+
return ch
|
|
148
|
+
# Reset escape status after any non‑backslash char
|
|
149
|
+
escaped = False
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def find_markdown_text(
|
|
154
|
+
pattern: re.Pattern[str], text: str, *, start_pos: int = 0
|
|
155
|
+
) -> re.Match[str] | None:
|
|
156
|
+
"""
|
|
157
|
+
Return first regex `pattern` match in `text` not inside an existing link.
|
|
158
|
+
|
|
159
|
+
A match is considered inside a link when the most recent unescaped square
|
|
160
|
+
bracket preceding the match start is an opening bracket "[".
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
pos = start_pos
|
|
164
|
+
while True:
|
|
165
|
+
match = pattern.search(text, pos)
|
|
166
|
+
if match is None:
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
last_bracket = _last_unescaped_bracket(text, match.start())
|
|
170
|
+
if last_bracket != "[":
|
|
171
|
+
return match
|
|
172
|
+
|
|
173
|
+
# Skip this match and continue searching
|
|
174
|
+
pos = match.end()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
## Tests
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def test_escape_markdown() -> None:
|
|
181
|
+
assert escape_markdown("") == ""
|
|
182
|
+
assert escape_markdown("Hello world") == "Hello world"
|
|
183
|
+
assert escape_markdown("`code`") == "\\`code\\`"
|
|
184
|
+
assert escape_markdown("*italic*") == "\\*italic\\*"
|
|
185
|
+
assert escape_markdown("_bold_") == "\\_bold\\_"
|
|
186
|
+
assert escape_markdown("{braces}") == "\\{braces\\}"
|
|
187
|
+
assert escape_markdown("# header") == "\\# header"
|
|
188
|
+
assert escape_markdown("1. item") == "1\\. item"
|
|
189
|
+
assert escape_markdown("line+break") == "line\\+break"
|
|
190
|
+
assert escape_markdown("dash-") == "dash\\-"
|
|
191
|
+
assert escape_markdown("!bang") == "\\!bang"
|
|
192
|
+
assert escape_markdown("backslash\\") == "backslash\\\\"
|
|
193
|
+
assert escape_markdown("Multiple *special* chars [here](#anchor).") == (
|
|
194
|
+
"Multiple \\*special\\* chars \\[here\\]\\(\\#anchor\\)\\."
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def test_extract_first_header() -> None:
|
|
199
|
+
assert extract_first_header("# Header 1") == "Header 1"
|
|
200
|
+
assert extract_first_header("Not a header\n# Header later") is None
|
|
201
|
+
assert extract_first_header("") is None
|
|
202
|
+
assert (
|
|
203
|
+
extract_first_header("## *Formatted* _Header_ [link](#anchor)") == "Formatted Header link"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def test_find_markdown_text() -> None: # pragma: no cover
|
|
208
|
+
# Match is returned when the term is not inside a link.
|
|
209
|
+
text = "Foo bar baz"
|
|
210
|
+
pattern = re.compile("Foo Bar", re.IGNORECASE)
|
|
211
|
+
match = find_markdown_text(pattern, text)
|
|
212
|
+
assert match is not None and match.group(0) == "Foo bar"
|
|
213
|
+
|
|
214
|
+
# Skips occurrence inside link and returns the first one outside.
|
|
215
|
+
text = "[Foo](http://example.com) something Foo"
|
|
216
|
+
pattern = re.compile("Foo", re.IGNORECASE)
|
|
217
|
+
match = find_markdown_text(pattern, text)
|
|
218
|
+
assert match is not None
|
|
219
|
+
assert match.start() > text.index(") ")
|
|
220
|
+
assert text[match.start() : match.end()] == "Foo"
|
|
221
|
+
|
|
222
|
+
# Returns None when the only occurrences are inside links.
|
|
223
|
+
text = "prefix [bar](http://example.com) suffix"
|
|
224
|
+
pattern = re.compile("bar", re.IGNORECASE)
|
|
225
|
+
match = find_markdown_text(pattern, text)
|
|
226
|
+
assert match is None
|