kash-shell 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. kash/actions/__init__.py +4 -4
  2. kash/actions/core/format_markdown_template.py +2 -5
  3. kash/actions/core/markdownify.py +7 -6
  4. kash/actions/core/readability.py +7 -6
  5. kash/actions/core/render_as_html.py +37 -0
  6. kash/actions/core/show_webpage.py +6 -11
  7. kash/actions/core/strip_html.py +2 -6
  8. kash/actions/core/tabbed_webpage_config.py +31 -0
  9. kash/actions/core/{webpage_generate.py → tabbed_webpage_generate.py} +5 -4
  10. kash/commands/__init__.py +8 -20
  11. kash/commands/base/basic_file_commands.py +15 -0
  12. kash/commands/base/debug_commands.py +13 -0
  13. kash/commands/base/files_command.py +28 -10
  14. kash/commands/base/general_commands.py +21 -16
  15. kash/commands/base/logs_commands.py +4 -2
  16. kash/commands/base/model_commands.py +8 -8
  17. kash/commands/base/search_command.py +3 -2
  18. kash/commands/base/show_command.py +5 -3
  19. kash/commands/extras/parse_uv_lock.py +186 -0
  20. kash/commands/help/doc_commands.py +2 -31
  21. kash/commands/help/welcome.py +33 -0
  22. kash/commands/workspace/selection_commands.py +11 -6
  23. kash/commands/workspace/workspace_commands.py +19 -17
  24. kash/config/colors.py +3 -1
  25. kash/config/env_settings.py +14 -1
  26. kash/config/init.py +2 -2
  27. kash/config/logger.py +59 -56
  28. kash/config/logger_basic.py +3 -3
  29. kash/config/settings.py +116 -57
  30. kash/config/setup.py +28 -12
  31. kash/config/text_styles.py +3 -13
  32. kash/docs/load_api_docs.py +2 -1
  33. kash/docs/markdown/topics/a3_getting_started.md +3 -2
  34. kash/{concepts → embeddings}/text_similarity.py +2 -2
  35. kash/exec/__init__.py +20 -3
  36. kash/exec/action_decorators.py +24 -10
  37. kash/exec/action_exec.py +41 -23
  38. kash/exec/action_registry.py +13 -48
  39. kash/exec/command_registry.py +2 -1
  40. kash/exec/fetch_url_metadata.py +4 -6
  41. kash/exec/importing.py +56 -0
  42. kash/exec/llm_transforms.py +12 -10
  43. kash/exec/precondition_registry.py +2 -1
  44. kash/exec/preconditions.py +22 -1
  45. kash/exec/resolve_args.py +4 -0
  46. kash/exec/shell_callable_action.py +33 -19
  47. kash/file_storage/file_store.py +42 -27
  48. kash/file_storage/item_file_format.py +5 -2
  49. kash/file_storage/metadata_dirs.py +11 -2
  50. kash/help/assistant.py +1 -1
  51. kash/help/assistant_instructions.py +2 -1
  52. kash/help/function_param_info.py +1 -1
  53. kash/help/help_embeddings.py +2 -2
  54. kash/help/help_printing.py +7 -11
  55. kash/llm_utils/clean_headings.py +1 -1
  56. kash/llm_utils/llm_api_keys.py +4 -4
  57. kash/llm_utils/llm_features.py +68 -0
  58. kash/llm_utils/llm_messages.py +1 -2
  59. kash/llm_utils/llm_names.py +1 -1
  60. kash/llm_utils/llms.py +8 -3
  61. kash/local_server/__init__.py +5 -2
  62. kash/local_server/local_server.py +8 -5
  63. kash/local_server/local_server_commands.py +2 -2
  64. kash/local_server/local_server_routes.py +1 -7
  65. kash/local_server/local_url_formatters.py +1 -1
  66. kash/mcp/__init__.py +5 -2
  67. kash/mcp/mcp_cli.py +5 -5
  68. kash/mcp/mcp_server_commands.py +5 -5
  69. kash/mcp/mcp_server_routes.py +5 -5
  70. kash/mcp/mcp_server_sse.py +4 -2
  71. kash/media_base/media_cache.py +8 -8
  72. kash/media_base/media_services.py +1 -1
  73. kash/media_base/media_tools.py +6 -6
  74. kash/media_base/services/local_file_media.py +2 -2
  75. kash/media_base/{speech_transcription.py → transcription_deepgram.py} +25 -110
  76. kash/media_base/transcription_format.py +73 -0
  77. kash/media_base/transcription_whisper.py +38 -0
  78. kash/model/__init__.py +73 -5
  79. kash/model/actions_model.py +38 -4
  80. kash/model/concept_model.py +30 -0
  81. kash/model/items_model.py +115 -32
  82. kash/model/params_model.py +24 -0
  83. kash/shell/completions/completion_scoring.py +37 -5
  84. kash/shell/output/kerm_codes.py +1 -2
  85. kash/shell/output/shell_formatting.py +14 -4
  86. kash/shell/shell_main.py +2 -2
  87. kash/shell/utils/exception_printing.py +6 -0
  88. kash/shell/utils/native_utils.py +26 -20
  89. kash/shell/utils/shell_function_wrapper.py +15 -15
  90. kash/text_handling/custom_sliding_transforms.py +12 -4
  91. kash/text_handling/doc_normalization.py +6 -2
  92. kash/text_handling/markdown_render.py +118 -0
  93. kash/text_handling/markdown_utils.py +226 -0
  94. kash/utils/common/function_inspect.py +360 -110
  95. kash/utils/common/import_utils.py +12 -3
  96. kash/utils/common/type_utils.py +0 -29
  97. kash/utils/common/url.py +27 -3
  98. kash/utils/errors.py +6 -0
  99. kash/utils/file_utils/file_ext.py +4 -0
  100. kash/utils/file_utils/file_formats.py +2 -2
  101. kash/utils/file_utils/file_formats_model.py +20 -1
  102. kash/web_content/dir_store.py +1 -2
  103. kash/web_content/file_cache_utils.py +37 -10
  104. kash/web_content/file_processing.py +68 -0
  105. kash/web_content/local_file_cache.py +12 -9
  106. kash/web_content/web_extract.py +8 -3
  107. kash/web_content/web_fetch.py +12 -4
  108. kash/web_gen/__init__.py +0 -4
  109. kash/web_gen/simple_webpage.py +52 -0
  110. kash/web_gen/tabbed_webpage.py +24 -14
  111. kash/web_gen/template_render.py +37 -2
  112. kash/web_gen/templates/base_styles.css.jinja +169 -43
  113. kash/web_gen/templates/base_webpage.html.jinja +110 -45
  114. kash/web_gen/templates/content_styles.css.jinja +4 -2
  115. kash/web_gen/templates/item_view.html.jinja +49 -39
  116. kash/web_gen/templates/simple_webpage.html.jinja +24 -0
  117. kash/web_gen/templates/tabbed_webpage.html.jinja +42 -33
  118. kash/workspaces/__init__.py +15 -2
  119. kash/workspaces/selections.py +18 -3
  120. kash/workspaces/source_items.py +0 -1
  121. kash/workspaces/workspaces.py +5 -11
  122. kash/xonsh_custom/command_nl_utils.py +40 -19
  123. kash/xonsh_custom/custom_shell.py +43 -11
  124. kash/xonsh_custom/customize_prompt.py +39 -21
  125. kash/xonsh_custom/load_into_xonsh.py +22 -25
  126. kash/xonsh_custom/shell_load_commands.py +2 -2
  127. kash/xonsh_custom/xonsh_completers.py +2 -249
  128. kash/xonsh_custom/xonsh_keybindings.py +282 -0
  129. kash/xonsh_custom/xonsh_modern_tools.py +3 -3
  130. kash/xontrib/kash_extension.py +5 -6
  131. {kash_shell-0.3.9.dist-info → kash_shell-0.3.11.dist-info}/METADATA +10 -8
  132. {kash_shell-0.3.9.dist-info → kash_shell-0.3.11.dist-info}/RECORD +137 -136
  133. kash/actions/core/webpage_config.py +0 -21
  134. kash/concepts/concept_formats.py +0 -23
  135. kash/shell/clideps/api_keys.py +0 -100
  136. kash/shell/clideps/dotenv_setup.py +0 -115
  137. kash/shell/clideps/dotenv_utils.py +0 -98
  138. kash/shell/clideps/pkg_deps.py +0 -257
  139. kash/shell/clideps/platforms.py +0 -11
  140. kash/shell/clideps/terminal_features.py +0 -56
  141. kash/shell/utils/osc_utils.py +0 -95
  142. kash/shell/utils/terminal_images.py +0 -133
  143. kash/text_handling/markdown_util.py +0 -167
  144. kash/utils/common/atomic_var.py +0 -171
  145. kash/utils/common/string_replace.py +0 -93
  146. kash/utils/common/string_template.py +0 -101
  147. /kash/{concepts → embeddings}/cosine.py +0 -0
  148. /kash/{concepts → embeddings}/embeddings.py +0 -0
  149. {kash_shell-0.3.9.dist-info → kash_shell-0.3.11.dist-info}/WHEEL +0 -0
  150. {kash_shell-0.3.9.dist-info → kash_shell-0.3.11.dist-info}/entry_points.txt +0 -0
  151. {kash_shell-0.3.9.dist-info → kash_shell-0.3.11.dist-info}/licenses/LICENSE +0 -0
@@ -11,15 +11,15 @@ import webbrowser
11
11
  from enum import Enum
12
12
  from pathlib import Path
13
13
 
14
+ from clideps.pkgs.pkg_check import pkg_check
15
+ from clideps.pkgs.platform_checks import Platform, get_platform
16
+ from clideps.terminal.terminal_images import terminal_show_image
14
17
  from flowmark import Wrap
15
18
  from funlog import log_calls
16
19
 
17
20
  from kash.config.logger import get_logger
18
- from kash.config.text_styles import BAT_STYLE, BAT_THEME, COLOR_ERROR
19
- from kash.shell.clideps.pkg_deps import Pkg, pkg_check
20
- from kash.shell.clideps.platforms import PLATFORM, Platform
21
+ from kash.config.text_styles import BAT_STYLE, BAT_STYLE_PLAIN, BAT_THEME, COLOR_ERROR
21
22
  from kash.shell.output.shell_output import cprint
22
- from kash.shell.utils.terminal_images import terminal_show_image
23
23
  from kash.utils.common.format_utils import fmt_loc
24
24
  from kash.utils.common.url import as_file_url, is_file_url, is_url
25
25
  from kash.utils.errors import FileNotFound, SetupError
@@ -49,11 +49,11 @@ def file_size_check(
49
49
  def native_open(filename: str | Path):
50
50
  filename = str(filename)
51
51
  log.message("Opening file: %s", filename)
52
- if PLATFORM == Platform.Darwin:
52
+ if get_platform() == Platform.Darwin:
53
53
  subprocess.run(["open", filename])
54
- elif PLATFORM == Platform.Linux:
54
+ elif get_platform() == Platform.Linux:
55
55
  subprocess.run(["xdg-open", filename])
56
- elif PLATFORM == Platform.Windows:
56
+ elif get_platform() == Platform.Windows:
57
57
  subprocess.run(["start", shlex.quote(filename)], shell=True)
58
58
  else:
59
59
  raise NotImplementedError("Unsupported platform")
@@ -110,12 +110,14 @@ def _detect_view_mode(file_or_url: str) -> ViewMode:
110
110
  def view_file_native(
111
111
  file_or_url: str | Path,
112
112
  view_mode: ViewMode = ViewMode.auto,
113
+ plain: bool = False,
113
114
  ):
114
115
  """
115
116
  Open a file or URL in the console or a native app. If `view_mode` is auto,
116
117
  automatically determine whether to use console, web browser, or the user's
117
118
  preferred native application. For images, also tries terminal-based image
118
- display.
119
+ display. The `--plain` flag will disable line numbers, grid, etc. in `bat`
120
+ and force `ViewMode.console`.
119
121
  """
120
122
  file_or_url = str(file_or_url)
121
123
  path = None
@@ -124,6 +126,9 @@ def view_file_native(
124
126
  if not path.exists():
125
127
  raise FileNotFound(fmt_loc(path))
126
128
 
129
+ if plain:
130
+ view_mode = ViewMode.console
131
+
127
132
  if view_mode == ViewMode.auto:
128
133
  view_mode = _detect_view_mode(file_or_url)
129
134
 
@@ -133,7 +138,7 @@ def view_file_native(
133
138
  webbrowser.open(url)
134
139
  elif view_mode == ViewMode.console and path:
135
140
  file_size, min_lines = file_size_check(path)
136
- view_file_console(path, use_pager=min_lines > 40 or file_size > 20 * 1024)
141
+ view_file_console(path, use_pager=min_lines > 40 or file_size > 20 * 1024, plain=plain)
137
142
  elif view_mode == ViewMode.terminal_image and path:
138
143
  try:
139
144
  terminal_show_image(path)
@@ -187,11 +192,11 @@ def tail_file(
187
192
  if follow:
188
193
  max_lines = follow_max_lines
189
194
 
190
- pkg_check().require(Pkg.tail)
191
- pkg_check().warn_if_missing(Pkg.bat)
195
+ pkg_check().require("tail")
196
+ pkg_check().warn_if_missing("bat")
192
197
 
193
198
  if follow:
194
- if pkg_check().has(Pkg.bat):
199
+ if pkg_check().is_found("bat"):
195
200
  # Follow the file in real-time.
196
201
  command = (
197
202
  f"tail -{max_lines} -f {all_paths_str} | "
@@ -202,8 +207,8 @@ def tail_file(
202
207
  command = f"tail -f {all_paths_str}"
203
208
  cprint("Following file: `%s`", command, text_wrap=Wrap.NONE)
204
209
  else:
205
- pkg_check().require(Pkg.less)
206
- if pkg_check().has(Pkg.bat, Pkg.less):
210
+ pkg_check().require("less")
211
+ if pkg_check().is_found("bat"):
207
212
  command = (
208
213
  f"tail -{max_lines} {all_paths_str} | "
209
214
  f"bat --paging=never --color=always --style=plain --theme={BAT_THEME} -l log | "
@@ -216,7 +221,7 @@ def tail_file(
216
221
  subprocess.run(command, shell=True, check=True)
217
222
 
218
223
 
219
- def view_file_console(filename: str | Path, use_pager: bool = True):
224
+ def view_file_console(filename: str | Path, use_pager: bool = True, plain: bool = False):
220
225
  """
221
226
  Displays a file in the console with pagination and syntax highlighting.
222
227
  """
@@ -226,18 +231,19 @@ def view_file_console(filename: str | Path, use_pager: bool = True):
226
231
  # TODO: Visualize YAML frontmatter with different syntax/style than Markdown content.
227
232
 
228
233
  is_text = file_format_info(filename).is_text
234
+ bat_style = BAT_STYLE_PLAIN if plain else BAT_STYLE
229
235
  if is_text:
230
- pkg_check().require(Pkg.less)
231
- if pkg_check().has(Pkg.bat):
236
+ pkg_check().require("less")
237
+ if pkg_check().is_found("bat"):
232
238
  pager_str = "--pager=always --pager=less " if use_pager else ""
233
- command = f"bat {pager_str}--color=always --style={BAT_STYLE} --theme={BAT_THEME} {quoted_filename}"
239
+ command = f"bat {pager_str}--color=always --style={bat_style} --theme={BAT_THEME} {quoted_filename}"
234
240
  else:
235
- pkg_check().require(Pkg.pygmentize)
241
+ pkg_check().require("pygmentize")
236
242
  command = f"pygmentize -g {quoted_filename}"
237
243
  if use_pager:
238
244
  command = f"{command} | less -R"
239
245
  else:
240
- pkg_check().require(Pkg.hexyl)
246
+ pkg_check().require("hexyl")
241
247
  command = f"hexyl {quoted_filename}"
242
248
  if use_pager:
243
249
  command = f"{command} | less -R"
@@ -27,7 +27,7 @@ def _map_positional(
27
27
  keywords_consumed = 0
28
28
 
29
29
  for param in pos_params:
30
- param_type = param.type or str
30
+ param_type = param.effective_type or str
31
31
  if param.is_varargs:
32
32
  pos_values.extend([param_type(arg) for arg in pos_args[i:]])
33
33
  return pos_values, 0 # All remaining args are consumed, so we can return early.
@@ -39,7 +39,7 @@ def _map_positional(
39
39
 
40
40
  # If there are remaining positional arguments, they will go toward keyword arguments.
41
41
  for param in kw_params:
42
- param_type = param.type or str
42
+ param_type = param.effective_type or str
43
43
  if not param.is_varargs and i < len(pos_args):
44
44
  pos_values.append(param_type(pos_args[i]))
45
45
  i += 1
@@ -70,30 +70,30 @@ def _map_keyword(kw_args: Mapping[str, str | bool], kw_params: list[FuncParam])
70
70
  for key, value in kw_args.items():
71
71
  matching_param = next((param for param in kw_params if param.name == key), None)
72
72
  if matching_param:
73
- matching_param_type = matching_param.type or str
73
+ param_type = matching_param.effective_type or str
74
74
 
75
75
  # Handle UnionType (str | None) specially
76
- if hasattr(types, "UnionType") and isinstance(matching_param_type, types.UnionType):
77
- args = get_args(matching_param_type)
76
+ if hasattr(types, "UnionType") and isinstance(param_type, types.UnionType):
77
+ args = get_args(param_type)
78
78
  non_none_args = [arg for arg in args if arg is not type(None)]
79
79
  if len(non_none_args) == 1 and isinstance(non_none_args[0], type):
80
- matching_param_type = non_none_args[0]
80
+ param_type = non_none_args[0]
81
81
 
82
- if isinstance(value, bool) and not issubclass(matching_param_type, bool):
82
+ if isinstance(value, bool) and not issubclass(param_type, bool):
83
83
  raise InvalidCommand(f"Option `--{key}` expects a value")
84
- if not isinstance(value, bool) and issubclass(matching_param_type, bool):
84
+ if not isinstance(value, bool) and issubclass(param_type, bool):
85
85
  raise InvalidCommand(f"Option `--{key}` is boolean and does not take a value")
86
86
 
87
87
  try:
88
- kw_values[key] = instantiate_as_type(
89
- value, matching_param_type, accept_enum_names=True
90
- )
88
+ kw_values[key] = instantiate_as_type(value, param_type, accept_enum_names=True)
91
89
  except Exception as e:
92
90
  valid_values = ""
93
- if isinstance(matching_param.type, type) and issubclass(matching_param.type, Enum):
94
- valid_values = f" (valid values are: {', '.join('`' + v.name + '`' for v in matching_param.type)})"
91
+ if isinstance(param_type, type) and issubclass(param_type, Enum):
92
+ valid_values = (
93
+ f" (valid values are: {', '.join('`' + v.name + '`' for v in param_type)})"
94
+ )
95
95
  raise InvalidCommand(
96
- f"Invalid value for parameter `{key}` of type {matching_param.type}: {value!r}{valid_values}"
96
+ f"Invalid value for parameter `{key}` of type {param_type}: {value!r}{valid_values}"
97
97
  ) from e
98
98
  elif var_kw_param:
99
99
  var_kw_values[key] = value
@@ -117,7 +117,7 @@ def wrap_for_shell_args(func: Callable[..., R]) -> Callable[[list[str]], R | Non
117
117
  from kash.commands.help import help_commands
118
118
 
119
119
  params = inspect_function_params(func)
120
- pos_params = [p for p in params if p.is_positional]
120
+ pos_params = [p for p in params if p.is_pure_positional]
121
121
  kw_params = [p for p in params if p not in pos_params]
122
122
 
123
123
  @wraps(func)
@@ -1,10 +1,17 @@
1
1
  from collections.abc import Callable
2
2
  from math import ceil
3
3
 
4
- from chopdiff.docs import DiffFilter, Paragraph, TextDoc, TextUnit, diff_docs, join_wordtoks
4
+ from chopdiff.docs import (
5
+ DIFF_FILTER_NONE,
6
+ DiffFilter,
7
+ Paragraph,
8
+ TextDoc,
9
+ TextUnit,
10
+ diff_docs,
11
+ join_wordtoks,
12
+ )
5
13
  from chopdiff.transforms import (
6
14
  WindowSettings,
7
- accept_all,
8
15
  remove_window_br,
9
16
  sliding_para_window,
10
17
  sliding_window_transform,
@@ -31,7 +38,7 @@ def filtered_transform(
31
38
  doc: TextDoc,
32
39
  transform_func: TextDocTransform,
33
40
  windowing: WindowSettings | None,
34
- diff_filter: DiffFilter = accept_all,
41
+ diff_filter: DiffFilter | None = None,
35
42
  ) -> TextDoc:
36
43
  """
37
44
  Apply a transform with sliding window across the input doc, enforcing the changes it's
@@ -39,7 +46,7 @@ def filtered_transform(
39
46
 
40
47
  If windowing is None, apply the transform to the entire document at once.
41
48
  """
42
- has_filter = diff_filter != accept_all
49
+ has_filter = bool(diff_filter and diff_filter != DIFF_FILTER_NONE)
43
50
 
44
51
  if not windowing or not windowing.size:
45
52
  transformed_doc = transform_func(doc)
@@ -52,6 +59,7 @@ def filtered_transform(
52
59
  transformed_doc = transform_func(input_doc)
53
60
 
54
61
  if has_filter:
62
+ assert diff_filter
55
63
  # Check the transform did what it should have.
56
64
  diff = diff_docs(input_doc, transformed_doc)
57
65
  accepted_diff, rejected_diff = diff.filter(diff_filter)
@@ -21,7 +21,11 @@ def normalize_formatting_ansi(text: str, format: Format | None, width=DEFAULT_WR
21
21
  text, width=width, word_splitter=simple_word_splitter, len_fn=ansi_cell_len
22
22
  )
23
23
  elif format == Format.markdown or format == Format.md_html:
24
- return fill_markdown(text, line_wrapper=line_wrap_by_sentence(len_fn=ansi_cell_len))
24
+ return fill_markdown(
25
+ text,
26
+ line_wrapper=line_wrap_by_sentence(len_fn=ansi_cell_len, is_markdown=True),
27
+ cleanups=True, # Safe cleanups like unbolding section headers.
28
+ )
25
29
  elif format == Format.html:
26
30
  # We don't currently auto-format HTML as we sometimes use HTML with specifically chosen line breaks.
27
31
  return text
@@ -52,7 +56,7 @@ def normalize_text_file(
52
56
 
53
57
 
54
58
  def test_osc8_link():
55
- from kash.shell.utils.osc_utils import osc8_link
59
+ from clideps.terminal.osc_utils import osc8_link
56
60
 
57
61
  link = osc8_link("https://example.com/" + "x" * 50, "Example")
58
62
  assert ansi_cell_len(link) == 7
@@ -0,0 +1,118 @@
1
+ from textwrap import dedent
2
+
3
+ import marko
4
+ import regex
5
+ from marko.block import HTMLBlock
6
+ from marko.ext.gfm import GFM
7
+ from marko.helpers import MarkoExtension
8
+
9
+
10
+ # When we use divs in Markdown we usually want them to be standalone paragraphs,
11
+ # so it doesn't break other wrapping with flowmark etc. This handles that.
12
+ class CustomHTMLBlockMixin:
13
+ div_pattern = regex.compile(r"^\s*<div\b", regex.IGNORECASE)
14
+
15
+ def render_html_block(self, element: HTMLBlock) -> str:
16
+ # Apply GFM filtering first via the next renderer in the MRO.
17
+ filtered_body = super().render_html_block(element) # pyright: ignore
18
+
19
+ # Check if the original block was a div.
20
+ if self.div_pattern.match(element.body.strip()):
21
+ # If it was a div, wrap the *filtered* result in newlines.
22
+ return f"\n{filtered_body.strip()}\n"
23
+ else:
24
+ # Otherwise, return the GFM-filtered body directly.
25
+ return filtered_body
26
+
27
+
28
+ # GFM first, adding our custom override as an extension to handle divs our way.
29
+ # Extensions later in this list are earlier in MRO.
30
+ MARKO_GFM = marko.Markdown(
31
+ extensions=["footnote", GFM, MarkoExtension(renderer_mixins=[CustomHTMLBlockMixin])]
32
+ )
33
+
34
+
35
+ FOOTNOTE_UP_ARROW = "&nbsp;↑&nbsp;"
36
+
37
+
38
+ def html_postprocess(html: str) -> str:
39
+ """
40
+ Final tweaks to the HTML.
41
+ """
42
+ # TODO: Improve rendering of footnote defs to put the up arrow next to the number instead?
43
+ html = html.replace(
44
+ """class="footnote">&#8617;</a>""", f"""class="footnote">{FOOTNOTE_UP_ARROW}</a>"""
45
+ )
46
+ return html
47
+
48
+
49
+ def markdown_to_html(markdown: str, converter: marko.Markdown = MARKO_GFM) -> str:
50
+ """
51
+ Convert Markdown to HTML.
52
+
53
+ Wraps div blocks with newlines for better Markdown compatibility.
54
+
55
+ Output passes through raw HTML! Note per GFM, unsafe script tags etc
56
+ are [allowed in some cases](https://github.github.com/gfm/#example-140) so
57
+ additional sanitization is needed if input isn't trusted.
58
+ """
59
+ html = converter.convert(markdown)
60
+ return html_postprocess(html)
61
+ return html
62
+
63
+
64
+ ## Tests
65
+
66
+
67
+ def test_markdown_to_html():
68
+ markdown = dedent(
69
+ """
70
+ # Heading
71
+
72
+ This is a paragraph and a [link](https://example.com).
73
+
74
+ - Item 1
75
+ - Item 2
76
+
77
+ ## Subheading
78
+
79
+ This is a paragraph with a <span>span</span> tag.
80
+ This is a paragraph with a <div>div</div> tag.
81
+ This is a paragraph with an <a href='https://example.com'>example link</a>.
82
+
83
+ <div class="div1">This is a div.</div>
84
+
85
+ <div class="div2">This is a second div.
86
+ <iframe src="https://example.com">Inline iframe, note this is sanitized</iframe>
87
+ </div>
88
+
89
+ <!-- Script tag in a block, note this isn't sanitized -->
90
+ <script>console.log("Javascript block!");</script>
91
+ """
92
+ )
93
+ print(markdown_to_html(markdown))
94
+
95
+ expected_html = dedent(
96
+ """
97
+ <h1>Heading</h1>
98
+ <p>This is a paragraph and a <a href="https://example.com">link</a>.</p>
99
+ <ul>
100
+ <li>Item 1</li>
101
+ <li>Item 2</li>
102
+ </ul>
103
+ <h2>Subheading</h2>
104
+ <p>This is a paragraph with a <span>span</span> tag.
105
+ This is a paragraph with a <div>div</div> tag.
106
+ This is a paragraph with an <a href='https://example.com'>example link</a>.</p>
107
+
108
+ <div class="div1">This is a div.</div>
109
+
110
+ <div class="div2">This is a second div.
111
+ &lt;iframe src="https://example.com">Inline iframe, note this is sanitized</iframe>
112
+ </div>
113
+ <!-- Script tag in a block, note this isn't sanitized -->
114
+ <script>console.log("Javascript block!");</script>
115
+ """
116
+ )
117
+
118
+ assert markdown_to_html(markdown).strip() == expected_html.strip()
@@ -0,0 +1,226 @@
1
+ import re
2
+ from typing import Any
3
+
4
+ import marko
5
+ import regex
6
+ from marko.block import Heading, ListItem
7
+ from marko.inline import Link
8
+
9
+ from kash.config.logger import get_logger
10
+ from kash.utils.common.url import Url
11
+
12
+ log = get_logger(__name__)
13
+
14
+ # Characters that commonly need escaping in Markdown inline text.
15
+ MARKDOWN_ESCAPE_CHARS = r"([\\`*_{}\[\]()#+.!-])"
16
+ MARKDOWN_ESCAPE_RE = re.compile(MARKDOWN_ESCAPE_CHARS)
17
+
18
+
19
+ def escape_markdown(text: str) -> str:
20
+ """
21
+ Escape characters with special meaning in Markdown.
22
+ """
23
+ return MARKDOWN_ESCAPE_RE.sub(r"\\\1", text)
24
+
25
+
26
+ def as_bullet_points(values: list[Any]) -> str:
27
+ """
28
+ Convert a list of values to a Markdown bullet-point list. If a value is a string,
29
+ it is treated like Markdown. If it's something else it's converted to a string
30
+ and also escaped for Markdown.
31
+ """
32
+ points: list[str] = []
33
+ for value in values:
34
+ value = value.replace("\n", " ").strip()
35
+ if isinstance(value, str):
36
+ points.append(value)
37
+ else:
38
+ points.append(escape_markdown(str(value)))
39
+
40
+ return "\n\n".join(f"- {point}" for point in points)
41
+
42
+
43
+ def markdown_link(text: str, url: str | Url) -> str:
44
+ """
45
+ Create a Markdown link.
46
+ """
47
+ text = text.replace("[", "\\[").replace("]", "\\]")
48
+ return f"[{text}]({url})"
49
+
50
+
51
+ def is_markdown_header(markdown: str) -> bool:
52
+ """
53
+ Is the start of this content a Markdown header?
54
+ """
55
+ return regex.match(r"^#+ ", markdown) is not None
56
+
57
+
58
+ def _tree_links(element, include_internal=False):
59
+ links = []
60
+
61
+ def _find_links(element):
62
+ match element:
63
+ case Link():
64
+ if include_internal or not element.dest.startswith("#"):
65
+ links.append(element.dest)
66
+ case _:
67
+ if hasattr(element, "children"):
68
+ for child in element.children:
69
+ _find_links(child)
70
+
71
+ _find_links(element)
72
+ return links
73
+
74
+
75
+ def extract_links(file_path: str, include_internal=False) -> list[str]:
76
+ """
77
+ Extract all links from a Markdown file. Future: Include textual and section context.
78
+ """
79
+
80
+ with open(file_path) as file:
81
+ content = file.read()
82
+ document = marko.parse(content)
83
+ return _tree_links(document, include_internal)
84
+
85
+
86
+ def extract_first_header(content: str) -> str | None:
87
+ """
88
+ Extract the first header from markdown content if present.
89
+ Also drops any formatting, so the result can be used as a document title.
90
+ """
91
+ document = marko.parse(content)
92
+
93
+ if document.children and isinstance(document.children[0], Heading):
94
+ return _extract_text(document.children[0]).strip()
95
+
96
+ return None
97
+
98
+
99
+ def _extract_text(element: Any) -> str:
100
+ if isinstance(element, str):
101
+ return element
102
+ elif hasattr(element, "children"):
103
+ return "".join(_extract_text(child) for child in element.children)
104
+ else:
105
+ return ""
106
+
107
+
108
+ def _tree_bullet_points(element: marko.block.Document) -> list[str]:
109
+ bullet_points: list[str] = []
110
+
111
+ def _find_bullet_points(element):
112
+ if isinstance(element, ListItem):
113
+ bullet_points.append(_extract_text(element).strip())
114
+ elif hasattr(element, "children"):
115
+ for child in element.children:
116
+ _find_bullet_points(child)
117
+
118
+ _find_bullet_points(element)
119
+ return bullet_points
120
+
121
+
122
+ def extract_bullet_points(content: str) -> list[str]:
123
+ """
124
+ Extract list item values from a Markdown file.
125
+ """
126
+
127
+ document = marko.parse(content)
128
+ return _tree_bullet_points(document)
129
+
130
+
131
+ def _type_from_heading(heading: Heading) -> str:
132
+ if heading.level in [1, 2, 3, 4, 5, 6]:
133
+ return f"h{heading.level}"
134
+ else:
135
+ raise ValueError(f"Unsupported heading: {heading}: level {heading.level}")
136
+
137
+
138
+ def _last_unescaped_bracket(text: str, index: int) -> str | None:
139
+ escaped = False
140
+ for i in range(index - 1, -1, -1):
141
+ ch = text[i]
142
+ if ch == "\\":
143
+ escaped = not escaped # Toggle escaping chain
144
+ continue
145
+ if ch in "[]":
146
+ if not escaped:
147
+ return ch
148
+ # Reset escape status after any non‑backslash char
149
+ escaped = False
150
+ return None
151
+
152
+
153
+ def find_markdown_text(
154
+ pattern: re.Pattern[str], text: str, *, start_pos: int = 0
155
+ ) -> re.Match[str] | None:
156
+ """
157
+ Return first regex `pattern` match in `text` not inside an existing link.
158
+
159
+ A match is considered inside a link when the most recent unescaped square
160
+ bracket preceding the match start is an opening bracket "[".
161
+ """
162
+
163
+ pos = start_pos
164
+ while True:
165
+ match = pattern.search(text, pos)
166
+ if match is None:
167
+ return None
168
+
169
+ last_bracket = _last_unescaped_bracket(text, match.start())
170
+ if last_bracket != "[":
171
+ return match
172
+
173
+ # Skip this match and continue searching
174
+ pos = match.end()
175
+
176
+
177
+ ## Tests
178
+
179
+
180
+ def test_escape_markdown() -> None:
181
+ assert escape_markdown("") == ""
182
+ assert escape_markdown("Hello world") == "Hello world"
183
+ assert escape_markdown("`code`") == "\\`code\\`"
184
+ assert escape_markdown("*italic*") == "\\*italic\\*"
185
+ assert escape_markdown("_bold_") == "\\_bold\\_"
186
+ assert escape_markdown("{braces}") == "\\{braces\\}"
187
+ assert escape_markdown("# header") == "\\# header"
188
+ assert escape_markdown("1. item") == "1\\. item"
189
+ assert escape_markdown("line+break") == "line\\+break"
190
+ assert escape_markdown("dash-") == "dash\\-"
191
+ assert escape_markdown("!bang") == "\\!bang"
192
+ assert escape_markdown("backslash\\") == "backslash\\\\"
193
+ assert escape_markdown("Multiple *special* chars [here](#anchor).") == (
194
+ "Multiple \\*special\\* chars \\[here\\]\\(\\#anchor\\)\\."
195
+ )
196
+
197
+
198
+ def test_extract_first_header() -> None:
199
+ assert extract_first_header("# Header 1") == "Header 1"
200
+ assert extract_first_header("Not a header\n# Header later") is None
201
+ assert extract_first_header("") is None
202
+ assert (
203
+ extract_first_header("## *Formatted* _Header_ [link](#anchor)") == "Formatted Header link"
204
+ )
205
+
206
+
207
+ def test_find_markdown_text() -> None: # pragma: no cover
208
+ # Match is returned when the term is not inside a link.
209
+ text = "Foo bar baz"
210
+ pattern = re.compile("Foo Bar", re.IGNORECASE)
211
+ match = find_markdown_text(pattern, text)
212
+ assert match is not None and match.group(0) == "Foo bar"
213
+
214
+ # Skips occurrence inside link and returns the first one outside.
215
+ text = "[Foo](http://example.com) something Foo"
216
+ pattern = re.compile("Foo", re.IGNORECASE)
217
+ match = find_markdown_text(pattern, text)
218
+ assert match is not None
219
+ assert match.start() > text.index(") ")
220
+ assert text[match.start() : match.end()] == "Foo"
221
+
222
+ # Returns None when the only occurrences are inside links.
223
+ text = "prefix [bar](http://example.com) suffix"
224
+ pattern = re.compile("bar", re.IGNORECASE)
225
+ match = find_markdown_text(pattern, text)
226
+ assert match is None