kash-shell 0.3.28__py3-none-any.whl → 0.3.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. kash/actions/core/chat.py +1 -0
  2. kash/actions/core/markdownify_html.py +4 -5
  3. kash/actions/core/minify_html.py +4 -5
  4. kash/actions/core/readability.py +1 -4
  5. kash/actions/core/render_as_html.py +10 -7
  6. kash/actions/core/save_sidematter_meta.py +47 -0
  7. kash/actions/core/show_webpage.py +2 -0
  8. kash/actions/core/zip_sidematter.py +47 -0
  9. kash/commands/base/basic_file_commands.py +7 -4
  10. kash/commands/base/diff_commands.py +6 -4
  11. kash/commands/base/files_command.py +31 -30
  12. kash/commands/base/general_commands.py +3 -2
  13. kash/commands/base/logs_commands.py +6 -4
  14. kash/commands/base/reformat_command.py +3 -2
  15. kash/commands/base/search_command.py +4 -3
  16. kash/commands/base/show_command.py +9 -7
  17. kash/commands/help/assistant_commands.py +6 -4
  18. kash/commands/help/help_commands.py +7 -4
  19. kash/commands/workspace/selection_commands.py +18 -16
  20. kash/commands/workspace/workspace_commands.py +39 -26
  21. kash/config/logger.py +1 -1
  22. kash/config/setup.py +2 -27
  23. kash/config/text_styles.py +1 -1
  24. kash/docs/markdown/topics/a1_what_is_kash.md +26 -18
  25. kash/docs/markdown/topics/a2_installation.md +3 -2
  26. kash/exec/action_decorators.py +7 -5
  27. kash/exec/action_exec.py +104 -53
  28. kash/exec/fetch_url_items.py +40 -11
  29. kash/exec/llm_transforms.py +14 -5
  30. kash/exec/preconditions.py +2 -2
  31. kash/exec/resolve_args.py +4 -1
  32. kash/exec/runtime_settings.py +3 -0
  33. kash/file_storage/file_store.py +108 -114
  34. kash/file_storage/item_file_format.py +91 -26
  35. kash/file_storage/item_id_index.py +128 -0
  36. kash/help/help_types.py +1 -1
  37. kash/llm_utils/llms.py +6 -1
  38. kash/local_server/local_server_commands.py +2 -1
  39. kash/mcp/mcp_server_commands.py +3 -2
  40. kash/mcp/mcp_server_routes.py +42 -12
  41. kash/model/actions_model.py +44 -32
  42. kash/model/compound_actions_model.py +4 -3
  43. kash/model/exec_model.py +33 -3
  44. kash/model/items_model.py +150 -60
  45. kash/model/params_model.py +4 -4
  46. kash/shell/output/shell_output.py +1 -2
  47. kash/utils/api_utils/gather_limited.py +2 -0
  48. kash/utils/api_utils/multitask_gather.py +74 -0
  49. kash/utils/common/s3_utils.py +108 -0
  50. kash/utils/common/url.py +16 -4
  51. kash/utils/file_formats/chat_format.py +7 -4
  52. kash/utils/file_utils/file_ext.py +1 -0
  53. kash/utils/file_utils/file_formats.py +4 -2
  54. kash/utils/file_utils/file_formats_model.py +12 -0
  55. kash/utils/text_handling/doc_normalization.py +1 -1
  56. kash/utils/text_handling/markdown_footnotes.py +224 -0
  57. kash/utils/text_handling/markdown_utils.py +532 -41
  58. kash/utils/text_handling/markdownify_utils.py +2 -1
  59. kash/web_content/web_fetch.py +2 -1
  60. kash/web_gen/templates/components/tooltip_scripts.js.jinja +186 -1
  61. kash/web_gen/templates/components/youtube_popover_scripts.js.jinja +223 -0
  62. kash/web_gen/templates/components/youtube_popover_styles.css.jinja +150 -0
  63. kash/web_gen/templates/content_styles.css.jinja +53 -1
  64. kash/web_gen/templates/youtube_webpage.html.jinja +47 -0
  65. kash/web_gen/webpage_render.py +103 -0
  66. kash/workspaces/workspaces.py +0 -5
  67. kash/xonsh_custom/custom_shell.py +4 -3
  68. {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/METADATA +35 -26
  69. {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/RECORD +72 -64
  70. kash/llm_utils/llm_features.py +0 -72
  71. kash/web_gen/simple_webpage.py +0 -55
  72. {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/WHEEL +0 -0
  73. {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/entry_points.txt +0 -0
  74. {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,108 @@
1
+ from __future__ import annotations
2
+
3
+ import shutil
4
+ import subprocess
5
+ from pathlib import Path
6
+
7
+ from sidematter_format.sidematter_format import Sidematter
8
+
9
+ from kash.utils.common.url import Url, is_s3_url, parse_s3_url
10
+
11
+
12
+ def check_aws_cli() -> None:
13
+ """
14
+ Check if the AWS CLI is installed and available.
15
+ """
16
+ if shutil.which("aws") is None:
17
+ raise RuntimeError(
18
+ "AWS CLI not found in PATH. Please install 'awscli' and ensure 'aws' is available."
19
+ )
20
+
21
+
22
+ def get_s3_parent_folder(url: Url) -> Url | None:
23
+ """
24
+ Get the parent folder of an S3 URL, or None if not an S3 URL.
25
+ """
26
+ if is_s3_url(url):
27
+ s3_bucket, s3_key = parse_s3_url(url)
28
+ s3_parent_folder = Path(s3_key).parent
29
+
30
+ return Url(f"s3://{s3_bucket}/{s3_parent_folder}")
31
+
32
+ else:
33
+ return None
34
+
35
+
36
+ def s3_sync_to_folder(
37
+ src_path: str | Path,
38
+ s3_dest_parent: Url,
39
+ *,
40
+ include_sidematter: bool = False,
41
+ ) -> list[Url]:
42
+ """
43
+ Sync a local file or directory to an S3 "parent" folder using the AWS CLI.
44
+ Set `include_sidematter` to include sidematter files alongside the source files.
45
+
46
+ Returns a list of S3 URLs that were the top-level sync targets:
47
+ - For a single file: the file URL (and sidematter file/dir URLs if included).
48
+ - For a directory: the destination parent prefix URL (non-recursive reporting).
49
+ """
50
+
51
+ src_path = Path(src_path)
52
+ if not src_path.exists():
53
+ raise ValueError(f"Source path does not exist: {src_path}")
54
+ if not is_s3_url(s3_dest_parent):
55
+ raise ValueError(f"Destination must be an s3:// URL: {s3_dest_parent}")
56
+
57
+ check_aws_cli()
58
+
59
+ dest_prefix = str(s3_dest_parent).rstrip("/") + "/"
60
+ targets: list[Url] = []
61
+
62
+ if src_path.is_file():
63
+ # Build the list of paths to sync using Sidematter's resolved path_list if requested.
64
+ sync_paths: list[Path]
65
+ if include_sidematter:
66
+ resolved = Sidematter(src_path).resolve(parse_meta=False, use_frontmatter=False)
67
+ sync_paths = resolved.path_list
68
+ else:
69
+ sync_paths = [src_path]
70
+
71
+ for p in sync_paths:
72
+ if p.is_file():
73
+ # Use sync with include/exclude to leverage default short-circuiting
74
+ subprocess.run(
75
+ [
76
+ "aws",
77
+ "s3",
78
+ "sync",
79
+ str(p.parent),
80
+ dest_prefix,
81
+ "--exclude",
82
+ "*",
83
+ "--include",
84
+ p.name,
85
+ ],
86
+ check=True,
87
+ )
88
+ targets.append(Url(dest_prefix + p.name))
89
+ elif p.is_dir():
90
+ dest_dir = dest_prefix + p.name + "/"
91
+ subprocess.run(["aws", "s3", "sync", str(p), dest_dir], check=True)
92
+ targets.append(Url(dest_dir))
93
+
94
+ return targets
95
+ else:
96
+ # Directory mode: sync whole directory.
97
+ subprocess.run(
98
+ [
99
+ "aws",
100
+ "s3",
101
+ "sync",
102
+ str(src_path),
103
+ dest_prefix,
104
+ ],
105
+ check=True,
106
+ )
107
+ targets.append(Url(dest_prefix))
108
+ return targets
kash/utils/common/url.py CHANGED
@@ -26,6 +26,7 @@ A string that may not be resolved to a URL or path.
26
26
 
27
27
  HTTP_ONLY = ["http", "https"]
28
28
  HTTP_OR_FILE = HTTP_ONLY + ["file"]
29
+ HTTP_OR_FILE_OR_S3 = HTTP_OR_FILE + ["s3"]
29
30
 
30
31
 
31
32
  def check_if_url(
@@ -36,7 +37,8 @@ def check_if_url(
36
37
  the `urlparse.ParseResult`.
37
38
 
38
39
  Also returns false for Paths, so that it's easy to use local paths and URLs
39
- (`Locator`s) interchangeably. Can provide `HTTP_ONLY` or `HTTP_OR_FILE` to
40
+ (`Locator`s) interchangeably. Can provide `HTTP_ONLY` or `HTTP_OR_FILE`
41
+ or `HTTP_OR_FILE_OR_S3` to restrict to only certain schemes.
40
42
  restrict to only certain schemes.
41
43
  """
42
44
  if isinstance(text, Path):
@@ -69,6 +71,13 @@ def is_file_url(url: str | Url) -> bool:
69
71
  return url.startswith("file://")
70
72
 
71
73
 
74
+ def is_s3_url(url: str | Url) -> bool:
75
+ """
76
+ Is URL an S3 URL?
77
+ """
78
+ return url.startswith("s3://")
79
+
80
+
72
81
  def parse_http_url(url: str | Url) -> ParseResult:
73
82
  """
74
83
  Parse an http/https URL and return the parsed result, raising ValueError if
@@ -118,7 +127,7 @@ def as_file_url(path: str | Path) -> Url:
118
127
 
119
128
  def normalize_url(
120
129
  url: Url,
121
- check_schemes: list[str] | None = HTTP_OR_FILE,
130
+ check_schemes: list[str] | None = HTTP_OR_FILE_OR_S3,
122
131
  drop_fragment: bool = True,
123
132
  resolve_local_paths: bool = True,
124
133
  ) -> Url:
@@ -238,7 +247,10 @@ def test_normalize_url():
238
247
  normalize_url(url=Url("/not/a/URL"))
239
248
  raise AssertionError()
240
249
  except ValueError as e:
241
- assert str(e) == "Scheme '' not in allowed schemes: ['http', 'https', 'file']: /not/a/URL"
250
+ assert (
251
+ str(e)
252
+ == "Scheme '' not in allowed schemes: ['http', 'https', 'file', 's3']: /not/a/URL"
253
+ )
242
254
 
243
255
  try:
244
256
  normalize_url(Url("ftp://example.com"))
@@ -246,7 +258,7 @@ def test_normalize_url():
246
258
  except ValueError as e:
247
259
  assert (
248
260
  str(e)
249
- == "Scheme 'ftp' not in allowed schemes: ['http', 'https', 'file']: ftp://example.com"
261
+ == "Scheme 'ftp' not in allowed schemes: ['http', 'https', 'file', 's3']: ftp://example.com"
250
262
  )
251
263
 
252
264
 
@@ -93,7 +93,6 @@ content: |
93
93
 
94
94
  from __future__ import annotations
95
95
 
96
- import json
97
96
  from dataclasses import field
98
97
  from enum import Enum
99
98
  from io import StringIO
@@ -104,6 +103,7 @@ from typing import Any
104
103
  from frontmatter_format import from_yaml_string, new_yaml, to_yaml_string
105
104
  from prettyfmt import abbrev_obj, custom_key_sort, fmt_size_human
106
105
  from pydantic.dataclasses import dataclass
106
+ from sidematter_format import to_json_string
107
107
 
108
108
 
109
109
  class ChatRole(str, Enum):
@@ -161,9 +161,12 @@ class ChatMessage:
161
161
  Convert to a format that can be used as a standard chat completion, with
162
162
  the content field holding JSON-serialized data if it is structured.
163
163
  """
164
+
164
165
  return {
165
166
  "role": self.role.value,
166
- "content": json.dumps(self.content) if isinstance(self.content, dict) else self.content,
167
+ "content": to_json_string(self.content)
168
+ if isinstance(self.content, dict)
169
+ else self.content,
167
170
  }
168
171
 
169
172
  @classmethod
@@ -174,7 +177,7 @@ class ChatMessage:
174
177
  return to_yaml_string(self.as_dict(), key_sort=_custom_key_sort)
175
178
 
176
179
  def to_json(self) -> str:
177
- return json.dumps(self.as_dict())
180
+ return to_json_string(self.as_dict())
178
181
 
179
182
  def as_str(self) -> str:
180
183
  return self.to_yaml()
@@ -222,7 +225,7 @@ class ChatHistory:
222
225
  return stream.getvalue()
223
226
 
224
227
  def to_json(self) -> str:
225
- return json.dumps([message.as_dict() for message in self.messages])
228
+ return to_json_string([message.as_dict() for message in self.messages], indent=None)
226
229
 
227
230
  def size_summary(self) -> str:
228
231
  role_counts = {}
@@ -37,6 +37,7 @@ class FileExt(Enum):
37
37
  mp4 = "mp4"
38
38
  pptx = "pptx"
39
39
  epub = "epub"
40
+ zip = "zip"
40
41
 
41
42
  @property
42
43
  def dot_ext(self) -> str:
@@ -16,7 +16,7 @@ def is_fullpage_html(content: str) -> bool:
16
16
  A full HTML document that is a full page (headers, footers, etc.) and
17
17
  so probably best rendered in a browser.
18
18
  """
19
- return bool(re.search(r"<!DOCTYPE html>|<html>|<body>|<head>", content, re.IGNORECASE))
19
+ return bool(re.search(r"<!DOCTYPE html>|<html.*?>|<body>|<head>", content, re.IGNORECASE))
20
20
 
21
21
 
22
22
  _yaml_header_pattern = re.compile(r"^---\n\w+:", re.MULTILINE)
@@ -35,7 +35,9 @@ def is_html(content: str) -> bool:
35
35
  """
36
36
  return bool(
37
37
  re.search(
38
- r"<!DOCTYPE html>|<html>|<body>|<head>|<div>|<p>|<img |<a href", content, re.IGNORECASE
38
+ r"<!DOCTYPE html>|<html.*?>|<body>|<head>|<div>|<p>|<img |<a href",
39
+ content,
40
+ re.IGNORECASE,
39
41
  )
40
42
  )
41
43
 
@@ -72,6 +72,9 @@ class Format(Enum):
72
72
  mp3 = "mp3"
73
73
  m4a = "m4a"
74
74
  mp4 = "mp4"
75
+
76
+ # Binary formats.
77
+ zip = "zip"
75
78
  binary = "binary"
76
79
  """Catch-all format for binary files that are unrecognized."""
77
80
 
@@ -167,6 +170,10 @@ class Format(Enum):
167
170
  def is_data(self) -> bool:
168
171
  return self in [self.csv, self.xlsx, self.npz]
169
172
 
173
+ @property
174
+ def is_zip(self) -> bool:
175
+ return self in [self.zip]
176
+
170
177
  @property
171
178
  def is_binary(self) -> bool:
172
179
  return self.has_body and not self.is_text
@@ -257,6 +264,7 @@ class Format(Enum):
257
264
  FileExt.m4a.value: Format.m4a,
258
265
  FileExt.mp4.value: Format.mp4,
259
266
  FileExt.epub.value: Format.epub,
267
+ FileExt.zip.value: Format.zip,
260
268
  }
261
269
  return ext_to_format.get(file_ext.value, None)
262
270
 
@@ -292,6 +300,7 @@ class Format(Enum):
292
300
  Format.mp3: FileExt.mp3,
293
301
  Format.m4a: FileExt.m4a,
294
302
  Format.mp4: FileExt.mp4,
303
+ Format.zip: FileExt.zip,
295
304
  }
296
305
 
297
306
  return format_to_file_ext.get(self, None)
@@ -329,6 +338,9 @@ class Format(Enum):
329
338
  "audio/mp3": Format.mp3,
330
339
  "audio/mp4": Format.m4a,
331
340
  "video/mp4": Format.mp4,
341
+ "application/zip": Format.zip,
342
+ "application/x-zip": Format.zip,
343
+ "application/x-zip-compressed": Format.zip,
332
344
  "application/octet-stream": Format.binary,
333
345
  }
334
346
 
@@ -75,7 +75,7 @@ def normalize_text_file(
75
75
 
76
76
  def test_osc8_link():
77
77
  from clideps.terminal.osc_utils import osc8_link
78
- from flowmark.text_wrapping import wrap_paragraph
78
+ from flowmark import wrap_paragraph
79
79
 
80
80
  link = osc8_link("https://example.com/" + "x" * 50, "Example")
81
81
  assert ansi_cell_len(link) == 7
@@ -0,0 +1,224 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass, field
5
+ from typing import Any
6
+
7
+ from flowmark import flowmark_markdown, line_wrap_by_sentence
8
+ from marko import Markdown
9
+ from marko.ext import footnote
10
+
11
+ from kash.utils.text_handling.markdown_utils import comprehensive_transform_tree
12
+
13
+
14
+ def _normalize_footnotes_in_markdown(content: str) -> str:
15
+ """
16
+ Ensure blank lines between consecutive footnote definitions.
17
+
18
+ Marko has a bug where consecutive footnotes without blank lines are parsed
19
+ as a single footnote. This adds blank lines where needed.
20
+ """
21
+ lines = content.split("\n")
22
+ result = []
23
+ i = 0
24
+
25
+ while i < len(lines):
26
+ line = lines[i]
27
+ result.append(line)
28
+
29
+ # Check if this is a footnote definition
30
+ if re.match(r"^\[\^[^\]]+\]:", line):
31
+ # Look ahead to see if the next non-empty line is also a footnote
32
+ j = i + 1
33
+ while j < len(lines) and not lines[j].strip():
34
+ result.append(lines[j])
35
+ j += 1
36
+
37
+ if j < len(lines) and re.match(r"^\[\^[^\]]+\]:", lines[j]):
38
+ # Next non-empty line is also a footnote, add blank line
39
+ result.append("")
40
+
41
+ i = j
42
+ else:
43
+ i += 1
44
+
45
+ return "\n".join(result)
46
+
47
+
48
+ @dataclass
49
+ class FootnoteInfo:
50
+ """
51
+ Information about a single footnote definition.
52
+ """
53
+
54
+ footnote_id: str # The footnote ID with caret (e.g., "^123", "^foo")
55
+ content: str # The rendered markdown content of the footnote
56
+ raw_element: footnote.FootnoteDef # The original marko element
57
+
58
+
59
+ @dataclass
60
+ class MarkdownFootnotes:
61
+ """
62
+ Container for all footnotes in a markdown document with fast lookup.
63
+
64
+ Provides efficient access to footnote definitions by their IDs.
65
+ IDs are stored with the leading caret (^) to avoid collisions.
66
+ """
67
+
68
+ footnotes: dict[str, FootnoteInfo] = field(default_factory=dict)
69
+ """Dictionary mapping footnote IDs (with ^) to FootnoteInfo objects."""
70
+
71
+ @staticmethod
72
+ def from_markdown(content: str, markdown_parser: Markdown | None = None) -> MarkdownFootnotes:
73
+ """
74
+ Extract all footnotes from markdown content.
75
+
76
+ Args:
77
+ content: The markdown content to parse
78
+ markdown_parser: Optional custom markdown parser. If None, uses default flowmark setup.
79
+
80
+ Returns:
81
+ MarkdownFootnotes instance with all footnotes indexed by ID
82
+ """
83
+ if markdown_parser is None:
84
+ markdown_parser = flowmark_markdown(line_wrap_by_sentence(is_markdown=True))
85
+
86
+ # Normalize to work around marko bug with consecutive footnotes
87
+ normalized_content = _normalize_footnotes_in_markdown(content)
88
+ document = markdown_parser.parse(normalized_content)
89
+ return MarkdownFootnotes.from_document(document, markdown_parser)
90
+
91
+ @staticmethod
92
+ def from_document(document: Any, markdown_parser: Markdown | None = None) -> MarkdownFootnotes:
93
+ """
94
+ Extract all footnotes from a parsed markdown document.
95
+
96
+ Args:
97
+ document: A parsed marko document object
98
+ markdown_parser: The markdown parser used (needed for rendering).
99
+ If None, uses default flowmark setup.
100
+
101
+ Returns:
102
+ MarkdownFootnotes instance with all footnotes indexed by ID
103
+ """
104
+ if markdown_parser is None:
105
+ markdown_parser = flowmark_markdown(line_wrap_by_sentence(is_markdown=True))
106
+
107
+ footnotes_dict: dict[str, FootnoteInfo] = {}
108
+
109
+ def collect_footnote(element: Any) -> None:
110
+ if isinstance(element, footnote.FootnoteDef):
111
+ content_parts = []
112
+ if hasattr(element, "children") and element.children:
113
+ for child in element.children:
114
+ rendered = markdown_parser.renderer.render(child)
115
+ content_parts.append(rendered)
116
+
117
+ rendered_content = "".join(content_parts).strip()
118
+
119
+ footnote_id = f"^{element.label}"
120
+ footnotes_dict[footnote_id] = FootnoteInfo(
121
+ footnote_id=footnote_id,
122
+ content=rendered_content,
123
+ raw_element=element,
124
+ )
125
+
126
+ comprehensive_transform_tree(document, collect_footnote)
127
+
128
+ return MarkdownFootnotes(footnotes=footnotes_dict)
129
+
130
+ def get(self, footnote_id: str, default: FootnoteInfo | None = None) -> FootnoteInfo | None:
131
+ """
132
+ Get a footnote by its ID.
133
+
134
+ Args:
135
+ footnote_id: The footnote ID (with or without leading ^)
136
+ default: Default value if footnote not found
137
+
138
+ Returns:
139
+ FootnoteInfo if found, otherwise default value
140
+ """
141
+ if not footnote_id.startswith("^"):
142
+ footnote_id = f"^{footnote_id}"
143
+ return self.footnotes.get(footnote_id, default)
144
+
145
+ def __getitem__(self, footnote_id: str) -> FootnoteInfo:
146
+ """
147
+ Get a footnote by its ID using dictionary-style access.
148
+
149
+ Args:
150
+ footnote_id: The footnote ID (with or without leading ^)
151
+
152
+ Returns:
153
+ FootnoteInfo for the ID
154
+
155
+ Raises:
156
+ KeyError: If the footnote ID is not found
157
+ """
158
+ if not footnote_id.startswith("^"):
159
+ footnote_id = f"^{footnote_id}"
160
+ return self.footnotes[footnote_id]
161
+
162
+ def __contains__(self, footnote_id: str) -> bool:
163
+ """
164
+ Check if a footnote exists.
165
+
166
+ Args:
167
+ footnote_id: The footnote ID (with or without leading ^)
168
+ """
169
+ if not footnote_id.startswith("^"):
170
+ footnote_id = f"^{footnote_id}"
171
+ return footnote_id in self.footnotes
172
+
173
+ def __len__(self) -> int:
174
+ """Return the number of footnotes."""
175
+ return len(self.footnotes)
176
+
177
+ def __iter__(self):
178
+ """Iterate over footnote IDs (with carets)."""
179
+ return iter(self.footnotes)
180
+
181
+ def items(self):
182
+ """Return (footnote_id, FootnoteInfo) pairs."""
183
+ return self.footnotes.items()
184
+
185
+ def values(self):
186
+ """Return FootnoteInfo objects."""
187
+ return self.footnotes.values()
188
+
189
+ def keys(self):
190
+ """Return footnote IDs (with carets)."""
191
+ return self.footnotes.keys()
192
+
193
+
194
+ def extract_footnote_references(content: str, markdown_parser: Markdown | None = None) -> list[str]:
195
+ """
196
+ Extract all footnote reference IDs used in the content.
197
+
198
+ This finds all FootnoteRef elements (e.g., [^123] in the text) as opposed
199
+ to FootnoteDef elements which are the definitions.
200
+
201
+ Args:
202
+ content: The markdown content to parse
203
+ markdown_parser: Optional custom markdown parser
204
+
205
+ Returns:
206
+ List of unique footnote IDs that are referenced (with the ^)
207
+ """
208
+ if markdown_parser is None:
209
+ markdown_parser = flowmark_markdown(line_wrap_by_sentence(is_markdown=True))
210
+
211
+ normalized_content = _normalize_footnotes_in_markdown(content)
212
+ document = markdown_parser.parse(normalized_content)
213
+ references: list[str] = []
214
+ seen: set[str] = set()
215
+
216
+ def collect_references(element: Any) -> None:
217
+ if isinstance(element, footnote.FootnoteRef):
218
+ footnote_id = f"^{element.label}"
219
+ if footnote_id not in seen:
220
+ seen.add(footnote_id)
221
+ references.append(footnote_id)
222
+
223
+ comprehensive_transform_tree(document, collect_references)
224
+ return references