kash-shell 0.3.9__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. kash/actions/__init__.py +4 -4
  2. kash/actions/core/markdownify.py +5 -2
  3. kash/actions/core/readability.py +5 -2
  4. kash/actions/core/render_as_html.py +18 -0
  5. kash/actions/core/webpage_config.py +12 -4
  6. kash/commands/__init__.py +8 -20
  7. kash/commands/base/basic_file_commands.py +15 -0
  8. kash/commands/base/debug_commands.py +13 -0
  9. kash/commands/base/general_commands.py +21 -16
  10. kash/commands/base/logs_commands.py +4 -2
  11. kash/commands/base/model_commands.py +8 -8
  12. kash/commands/base/search_command.py +3 -2
  13. kash/commands/base/show_command.py +5 -3
  14. kash/commands/extras/parse_uv_lock.py +186 -0
  15. kash/commands/help/doc_commands.py +2 -31
  16. kash/commands/help/welcome.py +33 -0
  17. kash/commands/workspace/selection_commands.py +11 -6
  18. kash/commands/workspace/workspace_commands.py +18 -15
  19. kash/config/colors.py +2 -0
  20. kash/config/env_settings.py +14 -1
  21. kash/config/init.py +2 -2
  22. kash/config/logger.py +59 -56
  23. kash/config/logger_basic.py +3 -3
  24. kash/config/settings.py +116 -57
  25. kash/config/setup.py +28 -12
  26. kash/config/text_styles.py +3 -13
  27. kash/docs/load_api_docs.py +2 -1
  28. kash/docs/markdown/topics/a3_getting_started.md +3 -2
  29. kash/{concepts → embeddings}/text_similarity.py +2 -2
  30. kash/exec/__init__.py +20 -3
  31. kash/exec/action_decorators.py +18 -4
  32. kash/exec/action_exec.py +41 -23
  33. kash/exec/action_registry.py +13 -48
  34. kash/exec/command_registry.py +2 -1
  35. kash/exec/fetch_url_metadata.py +4 -6
  36. kash/exec/importing.py +56 -0
  37. kash/exec/llm_transforms.py +6 -7
  38. kash/exec/precondition_registry.py +2 -1
  39. kash/exec/preconditions.py +16 -1
  40. kash/exec/shell_callable_action.py +33 -19
  41. kash/file_storage/file_store.py +23 -10
  42. kash/file_storage/item_file_format.py +5 -2
  43. kash/file_storage/metadata_dirs.py +11 -2
  44. kash/help/assistant.py +1 -1
  45. kash/help/assistant_instructions.py +2 -1
  46. kash/help/help_embeddings.py +2 -2
  47. kash/help/help_printing.py +7 -11
  48. kash/llm_utils/clean_headings.py +1 -1
  49. kash/llm_utils/llm_api_keys.py +4 -4
  50. kash/llm_utils/llm_features.py +68 -0
  51. kash/llm_utils/llm_messages.py +1 -2
  52. kash/llm_utils/llm_names.py +1 -1
  53. kash/llm_utils/llms.py +8 -3
  54. kash/local_server/__init__.py +5 -2
  55. kash/local_server/local_server.py +8 -5
  56. kash/local_server/local_server_commands.py +2 -2
  57. kash/local_server/local_url_formatters.py +1 -1
  58. kash/mcp/__init__.py +5 -2
  59. kash/mcp/mcp_cli.py +5 -5
  60. kash/mcp/mcp_server_commands.py +5 -5
  61. kash/mcp/mcp_server_routes.py +5 -5
  62. kash/mcp/mcp_server_sse.py +4 -2
  63. kash/media_base/media_cache.py +8 -8
  64. kash/media_base/media_services.py +1 -1
  65. kash/media_base/media_tools.py +6 -6
  66. kash/media_base/services/local_file_media.py +2 -2
  67. kash/media_base/{speech_transcription.py → transcription_deepgram.py} +25 -110
  68. kash/media_base/transcription_format.py +73 -0
  69. kash/media_base/transcription_whisper.py +38 -0
  70. kash/model/__init__.py +73 -5
  71. kash/model/actions_model.py +38 -4
  72. kash/model/concept_model.py +30 -0
  73. kash/model/items_model.py +44 -7
  74. kash/model/params_model.py +24 -0
  75. kash/shell/completions/completion_scoring.py +37 -5
  76. kash/shell/output/kerm_codes.py +1 -2
  77. kash/shell/output/shell_formatting.py +14 -4
  78. kash/shell/shell_main.py +2 -2
  79. kash/shell/utils/exception_printing.py +6 -0
  80. kash/shell/utils/native_utils.py +26 -20
  81. kash/text_handling/custom_sliding_transforms.py +12 -4
  82. kash/text_handling/doc_normalization.py +6 -2
  83. kash/text_handling/markdown_render.py +117 -0
  84. kash/text_handling/markdown_utils.py +204 -0
  85. kash/utils/common/import_utils.py +12 -3
  86. kash/utils/common/type_utils.py +0 -29
  87. kash/utils/common/url.py +27 -3
  88. kash/utils/errors.py +6 -0
  89. kash/utils/file_utils/file_formats.py +2 -2
  90. kash/utils/file_utils/file_formats_model.py +3 -0
  91. kash/web_content/dir_store.py +1 -2
  92. kash/web_content/file_cache_utils.py +37 -10
  93. kash/web_content/file_processing.py +68 -0
  94. kash/web_content/local_file_cache.py +12 -9
  95. kash/web_content/web_extract.py +8 -3
  96. kash/web_content/web_fetch.py +12 -4
  97. kash/web_gen/tabbed_webpage.py +5 -2
  98. kash/web_gen/templates/base_styles.css.jinja +120 -14
  99. kash/web_gen/templates/base_webpage.html.jinja +60 -13
  100. kash/web_gen/templates/content_styles.css.jinja +4 -2
  101. kash/web_gen/templates/item_view.html.jinja +2 -2
  102. kash/web_gen/templates/tabbed_webpage.html.jinja +1 -2
  103. kash/workspaces/__init__.py +15 -2
  104. kash/workspaces/selections.py +18 -3
  105. kash/workspaces/source_items.py +0 -1
  106. kash/workspaces/workspaces.py +5 -11
  107. kash/xonsh_custom/command_nl_utils.py +40 -19
  108. kash/xonsh_custom/custom_shell.py +43 -11
  109. kash/xonsh_custom/customize_prompt.py +39 -21
  110. kash/xonsh_custom/load_into_xonsh.py +22 -25
  111. kash/xonsh_custom/shell_load_commands.py +2 -2
  112. kash/xonsh_custom/xonsh_completers.py +2 -249
  113. kash/xonsh_custom/xonsh_keybindings.py +282 -0
  114. kash/xonsh_custom/xonsh_modern_tools.py +3 -3
  115. kash/xontrib/kash_extension.py +5 -6
  116. {kash_shell-0.3.9.dist-info → kash_shell-0.3.10.dist-info}/METADATA +8 -6
  117. {kash_shell-0.3.9.dist-info → kash_shell-0.3.10.dist-info}/RECORD +122 -123
  118. kash/concepts/concept_formats.py +0 -23
  119. kash/shell/clideps/api_keys.py +0 -100
  120. kash/shell/clideps/dotenv_setup.py +0 -115
  121. kash/shell/clideps/dotenv_utils.py +0 -98
  122. kash/shell/clideps/pkg_deps.py +0 -257
  123. kash/shell/clideps/platforms.py +0 -11
  124. kash/shell/clideps/terminal_features.py +0 -56
  125. kash/shell/utils/osc_utils.py +0 -95
  126. kash/shell/utils/terminal_images.py +0 -133
  127. kash/text_handling/markdown_util.py +0 -167
  128. kash/utils/common/atomic_var.py +0 -171
  129. kash/utils/common/string_replace.py +0 -93
  130. kash/utils/common/string_template.py +0 -101
  131. /kash/{concepts → embeddings}/cosine.py +0 -0
  132. /kash/{concepts → embeddings}/embeddings.py +0 -0
  133. {kash_shell-0.3.9.dist-info → kash_shell-0.3.10.dist-info}/WHEEL +0 -0
  134. {kash_shell-0.3.9.dist-info → kash_shell-0.3.10.dist-info}/entry_points.txt +0 -0
  135. {kash_shell-0.3.9.dist-info → kash_shell-0.3.10.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,117 @@
1
+ from textwrap import dedent
2
+
3
+ import marko
4
+ import regex
5
+ from marko.block import HTMLBlock
6
+ from marko.ext.gfm import GFM
7
+ from marko.helpers import MarkoExtension
8
+
9
+
10
+ # When we use divs in Markdown we usually want them to be standalone paragraphs,
11
+ # so it doesn't break other wrapping with flowmark etc. This handles that.
12
+ class CustomHTMLBlockMixin:
13
+ div_pattern = regex.compile(r"^\s*<div\b", regex.IGNORECASE)
14
+
15
+ def render_html_block(self, element: HTMLBlock) -> str:
16
+ # Apply GFM filtering first via the next renderer in the MRO.
17
+ filtered_body = super().render_html_block(element) # pyright: ignore
18
+
19
+ # Check if the original block was a div.
20
+ if self.div_pattern.match(element.body.strip()):
21
+ # If it was a div, wrap the *filtered* result in newlines.
22
+ return f"\n{filtered_body.strip()}\n"
23
+ else:
24
+ # Otherwise, return the GFM-filtered body directly.
25
+ return filtered_body
26
+
27
+
28
+ # GFM first, adding our custom override as an extension to handle divs our way.
29
+ # Extensions later in this list are earlier in MRO.
30
+ MARKO_GFM = marko.Markdown(
31
+ extensions=["footnote", GFM, MarkoExtension(renderer_mixins=[CustomHTMLBlockMixin])]
32
+ )
33
+
34
+
35
+ FOOTNOTE_UP_ARROW = "&nbsp;↑&nbsp;"
36
+
37
+
38
+ def html_postprocess(html: str) -> str:
39
+ """
40
+ Final tweaks to the HTML.
41
+ """
42
+ html = html.replace(
43
+ """class="footnote">&#8617;</a>""", f"""class="footnote">{FOOTNOTE_UP_ARROW}</a>"""
44
+ )
45
+ return html
46
+
47
+
48
+ def markdown_to_html(markdown: str, converter: marko.Markdown = MARKO_GFM) -> str:
49
+ """
50
+ Convert Markdown to HTML.
51
+
52
+ Wraps div blocks with newlines for better Markdown compatibility.
53
+
54
+ Output passes through raw HTML! Note per GFM, unsafe script tags etc
55
+ are [allowed in some cases](https://github.github.com/gfm/#example-140) so
56
+ additional sanitization is needed if input isn't trusted.
57
+ """
58
+ html = converter.convert(markdown)
59
+ return html_postprocess(html)
60
+ return html
61
+
62
+
63
+ ## Tests
64
+
65
+
66
+ def test_markdown_to_html():
67
+ markdown = dedent(
68
+ """
69
+ # Heading
70
+
71
+ This is a paragraph and a [link](https://example.com).
72
+
73
+ - Item 1
74
+ - Item 2
75
+
76
+ ## Subheading
77
+
78
+ This is a paragraph with a <span>span</span> tag.
79
+ This is a paragraph with a <div>div</div> tag.
80
+ This is a paragraph with an <a href='https://example.com'>example link</a>.
81
+
82
+ <div class="div1">This is a div.</div>
83
+
84
+ <div class="div2">This is a second div.
85
+ <iframe src="https://example.com">Inline iframe, note this is sanitized</iframe>
86
+ </div>
87
+
88
+ <!-- Script tag in a block, note this isn't sanitized -->
89
+ <script>console.log("Javascript block!");</script>
90
+ """
91
+ )
92
+ print(markdown_to_html(markdown))
93
+
94
+ expected_html = dedent(
95
+ """
96
+ <h1>Heading</h1>
97
+ <p>This is a paragraph and a <a href="https://example.com">link</a>.</p>
98
+ <ul>
99
+ <li>Item 1</li>
100
+ <li>Item 2</li>
101
+ </ul>
102
+ <h2>Subheading</h2>
103
+ <p>This is a paragraph with a <span>span</span> tag.
104
+ This is a paragraph with a <div>div</div> tag.
105
+ This is a paragraph with an <a href='https://example.com'>example link</a>.</p>
106
+
107
+ <div class="div1">This is a div.</div>
108
+
109
+ <div class="div2">This is a second div.
110
+ &lt;iframe src="https://example.com">Inline iframe, note this is sanitized</iframe>
111
+ </div>
112
+ <!-- Script tag in a block, note this isn't sanitized -->
113
+ <script>console.log("Javascript block!");</script>
114
+ """
115
+ )
116
+
117
+ assert markdown_to_html(markdown).strip() == expected_html.strip()
@@ -0,0 +1,204 @@
1
+ import re
2
+ from typing import Any
3
+
4
+ import marko
5
+ import regex
6
+ from marko.block import Heading, ListItem
7
+ from marko.inline import Link
8
+
9
+ from kash.config.logger import get_logger
10
+ from kash.utils.common.url import Url
11
+
12
+ log = get_logger(__name__)
13
+
14
+ # Characters that commonly need escaping in Markdown inline text.
15
+ MARKDOWN_ESCAPE_CHARS = r"([\\`*_{}\[\]()#+.!-])"
16
+ MARKDOWN_ESCAPE_RE = re.compile(MARKDOWN_ESCAPE_CHARS)
17
+
18
+
19
+ def escape_markdown(text: str) -> str:
20
+ """
21
+ Escape characters with special meaning in Markdown.
22
+ """
23
+ return MARKDOWN_ESCAPE_RE.sub(r"\\\1", text)
24
+
25
+
26
+ def as_bullet_points(values: list[Any]) -> str:
27
+ """
28
+ Convert a list of values to a Markdown bullet-point list. If a value is a string,
29
+ it is treated like Markdown. If it's something else it's converted to a string
30
+ and also escaped for Markdown.
31
+ """
32
+ points: list[str] = []
33
+ for value in values:
34
+ value = value.replace("\n", " ").strip()
35
+ if isinstance(value, str):
36
+ points.append(value)
37
+ else:
38
+ points.append(escape_markdown(str(value)))
39
+
40
+ return "\n\n".join(f"- {point}" for point in points)
41
+
42
+
43
+ def markdown_link(text: str, url: str | Url) -> str:
44
+ """
45
+ Create a Markdown link.
46
+ """
47
+ text = text.replace("[", "\\[").replace("]", "\\]")
48
+ return f"[{text}]({url})"
49
+
50
+
51
+ def is_markdown_header(markdown: str) -> bool:
52
+ """
53
+ Is the start of this content a Markdown header?
54
+ """
55
+ return regex.match(r"^#+ ", markdown) is not None
56
+
57
+
58
+ def _tree_links(element, include_internal=False):
59
+ links = []
60
+
61
+ def _find_links(element):
62
+ match element:
63
+ case Link():
64
+ if include_internal or not element.dest.startswith("#"):
65
+ links.append(element.dest)
66
+ case _:
67
+ if hasattr(element, "children"):
68
+ for child in element.children:
69
+ _find_links(child)
70
+
71
+ _find_links(element)
72
+ return links
73
+
74
+
75
+ def extract_links(file_path: str, include_internal=False) -> list[str]:
76
+ """
77
+ Extract all links from a Markdown file. Future: Include textual and section context.
78
+ """
79
+
80
+ with open(file_path) as file:
81
+ content = file.read()
82
+ document = marko.parse(content)
83
+ return _tree_links(document, include_internal)
84
+
85
+
86
+ def _extract_text(element: Any) -> str:
87
+ if isinstance(element, str):
88
+ return element
89
+ elif hasattr(element, "children"):
90
+ return "".join(_extract_text(child) for child in element.children)
91
+ else:
92
+ return ""
93
+
94
+
95
+ def _tree_bullet_points(element: marko.block.Document) -> list[str]:
96
+ bullet_points: list[str] = []
97
+
98
+ def _find_bullet_points(element):
99
+ if isinstance(element, ListItem):
100
+ bullet_points.append(_extract_text(element).strip())
101
+ elif hasattr(element, "children"):
102
+ for child in element.children:
103
+ _find_bullet_points(child)
104
+
105
+ _find_bullet_points(element)
106
+ return bullet_points
107
+
108
+
109
+ def extract_bullet_points(content: str) -> list[str]:
110
+ """
111
+ Extract list item values from a Markdown file.
112
+ """
113
+
114
+ document = marko.parse(content)
115
+ return _tree_bullet_points(document)
116
+
117
+
118
+ def _type_from_heading(heading: Heading) -> str:
119
+ if heading.level in [1, 2, 3, 4, 5, 6]:
120
+ return f"h{heading.level}"
121
+ else:
122
+ raise ValueError(f"Unsupported heading: {heading}: level {heading.level}")
123
+
124
+
125
+ def _last_unescaped_bracket(text: str, index: int) -> str | None:
126
+ escaped = False
127
+ for i in range(index - 1, -1, -1):
128
+ ch = text[i]
129
+ if ch == "\\":
130
+ escaped = not escaped # Toggle escaping chain
131
+ continue
132
+ if ch in "[]":
133
+ if not escaped:
134
+ return ch
135
+ # Reset escape status after any non‑backslash char
136
+ escaped = False
137
+ return None
138
+
139
+
140
+ def find_markdown_text(
141
+ pattern: re.Pattern[str], text: str, *, start_pos: int = 0
142
+ ) -> re.Match[str] | None:
143
+ """
144
+ Return first regex `pattern` match in `text` not inside an existing link.
145
+
146
+ A match is considered inside a link when the most recent unescaped square
147
+ bracket preceding the match start is an opening bracket "[".
148
+ """
149
+
150
+ pos = start_pos
151
+ while True:
152
+ match = pattern.search(text, pos)
153
+ if match is None:
154
+ return None
155
+
156
+ last_bracket = _last_unescaped_bracket(text, match.start())
157
+ if last_bracket != "[":
158
+ return match
159
+
160
+ # Skip this match and continue searching
161
+ pos = match.end()
162
+
163
+
164
+ ## Tests
165
+
166
+
167
+ def test_escape_markdown() -> None:
168
+ assert escape_markdown("") == ""
169
+ assert escape_markdown("Hello world") == "Hello world"
170
+ assert escape_markdown("`code`") == "\\`code\\`"
171
+ assert escape_markdown("*italic*") == "\\*italic\\*"
172
+ assert escape_markdown("_bold_") == "\\_bold\\_"
173
+ assert escape_markdown("{braces}") == "\\{braces\\}"
174
+ assert escape_markdown("# header") == "\\# header"
175
+ assert escape_markdown("1. item") == "1\\. item"
176
+ assert escape_markdown("line+break") == "line\\+break"
177
+ assert escape_markdown("dash-") == "dash\\-"
178
+ assert escape_markdown("!bang") == "\\!bang"
179
+ assert escape_markdown("backslash\\") == "backslash\\\\"
180
+ assert escape_markdown("Multiple *special* chars [here](#anchor).") == (
181
+ "Multiple \\*special\\* chars \\[here\\]\\(\\#anchor\\)\\."
182
+ )
183
+
184
+
185
+ def test_find_markdown_text() -> None: # pragma: no cover
186
+ # Match is returned when the term is not inside a link.
187
+ text = "Foo bar baz"
188
+ pattern = re.compile("Foo Bar", re.IGNORECASE)
189
+ match = find_markdown_text(pattern, text)
190
+ assert match is not None and match.group(0) == "Foo bar"
191
+
192
+ # Skips occurrence inside link and returns the first one outside.
193
+ text = "[Foo](http://example.com) something Foo"
194
+ pattern = re.compile("Foo", re.IGNORECASE)
195
+ match = find_markdown_text(pattern, text)
196
+ assert match is not None
197
+ assert match.start() > text.index(") ")
198
+ assert text[match.start() : match.end()] == "Foo"
199
+
200
+ # Returns None when the only occurrences are inside links.
201
+ text = "prefix [bar](http://example.com) suffix"
202
+ pattern = re.compile("bar", re.IGNORECASE)
203
+ match = find_markdown_text(pattern, text)
204
+ assert match is None
@@ -15,21 +15,30 @@ Tallies: TypeAlias = dict[str, int]
15
15
  def import_subdirs(
16
16
  parent_package_name: str,
17
17
  parent_dir: Path,
18
- subdir_names: list[str],
18
+ subdir_names: list[str] | None = None,
19
19
  tallies: Tallies | None = None,
20
20
  ):
21
21
  """
22
22
  Import all files in the given subdirectories of a single parent directory.
23
+ Wraps `pkgutil.iter_modules` to iterate over all modules in the subdirectories.
24
+ If `subdir_names` is `None`, will import all subdirectories.
23
25
  """
24
26
  if tallies is None:
25
27
  tallies = {}
28
+ if not subdir_names:
29
+ subdir_names = ["."]
26
30
 
27
31
  for subdir_name in subdir_names:
28
- full_path = parent_dir / subdir_name
32
+ if subdir_name == ".":
33
+ full_path = parent_dir
34
+ package_name = parent_package_name
35
+ else:
36
+ full_path = parent_dir / subdir_name
37
+ package_name = f"{parent_package_name}.{subdir_name}"
38
+
29
39
  if not full_path.is_dir():
30
40
  raise FileNotFoundError(f"Subdirectory not found: {full_path}")
31
41
 
32
- package_name = f"{parent_package_name}.{subdir_name}"
33
42
  for _module_finder, module_name, _is_pkg in pkgutil.iter_modules(path=[str(full_path)]):
34
43
  importlib.import_module(f"{package_name}.{module_name}") # Propagate import errors
35
44
  tallies[package_name] = tallies.get(package_name, 0) + 1
@@ -15,35 +15,6 @@ def not_none(value: T | None, message: str | None = None) -> T:
15
15
  return value
16
16
 
17
17
 
18
- def is_truthy(value: Any, strict: bool = True) -> bool:
19
- """
20
- True for all common string and non-string values for true. Useful for parsing
21
- string values or command line arguments.
22
- """
23
- truthy_values = {"true", "1", "yes", "on", "y"}
24
- falsy_values = {"false", "0", "no", "off", "n", ""}
25
-
26
- if value is None:
27
- return False
28
- elif isinstance(value, str):
29
- value = value.strip().lower()
30
- if value in truthy_values:
31
- return True
32
- elif value in falsy_values:
33
- return False
34
- elif isinstance(value, (int, float)):
35
- return value != 0
36
- elif isinstance(value, bool):
37
- return value
38
- elif isinstance(value, (list, tuple, set, dict)):
39
- return len(value) > 0
40
-
41
- if strict:
42
- raise ValueError(f"Could not convert type {type(value)} to boolean: {repr(value)}")
43
-
44
- return bool(value)
45
-
46
-
47
18
  def as_dataclass(dict_data: dict[str, Any], dataclass_type: type[T]) -> T:
48
19
  """
49
20
  Convert a dict recursively to dataclass object, raising an error if the data does
kash/utils/common/url.py CHANGED
@@ -67,15 +67,39 @@ def is_file_url(url: str | Url) -> bool:
67
67
  return url.startswith("file://")
68
68
 
69
69
 
70
- def parse_file_url(url: str | Url) -> Path | None:
70
+ def parse_http_url(url: str | Url) -> ParseResult:
71
71
  """
72
- Parse a file URL and return the path, or None if not a file URL.
72
+ Parse an http/https URL and return the parsed result, raising ValueError if
73
+ not an http/https URL.
74
+ """
75
+ parsed_url = urlparse(url)
76
+ if parsed_url.scheme in ("http", "https"):
77
+ return parsed_url
78
+ else:
79
+ raise ValueError(f"Not an http/https URL: {url}")
80
+
81
+
82
+ def parse_file_url(url: str | Url) -> Path:
83
+ """
84
+ Parse a file URL and return the path, raising ValueError if not a file URL.
73
85
  """
74
86
  parsed_url = urlparse(url)
75
87
  if parsed_url.scheme == "file":
76
88
  return Path(parsed_url.path)
77
89
  else:
78
- return None
90
+ raise ValueError(f"Not a file URL: {url}")
91
+
92
+
93
+ def parse_s3_url(url: str | Url) -> tuple[str, str]:
94
+ """
95
+ Parse an S3 URL and return the bucket and key, raising ValueError if not an
96
+ S3 URL.
97
+ """
98
+ parsed_url = urlparse(url)
99
+ if parsed_url.scheme == "s3":
100
+ return parsed_url.netloc, parsed_url.path.lstrip("/")
101
+ else:
102
+ raise ValueError(f"Not an S3 URL: {url}")
79
103
 
80
104
 
81
105
  def as_file_url(path: str | Path) -> Url:
kash/utils/errors.py CHANGED
@@ -139,6 +139,12 @@ class FileFormatError(ContentError):
139
139
  pass
140
140
 
141
141
 
142
+ class ApiError(KashRuntimeError):
143
+ """Raised when an API call returns something unexpected."""
144
+
145
+ pass
146
+
147
+
142
148
  def _nonfatal_exceptions() -> tuple[type[Exception], ...]:
143
149
  exceptions: list[type[Exception]] = [SelfExplanatoryError, FileNotFoundError, IOError]
144
150
  try:
@@ -4,9 +4,9 @@ from pathlib import Path
4
4
  from typing import NewType
5
5
 
6
6
  import regex
7
+ from clideps.pkgs.pkg_check import pkg_check
7
8
 
8
9
  from kash.config.logger import get_logger
9
- from kash.shell.clideps.pkg_deps import Pkg, pkg_check
10
10
 
11
11
  log = get_logger(__name__)
12
12
 
@@ -86,7 +86,7 @@ def detect_mime_type(filename: str | Path) -> MimeType | None:
86
86
  Get the mime type of a file using libmagic heuristics plus more careful
87
87
  detection of HTML, Markdown, and multipart YAML.
88
88
  """
89
- pkg_check().require(Pkg.libmagic)
89
+ pkg_check().require("libmagic")
90
90
  import magic
91
91
 
92
92
  mime = magic.Magic(mime=True)
@@ -36,6 +36,8 @@ class Format(Enum):
36
36
  it is the format of the resource (url, media, etc.).
37
37
  """
38
38
 
39
+ # TODO: Be more thorough, pulling in relevant extensions and types from the `mimetypes` module.
40
+
39
41
  # Formats with no body (content is in frontmatter).
40
42
  url = "url"
41
43
 
@@ -146,6 +148,7 @@ class Format(Enum):
146
148
  self.markdown,
147
149
  self.md_html,
148
150
  self.html,
151
+ self.json, # Not strictly true but we encourage use of comments.
149
152
  self.yaml,
150
153
  self.diff,
151
154
  self.python,
@@ -87,8 +87,7 @@ class DirStore:
87
87
  self, keys: list[str | Path], folder: str | None = None, suffix: str | None = None
88
88
  ) -> dict[str | Path, Path | None]:
89
89
  """
90
- Look up all existing cached results for the set of keys. This should work fine but could
91
- be optimized for large batches.
90
+ Look up all existing cached results for the set of keys.
92
91
  """
93
92
  return {key: self.find(key, folder=folder, suffix=suffix) for key in keys}
94
93
 
@@ -1,4 +1,7 @@
1
+ import json
2
+ from collections.abc import Callable
1
3
  from pathlib import Path
4
+ from typing import Any
2
5
 
3
6
  from prettyfmt import fmt_lines, fmt_path
4
7
 
@@ -35,18 +38,40 @@ def reset_content_cache_dir(path: Path):
35
38
  log.info("Using web cache: %s", fmt_path(path))
36
39
 
37
40
 
38
- def cache_file(source: Url | Path | Loadable, global_cache: bool = False) -> tuple[Path, bool]:
41
+ def cache_file(
42
+ source: Url | Path | Loadable, global_cache: bool = False, expiration_sec: float | None = None
43
+ ) -> tuple[Path, bool]:
39
44
  """
40
45
  Return a local cached copy of the item. If it is an URL, content is fetched.
41
- Raises requests.HTTPError if the URL is not reachable. If it is a Path or
42
- a Loadable, a cached copy is returned.
46
+ If it is a Path or a Loadable, a cached copy is returned.
47
+ LocalFileCache uses httpx so httpx.HTTPError is raised for non-2xx responses.
48
+
49
+ Uses the current content cache unless there is no current cache or `global_cache` is True,
50
+ in which case the global cache is used.
43
51
  """
44
52
  cache = _global_content_cache if global_cache else _content_cache
45
- path, was_cached = cache.cache(source)
53
+ path, was_cached = cache.cache(source, expiration_sec)
46
54
  return path, was_cached
47
55
 
48
56
 
49
- def cache_resource(item: Item) -> dict[MediaType, Path]:
57
+ def cache_api_response(
58
+ url: Url,
59
+ global_cache: bool = False,
60
+ expiration_sec: float | None = None,
61
+ parser: Callable[[str], Any] = json.loads,
62
+ ) -> tuple[Any, bool]:
63
+ """
64
+ Cache an API response. By default parse the response as JSON.
65
+ """
66
+ cache = _global_content_cache if global_cache else _content_cache
67
+ path, was_cached = cache.cache(url, expiration_sec)
68
+ result = parser(path.read_text())
69
+ return result, was_cached
70
+
71
+
72
+ def cache_resource(
73
+ item: Item, global_cache: bool = False, expiration_sec: float | None = None
74
+ ) -> dict[MediaType, Path]:
50
75
  """
51
76
  Cache a resource item for an external local path or a URL, fetching or
52
77
  copying as needed. For media this may yield more than one format.
@@ -64,17 +89,17 @@ def cache_resource(item: Item) -> dict[MediaType, Path]:
64
89
  if is_media_url(item.url):
65
90
  result = cache_media(item.url)
66
91
  else:
67
- path, _was_cached = cache_file(item.url)
92
+ path, _was_cached = cache_file(item.url, global_cache, expiration_sec)
68
93
  elif item.external_path:
69
94
  path = Path(item.external_path)
70
95
  if not path.is_file():
71
96
  raise FileNotFound(f"External path not found: {path}")
72
- path, _was_cached = cache_file(path)
97
+ path, _was_cached = cache_file(path, global_cache, expiration_sec)
73
98
  elif item.original_filename:
74
99
  path = Path(item.original_filename)
75
100
  if not path.is_file():
76
101
  raise FileNotFound(f"Original filename not found: {path}")
77
- path, _was_cached = cache_file(path)
102
+ path, _was_cached = cache_file(path, global_cache, expiration_sec)
78
103
  else:
79
104
  raise ValueError(f"Item has no URL or external path: {item}")
80
105
 
@@ -94,7 +119,9 @@ def cache_resource(item: Item) -> dict[MediaType, Path]:
94
119
  return result
95
120
 
96
121
 
97
- def get_url_html(item: Item) -> tuple[Url, str]:
122
+ def get_url_html(
123
+ item: Item, global_cache: bool = False, expiration_sec: float | None = None
124
+ ) -> tuple[Url, str]:
98
125
  """
99
126
  Returns the HTML content of an URL item, using the content cache,
100
127
  or the body of the item if it has a URL and HTML body.
@@ -106,7 +133,7 @@ def get_url_html(item: Item) -> tuple[Url, str]:
106
133
  url = Url(canonicalize_url(item.url))
107
134
 
108
135
  if is_url_item(item):
109
- path, _was_cached = cache_file(url)
136
+ path, _was_cached = cache_file(url, global_cache, expiration_sec)
110
137
  with open(path) as file:
111
138
  html_content = file.read()
112
139
  else:
@@ -0,0 +1,68 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Mapping
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import TypeAlias
7
+
8
+ from kash.web_content.local_file_cache import read_mtime
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class OutputType:
13
+ """
14
+ A type of output file, represented by the filename suffix, e.g. '.mp3', '.txt', etc.
15
+ """
16
+
17
+ suffix: str
18
+
19
+ def output_path(self, src: Path) -> Path:
20
+ """
21
+ Resolve the output path. Will be next to the source file, e.g.
22
+ some-dir/video.mp4 -> some-dir/video.mp3
23
+ """
24
+ return src.with_suffix(self.suffix)
25
+
26
+
27
+ Processor: TypeAlias = Callable[[Path, Mapping[OutputType, Path]], None]
28
+ """
29
+ A function that takes a source file and a mapping with one or more output paths.
30
+ """
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class FileProcess:
35
+ """
36
+ Process a file and produce one or more outputs.
37
+ """
38
+
39
+ processor: Processor
40
+ outputs: list[OutputType]
41
+
42
+ def is_outdated(self, src: Path) -> bool:
43
+ """
44
+ True when any output is missing or older (earliest mtime) than `src`.
45
+ """
46
+ dests = {o.output_path(src) for o in self.outputs}
47
+ if any(not p.exists() for p in dests):
48
+ return True
49
+ earliest = min(read_mtime(p) for p in dests)
50
+ return read_mtime(src) > earliest
51
+
52
+ def run(self, src: Path) -> dict[OutputType, Path]:
53
+ """
54
+ Run unconditionally and return a mapping of outputs to paths.
55
+ """
56
+ dests = {o: o.output_path(src) for o in self.outputs}
57
+ self.processor(src, dests)
58
+ return dests
59
+
60
+ def run_if_needed(self, src: Path) -> dict[OutputType, Path]:
61
+ """
62
+ Run only if any output is missing or outdated.
63
+ """
64
+ return (
65
+ self.run(src)
66
+ if self.is_outdated(src)
67
+ else {o: o.output_path(src) for o in self.outputs}
68
+ )