kash-shell 0.3.9__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/__init__.py +4 -4
- kash/actions/core/markdownify.py +5 -2
- kash/actions/core/readability.py +5 -2
- kash/actions/core/render_as_html.py +18 -0
- kash/actions/core/webpage_config.py +12 -4
- kash/commands/__init__.py +8 -20
- kash/commands/base/basic_file_commands.py +15 -0
- kash/commands/base/debug_commands.py +13 -0
- kash/commands/base/general_commands.py +21 -16
- kash/commands/base/logs_commands.py +4 -2
- kash/commands/base/model_commands.py +8 -8
- kash/commands/base/search_command.py +3 -2
- kash/commands/base/show_command.py +5 -3
- kash/commands/extras/parse_uv_lock.py +186 -0
- kash/commands/help/doc_commands.py +2 -31
- kash/commands/help/welcome.py +33 -0
- kash/commands/workspace/selection_commands.py +11 -6
- kash/commands/workspace/workspace_commands.py +18 -15
- kash/config/colors.py +2 -0
- kash/config/env_settings.py +14 -1
- kash/config/init.py +2 -2
- kash/config/logger.py +59 -56
- kash/config/logger_basic.py +3 -3
- kash/config/settings.py +116 -57
- kash/config/setup.py +28 -12
- kash/config/text_styles.py +3 -13
- kash/docs/load_api_docs.py +2 -1
- kash/docs/markdown/topics/a3_getting_started.md +3 -2
- kash/{concepts → embeddings}/text_similarity.py +2 -2
- kash/exec/__init__.py +20 -3
- kash/exec/action_decorators.py +18 -4
- kash/exec/action_exec.py +41 -23
- kash/exec/action_registry.py +13 -48
- kash/exec/command_registry.py +2 -1
- kash/exec/fetch_url_metadata.py +4 -6
- kash/exec/importing.py +56 -0
- kash/exec/llm_transforms.py +6 -7
- kash/exec/precondition_registry.py +2 -1
- kash/exec/preconditions.py +16 -1
- kash/exec/shell_callable_action.py +33 -19
- kash/file_storage/file_store.py +23 -10
- kash/file_storage/item_file_format.py +5 -2
- kash/file_storage/metadata_dirs.py +11 -2
- kash/help/assistant.py +1 -1
- kash/help/assistant_instructions.py +2 -1
- kash/help/help_embeddings.py +2 -2
- kash/help/help_printing.py +7 -11
- kash/llm_utils/clean_headings.py +1 -1
- kash/llm_utils/llm_api_keys.py +4 -4
- kash/llm_utils/llm_features.py +68 -0
- kash/llm_utils/llm_messages.py +1 -2
- kash/llm_utils/llm_names.py +1 -1
- kash/llm_utils/llms.py +8 -3
- kash/local_server/__init__.py +5 -2
- kash/local_server/local_server.py +8 -5
- kash/local_server/local_server_commands.py +2 -2
- kash/local_server/local_url_formatters.py +1 -1
- kash/mcp/__init__.py +5 -2
- kash/mcp/mcp_cli.py +5 -5
- kash/mcp/mcp_server_commands.py +5 -5
- kash/mcp/mcp_server_routes.py +5 -5
- kash/mcp/mcp_server_sse.py +4 -2
- kash/media_base/media_cache.py +8 -8
- kash/media_base/media_services.py +1 -1
- kash/media_base/media_tools.py +6 -6
- kash/media_base/services/local_file_media.py +2 -2
- kash/media_base/{speech_transcription.py → transcription_deepgram.py} +25 -110
- kash/media_base/transcription_format.py +73 -0
- kash/media_base/transcription_whisper.py +38 -0
- kash/model/__init__.py +73 -5
- kash/model/actions_model.py +38 -4
- kash/model/concept_model.py +30 -0
- kash/model/items_model.py +44 -7
- kash/model/params_model.py +24 -0
- kash/shell/completions/completion_scoring.py +37 -5
- kash/shell/output/kerm_codes.py +1 -2
- kash/shell/output/shell_formatting.py +14 -4
- kash/shell/shell_main.py +2 -2
- kash/shell/utils/exception_printing.py +6 -0
- kash/shell/utils/native_utils.py +26 -20
- kash/text_handling/custom_sliding_transforms.py +12 -4
- kash/text_handling/doc_normalization.py +6 -2
- kash/text_handling/markdown_render.py +117 -0
- kash/text_handling/markdown_utils.py +204 -0
- kash/utils/common/import_utils.py +12 -3
- kash/utils/common/type_utils.py +0 -29
- kash/utils/common/url.py +27 -3
- kash/utils/errors.py +6 -0
- kash/utils/file_utils/file_formats.py +2 -2
- kash/utils/file_utils/file_formats_model.py +3 -0
- kash/web_content/dir_store.py +1 -2
- kash/web_content/file_cache_utils.py +37 -10
- kash/web_content/file_processing.py +68 -0
- kash/web_content/local_file_cache.py +12 -9
- kash/web_content/web_extract.py +8 -3
- kash/web_content/web_fetch.py +12 -4
- kash/web_gen/tabbed_webpage.py +5 -2
- kash/web_gen/templates/base_styles.css.jinja +120 -14
- kash/web_gen/templates/base_webpage.html.jinja +60 -13
- kash/web_gen/templates/content_styles.css.jinja +4 -2
- kash/web_gen/templates/item_view.html.jinja +2 -2
- kash/web_gen/templates/tabbed_webpage.html.jinja +1 -2
- kash/workspaces/__init__.py +15 -2
- kash/workspaces/selections.py +18 -3
- kash/workspaces/source_items.py +0 -1
- kash/workspaces/workspaces.py +5 -11
- kash/xonsh_custom/command_nl_utils.py +40 -19
- kash/xonsh_custom/custom_shell.py +43 -11
- kash/xonsh_custom/customize_prompt.py +39 -21
- kash/xonsh_custom/load_into_xonsh.py +22 -25
- kash/xonsh_custom/shell_load_commands.py +2 -2
- kash/xonsh_custom/xonsh_completers.py +2 -249
- kash/xonsh_custom/xonsh_keybindings.py +282 -0
- kash/xonsh_custom/xonsh_modern_tools.py +3 -3
- kash/xontrib/kash_extension.py +5 -6
- {kash_shell-0.3.9.dist-info → kash_shell-0.3.10.dist-info}/METADATA +8 -6
- {kash_shell-0.3.9.dist-info → kash_shell-0.3.10.dist-info}/RECORD +122 -123
- kash/concepts/concept_formats.py +0 -23
- kash/shell/clideps/api_keys.py +0 -100
- kash/shell/clideps/dotenv_setup.py +0 -115
- kash/shell/clideps/dotenv_utils.py +0 -98
- kash/shell/clideps/pkg_deps.py +0 -257
- kash/shell/clideps/platforms.py +0 -11
- kash/shell/clideps/terminal_features.py +0 -56
- kash/shell/utils/osc_utils.py +0 -95
- kash/shell/utils/terminal_images.py +0 -133
- kash/text_handling/markdown_util.py +0 -167
- kash/utils/common/atomic_var.py +0 -171
- kash/utils/common/string_replace.py +0 -93
- kash/utils/common/string_template.py +0 -101
- /kash/{concepts → embeddings}/cosine.py +0 -0
- /kash/{concepts → embeddings}/embeddings.py +0 -0
- {kash_shell-0.3.9.dist-info → kash_shell-0.3.10.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.9.dist-info → kash_shell-0.3.10.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.9.dist-info → kash_shell-0.3.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from textwrap import dedent
|
|
2
|
+
|
|
3
|
+
import marko
|
|
4
|
+
import regex
|
|
5
|
+
from marko.block import HTMLBlock
|
|
6
|
+
from marko.ext.gfm import GFM
|
|
7
|
+
from marko.helpers import MarkoExtension
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# When we use divs in Markdown we usually want them to be standalone paragraphs,
|
|
11
|
+
# so it doesn't break other wrapping with flowmark etc. This handles that.
|
|
12
|
+
class CustomHTMLBlockMixin:
|
|
13
|
+
div_pattern = regex.compile(r"^\s*<div\b", regex.IGNORECASE)
|
|
14
|
+
|
|
15
|
+
def render_html_block(self, element: HTMLBlock) -> str:
|
|
16
|
+
# Apply GFM filtering first via the next renderer in the MRO.
|
|
17
|
+
filtered_body = super().render_html_block(element) # pyright: ignore
|
|
18
|
+
|
|
19
|
+
# Check if the original block was a div.
|
|
20
|
+
if self.div_pattern.match(element.body.strip()):
|
|
21
|
+
# If it was a div, wrap the *filtered* result in newlines.
|
|
22
|
+
return f"\n{filtered_body.strip()}\n"
|
|
23
|
+
else:
|
|
24
|
+
# Otherwise, return the GFM-filtered body directly.
|
|
25
|
+
return filtered_body
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# GFM first, adding our custom override as an extension to handle divs our way.
|
|
29
|
+
# Extensions later in this list are earlier in MRO.
|
|
30
|
+
MARKO_GFM = marko.Markdown(
|
|
31
|
+
extensions=["footnote", GFM, MarkoExtension(renderer_mixins=[CustomHTMLBlockMixin])]
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
FOOTNOTE_UP_ARROW = " ↑ "
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def html_postprocess(html: str) -> str:
|
|
39
|
+
"""
|
|
40
|
+
Final tweaks to the HTML.
|
|
41
|
+
"""
|
|
42
|
+
html = html.replace(
|
|
43
|
+
"""class="footnote">↩</a>""", f"""class="footnote">{FOOTNOTE_UP_ARROW}</a>"""
|
|
44
|
+
)
|
|
45
|
+
return html
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def markdown_to_html(markdown: str, converter: marko.Markdown = MARKO_GFM) -> str:
|
|
49
|
+
"""
|
|
50
|
+
Convert Markdown to HTML.
|
|
51
|
+
|
|
52
|
+
Wraps div blocks with newlines for better Markdown compatibility.
|
|
53
|
+
|
|
54
|
+
Output passes through raw HTML! Note per GFM, unsafe script tags etc
|
|
55
|
+
are [allowed in some cases](https://github.github.com/gfm/#example-140) so
|
|
56
|
+
additional sanitization is needed if input isn't trusted.
|
|
57
|
+
"""
|
|
58
|
+
html = converter.convert(markdown)
|
|
59
|
+
return html_postprocess(html)
|
|
60
|
+
return html
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
## Tests
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_markdown_to_html():
|
|
67
|
+
markdown = dedent(
|
|
68
|
+
"""
|
|
69
|
+
# Heading
|
|
70
|
+
|
|
71
|
+
This is a paragraph and a [link](https://example.com).
|
|
72
|
+
|
|
73
|
+
- Item 1
|
|
74
|
+
- Item 2
|
|
75
|
+
|
|
76
|
+
## Subheading
|
|
77
|
+
|
|
78
|
+
This is a paragraph with a <span>span</span> tag.
|
|
79
|
+
This is a paragraph with a <div>div</div> tag.
|
|
80
|
+
This is a paragraph with an <a href='https://example.com'>example link</a>.
|
|
81
|
+
|
|
82
|
+
<div class="div1">This is a div.</div>
|
|
83
|
+
|
|
84
|
+
<div class="div2">This is a second div.
|
|
85
|
+
<iframe src="https://example.com">Inline iframe, note this is sanitized</iframe>
|
|
86
|
+
</div>
|
|
87
|
+
|
|
88
|
+
<!-- Script tag in a block, note this isn't sanitized -->
|
|
89
|
+
<script>console.log("Javascript block!");</script>
|
|
90
|
+
"""
|
|
91
|
+
)
|
|
92
|
+
print(markdown_to_html(markdown))
|
|
93
|
+
|
|
94
|
+
expected_html = dedent(
|
|
95
|
+
"""
|
|
96
|
+
<h1>Heading</h1>
|
|
97
|
+
<p>This is a paragraph and a <a href="https://example.com">link</a>.</p>
|
|
98
|
+
<ul>
|
|
99
|
+
<li>Item 1</li>
|
|
100
|
+
<li>Item 2</li>
|
|
101
|
+
</ul>
|
|
102
|
+
<h2>Subheading</h2>
|
|
103
|
+
<p>This is a paragraph with a <span>span</span> tag.
|
|
104
|
+
This is a paragraph with a <div>div</div> tag.
|
|
105
|
+
This is a paragraph with an <a href='https://example.com'>example link</a>.</p>
|
|
106
|
+
|
|
107
|
+
<div class="div1">This is a div.</div>
|
|
108
|
+
|
|
109
|
+
<div class="div2">This is a second div.
|
|
110
|
+
<iframe src="https://example.com">Inline iframe, note this is sanitized</iframe>
|
|
111
|
+
</div>
|
|
112
|
+
<!-- Script tag in a block, note this isn't sanitized -->
|
|
113
|
+
<script>console.log("Javascript block!");</script>
|
|
114
|
+
"""
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
assert markdown_to_html(markdown).strip() == expected_html.strip()
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import marko
|
|
5
|
+
import regex
|
|
6
|
+
from marko.block import Heading, ListItem
|
|
7
|
+
from marko.inline import Link
|
|
8
|
+
|
|
9
|
+
from kash.config.logger import get_logger
|
|
10
|
+
from kash.utils.common.url import Url
|
|
11
|
+
|
|
12
|
+
log = get_logger(__name__)
|
|
13
|
+
|
|
14
|
+
# Characters that commonly need escaping in Markdown inline text.
|
|
15
|
+
MARKDOWN_ESCAPE_CHARS = r"([\\`*_{}\[\]()#+.!-])"
|
|
16
|
+
MARKDOWN_ESCAPE_RE = re.compile(MARKDOWN_ESCAPE_CHARS)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def escape_markdown(text: str) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Escape characters with special meaning in Markdown.
|
|
22
|
+
"""
|
|
23
|
+
return MARKDOWN_ESCAPE_RE.sub(r"\\\1", text)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def as_bullet_points(values: list[Any]) -> str:
|
|
27
|
+
"""
|
|
28
|
+
Convert a list of values to a Markdown bullet-point list. If a value is a string,
|
|
29
|
+
it is treated like Markdown. If it's something else it's converted to a string
|
|
30
|
+
and also escaped for Markdown.
|
|
31
|
+
"""
|
|
32
|
+
points: list[str] = []
|
|
33
|
+
for value in values:
|
|
34
|
+
value = value.replace("\n", " ").strip()
|
|
35
|
+
if isinstance(value, str):
|
|
36
|
+
points.append(value)
|
|
37
|
+
else:
|
|
38
|
+
points.append(escape_markdown(str(value)))
|
|
39
|
+
|
|
40
|
+
return "\n\n".join(f"- {point}" for point in points)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def markdown_link(text: str, url: str | Url) -> str:
|
|
44
|
+
"""
|
|
45
|
+
Create a Markdown link.
|
|
46
|
+
"""
|
|
47
|
+
text = text.replace("[", "\\[").replace("]", "\\]")
|
|
48
|
+
return f"[{text}]({url})"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def is_markdown_header(markdown: str) -> bool:
|
|
52
|
+
"""
|
|
53
|
+
Is the start of this content a Markdown header?
|
|
54
|
+
"""
|
|
55
|
+
return regex.match(r"^#+ ", markdown) is not None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _tree_links(element, include_internal=False):
|
|
59
|
+
links = []
|
|
60
|
+
|
|
61
|
+
def _find_links(element):
|
|
62
|
+
match element:
|
|
63
|
+
case Link():
|
|
64
|
+
if include_internal or not element.dest.startswith("#"):
|
|
65
|
+
links.append(element.dest)
|
|
66
|
+
case _:
|
|
67
|
+
if hasattr(element, "children"):
|
|
68
|
+
for child in element.children:
|
|
69
|
+
_find_links(child)
|
|
70
|
+
|
|
71
|
+
_find_links(element)
|
|
72
|
+
return links
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def extract_links(file_path: str, include_internal=False) -> list[str]:
|
|
76
|
+
"""
|
|
77
|
+
Extract all links from a Markdown file. Future: Include textual and section context.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
with open(file_path) as file:
|
|
81
|
+
content = file.read()
|
|
82
|
+
document = marko.parse(content)
|
|
83
|
+
return _tree_links(document, include_internal)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _extract_text(element: Any) -> str:
|
|
87
|
+
if isinstance(element, str):
|
|
88
|
+
return element
|
|
89
|
+
elif hasattr(element, "children"):
|
|
90
|
+
return "".join(_extract_text(child) for child in element.children)
|
|
91
|
+
else:
|
|
92
|
+
return ""
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _tree_bullet_points(element: marko.block.Document) -> list[str]:
|
|
96
|
+
bullet_points: list[str] = []
|
|
97
|
+
|
|
98
|
+
def _find_bullet_points(element):
|
|
99
|
+
if isinstance(element, ListItem):
|
|
100
|
+
bullet_points.append(_extract_text(element).strip())
|
|
101
|
+
elif hasattr(element, "children"):
|
|
102
|
+
for child in element.children:
|
|
103
|
+
_find_bullet_points(child)
|
|
104
|
+
|
|
105
|
+
_find_bullet_points(element)
|
|
106
|
+
return bullet_points
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def extract_bullet_points(content: str) -> list[str]:
|
|
110
|
+
"""
|
|
111
|
+
Extract list item values from a Markdown file.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
document = marko.parse(content)
|
|
115
|
+
return _tree_bullet_points(document)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _type_from_heading(heading: Heading) -> str:
|
|
119
|
+
if heading.level in [1, 2, 3, 4, 5, 6]:
|
|
120
|
+
return f"h{heading.level}"
|
|
121
|
+
else:
|
|
122
|
+
raise ValueError(f"Unsupported heading: {heading}: level {heading.level}")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _last_unescaped_bracket(text: str, index: int) -> str | None:
|
|
126
|
+
escaped = False
|
|
127
|
+
for i in range(index - 1, -1, -1):
|
|
128
|
+
ch = text[i]
|
|
129
|
+
if ch == "\\":
|
|
130
|
+
escaped = not escaped # Toggle escaping chain
|
|
131
|
+
continue
|
|
132
|
+
if ch in "[]":
|
|
133
|
+
if not escaped:
|
|
134
|
+
return ch
|
|
135
|
+
# Reset escape status after any non‑backslash char
|
|
136
|
+
escaped = False
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def find_markdown_text(
|
|
141
|
+
pattern: re.Pattern[str], text: str, *, start_pos: int = 0
|
|
142
|
+
) -> re.Match[str] | None:
|
|
143
|
+
"""
|
|
144
|
+
Return first regex `pattern` match in `text` not inside an existing link.
|
|
145
|
+
|
|
146
|
+
A match is considered inside a link when the most recent unescaped square
|
|
147
|
+
bracket preceding the match start is an opening bracket "[".
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
pos = start_pos
|
|
151
|
+
while True:
|
|
152
|
+
match = pattern.search(text, pos)
|
|
153
|
+
if match is None:
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
last_bracket = _last_unescaped_bracket(text, match.start())
|
|
157
|
+
if last_bracket != "[":
|
|
158
|
+
return match
|
|
159
|
+
|
|
160
|
+
# Skip this match and continue searching
|
|
161
|
+
pos = match.end()
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
## Tests
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def test_escape_markdown() -> None:
|
|
168
|
+
assert escape_markdown("") == ""
|
|
169
|
+
assert escape_markdown("Hello world") == "Hello world"
|
|
170
|
+
assert escape_markdown("`code`") == "\\`code\\`"
|
|
171
|
+
assert escape_markdown("*italic*") == "\\*italic\\*"
|
|
172
|
+
assert escape_markdown("_bold_") == "\\_bold\\_"
|
|
173
|
+
assert escape_markdown("{braces}") == "\\{braces\\}"
|
|
174
|
+
assert escape_markdown("# header") == "\\# header"
|
|
175
|
+
assert escape_markdown("1. item") == "1\\. item"
|
|
176
|
+
assert escape_markdown("line+break") == "line\\+break"
|
|
177
|
+
assert escape_markdown("dash-") == "dash\\-"
|
|
178
|
+
assert escape_markdown("!bang") == "\\!bang"
|
|
179
|
+
assert escape_markdown("backslash\\") == "backslash\\\\"
|
|
180
|
+
assert escape_markdown("Multiple *special* chars [here](#anchor).") == (
|
|
181
|
+
"Multiple \\*special\\* chars \\[here\\]\\(\\#anchor\\)\\."
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def test_find_markdown_text() -> None: # pragma: no cover
|
|
186
|
+
# Match is returned when the term is not inside a link.
|
|
187
|
+
text = "Foo bar baz"
|
|
188
|
+
pattern = re.compile("Foo Bar", re.IGNORECASE)
|
|
189
|
+
match = find_markdown_text(pattern, text)
|
|
190
|
+
assert match is not None and match.group(0) == "Foo bar"
|
|
191
|
+
|
|
192
|
+
# Skips occurrence inside link and returns the first one outside.
|
|
193
|
+
text = "[Foo](http://example.com) something Foo"
|
|
194
|
+
pattern = re.compile("Foo", re.IGNORECASE)
|
|
195
|
+
match = find_markdown_text(pattern, text)
|
|
196
|
+
assert match is not None
|
|
197
|
+
assert match.start() > text.index(") ")
|
|
198
|
+
assert text[match.start() : match.end()] == "Foo"
|
|
199
|
+
|
|
200
|
+
# Returns None when the only occurrences are inside links.
|
|
201
|
+
text = "prefix [bar](http://example.com) suffix"
|
|
202
|
+
pattern = re.compile("bar", re.IGNORECASE)
|
|
203
|
+
match = find_markdown_text(pattern, text)
|
|
204
|
+
assert match is None
|
|
@@ -15,21 +15,30 @@ Tallies: TypeAlias = dict[str, int]
|
|
|
15
15
|
def import_subdirs(
|
|
16
16
|
parent_package_name: str,
|
|
17
17
|
parent_dir: Path,
|
|
18
|
-
subdir_names: list[str],
|
|
18
|
+
subdir_names: list[str] | None = None,
|
|
19
19
|
tallies: Tallies | None = None,
|
|
20
20
|
):
|
|
21
21
|
"""
|
|
22
22
|
Import all files in the given subdirectories of a single parent directory.
|
|
23
|
+
Wraps `pkgutil.iter_modules` to iterate over all modules in the subdirectories.
|
|
24
|
+
If `subdir_names` is `None`, will import all subdirectories.
|
|
23
25
|
"""
|
|
24
26
|
if tallies is None:
|
|
25
27
|
tallies = {}
|
|
28
|
+
if not subdir_names:
|
|
29
|
+
subdir_names = ["."]
|
|
26
30
|
|
|
27
31
|
for subdir_name in subdir_names:
|
|
28
|
-
|
|
32
|
+
if subdir_name == ".":
|
|
33
|
+
full_path = parent_dir
|
|
34
|
+
package_name = parent_package_name
|
|
35
|
+
else:
|
|
36
|
+
full_path = parent_dir / subdir_name
|
|
37
|
+
package_name = f"{parent_package_name}.{subdir_name}"
|
|
38
|
+
|
|
29
39
|
if not full_path.is_dir():
|
|
30
40
|
raise FileNotFoundError(f"Subdirectory not found: {full_path}")
|
|
31
41
|
|
|
32
|
-
package_name = f"{parent_package_name}.{subdir_name}"
|
|
33
42
|
for _module_finder, module_name, _is_pkg in pkgutil.iter_modules(path=[str(full_path)]):
|
|
34
43
|
importlib.import_module(f"{package_name}.{module_name}") # Propagate import errors
|
|
35
44
|
tallies[package_name] = tallies.get(package_name, 0) + 1
|
kash/utils/common/type_utils.py
CHANGED
|
@@ -15,35 +15,6 @@ def not_none(value: T | None, message: str | None = None) -> T:
|
|
|
15
15
|
return value
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
def is_truthy(value: Any, strict: bool = True) -> bool:
|
|
19
|
-
"""
|
|
20
|
-
True for all common string and non-string values for true. Useful for parsing
|
|
21
|
-
string values or command line arguments.
|
|
22
|
-
"""
|
|
23
|
-
truthy_values = {"true", "1", "yes", "on", "y"}
|
|
24
|
-
falsy_values = {"false", "0", "no", "off", "n", ""}
|
|
25
|
-
|
|
26
|
-
if value is None:
|
|
27
|
-
return False
|
|
28
|
-
elif isinstance(value, str):
|
|
29
|
-
value = value.strip().lower()
|
|
30
|
-
if value in truthy_values:
|
|
31
|
-
return True
|
|
32
|
-
elif value in falsy_values:
|
|
33
|
-
return False
|
|
34
|
-
elif isinstance(value, (int, float)):
|
|
35
|
-
return value != 0
|
|
36
|
-
elif isinstance(value, bool):
|
|
37
|
-
return value
|
|
38
|
-
elif isinstance(value, (list, tuple, set, dict)):
|
|
39
|
-
return len(value) > 0
|
|
40
|
-
|
|
41
|
-
if strict:
|
|
42
|
-
raise ValueError(f"Could not convert type {type(value)} to boolean: {repr(value)}")
|
|
43
|
-
|
|
44
|
-
return bool(value)
|
|
45
|
-
|
|
46
|
-
|
|
47
18
|
def as_dataclass(dict_data: dict[str, Any], dataclass_type: type[T]) -> T:
|
|
48
19
|
"""
|
|
49
20
|
Convert a dict recursively to dataclass object, raising an error if the data does
|
kash/utils/common/url.py
CHANGED
|
@@ -67,15 +67,39 @@ def is_file_url(url: str | Url) -> bool:
|
|
|
67
67
|
return url.startswith("file://")
|
|
68
68
|
|
|
69
69
|
|
|
70
|
-
def
|
|
70
|
+
def parse_http_url(url: str | Url) -> ParseResult:
|
|
71
71
|
"""
|
|
72
|
-
Parse
|
|
72
|
+
Parse an http/https URL and return the parsed result, raising ValueError if
|
|
73
|
+
not an http/https URL.
|
|
74
|
+
"""
|
|
75
|
+
parsed_url = urlparse(url)
|
|
76
|
+
if parsed_url.scheme in ("http", "https"):
|
|
77
|
+
return parsed_url
|
|
78
|
+
else:
|
|
79
|
+
raise ValueError(f"Not an http/https URL: {url}")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def parse_file_url(url: str | Url) -> Path:
|
|
83
|
+
"""
|
|
84
|
+
Parse a file URL and return the path, raising ValueError if not a file URL.
|
|
73
85
|
"""
|
|
74
86
|
parsed_url = urlparse(url)
|
|
75
87
|
if parsed_url.scheme == "file":
|
|
76
88
|
return Path(parsed_url.path)
|
|
77
89
|
else:
|
|
78
|
-
|
|
90
|
+
raise ValueError(f"Not a file URL: {url}")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def parse_s3_url(url: str | Url) -> tuple[str, str]:
|
|
94
|
+
"""
|
|
95
|
+
Parse an S3 URL and return the bucket and key, raising ValueError if not an
|
|
96
|
+
S3 URL.
|
|
97
|
+
"""
|
|
98
|
+
parsed_url = urlparse(url)
|
|
99
|
+
if parsed_url.scheme == "s3":
|
|
100
|
+
return parsed_url.netloc, parsed_url.path.lstrip("/")
|
|
101
|
+
else:
|
|
102
|
+
raise ValueError(f"Not an S3 URL: {url}")
|
|
79
103
|
|
|
80
104
|
|
|
81
105
|
def as_file_url(path: str | Path) -> Url:
|
kash/utils/errors.py
CHANGED
|
@@ -139,6 +139,12 @@ class FileFormatError(ContentError):
|
|
|
139
139
|
pass
|
|
140
140
|
|
|
141
141
|
|
|
142
|
+
class ApiError(KashRuntimeError):
|
|
143
|
+
"""Raised when an API call returns something unexpected."""
|
|
144
|
+
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
|
|
142
148
|
def _nonfatal_exceptions() -> tuple[type[Exception], ...]:
|
|
143
149
|
exceptions: list[type[Exception]] = [SelfExplanatoryError, FileNotFoundError, IOError]
|
|
144
150
|
try:
|
|
@@ -4,9 +4,9 @@ from pathlib import Path
|
|
|
4
4
|
from typing import NewType
|
|
5
5
|
|
|
6
6
|
import regex
|
|
7
|
+
from clideps.pkgs.pkg_check import pkg_check
|
|
7
8
|
|
|
8
9
|
from kash.config.logger import get_logger
|
|
9
|
-
from kash.shell.clideps.pkg_deps import Pkg, pkg_check
|
|
10
10
|
|
|
11
11
|
log = get_logger(__name__)
|
|
12
12
|
|
|
@@ -86,7 +86,7 @@ def detect_mime_type(filename: str | Path) -> MimeType | None:
|
|
|
86
86
|
Get the mime type of a file using libmagic heuristics plus more careful
|
|
87
87
|
detection of HTML, Markdown, and multipart YAML.
|
|
88
88
|
"""
|
|
89
|
-
pkg_check().require(
|
|
89
|
+
pkg_check().require("libmagic")
|
|
90
90
|
import magic
|
|
91
91
|
|
|
92
92
|
mime = magic.Magic(mime=True)
|
|
@@ -36,6 +36,8 @@ class Format(Enum):
|
|
|
36
36
|
it is the format of the resource (url, media, etc.).
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
|
+
# TODO: Be more thorough, pulling in relevant extensions and types from the `mimetypes` module.
|
|
40
|
+
|
|
39
41
|
# Formats with no body (content is in frontmatter).
|
|
40
42
|
url = "url"
|
|
41
43
|
|
|
@@ -146,6 +148,7 @@ class Format(Enum):
|
|
|
146
148
|
self.markdown,
|
|
147
149
|
self.md_html,
|
|
148
150
|
self.html,
|
|
151
|
+
self.json, # Not strictly true but we encourage use of comments.
|
|
149
152
|
self.yaml,
|
|
150
153
|
self.diff,
|
|
151
154
|
self.python,
|
kash/web_content/dir_store.py
CHANGED
|
@@ -87,8 +87,7 @@ class DirStore:
|
|
|
87
87
|
self, keys: list[str | Path], folder: str | None = None, suffix: str | None = None
|
|
88
88
|
) -> dict[str | Path, Path | None]:
|
|
89
89
|
"""
|
|
90
|
-
Look up all existing cached results for the set of keys.
|
|
91
|
-
be optimized for large batches.
|
|
90
|
+
Look up all existing cached results for the set of keys.
|
|
92
91
|
"""
|
|
93
92
|
return {key: self.find(key, folder=folder, suffix=suffix) for key in keys}
|
|
94
93
|
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from collections.abc import Callable
|
|
1
3
|
from pathlib import Path
|
|
4
|
+
from typing import Any
|
|
2
5
|
|
|
3
6
|
from prettyfmt import fmt_lines, fmt_path
|
|
4
7
|
|
|
@@ -35,18 +38,40 @@ def reset_content_cache_dir(path: Path):
|
|
|
35
38
|
log.info("Using web cache: %s", fmt_path(path))
|
|
36
39
|
|
|
37
40
|
|
|
38
|
-
def cache_file(
|
|
41
|
+
def cache_file(
|
|
42
|
+
source: Url | Path | Loadable, global_cache: bool = False, expiration_sec: float | None = None
|
|
43
|
+
) -> tuple[Path, bool]:
|
|
39
44
|
"""
|
|
40
45
|
Return a local cached copy of the item. If it is an URL, content is fetched.
|
|
41
|
-
|
|
42
|
-
|
|
46
|
+
If it is a Path or a Loadable, a cached copy is returned.
|
|
47
|
+
LocalFileCache uses httpx so httpx.HTTPError is raised for non-2xx responses.
|
|
48
|
+
|
|
49
|
+
Uses the current content cache unless there is no current cache or `global_cache` is True,
|
|
50
|
+
in which case the global cache is used.
|
|
43
51
|
"""
|
|
44
52
|
cache = _global_content_cache if global_cache else _content_cache
|
|
45
|
-
path, was_cached = cache.cache(source)
|
|
53
|
+
path, was_cached = cache.cache(source, expiration_sec)
|
|
46
54
|
return path, was_cached
|
|
47
55
|
|
|
48
56
|
|
|
49
|
-
def
|
|
57
|
+
def cache_api_response(
|
|
58
|
+
url: Url,
|
|
59
|
+
global_cache: bool = False,
|
|
60
|
+
expiration_sec: float | None = None,
|
|
61
|
+
parser: Callable[[str], Any] = json.loads,
|
|
62
|
+
) -> tuple[Any, bool]:
|
|
63
|
+
"""
|
|
64
|
+
Cache an API response. By default parse the response as JSON.
|
|
65
|
+
"""
|
|
66
|
+
cache = _global_content_cache if global_cache else _content_cache
|
|
67
|
+
path, was_cached = cache.cache(url, expiration_sec)
|
|
68
|
+
result = parser(path.read_text())
|
|
69
|
+
return result, was_cached
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def cache_resource(
|
|
73
|
+
item: Item, global_cache: bool = False, expiration_sec: float | None = None
|
|
74
|
+
) -> dict[MediaType, Path]:
|
|
50
75
|
"""
|
|
51
76
|
Cache a resource item for an external local path or a URL, fetching or
|
|
52
77
|
copying as needed. For media this may yield more than one format.
|
|
@@ -64,17 +89,17 @@ def cache_resource(item: Item) -> dict[MediaType, Path]:
|
|
|
64
89
|
if is_media_url(item.url):
|
|
65
90
|
result = cache_media(item.url)
|
|
66
91
|
else:
|
|
67
|
-
path, _was_cached = cache_file(item.url)
|
|
92
|
+
path, _was_cached = cache_file(item.url, global_cache, expiration_sec)
|
|
68
93
|
elif item.external_path:
|
|
69
94
|
path = Path(item.external_path)
|
|
70
95
|
if not path.is_file():
|
|
71
96
|
raise FileNotFound(f"External path not found: {path}")
|
|
72
|
-
path, _was_cached = cache_file(path)
|
|
97
|
+
path, _was_cached = cache_file(path, global_cache, expiration_sec)
|
|
73
98
|
elif item.original_filename:
|
|
74
99
|
path = Path(item.original_filename)
|
|
75
100
|
if not path.is_file():
|
|
76
101
|
raise FileNotFound(f"Original filename not found: {path}")
|
|
77
|
-
path, _was_cached = cache_file(path)
|
|
102
|
+
path, _was_cached = cache_file(path, global_cache, expiration_sec)
|
|
78
103
|
else:
|
|
79
104
|
raise ValueError(f"Item has no URL or external path: {item}")
|
|
80
105
|
|
|
@@ -94,7 +119,9 @@ def cache_resource(item: Item) -> dict[MediaType, Path]:
|
|
|
94
119
|
return result
|
|
95
120
|
|
|
96
121
|
|
|
97
|
-
def get_url_html(
|
|
122
|
+
def get_url_html(
|
|
123
|
+
item: Item, global_cache: bool = False, expiration_sec: float | None = None
|
|
124
|
+
) -> tuple[Url, str]:
|
|
98
125
|
"""
|
|
99
126
|
Returns the HTML content of an URL item, using the content cache,
|
|
100
127
|
or the body of the item if it has a URL and HTML body.
|
|
@@ -106,7 +133,7 @@ def get_url_html(item: Item) -> tuple[Url, str]:
|
|
|
106
133
|
url = Url(canonicalize_url(item.url))
|
|
107
134
|
|
|
108
135
|
if is_url_item(item):
|
|
109
|
-
path, _was_cached = cache_file(url)
|
|
136
|
+
path, _was_cached = cache_file(url, global_cache, expiration_sec)
|
|
110
137
|
with open(path) as file:
|
|
111
138
|
html_content = file.read()
|
|
112
139
|
else:
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Mapping
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TypeAlias
|
|
7
|
+
|
|
8
|
+
from kash.web_content.local_file_cache import read_mtime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class OutputType:
|
|
13
|
+
"""
|
|
14
|
+
A type of output file, represented by the filename suffix, e.g. '.mp3', '.txt', etc.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
suffix: str
|
|
18
|
+
|
|
19
|
+
def output_path(self, src: Path) -> Path:
|
|
20
|
+
"""
|
|
21
|
+
Resolve the output path. Will be next to the source file, e.g.
|
|
22
|
+
some-dir/video.mp4 -> some-dir/video.mp3
|
|
23
|
+
"""
|
|
24
|
+
return src.with_suffix(self.suffix)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
Processor: TypeAlias = Callable[[Path, Mapping[OutputType, Path]], None]
|
|
28
|
+
"""
|
|
29
|
+
A function that takes a source file and a mapping with one or more output paths.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class FileProcess:
|
|
35
|
+
"""
|
|
36
|
+
Process a file and produce one or more outputs.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
processor: Processor
|
|
40
|
+
outputs: list[OutputType]
|
|
41
|
+
|
|
42
|
+
def is_outdated(self, src: Path) -> bool:
|
|
43
|
+
"""
|
|
44
|
+
True when any output is missing or older (earliest mtime) than `src`.
|
|
45
|
+
"""
|
|
46
|
+
dests = {o.output_path(src) for o in self.outputs}
|
|
47
|
+
if any(not p.exists() for p in dests):
|
|
48
|
+
return True
|
|
49
|
+
earliest = min(read_mtime(p) for p in dests)
|
|
50
|
+
return read_mtime(src) > earliest
|
|
51
|
+
|
|
52
|
+
def run(self, src: Path) -> dict[OutputType, Path]:
|
|
53
|
+
"""
|
|
54
|
+
Run unconditionally and return a mapping of outputs to paths.
|
|
55
|
+
"""
|
|
56
|
+
dests = {o: o.output_path(src) for o in self.outputs}
|
|
57
|
+
self.processor(src, dests)
|
|
58
|
+
return dests
|
|
59
|
+
|
|
60
|
+
def run_if_needed(self, src: Path) -> dict[OutputType, Path]:
|
|
61
|
+
"""
|
|
62
|
+
Run only if any output is missing or outdated.
|
|
63
|
+
"""
|
|
64
|
+
return (
|
|
65
|
+
self.run(src)
|
|
66
|
+
if self.is_outdated(src)
|
|
67
|
+
else {o: o.output_path(src) for o in self.outputs}
|
|
68
|
+
)
|