kash-shell 0.3.13__py3-none-any.whl → 0.3.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/markdownify.py +7 -4
- kash/actions/core/readability.py +4 -3
- kash/actions/core/render_as_html.py +2 -2
- kash/actions/core/show_webpage.py +2 -2
- kash/commands/workspace/workspace_commands.py +37 -20
- kash/exec/action_exec.py +2 -3
- kash/exec/preconditions.py +24 -4
- kash/file_storage/file_store.py +32 -11
- kash/model/items_model.py +12 -7
- kash/model/operations_model.py +14 -0
- kash/shell/utils/native_utils.py +2 -2
- kash/utils/common/url.py +80 -3
- kash/utils/file_utils/file_formats.py +3 -2
- kash/utils/file_utils/file_formats_model.py +30 -48
- kash/web_content/local_file_cache.py +28 -5
- {kash_shell-0.3.13.dist-info → kash_shell-0.3.14.dist-info}/METADATA +1 -1
- {kash_shell-0.3.13.dist-info → kash_shell-0.3.14.dist-info}/RECORD +20 -21
- kash/workspaces/workspace_importing.py +0 -56
- {kash_shell-0.3.13.dist-info → kash_shell-0.3.14.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.13.dist-info → kash_shell-0.3.14.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.13.dist-info → kash_shell-0.3.14.dist-info}/licenses/LICENSE +0 -0
kash/actions/core/markdownify.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from kash.config.logger import get_logger
|
|
2
2
|
from kash.exec import kash_action
|
|
3
3
|
from kash.exec.preconditions import has_html_body, is_url_resource
|
|
4
|
+
from kash.exec.runtime_settings import current_runtime_settings
|
|
4
5
|
from kash.model import Format, Item
|
|
5
|
-
from kash.model.
|
|
6
|
+
from kash.model.items_model import ItemType
|
|
6
7
|
from kash.utils.text_handling.markdownify_utils import markdownify_custom
|
|
7
8
|
from kash.web_content.file_cache_utils import get_url_html
|
|
8
9
|
from kash.web_content.web_extract_readabilipy import extract_text_readabilipy
|
|
@@ -12,20 +13,22 @@ log = get_logger(__name__)
|
|
|
12
13
|
|
|
13
14
|
@kash_action(
|
|
14
15
|
precondition=is_url_resource | has_html_body,
|
|
15
|
-
params=common_params("refetch"),
|
|
16
16
|
mcp_tool=True,
|
|
17
17
|
)
|
|
18
|
-
def markdownify(item: Item
|
|
18
|
+
def markdownify(item: Item) -> Item:
|
|
19
19
|
"""
|
|
20
20
|
Converts a URL or raw HTML item to Markdown, fetching with the content
|
|
21
21
|
cache if needed. Also uses readability to clean up the HTML.
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
+
refetch = current_runtime_settings().refetch
|
|
24
25
|
expiration_sec = 0 if refetch else None
|
|
25
26
|
url, html_content = get_url_html(item, expiration_sec=expiration_sec)
|
|
26
27
|
page_data = extract_text_readabilipy(url, html_content)
|
|
27
28
|
assert page_data.clean_html
|
|
28
29
|
markdown_content = markdownify_custom(page_data.clean_html)
|
|
29
30
|
|
|
30
|
-
output_item = item.derived_copy(
|
|
31
|
+
output_item = item.derived_copy(
|
|
32
|
+
type=ItemType.doc, format=Format.markdown, body=markdown_content
|
|
33
|
+
)
|
|
31
34
|
return output_item
|
kash/actions/core/readability.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from kash.config.logger import get_logger
|
|
2
2
|
from kash.exec import kash_action
|
|
3
3
|
from kash.exec.preconditions import has_html_body, is_url_resource
|
|
4
|
+
from kash.exec.runtime_settings import current_runtime_settings
|
|
4
5
|
from kash.model import Format, Item
|
|
5
|
-
from kash.model.params_model import common_params
|
|
6
6
|
from kash.web_content.file_cache_utils import get_url_html
|
|
7
7
|
from kash.web_content.web_extract_readabilipy import extract_text_readabilipy
|
|
8
8
|
|
|
@@ -11,14 +11,15 @@ log = get_logger(__name__)
|
|
|
11
11
|
|
|
12
12
|
@kash_action(
|
|
13
13
|
precondition=is_url_resource | has_html_body,
|
|
14
|
-
params=common_params("refetch"),
|
|
15
14
|
mcp_tool=True,
|
|
16
15
|
)
|
|
17
|
-
def readability(item: Item
|
|
16
|
+
def readability(item: Item) -> Item:
|
|
18
17
|
"""
|
|
19
18
|
Extracts clean HTML from a raw HTML item.
|
|
20
19
|
See `markdownify` to also convert to Markdown.
|
|
21
20
|
"""
|
|
21
|
+
|
|
22
|
+
refetch = current_runtime_settings().refetch
|
|
22
23
|
expiration_sec = 0 if refetch else None
|
|
23
24
|
locator, html_content = get_url_html(item, expiration_sec=expiration_sec)
|
|
24
25
|
page_data = extract_text_readabilipy(locator, html_content)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from kash.actions.core.tabbed_webpage_config import tabbed_webpage_config
|
|
2
2
|
from kash.actions.core.tabbed_webpage_generate import tabbed_webpage_generate
|
|
3
3
|
from kash.exec import kash_action
|
|
4
|
-
from kash.exec.preconditions import
|
|
4
|
+
from kash.exec.preconditions import has_fullpage_html_body, has_html_body, has_simple_text_body
|
|
5
5
|
from kash.exec_model.args_model import ONE_OR_MORE_ARGS
|
|
6
6
|
from kash.model import ActionInput, ActionResult, Param
|
|
7
7
|
from kash.model.items_model import ItemType
|
|
@@ -11,7 +11,7 @@ from kash.web_gen.simple_webpage import simple_webpage_render
|
|
|
11
11
|
|
|
12
12
|
@kash_action(
|
|
13
13
|
expected_args=ONE_OR_MORE_ARGS,
|
|
14
|
-
precondition=(has_html_body | has_simple_text_body) & ~
|
|
14
|
+
precondition=(has_html_body | has_simple_text_body) & ~has_fullpage_html_body,
|
|
15
15
|
params=(Param("no_title", "Don't add a title to the page body.", type=bool),),
|
|
16
16
|
)
|
|
17
17
|
def render_as_html(input: ActionInput, no_title: bool = False) -> ActionResult:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from kash.actions.core.render_as_html import render_as_html
|
|
2
2
|
from kash.commands.base.show_command import show
|
|
3
3
|
from kash.exec import kash_action
|
|
4
|
-
from kash.exec.preconditions import
|
|
4
|
+
from kash.exec.preconditions import has_fullpage_html_body, has_html_body, has_simple_text_body
|
|
5
5
|
from kash.exec_model.args_model import ONE_OR_MORE_ARGS
|
|
6
6
|
from kash.exec_model.commands_model import Command
|
|
7
7
|
from kash.exec_model.shell_model import ShellResult
|
|
@@ -10,7 +10,7 @@ from kash.model import ActionInput, ActionResult
|
|
|
10
10
|
|
|
11
11
|
@kash_action(
|
|
12
12
|
expected_args=ONE_OR_MORE_ARGS,
|
|
13
|
-
precondition=(has_html_body | has_simple_text_body) & ~
|
|
13
|
+
precondition=(has_html_body | has_simple_text_body) & ~has_fullpage_html_body,
|
|
14
14
|
)
|
|
15
15
|
def show_webpage(input: ActionInput) -> ActionResult:
|
|
16
16
|
"""
|
|
@@ -193,49 +193,68 @@ def download(*urls_or_paths: str, refetch: bool = False) -> ShellResult:
|
|
|
193
193
|
"""
|
|
194
194
|
Download a URL or resource. Uses cached content if available, unless `refetch` is true.
|
|
195
195
|
Inputs can be URLs or paths to URL resources.
|
|
196
|
+
Creates both resource and document versions for text content.
|
|
196
197
|
"""
|
|
197
|
-
expiration_sec = 0 if refetch else None
|
|
198
|
-
|
|
199
|
-
# TODO: Add option to include frontmatter metadata for text files.
|
|
200
198
|
ws = current_ws()
|
|
201
199
|
saved_paths = []
|
|
200
|
+
|
|
202
201
|
for url_or_path in urls_or_paths:
|
|
203
202
|
locator = resolve_locator_arg(url_or_path)
|
|
204
203
|
url: Url | None = None
|
|
204
|
+
|
|
205
|
+
# Get the URL from the locator
|
|
205
206
|
if not isinstance(locator, Path) and is_url(locator):
|
|
206
207
|
url = Url(locator)
|
|
207
|
-
|
|
208
|
+
elif isinstance(locator, StorePath):
|
|
208
209
|
url_item = ws.load(locator)
|
|
209
210
|
if is_url_resource(url_item):
|
|
210
211
|
url = url_item.url
|
|
212
|
+
|
|
211
213
|
if not url:
|
|
212
214
|
raise InvalidInput(f"Not a URL or URL resource: {fmt_loc(locator)}")
|
|
213
215
|
|
|
216
|
+
# Handle media URLs differently
|
|
214
217
|
if is_media_url(url):
|
|
215
218
|
log.message(
|
|
216
219
|
"URL is a media URL, so adding as a resource and will cache media: %s", fmt_loc(url)
|
|
217
220
|
)
|
|
218
|
-
store_path = ws.import_item(
|
|
221
|
+
store_path = ws.import_item(url, as_type=ItemType.resource, reimport=refetch)
|
|
222
|
+
saved_paths.append(store_path)
|
|
219
223
|
media_tools.cache_media(url)
|
|
220
224
|
else:
|
|
221
|
-
|
|
222
|
-
|
|
225
|
+
# Cache the content first
|
|
226
|
+
expiration_sec = 0 if refetch else None
|
|
223
227
|
cache_result = cache_file(url, expiration_sec=expiration_sec)
|
|
224
|
-
|
|
228
|
+
original_filename = Path(parse_http_url(url).path).name
|
|
225
229
|
mime_type = cache_result.content.headers and cache_result.content.headers.mime_type
|
|
226
|
-
|
|
230
|
+
|
|
231
|
+
# Create a resource item
|
|
232
|
+
resource_item = Item.from_external_path(
|
|
227
233
|
cache_result.content.path,
|
|
228
234
|
ItemType.resource,
|
|
235
|
+
url=url,
|
|
229
236
|
mime_type=mime_type,
|
|
230
237
|
original_filename=original_filename,
|
|
231
238
|
)
|
|
232
|
-
store_path = ws.save(
|
|
239
|
+
store_path = ws.save(resource_item, no_frontmatter=True, no_format=True)
|
|
233
240
|
saved_paths.append(store_path)
|
|
234
241
|
|
|
242
|
+
# Also create a doc version for text content
|
|
243
|
+
if resource_item.format and resource_item.format.supports_frontmatter:
|
|
244
|
+
doc_item = Item.from_external_path(
|
|
245
|
+
cache_result.content.path,
|
|
246
|
+
ItemType.doc,
|
|
247
|
+
url=url,
|
|
248
|
+
mime_type=mime_type,
|
|
249
|
+
original_filename=original_filename,
|
|
250
|
+
)
|
|
251
|
+
doc_store_path = ws.save(doc_item, no_frontmatter=False, no_format=False)
|
|
252
|
+
saved_paths.append(doc_store_path)
|
|
253
|
+
|
|
235
254
|
print_status(
|
|
236
255
|
"Downloaded %s %s:\n%s",
|
|
237
|
-
len(
|
|
238
|
-
plural("item", len(
|
|
256
|
+
len(saved_paths),
|
|
257
|
+
plural("item", len(saved_paths)),
|
|
239
258
|
fmt_lines(saved_paths),
|
|
240
259
|
)
|
|
241
260
|
select(*saved_paths)
|
|
@@ -483,7 +502,7 @@ def import_item(
|
|
|
483
502
|
|
|
484
503
|
|
|
485
504
|
@kash_command
|
|
486
|
-
def
|
|
505
|
+
def save_clipboard(
|
|
487
506
|
title: str | None = "pasted_text",
|
|
488
507
|
type: ItemType = ItemType.resource,
|
|
489
508
|
format: Format = Format.plaintext,
|
|
@@ -518,8 +537,6 @@ def fetch_metadata(*files_or_urls: str, refetch: bool = False) -> ShellResult:
|
|
|
518
537
|
|
|
519
538
|
Skips items that already have a title and description, unless `refetch` is true.
|
|
520
539
|
Skips (with a warning) items that are not URL resources.
|
|
521
|
-
|
|
522
|
-
:param use_cache: If true, also save page in content cache.
|
|
523
540
|
"""
|
|
524
541
|
if not files_or_urls:
|
|
525
542
|
locators = assemble_store_path_args()
|
|
@@ -529,12 +546,12 @@ def fetch_metadata(*files_or_urls: str, refetch: bool = False) -> ShellResult:
|
|
|
529
546
|
store_paths = []
|
|
530
547
|
for locator in locators:
|
|
531
548
|
try:
|
|
532
|
-
if isinstance(locator, Path):
|
|
533
|
-
raise InvalidInput()
|
|
534
549
|
fetched_item = fetch_url_metadata(locator, refetch=refetch)
|
|
535
550
|
store_paths.append(fetched_item.store_path)
|
|
536
|
-
except InvalidInput:
|
|
537
|
-
log.warning(
|
|
551
|
+
except InvalidInput as e:
|
|
552
|
+
log.warning(
|
|
553
|
+
"Not a URL or URL resource, will not fetch metadata: %s: %s", fmt_loc(locator), e
|
|
554
|
+
)
|
|
538
555
|
|
|
539
556
|
if store_paths:
|
|
540
557
|
select(*store_paths)
|
|
@@ -716,7 +733,7 @@ def reset_ignore_file(append: bool = False) -> None:
|
|
|
716
733
|
ignore_path = ws.base_dir / ws.dirs.ignore_file
|
|
717
734
|
write_ignore(ignore_path, append=append)
|
|
718
735
|
|
|
719
|
-
log.message("
|
|
736
|
+
log.message("Rewritten kash ignore file: %s", fmt_loc(ignore_path))
|
|
720
737
|
|
|
721
738
|
|
|
722
739
|
@kash_command
|
kash/exec/action_exec.py
CHANGED
|
@@ -32,7 +32,6 @@ from kash.utils.common.task_stack import task_stack
|
|
|
32
32
|
from kash.utils.common.type_utils import not_none
|
|
33
33
|
from kash.utils.errors import ContentError, InvalidOutput, get_nonfatal_exceptions
|
|
34
34
|
from kash.workspaces import Selection, current_ws
|
|
35
|
-
from kash.workspaces.workspace_importing import import_and_load
|
|
36
35
|
|
|
37
36
|
log = get_logger(__name__)
|
|
38
37
|
|
|
@@ -49,7 +48,7 @@ def prepare_action_input(*input_args: CommandArg, refetch: bool = False) -> Acti
|
|
|
49
48
|
|
|
50
49
|
# Ensure input items are already saved in the workspace and load the corresponding items.
|
|
51
50
|
# This also imports any URLs.
|
|
52
|
-
input_items = [import_and_load(
|
|
51
|
+
input_items = [ws.import_and_load(arg) for arg in input_args]
|
|
53
52
|
|
|
54
53
|
# URLs should have metadata like a title and be valid, so we fetch them.
|
|
55
54
|
if input_items:
|
|
@@ -383,7 +382,7 @@ def run_action_with_caching(
|
|
|
383
382
|
|
|
384
383
|
PrintHooks.before_done_message()
|
|
385
384
|
log.message(
|
|
386
|
-
"%s
|
|
385
|
+
"%s Action: `%s` completed with %s %s",
|
|
387
386
|
EMOJI_SUCCESS,
|
|
388
387
|
action.name,
|
|
389
388
|
len(result.items),
|
kash/exec/preconditions.py
CHANGED
|
@@ -7,7 +7,7 @@ from chopdiff.html import has_timestamp
|
|
|
7
7
|
|
|
8
8
|
from kash.exec.precondition_registry import kash_precondition
|
|
9
9
|
from kash.model.items_model import Item, ItemType
|
|
10
|
-
from kash.utils.file_utils.file_formats import
|
|
10
|
+
from kash.utils.file_utils.file_formats import is_fullpage_html
|
|
11
11
|
from kash.utils.file_utils.file_formats_model import Format
|
|
12
12
|
from kash.utils.text_handling.markdown_utils import extract_bullet_points
|
|
13
13
|
|
|
@@ -22,9 +22,14 @@ def is_doc_resource(item: Item) -> bool:
|
|
|
22
22
|
return bool(is_resource(item) and item.format and item.format.is_doc)
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
@kash_precondition
|
|
26
|
+
def is_markdown_resource(item: Item) -> bool:
|
|
27
|
+
return bool(is_resource(item) and item.format and item.format.is_markdown)
|
|
28
|
+
|
|
29
|
+
|
|
25
30
|
@kash_precondition
|
|
26
31
|
def is_html_resource(item: Item) -> bool:
|
|
27
|
-
return bool(is_resource(item) and item.format and item.format
|
|
32
|
+
return bool(is_resource(item) and item.format and item.format.is_html)
|
|
28
33
|
|
|
29
34
|
|
|
30
35
|
@kash_precondition
|
|
@@ -100,8 +105,18 @@ def has_html_body(item: Item) -> bool:
|
|
|
100
105
|
|
|
101
106
|
|
|
102
107
|
@kash_precondition
|
|
103
|
-
def
|
|
104
|
-
return bool(
|
|
108
|
+
def has_markdown_body(item: Item) -> bool:
|
|
109
|
+
return bool(has_body(item) and item.format and item.format.is_markdown)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@kash_precondition
|
|
113
|
+
def has_markdown_with_html_body(item: Item) -> bool:
|
|
114
|
+
return bool(has_body(item) and item.format and item.format.is_markdown_with_html)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@kash_precondition
|
|
118
|
+
def has_fullpage_html_body(item: Item) -> bool:
|
|
119
|
+
return bool(has_html_body(item) and item.body and is_fullpage_html(item.body))
|
|
105
120
|
|
|
106
121
|
|
|
107
122
|
@kash_precondition
|
|
@@ -114,6 +129,11 @@ def is_markdown(item: Item) -> bool:
|
|
|
114
129
|
return bool(has_body(item) and item.format and item.format.is_markdown)
|
|
115
130
|
|
|
116
131
|
|
|
132
|
+
@kash_precondition
|
|
133
|
+
def is_markdown_with_html(item: Item) -> bool:
|
|
134
|
+
return bool(has_body(item) and item.format and item.format.is_markdown_with_html)
|
|
135
|
+
|
|
136
|
+
|
|
117
137
|
@kash_precondition
|
|
118
138
|
def is_markdown_template(item: Item) -> bool:
|
|
119
139
|
return is_markdown(item) and contains_curly_vars(item)
|
kash/file_storage/file_store.py
CHANGED
|
@@ -22,7 +22,7 @@ from kash.model.paths_model import StorePath
|
|
|
22
22
|
from kash.shell.output.shell_output import PrintHooks
|
|
23
23
|
from kash.utils.common.format_utils import fmt_loc
|
|
24
24
|
from kash.utils.common.uniquifier import Uniquifier
|
|
25
|
-
from kash.utils.common.url import Locator, Url, is_url
|
|
25
|
+
from kash.utils.common.url import Locator, UnresolvedLocator, Url, is_url
|
|
26
26
|
from kash.utils.errors import FileExists, FileNotFound, InvalidFilename, SkippableError
|
|
27
27
|
from kash.utils.file_utils.file_formats_model import Format
|
|
28
28
|
from kash.utils.file_utils.file_walk import walk_by_dir
|
|
@@ -290,7 +290,7 @@ class FileStore(Workspace):
|
|
|
290
290
|
elif item_id in self.id_map and self.exists(self.id_map[item_id]):
|
|
291
291
|
# If this item has an identity and we've saved under that id before, use the same store path.
|
|
292
292
|
store_path = self.id_map[item_id]
|
|
293
|
-
log.
|
|
293
|
+
log.info(
|
|
294
294
|
"Found existing item with same id:\n%s",
|
|
295
295
|
fmt_lines([fmt_loc(store_path), item_id]),
|
|
296
296
|
)
|
|
@@ -334,6 +334,7 @@ class FileStore(Workspace):
|
|
|
334
334
|
skip_dup_names: bool = False,
|
|
335
335
|
as_tmp: bool = False,
|
|
336
336
|
no_format: bool = False,
|
|
337
|
+
no_frontmatter: bool = False,
|
|
337
338
|
) -> StorePath:
|
|
338
339
|
"""
|
|
339
340
|
Save the item. Uses the `store_path` if it's already set or generates a new one.
|
|
@@ -342,6 +343,8 @@ class FileStore(Workspace):
|
|
|
342
343
|
Unless `no_format` is true, also normalizes body text formatting (for Markdown)
|
|
343
344
|
and updates the item's body to match.
|
|
344
345
|
|
|
346
|
+
If `no_frontmatter` is true, will not add frontmatter metadata to the item.
|
|
347
|
+
|
|
345
348
|
If `overwrite` is true, will overwrite a file that has the same path.
|
|
346
349
|
|
|
347
350
|
If `as_tmp` is true, will save the item to a temporary file.
|
|
@@ -390,9 +393,14 @@ class FileStore(Workspace):
|
|
|
390
393
|
|
|
391
394
|
# Now save the new item.
|
|
392
395
|
try:
|
|
393
|
-
|
|
396
|
+
supports_frontmatter = item.format and item.format.supports_frontmatter
|
|
397
|
+
# For binary or unknown formats or if we're not adding frontmatter, copy the file exactly.
|
|
398
|
+
if item.external_path and (no_frontmatter or not supports_frontmatter):
|
|
394
399
|
copyfile_atomic(item.external_path, full_path, make_parents=True)
|
|
395
400
|
else:
|
|
401
|
+
# Save as a text item with frontmatter.
|
|
402
|
+
if item.external_path:
|
|
403
|
+
item.body = Path(item.external_path).read_text()
|
|
396
404
|
if overwrite and full_path.exists():
|
|
397
405
|
log.info(
|
|
398
406
|
"Overwrite is enabled and a previous file exists so will archive it: %s",
|
|
@@ -448,7 +456,7 @@ class FileStore(Workspace):
|
|
|
448
456
|
|
|
449
457
|
def import_item(
|
|
450
458
|
self,
|
|
451
|
-
locator:
|
|
459
|
+
locator: UnresolvedLocator,
|
|
452
460
|
*,
|
|
453
461
|
as_type: ItemType | None = None,
|
|
454
462
|
reimport: bool = False,
|
|
@@ -462,7 +470,10 @@ class FileStore(Workspace):
|
|
|
462
470
|
"""
|
|
463
471
|
from kash.web_content.canon_url import canonicalize_url
|
|
464
472
|
|
|
465
|
-
if
|
|
473
|
+
if isinstance(locator, StorePath) and not reimport:
|
|
474
|
+
log.info("Store path already imported: %s", fmt_loc(locator))
|
|
475
|
+
return locator
|
|
476
|
+
elif is_url(locator):
|
|
466
477
|
# Import a URL as a resource.
|
|
467
478
|
orig_url = Url(str(locator))
|
|
468
479
|
url = canonicalize_url(orig_url)
|
|
@@ -480,9 +491,6 @@ class FileStore(Workspace):
|
|
|
480
491
|
else:
|
|
481
492
|
store_path = self.save(item)
|
|
482
493
|
return store_path
|
|
483
|
-
elif isinstance(locator, StorePath) and not reimport:
|
|
484
|
-
log.info("Store path already imported: %s", fmt_loc(locator))
|
|
485
|
-
return locator
|
|
486
494
|
else:
|
|
487
495
|
# We have a path, possibly outside of or inside of the store.
|
|
488
496
|
path = Path(locator).resolve()
|
|
@@ -553,6 +561,13 @@ class FileStore(Workspace):
|
|
|
553
561
|
self.import_item(locator, as_type=as_type, reimport=reimport) for locator in locators
|
|
554
562
|
]
|
|
555
563
|
|
|
564
|
+
def import_and_load(self, locator: UnresolvedLocator) -> Item:
|
|
565
|
+
"""
|
|
566
|
+
Import a locator and return the item.
|
|
567
|
+
"""
|
|
568
|
+
store_path = self.import_item(locator)
|
|
569
|
+
return self.load(store_path)
|
|
570
|
+
|
|
556
571
|
def _filter_selection_paths(self):
|
|
557
572
|
"""
|
|
558
573
|
Filter out any paths that don't exist from all selections.
|
|
@@ -695,14 +710,20 @@ class FileStore(Workspace):
|
|
|
695
710
|
dirs_ignored,
|
|
696
711
|
)
|
|
697
712
|
|
|
698
|
-
def normalize(
|
|
713
|
+
def normalize(
|
|
714
|
+
self,
|
|
715
|
+
store_path: StorePath,
|
|
716
|
+
*,
|
|
717
|
+
no_format: bool = False,
|
|
718
|
+
no_frontmatter: bool = False,
|
|
719
|
+
) -> StorePath:
|
|
699
720
|
"""
|
|
700
721
|
Normalize an item or all items in a folder to make sure contents are in current
|
|
701
|
-
format.
|
|
722
|
+
format. This is the same as loading and saving the item.
|
|
702
723
|
"""
|
|
703
724
|
log.info("Normalizing item: %s", fmt_path(store_path))
|
|
704
725
|
|
|
705
726
|
item = self.load(store_path)
|
|
706
|
-
new_store_path = self.save(item)
|
|
727
|
+
new_store_path = self.save(item, no_format=no_format, no_frontmatter=no_frontmatter)
|
|
707
728
|
|
|
708
729
|
return new_store_path
|
kash/model/items_model.py
CHANGED
|
@@ -181,7 +181,7 @@ class ItemId:
|
|
|
181
181
|
item_id = ItemId(item.type, IdType.url, canonicalize_url(item.url))
|
|
182
182
|
elif item.type == ItemType.concept and item.title:
|
|
183
183
|
item_id = ItemId(item.type, IdType.concept, canonicalize_concept(item.title))
|
|
184
|
-
elif item.source and item.source.cacheable:
|
|
184
|
+
elif item.source and item.source.cacheable and item.source.operation.has_known_inputs:
|
|
185
185
|
# We know the source of this and if the action was cacheable, we can create
|
|
186
186
|
# an identity based on the source.
|
|
187
187
|
item_id = ItemId(item.type, IdType.source, item.source.as_str())
|
|
@@ -363,21 +363,24 @@ class Item:
|
|
|
363
363
|
*,
|
|
364
364
|
title: str | None = None,
|
|
365
365
|
original_filename: str | None = None,
|
|
366
|
+
url: Url | None = None,
|
|
366
367
|
mime_type: MimeType | None = None,
|
|
367
368
|
) -> Item:
|
|
368
369
|
"""
|
|
369
370
|
Create a resource Item for a file with a format inferred from the file extension
|
|
370
371
|
or the content. Only sets basic metadata. Does not read the content. Will set
|
|
371
372
|
`format` and `file_ext` if possible but will leave them as None if unrecognized.
|
|
372
|
-
If `mime_type` is provided, it can help determine the file extension
|
|
373
|
+
If `mime_type` is provided, it can help determine the file extension if the
|
|
374
|
+
extension isn't recognized from the filename or URL.
|
|
373
375
|
"""
|
|
374
376
|
from kash.file_storage.store_filenames import parse_item_filename
|
|
375
|
-
from kash.utils.file_utils.file_formats_model import
|
|
377
|
+
from kash.utils.file_utils.file_formats_model import file_format_info
|
|
376
378
|
|
|
377
379
|
# Will raise error for unrecognized file ext.
|
|
378
380
|
_name, filename_item_type, format, file_ext = parse_item_filename(path)
|
|
381
|
+
format_info = file_format_info(path, suggested_mime_type=mime_type)
|
|
379
382
|
if not format:
|
|
380
|
-
format =
|
|
383
|
+
format = format_info.format
|
|
381
384
|
if not item_type and filename_item_type:
|
|
382
385
|
item_type = filename_item_type
|
|
383
386
|
if not item_type:
|
|
@@ -385,9 +388,10 @@ class Item:
|
|
|
385
388
|
item_type = (
|
|
386
389
|
ItemType.doc if format and format.supports_frontmatter else ItemType.resource
|
|
387
390
|
)
|
|
388
|
-
|
|
389
|
-
if not
|
|
390
|
-
|
|
391
|
+
|
|
392
|
+
# Try to determine a good file extension if it's not already on the filename.
|
|
393
|
+
if not file_ext:
|
|
394
|
+
file_ext = format_info.suggested_file_ext
|
|
391
395
|
|
|
392
396
|
item = cls(
|
|
393
397
|
type=item_type,
|
|
@@ -396,6 +400,7 @@ class Item:
|
|
|
396
400
|
format=format,
|
|
397
401
|
external_path=str(path),
|
|
398
402
|
original_filename=original_filename,
|
|
403
|
+
url=url,
|
|
399
404
|
)
|
|
400
405
|
|
|
401
406
|
# Update modified time from the file system.
|
kash/model/operations_model.py
CHANGED
|
@@ -66,6 +66,13 @@ class Input:
|
|
|
66
66
|
else:
|
|
67
67
|
return "[input info missing]"
|
|
68
68
|
|
|
69
|
+
@property
|
|
70
|
+
def is_known(self) -> bool:
|
|
71
|
+
"""
|
|
72
|
+
Whether the input is known, i.e. we had saved inputs with hashes.
|
|
73
|
+
"""
|
|
74
|
+
return bool(self.path and self.hash)
|
|
75
|
+
|
|
69
76
|
# Inputs are equal if the hashes match (even if the paths have changed).
|
|
70
77
|
|
|
71
78
|
def __hash__(self):
|
|
@@ -117,6 +124,13 @@ class Operation:
|
|
|
117
124
|
|
|
118
125
|
return d
|
|
119
126
|
|
|
127
|
+
@property
|
|
128
|
+
def has_known_inputs(self) -> bool:
|
|
129
|
+
"""
|
|
130
|
+
Whether the operation has known inputs, i.e. all inputs have hashes.
|
|
131
|
+
"""
|
|
132
|
+
return all(arg.is_known for arg in self.arguments)
|
|
133
|
+
|
|
120
134
|
def summary(self) -> OperationSummary:
|
|
121
135
|
return OperationSummary(self.action_name)
|
|
122
136
|
|
kash/shell/utils/native_utils.py
CHANGED
|
@@ -23,7 +23,7 @@ from kash.shell.output.shell_output import cprint
|
|
|
23
23
|
from kash.utils.common.format_utils import fmt_loc
|
|
24
24
|
from kash.utils.common.url import as_file_url, is_file_url, is_url
|
|
25
25
|
from kash.utils.errors import FileNotFound, SetupError
|
|
26
|
-
from kash.utils.file_utils.file_formats import
|
|
26
|
+
from kash.utils.file_utils.file_formats import is_fullpage_html, read_partial_text
|
|
27
27
|
from kash.utils.file_utils.file_formats_model import file_format_info
|
|
28
28
|
|
|
29
29
|
log = get_logger(__name__)
|
|
@@ -88,7 +88,7 @@ def _detect_view_mode(file_or_url: str) -> ViewMode:
|
|
|
88
88
|
path = Path(file_or_url)
|
|
89
89
|
if path.is_file(): # File or symlink.
|
|
90
90
|
content = read_partial_text(path)
|
|
91
|
-
if content and
|
|
91
|
+
if content and is_fullpage_html(content):
|
|
92
92
|
return ViewMode.browser
|
|
93
93
|
|
|
94
94
|
info = file_format_info(path)
|
kash/utils/common/url.py
CHANGED
|
@@ -47,7 +47,9 @@ def check_if_url(
|
|
|
47
47
|
if only_schemes:
|
|
48
48
|
return result if result.scheme in only_schemes else None
|
|
49
49
|
else:
|
|
50
|
-
|
|
50
|
+
# Consider it a URL if the scheme is present and longer than a single character.
|
|
51
|
+
# This helps avoid misinterpreting Windows drive letters (e.g., "C:\foo") as schemes.
|
|
52
|
+
return result if result.scheme and len(result.scheme) > 1 else None
|
|
51
53
|
except ValueError:
|
|
52
54
|
return None
|
|
53
55
|
|
|
@@ -145,6 +147,41 @@ def normalize_url(
|
|
|
145
147
|
return Url(normalized_url)
|
|
146
148
|
|
|
147
149
|
|
|
150
|
+
def is_valid_path(text: UnresolvedLocator) -> bool:
|
|
151
|
+
"""
|
|
152
|
+
Sanity check if the input is plausibly a file path, i.e. not a URL or malformed in
|
|
153
|
+
an obvious way. Does not check for existence or OS-specific naming restrictions.
|
|
154
|
+
For a more thorough check there are other more complex options like:
|
|
155
|
+
https://github.com/thombashi/pathvalidate
|
|
156
|
+
"""
|
|
157
|
+
if isinstance(text, Path):
|
|
158
|
+
return True
|
|
159
|
+
elif isinstance(text, str):
|
|
160
|
+
path_str = text
|
|
161
|
+
else:
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
# Check for empty or whitespace-only strings or null characters
|
|
165
|
+
# (never acceptable paths).
|
|
166
|
+
if not path_str or path_str.isspace():
|
|
167
|
+
return False
|
|
168
|
+
if "\0" in path_str:
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
# Explicitly disallow URLs.
|
|
172
|
+
if is_url(path_str):
|
|
173
|
+
return False
|
|
174
|
+
|
|
175
|
+
# As a final lightweight check, ensure it can be instantiated as a Path object
|
|
176
|
+
# This doesn't validate existence or character restrictions.
|
|
177
|
+
try:
|
|
178
|
+
_ = Path(path_str)
|
|
179
|
+
except (TypeError, ValueError):
|
|
180
|
+
return False
|
|
181
|
+
|
|
182
|
+
return True
|
|
183
|
+
|
|
184
|
+
|
|
148
185
|
## Tests
|
|
149
186
|
|
|
150
187
|
|
|
@@ -155,13 +192,19 @@ def test_is_url():
|
|
|
155
192
|
assert is_url("ftp://example.com") == True
|
|
156
193
|
assert is_url("file:///path/to/file") == True
|
|
157
194
|
assert is_url("file://hostname/path/to/file") == True
|
|
158
|
-
assert is_url("invalid-url") == False
|
|
159
|
-
assert is_url("www.example.com") == False
|
|
160
195
|
assert is_url("http://example.com", only_schemes=HTTP_ONLY) == True
|
|
161
196
|
assert is_url("https://example.com", only_schemes=HTTP_ONLY) == True
|
|
197
|
+
|
|
198
|
+
assert is_url("invalid-url") == False
|
|
199
|
+
assert is_url("www.example.com") == False
|
|
162
200
|
assert is_url("ftp://example.com", only_schemes=HTTP_ONLY) == False
|
|
163
201
|
assert is_url("file:///path/to/file", only_schemes=HTTP_ONLY) == False
|
|
164
202
|
|
|
203
|
+
assert is_url("www.example.com") is False
|
|
204
|
+
assert is_url("c:\\path\\to\\file") is False
|
|
205
|
+
assert is_url("/foo/bar") is False
|
|
206
|
+
assert is_url("//foo") is False
|
|
207
|
+
|
|
165
208
|
|
|
166
209
|
def test_as_file_url():
|
|
167
210
|
assert as_file_url("file:///path/to/file") == "file:///path/to/file"
|
|
@@ -205,3 +248,37 @@ def test_normalize_url():
|
|
|
205
248
|
str(e)
|
|
206
249
|
== "Scheme 'ftp' not in allowed schemes: ['http', 'https', 'file']: ftp://example.com"
|
|
207
250
|
)
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def test_is_path():
|
|
254
|
+
assert is_valid_path("foo/bar") is True
|
|
255
|
+
assert is_valid_path("/foo/bar") is True
|
|
256
|
+
assert is_valid_path("./foo/bar") is True
|
|
257
|
+
assert is_valid_path("../foo/bar") is True
|
|
258
|
+
assert is_valid_path("foo.txt") is True
|
|
259
|
+
assert is_valid_path(Path("foo/bar")) is True
|
|
260
|
+
assert is_valid_path(Path()) is True
|
|
261
|
+
assert is_valid_path(".") is True
|
|
262
|
+
assert is_valid_path("..") is True
|
|
263
|
+
assert is_valid_path("C:\\Users\\name") is True # Windows-style
|
|
264
|
+
assert is_valid_path("file_with:colon.txt") is True # Valid on POSIX
|
|
265
|
+
assert is_valid_path(Url("relative/path")) is True # Url type with relative content
|
|
266
|
+
|
|
267
|
+
assert is_valid_path("http://example.com") is False
|
|
268
|
+
assert is_valid_path("https://example.com/path") is False
|
|
269
|
+
assert is_valid_path("file:///path/to/file") is False
|
|
270
|
+
assert is_valid_path(Url("http://example.com")) is False
|
|
271
|
+
assert is_valid_path("") is False
|
|
272
|
+
assert is_valid_path(" ") is False
|
|
273
|
+
assert is_valid_path("foo\0bar.txt") is False
|
|
274
|
+
assert is_valid_path(None) is False # pyright: ignore
|
|
275
|
+
assert is_valid_path(123) is False # pyright: ignore
|
|
276
|
+
|
|
277
|
+
# Edge cases
|
|
278
|
+
assert is_valid_path("www.example.com") is True # No scheme
|
|
279
|
+
assert str(Path("")) == "."
|
|
280
|
+
assert str(Path(" ")) == " "
|
|
281
|
+
assert is_valid_path(Path(" ")) is True # A bad idea but allowed
|
|
282
|
+
assert is_valid_path(Path("")) is True
|
|
283
|
+
assert is_valid_path(" ") is False
|
|
284
|
+
assert is_valid_path("") is False
|
|
@@ -11,9 +11,10 @@ from kash.config.logger import get_logger
|
|
|
11
11
|
log = get_logger(__name__)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def
|
|
14
|
+
def is_fullpage_html(content: str) -> bool:
|
|
15
15
|
"""
|
|
16
|
-
A full HTML document that is
|
|
16
|
+
A full HTML document that is a full page (headers, footers, etc.) and
|
|
17
|
+
so probably best rendered in a browser.
|
|
17
18
|
"""
|
|
18
19
|
return bool(re.search(r"<!DOCTYPE html>|<html>|<body>|<head>", content[:2048], re.IGNORECASE))
|
|
19
20
|
|
|
@@ -4,7 +4,7 @@ from dataclasses import dataclass
|
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
|
-
from kash.utils.common.url import
|
|
7
|
+
from kash.utils.common.url import is_valid_path
|
|
8
8
|
from kash.utils.file_utils.file_ext import FileExt
|
|
9
9
|
from kash.utils.file_utils.file_formats import (
|
|
10
10
|
MIME_EMPTY,
|
|
@@ -143,7 +143,13 @@ class Format(Enum):
|
|
|
143
143
|
|
|
144
144
|
@property
|
|
145
145
|
def is_markdown(self) -> bool:
|
|
146
|
-
|
|
146
|
+
"""Is in pure Markdown (no HTML)."""
|
|
147
|
+
return self in [self.markdown]
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def is_markdown_with_html(self) -> bool:
|
|
151
|
+
"""Is in Markdown with HTML."""
|
|
152
|
+
return self in [self.md_html]
|
|
147
153
|
|
|
148
154
|
@property
|
|
149
155
|
def is_html(self) -> bool:
|
|
@@ -406,15 +412,6 @@ class FileFormatInfo:
|
|
|
406
412
|
return self.as_str()
|
|
407
413
|
|
|
408
414
|
|
|
409
|
-
def _guess_format(file_ext: FileExt | None, mime_type: MimeType | None) -> Format | None:
|
|
410
|
-
format = None
|
|
411
|
-
if file_ext:
|
|
412
|
-
format = Format.guess_by_file_ext(file_ext)
|
|
413
|
-
if not format and mime_type:
|
|
414
|
-
format = Format.from_mime_type(mime_type)
|
|
415
|
-
return format
|
|
416
|
-
|
|
417
|
-
|
|
418
415
|
def guess_format_by_name(path: str | Path) -> Format | None:
|
|
419
416
|
"""
|
|
420
417
|
Fast guess of file format by the file name only.
|
|
@@ -423,22 +420,39 @@ def guess_format_by_name(path: str | Path) -> Format | None:
|
|
|
423
420
|
return Format.guess_by_file_ext(file_ext) if file_ext else None
|
|
424
421
|
|
|
425
422
|
|
|
426
|
-
def file_format_info(
|
|
423
|
+
def file_format_info(
|
|
424
|
+
path: str | Path,
|
|
425
|
+
suggested_mime_type: MimeType | None = None,
|
|
426
|
+
) -> FileFormatInfo:
|
|
427
427
|
"""
|
|
428
428
|
Get info on the file format path and content (file extension and file content).
|
|
429
429
|
Looks at the file extension first and then the file content if needed.
|
|
430
|
-
If `
|
|
431
|
-
|
|
430
|
+
If `suggested_mime_type` is provided, it will be used as the detected mime type
|
|
431
|
+
instead of detecting it from the file content.
|
|
432
432
|
"""
|
|
433
|
+
if not is_valid_path(path):
|
|
434
|
+
raise ValueError(f"Expected a file path but got: {path!r}")
|
|
435
|
+
|
|
433
436
|
path = Path(path)
|
|
434
437
|
file_ext = parse_file_ext(path)
|
|
435
|
-
if
|
|
438
|
+
if not suggested_mime_type and not file_ext:
|
|
436
439
|
# Look at the file content.
|
|
437
440
|
detected_mime_type = detect_mime_type(path)
|
|
441
|
+
elif suggested_mime_type:
|
|
442
|
+
detected_mime_type = suggested_mime_type
|
|
438
443
|
else:
|
|
439
444
|
detected_mime_type = None
|
|
440
|
-
|
|
445
|
+
|
|
446
|
+
# Pick format first by file extension, then by detected mime type.
|
|
447
|
+
format = None
|
|
448
|
+
if file_ext:
|
|
449
|
+
format = Format.guess_by_file_ext(file_ext)
|
|
450
|
+
if not format and detected_mime_type:
|
|
451
|
+
format = Format.from_mime_type(detected_mime_type)
|
|
452
|
+
|
|
453
|
+
# Attempt to canonicalize the mime type to match the format.
|
|
441
454
|
final_mime_type = format.mime_type if format else detected_mime_type
|
|
455
|
+
|
|
442
456
|
return FileFormatInfo(file_ext, format, final_mime_type)
|
|
443
457
|
|
|
444
458
|
|
|
@@ -456,35 +470,3 @@ def detect_media_type(filename: str | Path) -> MediaType:
|
|
|
456
470
|
fmt = detect_file_format(filename)
|
|
457
471
|
media_type = fmt.media_type if fmt else MediaType.binary
|
|
458
472
|
return media_type
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
def choose_file_ext(
|
|
462
|
-
url_or_path: Url | Path | str, mime_type: MimeType | None = None
|
|
463
|
-
) -> FileExt | None:
|
|
464
|
-
"""
|
|
465
|
-
Pick a file extension to reflect the type of the content. First tries from any
|
|
466
|
-
provided content type (e.g. if this item was just downloaded). Then
|
|
467
|
-
recognizes known file extensions on the filename or URL, then tries looking
|
|
468
|
-
at the content with libmagic and heuristics, then gives up.
|
|
469
|
-
"""
|
|
470
|
-
if mime_type:
|
|
471
|
-
fmt = Format.from_mime_type(mime_type)
|
|
472
|
-
if fmt:
|
|
473
|
-
return fmt.file_ext
|
|
474
|
-
|
|
475
|
-
# First check if it's a known standard extension.
|
|
476
|
-
filename_ext = parse_file_ext(url_or_path)
|
|
477
|
-
if filename_ext:
|
|
478
|
-
return filename_ext
|
|
479
|
-
|
|
480
|
-
local_path = None
|
|
481
|
-
if isinstance(url_or_path, str) and is_file_url(url_or_path):
|
|
482
|
-
local_path = parse_file_url(url_or_path)
|
|
483
|
-
elif not is_url(url_or_path):
|
|
484
|
-
local_path = Path(url_or_path)
|
|
485
|
-
|
|
486
|
-
# If it's local based the extension on the file content.
|
|
487
|
-
if local_path:
|
|
488
|
-
return file_format_info(local_path).suggested_file_ext
|
|
489
|
-
|
|
490
|
-
return None
|
|
@@ -10,10 +10,17 @@ from funlog import log_if_modifies
|
|
|
10
10
|
from prettyfmt import fmt_path
|
|
11
11
|
from strif import atomic_output_file, copyfile_atomic
|
|
12
12
|
|
|
13
|
-
from kash.utils.common.url import
|
|
13
|
+
from kash.utils.common.url import (
|
|
14
|
+
Url,
|
|
15
|
+
is_file_url,
|
|
16
|
+
is_url,
|
|
17
|
+
is_valid_path,
|
|
18
|
+
normalize_url,
|
|
19
|
+
parse_file_url,
|
|
20
|
+
)
|
|
14
21
|
from kash.utils.errors import FileNotFound
|
|
15
|
-
from kash.utils.file_utils.
|
|
16
|
-
from kash.utils.file_utils.
|
|
22
|
+
from kash.utils.file_utils.file_formats_model import file_format_info
|
|
23
|
+
from kash.utils.file_utils.filename_parsing import parse_file_ext
|
|
17
24
|
from kash.web_content.dir_store import DirStore
|
|
18
25
|
from kash.web_content.web_fetch import HttpHeaders, download_url
|
|
19
26
|
|
|
@@ -91,9 +98,25 @@ class CacheResult:
|
|
|
91
98
|
was_cached: bool
|
|
92
99
|
|
|
93
100
|
|
|
94
|
-
def _suffix_for(cacheable: Cacheable
|
|
101
|
+
def _suffix_for(cacheable: Cacheable) -> str | None:
|
|
95
102
|
key = cacheable.key if isinstance(cacheable, Loadable) else cacheable
|
|
96
|
-
|
|
103
|
+
|
|
104
|
+
# Check for recognized file extensions on URLs and Paths.
|
|
105
|
+
filename_ext = parse_file_ext(str(key))
|
|
106
|
+
if filename_ext:
|
|
107
|
+
return filename_ext.dot_ext
|
|
108
|
+
|
|
109
|
+
# Handle local paths
|
|
110
|
+
if is_file_url(str(key)):
|
|
111
|
+
path = parse_file_url(str(key))
|
|
112
|
+
elif is_valid_path(str(key)):
|
|
113
|
+
path = Path(str(key))
|
|
114
|
+
else:
|
|
115
|
+
# A non-local path with no recognized extension.
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
# If it's a local file, check the file content too.
|
|
119
|
+
file_ext = file_format_info(path).suggested_file_ext
|
|
97
120
|
return file_ext.dot_ext if file_ext else None
|
|
98
121
|
|
|
99
122
|
|
|
@@ -4,10 +4,10 @@ kash/actions/__init__.py,sha256=a4pQw8O-Y3q5N4Qg2jUV0xEZLX6d164FQhZ6zizY9fE,1357
|
|
|
4
4
|
kash/actions/core/assistant_chat.py,sha256=28G20cSr7Z94cltouTPve5TXY3km0lACrRvpLE27fK8,1837
|
|
5
5
|
kash/actions/core/chat.py,sha256=yCannBFa0cSpR_in-XSSuMm1x2ZZQUCKmlqzhsUfpOo,2696
|
|
6
6
|
kash/actions/core/format_markdown_template.py,sha256=ZJbtyTSypPo2ewLiGRSyIpVf711vQMhI_-Ng-FgCs80,2991
|
|
7
|
-
kash/actions/core/markdownify.py,sha256=
|
|
8
|
-
kash/actions/core/readability.py,sha256=
|
|
9
|
-
kash/actions/core/render_as_html.py,sha256=
|
|
10
|
-
kash/actions/core/show_webpage.py,sha256=
|
|
7
|
+
kash/actions/core/markdownify.py,sha256=KjdUeY4c9EhZ5geQrn22IoBv0P_p62q4zyyOYE0NRHM,1270
|
|
8
|
+
kash/actions/core/readability.py,sha256=ljdB2rOpzfKU2FpEJ2UELIzcdOAWvdUjFsxoHRTE3xo,989
|
|
9
|
+
kash/actions/core/render_as_html.py,sha256=bSyZdX9nZnP33QBdGSzWhInRREWXWayMG2oyiKn4rxw,1824
|
|
10
|
+
kash/actions/core/show_webpage.py,sha256=Ggba9jkx9U-FZOcuL0lkS-SwtPNUyxVsGdeQrqwWs1s,887
|
|
11
11
|
kash/actions/core/strip_html.py,sha256=FDLN_4CKB11q5cU4NixTf7PGrAq92AjQNbKAdvQDwCY,849
|
|
12
12
|
kash/actions/core/summarize_as_bullets.py,sha256=Zwr8lNzL77pwpnW_289LQjNBijNDpTPANfFdOJA-PZ4,2070
|
|
13
13
|
kash/actions/core/tabbed_webpage_config.py,sha256=rIbzEhBTmnkbSiRZC-Rj46T1J6c0jOztiKE9Usa4nsc,980
|
|
@@ -34,7 +34,7 @@ kash/commands/help/help_commands.py,sha256=eJTpIhXck123PAUq2k-D3Q6UL6IQ8atOVYurL
|
|
|
34
34
|
kash/commands/help/logo.py,sha256=W8SUach9FjoTqpHZwTGS582ry4ZluxbBp86ZCiAtDkY,3505
|
|
35
35
|
kash/commands/help/welcome.py,sha256=F4QBgj3e1dM9Pf0H4TSzCrkVfXQVKUIl0b6Qmofbdo4,905
|
|
36
36
|
kash/commands/workspace/selection_commands.py,sha256=yr0fFPlFIJUPHyFni1byXz8UDvYstIw4oRpOMa8iOBo,7428
|
|
37
|
-
kash/commands/workspace/workspace_commands.py,sha256=
|
|
37
|
+
kash/commands/workspace/workspace_commands.py,sha256=smPNGmY8y7gcmh0hAFOf4GYYMuNAoqkf3kRMJamYcMQ,24768
|
|
38
38
|
kash/config/__init__.py,sha256=ytly9Typ1mWV4CXfV9G3CIPtPQ02u2rpZ304L3GlFro,148
|
|
39
39
|
kash/config/capture_output.py,sha256=ud3uUVNuDicHj3mI_nBUBO-VmOrxtBdA3z-I3D1lSCU,2398
|
|
40
40
|
kash/config/colors.py,sha256=6lqrB2RQYF2OLw-njfOqVHO9Bwiq7bW6K1ROCOAd1EM,9949
|
|
@@ -82,7 +82,7 @@ kash/embeddings/embeddings.py,sha256=v6RmrEHsx5PuE3fPrY15RK4fgW0K_VlNWDTjCVr11zY
|
|
|
82
82
|
kash/embeddings/text_similarity.py,sha256=BOo9Vcs5oi2Zs5La56uTkPMHo65XSd4qz_yr6GTfUA4,1924
|
|
83
83
|
kash/exec/__init__.py,sha256=rdSsKzTaXfSZmD5JvmUSSwmpfvl-moNv9PUgtE_WUpQ,1148
|
|
84
84
|
kash/exec/action_decorators.py,sha256=VOSCnFiev2_DuFoSk0i_moejwM4wJ1j6QfsQd93uetI,16480
|
|
85
|
-
kash/exec/action_exec.py,sha256=
|
|
85
|
+
kash/exec/action_exec.py,sha256=wndn9WsH9dGIzRjbiNCLfHHKZPlTzlFp-eogDvqkfbI,18500
|
|
86
86
|
kash/exec/action_registry.py,sha256=numU9pH_W5RgIrYmfi0iYMYy_kLJl6vup8PMrhxAfdc,2627
|
|
87
87
|
kash/exec/combiners.py,sha256=AJ6wgPUHsmwanObsUw64B83XzU26yuh5t4l7igLn82I,4291
|
|
88
88
|
kash/exec/command_exec.py,sha256=zc-gWm7kyB5J5Kp8xhULQ9Jj9AL927KkDPXXk-Yr1Bw,1292
|
|
@@ -93,7 +93,7 @@ kash/exec/importing.py,sha256=xunmBapeUMNc6Zox7y6e_DZkidyWeouiFZpphajwSzc,1843
|
|
|
93
93
|
kash/exec/llm_transforms.py,sha256=p_aLp70VoIgheW4v8uoweeuEVWj06AzQekvn_jM3B-g,4378
|
|
94
94
|
kash/exec/precondition_checks.py,sha256=HymxL7qm4Yz8V76Um5pKdIRnQ2N-p9rpQQi1fI38bNA,2139
|
|
95
95
|
kash/exec/precondition_registry.py,sha256=cmp0mUfLS42AbAByDhwGx8GWz9PuZNR7z5rPZW9WQE4,1244
|
|
96
|
-
kash/exec/preconditions.py,sha256=
|
|
96
|
+
kash/exec/preconditions.py,sha256=kJXJQwqwsGBmzbrYy8s-soJeY8-gXx5ahbBPSqo7UvY,4965
|
|
97
97
|
kash/exec/resolve_args.py,sha256=yGU6Jjzn5yyAN9pNZx8Qfc9oBrosFEdazIs5g9pjWTs,4410
|
|
98
98
|
kash/exec/runtime_settings.py,sha256=aK6nGbZhKSIDVmV6AqV68hQkiaIGWnCiNzHtwwZ5V0w,3960
|
|
99
99
|
kash/exec/shell_callable_action.py,sha256=x-Hs4EqpsZfKEcwhWkhc27HCIfoI91b-DrbG40BLxRY,4350
|
|
@@ -103,7 +103,7 @@ kash/exec_model/commands_model.py,sha256=iM8QhzA0tAas5OwF5liUfHtm45XIH1LcvCviuh3
|
|
|
103
103
|
kash/exec_model/script_model.py,sha256=1VG3LhkTmlKzHOYouZ92ZpOSKSCcsz3-tHNcFMQF788,5031
|
|
104
104
|
kash/exec_model/shell_model.py,sha256=LUhQivbpXlerM-DUzNY7BtctNBbn08Wto8CSSxQDxRU,568
|
|
105
105
|
kash/file_storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
106
|
-
kash/file_storage/file_store.py,sha256=
|
|
106
|
+
kash/file_storage/file_store.py,sha256=sqhNFZtWDgaJbR8ah-yCYuaVbLKzs7XpAcYRfU9rtss,29425
|
|
107
107
|
kash/file_storage/item_file_format.py,sha256=YAz7VqyfIoiSLQOoFdWsp-FI_2tTLXAPi8V8QXbo5ag,5475
|
|
108
108
|
kash/file_storage/metadata_dirs.py,sha256=9AqO3S3SSY1dtvP2iLX--E4ui0VIzXttG8R040otfyg,3820
|
|
109
109
|
kash/file_storage/persisted_yaml.py,sha256=4-4RkFqdlBUkTOwkdA4vRKUywEE9TaDo13OGaDUyU9M,1309
|
|
@@ -164,11 +164,11 @@ kash/model/compound_actions_model.py,sha256=HiDK5wwCu3WwZYHATZoLEguiqwR9V6V296wi
|
|
|
164
164
|
kash/model/concept_model.py,sha256=we2qOcy9Mv1q7XPfkDLp_CyO_-8DwAUfUYlpgy_jrFs,1011
|
|
165
165
|
kash/model/exec_model.py,sha256=IlfvtQyoFRRWhWju7vdXp9J-w_NGcGtL5DhDLy9gRd8,2250
|
|
166
166
|
kash/model/graph_model.py,sha256=jnctrPiBZ0xwAR8D54JMAJPanA1yZdaxSFQoIpe8anA,2662
|
|
167
|
-
kash/model/items_model.py,sha256=
|
|
167
|
+
kash/model/items_model.py,sha256=429FXlEsKxUFCqT_Z5t2zAFcfVEpjOGMdvz7q4hMEtw,34891
|
|
168
168
|
kash/model/language_list.py,sha256=I3RIbxTseVmPdhExQimimEv18Gmy2ImMbpXe0-_t1Qw,450
|
|
169
169
|
kash/model/llm_actions_model.py,sha256=a29uXVNfS2CiqvM7HPdC6H9A23rSQQihAideuBLMH8g,2110
|
|
170
170
|
kash/model/media_model.py,sha256=64Zic4cRjQpgf_-tOuZlZZe59mz_qu0s6OQSU0YlDUI,3357
|
|
171
|
-
kash/model/operations_model.py,sha256=
|
|
171
|
+
kash/model/operations_model.py,sha256=WmU-xeWGsqMLVN369dQEyVGU8T7G_KyLLsj6YFc5sVw,6517
|
|
172
172
|
kash/model/params_model.py,sha256=qGhsGvtDQoSqWkrKk9QZZfEh-jO1q2V-s-p6X-F37_M,14939
|
|
173
173
|
kash/model/paths_model.py,sha256=KDFm7wan7hjObHbnV2rR8-jsyLTVqbKcwFdKeLFRtdM,15889
|
|
174
174
|
kash/model/preconditions_model.py,sha256=-IfsVR0NkQhq_3hUTXzK2bFYAd--3YjSwUiDKHVQQqk,2887
|
|
@@ -195,7 +195,7 @@ kash/shell/ui/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
195
195
|
kash/shell/ui/shell_results.py,sha256=mvFHxK_oz3bNfF5_Twt6VqDO44TA1b256Bjf5oco804,4130
|
|
196
196
|
kash/shell/ui/shell_syntax.py,sha256=1fuDqcCV16AAWwWS4w4iT-tlSnl-Ywdrf68Ge8XIfmQ,751
|
|
197
197
|
kash/shell/utils/exception_printing.py,sha256=UizjOkBPhW6YbkiFP965BE5FrCwn04MXGDbxyTuyvOk,1908
|
|
198
|
-
kash/shell/utils/native_utils.py,sha256=
|
|
198
|
+
kash/shell/utils/native_utils.py,sha256=pAiuqqrjfNTesdArSya6CVavKVsuAXOcX3_XAIQrWtE,9151
|
|
199
199
|
kash/shell/utils/shell_function_wrapper.py,sha256=fgUuVhocYMKLkGJJQJOER5nFMAvM0ZVpfGu7iJPJI9s,7385
|
|
200
200
|
kash/utils/__init__.py,sha256=4Jl_AtgRADdGORimWhYZwbSfQSpQ6SiexNIZzmbcngI,111
|
|
201
201
|
kash/utils/errors.py,sha256=2lPL0fxI8pPOiDvjl0j-rvwY8uhmWetsrYYIc2-x1WY,3906
|
|
@@ -211,13 +211,13 @@ kash/utils/common/stack_traces.py,sha256=a2NwlK_0xxnjMCDC4LrQu7ueFylF-OImFG3bAAH
|
|
|
211
211
|
kash/utils/common/task_stack.py,sha256=XkeBz3BwYY1HxxTqd3f7CulV0s61PePAKw1Irrtvf5o,4536
|
|
212
212
|
kash/utils/common/type_utils.py,sha256=SJirXhPilQom_-OKkFToDLm_82ZwpjcNjRy8U1HaQ0Q,3829
|
|
213
213
|
kash/utils/common/uniquifier.py,sha256=75OY4KIVF8u1eoO0FCPbEGTyVpPOtM-0ctoG_s_jahM,3082
|
|
214
|
-
kash/utils/common/url.py,sha256=
|
|
214
|
+
kash/utils/common/url.py,sha256=R_P-CkOUiFxVdo9COcaL7YFvFIoAULj5-XxvmlFLvzo,9416
|
|
215
215
|
kash/utils/file_formats/chat_format.py,sha256=Onby7Zany1UQSUo_JzLs6MIfmoXViZeOAacRTMVe92M,11818
|
|
216
216
|
kash/utils/file_utils/__init__.py,sha256=loL_iW0oOZs0mJ5GelBPptBcqzYKSWdsGcHrpRyxitQ,43
|
|
217
217
|
kash/utils/file_utils/dir_info.py,sha256=HamMr58k_DanTLifj7A2JDxTGWXEZZx2pQuE6Hjcm8g,1856
|
|
218
218
|
kash/utils/file_utils/file_ext.py,sha256=-H63vlrVI3pfE2Cn_9qF7-QLDaUIu_njc4TieNgAHSY,1860
|
|
219
|
-
kash/utils/file_utils/file_formats.py,sha256=
|
|
220
|
-
kash/utils/file_utils/file_formats_model.py,sha256=
|
|
219
|
+
kash/utils/file_utils/file_formats.py,sha256=vnihRFLl85G1uzpqDc_uiGH9SIvbFTYVszz3srdSSz0,4949
|
|
220
|
+
kash/utils/file_utils/file_formats_model.py,sha256=0rwWlkgMaZu5Ap7IFPt3poA1owdwFCfgGGtvu__15GY,15099
|
|
221
221
|
kash/utils/file_utils/file_sort_filter.py,sha256=_k1chT3dJl5lSmKA2PW90KaoG4k4zftGdtwWoNEljP4,7136
|
|
222
222
|
kash/utils/file_utils/file_walk.py,sha256=cpwVDPuaVm95_ZwFJiAdIuZAGhASI3gJ3ZUsCGP75b8,5527
|
|
223
223
|
kash/utils/file_utils/filename_parsing.py,sha256=drHrH2B9W_5yAbXURNGJxNqj9GmTe8FayH6Gjw9e4-U,4194
|
|
@@ -241,7 +241,7 @@ kash/web_content/canon_url.py,sha256=Zv2q7xQdIHBFkxxwyJn3_ME-qqMFRi_fKxE_IgV2Z50
|
|
|
241
241
|
kash/web_content/dir_store.py,sha256=BJc-s-RL5CC-GwhFTC_lhLXSMWluPPnLVmVBx-66DiM,3425
|
|
242
242
|
kash/web_content/file_cache_utils.py,sha256=JRXUCAmrc83iAgdiICU2EYGWcoORflWNl6GAVq-O80I,5529
|
|
243
243
|
kash/web_content/file_processing.py,sha256=cQC-MnJMM5qG9-y0S4yobkmRi6A75qhHjV6xTwbtYDY,1904
|
|
244
|
-
kash/web_content/local_file_cache.py,sha256=
|
|
244
|
+
kash/web_content/local_file_cache.py,sha256=PEDKU5VIwhCnSC-HXG4EkO2OzrOUDuuDBMuo3lP2EN0,9466
|
|
245
245
|
kash/web_content/web_extract.py,sha256=LbuG4AFEeIiXyUrN9CAxX0ret41Fqu_iTJSjIWyk3Bg,2296
|
|
246
246
|
kash/web_content/web_extract_justext.py,sha256=74HLJBKDGKatwxyRDX6za70bZG9LrVmtj9jLX7UJzg4,2540
|
|
247
247
|
kash/web_content/web_extract_readabilipy.py,sha256=IT7ET5IoU2-Nf37-Neh6CkKMvLL3WTNVJjq7ZMOx6OM,808
|
|
@@ -263,7 +263,6 @@ kash/workspaces/param_state.py,sha256=vT_eGWqg2SRviIM5jqEAauznX2B5Xt2nHHu2oRxTcI
|
|
|
263
263
|
kash/workspaces/selections.py,sha256=rEUuQlrQ3C_54bzBSKDTTptgX8oZPqN0Ao4uaXSWA-Q,12003
|
|
264
264
|
kash/workspaces/source_items.py,sha256=Pwnw3OhjR2IJEMEeHf6hpKloj-ellM5vsY7LgkGevRY,2861
|
|
265
265
|
kash/workspaces/workspace_dirs.py,sha256=kjuY4t7mSSXq00fZmln7p9TWq4kAZoPTCDM0DG7uEaI,1545
|
|
266
|
-
kash/workspaces/workspace_importing.py,sha256=4IJo713Kuoynhd_lcZF9M_DZ0rrMK_IDfhTVgwKmVyQ,1934
|
|
267
266
|
kash/workspaces/workspace_output.py,sha256=MMg_KumkHKFGc0DOUFaW5ImpgqIfdlsLtvXbLEt1hwI,5692
|
|
268
267
|
kash/workspaces/workspace_registry.py,sha256=SQt2DZgBEu95Zj9fpy67XdJPgJyKFDCU2laSuiZswNo,2200
|
|
269
268
|
kash/workspaces/workspaces.py,sha256=kQyS3F57Y9A9xVT_Ss7HzJhDGlI-UXHKvRDnEVkBnik,6764
|
|
@@ -280,8 +279,8 @@ kash/xonsh_custom/xonsh_modern_tools.py,sha256=mj_b34LZXfE8MJe9EpDmp5JZ0tDM1biYN
|
|
|
280
279
|
kash/xonsh_custom/xonsh_ranking_completer.py,sha256=ZRGiAfoEgqgnlq2-ReUVEaX5oOgW1DQ9WxIv2OJLuTo,5620
|
|
281
280
|
kash/xontrib/fnm.py,sha256=V2tsOdmIDgbFbZSfMLpsvDIwwJJqiYnOkOySD1cXNXw,3700
|
|
282
281
|
kash/xontrib/kash_extension.py,sha256=JRRJC3cZSMOl4sSWEdKAQ_dVRMubWaOltKr8G0dWt6Y,1876
|
|
283
|
-
kash_shell-0.3.
|
|
284
|
-
kash_shell-0.3.
|
|
285
|
-
kash_shell-0.3.
|
|
286
|
-
kash_shell-0.3.
|
|
287
|
-
kash_shell-0.3.
|
|
282
|
+
kash_shell-0.3.14.dist-info/METADATA,sha256=w_L4jxifwPdsDvYqMRJZSbhc3u9bV5mYORRcbXHDj9k,31258
|
|
283
|
+
kash_shell-0.3.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
284
|
+
kash_shell-0.3.14.dist-info/entry_points.txt,sha256=SQraWDAo8SqYpthLXThei0mf_hGGyhYBUO-Er_0HcwI,85
|
|
285
|
+
kash_shell-0.3.14.dist-info/licenses/LICENSE,sha256=rCh2PsfYeiU6FK_0wb58kHGm_Fj5c43fdcHEexiVzIo,34562
|
|
286
|
+
kash_shell-0.3.14.dist-info/RECORD,,
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
from kash.config.logger import get_logger
|
|
4
|
-
from kash.file_storage.file_store import FileStore
|
|
5
|
-
from kash.model.items_model import Item, ItemType
|
|
6
|
-
from kash.model.paths_model import StorePath
|
|
7
|
-
from kash.utils.common.url import Locator, Url, is_url
|
|
8
|
-
from kash.utils.errors import InvalidInput
|
|
9
|
-
from kash.utils.file_utils.file_formats_model import Format
|
|
10
|
-
from kash.web_content.canon_url import canonicalize_url
|
|
11
|
-
|
|
12
|
-
# TODO: Clean this up, move into FileStore.
|
|
13
|
-
|
|
14
|
-
log = get_logger(__name__)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def import_url(ws: FileStore, url: Url) -> Item:
|
|
18
|
-
"""
|
|
19
|
-
Import a URL as a resource. Does not fetch metadata.
|
|
20
|
-
"""
|
|
21
|
-
canon_url = canonicalize_url(url)
|
|
22
|
-
log.message(
|
|
23
|
-
"Importing URL: %s%s", canon_url, f" canonicalized from {url}" if url != canon_url else ""
|
|
24
|
-
)
|
|
25
|
-
item = Item(ItemType.resource, url=canon_url, format=Format.url)
|
|
26
|
-
# No need to overwrite any resource we already have for the identical URL.
|
|
27
|
-
store_path = ws.save(item, skip_dup_names=True)
|
|
28
|
-
# Load to fill in any metadata we may already have.
|
|
29
|
-
item = ws.load(store_path)
|
|
30
|
-
return item
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def import_and_load(ws: FileStore, locator: Locator | str) -> Item:
|
|
34
|
-
"""
|
|
35
|
-
Ensure that a URL or file path is imported into the workspace and
|
|
36
|
-
return the Item.
|
|
37
|
-
"""
|
|
38
|
-
|
|
39
|
-
if isinstance(locator, str) and is_url(locator):
|
|
40
|
-
log.message("Importing locator as URL: %r", locator)
|
|
41
|
-
item = import_url(ws, Url(locator))
|
|
42
|
-
else:
|
|
43
|
-
if isinstance(locator, StorePath):
|
|
44
|
-
log.info("Locator is in the file store: %r", locator)
|
|
45
|
-
# It's already a StorePath.
|
|
46
|
-
item = ws.load(locator)
|
|
47
|
-
else:
|
|
48
|
-
log.info("Importing locator as local path: %r", locator)
|
|
49
|
-
path = Path(locator)
|
|
50
|
-
if not path.exists():
|
|
51
|
-
raise InvalidInput(f"File not found: {path}")
|
|
52
|
-
|
|
53
|
-
store_path = ws.import_item(path)
|
|
54
|
-
item = ws.load(store_path)
|
|
55
|
-
|
|
56
|
-
return item
|
|
File without changes
|
|
File without changes
|
|
File without changes
|