kash-shell 0.3.28__py3-none-any.whl → 0.3.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/chat.py +1 -0
- kash/actions/core/markdownify_html.py +4 -5
- kash/actions/core/minify_html.py +4 -5
- kash/actions/core/readability.py +1 -4
- kash/actions/core/render_as_html.py +10 -7
- kash/actions/core/save_sidematter_meta.py +47 -0
- kash/actions/core/show_webpage.py +2 -0
- kash/actions/core/zip_sidematter.py +47 -0
- kash/commands/base/basic_file_commands.py +7 -4
- kash/commands/base/diff_commands.py +6 -4
- kash/commands/base/files_command.py +31 -30
- kash/commands/base/general_commands.py +3 -2
- kash/commands/base/logs_commands.py +6 -4
- kash/commands/base/reformat_command.py +3 -2
- kash/commands/base/search_command.py +4 -3
- kash/commands/base/show_command.py +9 -7
- kash/commands/help/assistant_commands.py +6 -4
- kash/commands/help/help_commands.py +7 -4
- kash/commands/workspace/selection_commands.py +18 -16
- kash/commands/workspace/workspace_commands.py +39 -26
- kash/config/logger.py +1 -1
- kash/config/setup.py +2 -27
- kash/config/text_styles.py +1 -1
- kash/docs/markdown/topics/a1_what_is_kash.md +26 -18
- kash/docs/markdown/topics/a2_installation.md +3 -2
- kash/exec/action_decorators.py +7 -5
- kash/exec/action_exec.py +104 -53
- kash/exec/fetch_url_items.py +40 -11
- kash/exec/llm_transforms.py +14 -5
- kash/exec/preconditions.py +2 -2
- kash/exec/resolve_args.py +4 -1
- kash/exec/runtime_settings.py +3 -0
- kash/file_storage/file_store.py +108 -114
- kash/file_storage/item_file_format.py +91 -26
- kash/file_storage/item_id_index.py +128 -0
- kash/help/help_types.py +1 -1
- kash/llm_utils/llms.py +6 -1
- kash/local_server/local_server_commands.py +2 -1
- kash/mcp/mcp_server_commands.py +3 -2
- kash/mcp/mcp_server_routes.py +42 -12
- kash/model/actions_model.py +44 -32
- kash/model/compound_actions_model.py +4 -3
- kash/model/exec_model.py +33 -3
- kash/model/items_model.py +150 -60
- kash/model/params_model.py +4 -4
- kash/shell/output/shell_output.py +1 -2
- kash/utils/api_utils/gather_limited.py +2 -0
- kash/utils/api_utils/multitask_gather.py +74 -0
- kash/utils/common/s3_utils.py +108 -0
- kash/utils/common/url.py +16 -4
- kash/utils/file_formats/chat_format.py +7 -4
- kash/utils/file_utils/file_ext.py +1 -0
- kash/utils/file_utils/file_formats.py +4 -2
- kash/utils/file_utils/file_formats_model.py +12 -0
- kash/utils/text_handling/doc_normalization.py +1 -1
- kash/utils/text_handling/markdown_footnotes.py +224 -0
- kash/utils/text_handling/markdown_utils.py +532 -41
- kash/utils/text_handling/markdownify_utils.py +2 -1
- kash/web_content/web_fetch.py +2 -1
- kash/web_gen/templates/components/tooltip_scripts.js.jinja +186 -1
- kash/web_gen/templates/components/youtube_popover_scripts.js.jinja +223 -0
- kash/web_gen/templates/components/youtube_popover_styles.css.jinja +150 -0
- kash/web_gen/templates/content_styles.css.jinja +53 -1
- kash/web_gen/templates/youtube_webpage.html.jinja +47 -0
- kash/web_gen/webpage_render.py +103 -0
- kash/workspaces/workspaces.py +0 -5
- kash/xonsh_custom/custom_shell.py +4 -3
- {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/METADATA +35 -26
- {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/RECORD +72 -64
- kash/llm_utils/llm_features.py +0 -72
- kash/web_gen/simple_webpage.py +0 -55
- {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/licenses/LICENSE +0 -0
kash/exec/fetch_url_items.py
CHANGED
|
@@ -2,7 +2,7 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from kash.config.logger import get_logger
|
|
4
4
|
from kash.exec.preconditions import is_url_resource
|
|
5
|
-
from kash.model.items_model import Item, ItemType
|
|
5
|
+
from kash.model.items_model import Format, Item, ItemType
|
|
6
6
|
from kash.model.paths_model import StorePath
|
|
7
7
|
from kash.utils.common.format_utils import fmt_loc
|
|
8
8
|
from kash.utils.common.url import Url, is_url
|
|
@@ -36,7 +36,15 @@ def fetch_url_item(
|
|
|
36
36
|
save_content: bool = True,
|
|
37
37
|
refetch: bool = False,
|
|
38
38
|
cache: bool = True,
|
|
39
|
+
overwrite: bool = True,
|
|
39
40
|
) -> FetchItemResult:
|
|
41
|
+
"""
|
|
42
|
+
Fetch or load an URL or path. For a URL, will fetch the content and metadata and save
|
|
43
|
+
as an item in the workspace.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
The fetched or loaded item, already saved to the workspace.
|
|
47
|
+
"""
|
|
40
48
|
from kash.workspaces import current_ws
|
|
41
49
|
|
|
42
50
|
ws = current_ws()
|
|
@@ -51,11 +59,22 @@ def fetch_url_item(
|
|
|
51
59
|
else:
|
|
52
60
|
raise InvalidInput(f"Not a URL or URL resource: {fmt_loc(locator)}")
|
|
53
61
|
|
|
54
|
-
return fetch_url_item_content(
|
|
62
|
+
return fetch_url_item_content(
|
|
63
|
+
item,
|
|
64
|
+
save_content=save_content,
|
|
65
|
+
refetch=refetch,
|
|
66
|
+
cache=cache,
|
|
67
|
+
overwrite=overwrite,
|
|
68
|
+
)
|
|
55
69
|
|
|
56
70
|
|
|
57
71
|
def fetch_url_item_content(
|
|
58
|
-
item: Item,
|
|
72
|
+
item: Item,
|
|
73
|
+
*,
|
|
74
|
+
save_content: bool = True,
|
|
75
|
+
refetch: bool = False,
|
|
76
|
+
cache: bool = True,
|
|
77
|
+
overwrite: bool = True,
|
|
59
78
|
) -> FetchItemResult:
|
|
60
79
|
"""
|
|
61
80
|
Fetch content and metadata for a URL using a media service if we
|
|
@@ -67,8 +86,11 @@ def fetch_url_item_content(
|
|
|
67
86
|
|
|
68
87
|
If `cache` is true, the content is also cached in the local file cache.
|
|
69
88
|
|
|
70
|
-
|
|
71
|
-
|
|
89
|
+
If `overwrite` is true, the item is saved at the same location every time.
|
|
90
|
+
This is useful to keep resource filenames consistent.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The fetched or loaded item, already saved to the workspace.
|
|
72
94
|
"""
|
|
73
95
|
from kash.media_base.media_services import get_media_metadata
|
|
74
96
|
from kash.web_content.canon_url import canonicalize_url
|
|
@@ -76,9 +98,11 @@ def fetch_url_item_content(
|
|
|
76
98
|
from kash.workspaces import current_ws
|
|
77
99
|
|
|
78
100
|
ws = current_ws()
|
|
79
|
-
|
|
101
|
+
# We could check for description too, but many pages don't have one.
|
|
102
|
+
has_key_content = item.title and (not item.has_body or item.body)
|
|
103
|
+
if not refetch and has_key_content:
|
|
80
104
|
log.info(
|
|
81
|
-
"Already have title
|
|
105
|
+
"Already have title so assuming metadata is up to date, will not fetch: %s",
|
|
82
106
|
item.fmt_loc(),
|
|
83
107
|
)
|
|
84
108
|
return FetchItemResult(item, was_cached=True)
|
|
@@ -109,7 +133,10 @@ def fetch_url_item_content(
|
|
|
109
133
|
url_item = item.merged_copy(url_item)
|
|
110
134
|
else:
|
|
111
135
|
page_data = fetch_page_content(url, refetch=refetch, cache=cache)
|
|
112
|
-
url_item =
|
|
136
|
+
url_item = Item(
|
|
137
|
+
type=ItemType.resource,
|
|
138
|
+
format=Format.url,
|
|
139
|
+
url=url,
|
|
113
140
|
title=page_data.title or item.title,
|
|
114
141
|
description=page_data.description or item.description,
|
|
115
142
|
thumbnail_url=page_data.thumbnail_url or item.thumbnail_url,
|
|
@@ -128,10 +155,10 @@ def fetch_url_item_content(
|
|
|
128
155
|
log.warning("Failed to fetch page data: title is missing: %s", item.url)
|
|
129
156
|
|
|
130
157
|
# Now save the updated URL item and also the content item if we have one.
|
|
131
|
-
ws.save(url_item)
|
|
158
|
+
ws.save(url_item, overwrite=overwrite)
|
|
132
159
|
assert url_item.store_path
|
|
133
160
|
if content_item:
|
|
134
|
-
ws.save(content_item)
|
|
161
|
+
ws.save(content_item, overwrite=overwrite)
|
|
135
162
|
assert content_item.store_path
|
|
136
163
|
log.info(
|
|
137
164
|
"Saved both URL and content item: %s, %s",
|
|
@@ -144,4 +171,6 @@ def fetch_url_item_content(
|
|
|
144
171
|
was_cached = bool(
|
|
145
172
|
not page_data or (page_data.cache_result and page_data.cache_result.was_cached)
|
|
146
173
|
)
|
|
147
|
-
return FetchItemResult(
|
|
174
|
+
return FetchItemResult(
|
|
175
|
+
item=content_item or url_item, was_cached=was_cached, page_data=page_data
|
|
176
|
+
)
|
kash/exec/llm_transforms.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from dataclasses import replace
|
|
2
|
+
from typing import Unpack
|
|
2
3
|
|
|
3
4
|
from chopdiff.docs import DiffFilter, TextDoc
|
|
4
5
|
from chopdiff.transforms import WindowSettings, filtered_transform
|
|
@@ -12,7 +13,7 @@ from kash.llm_utils.fuzzy_parsing import strip_markdown_fence
|
|
|
12
13
|
from kash.llm_utils.llm_completion import llm_template_completion
|
|
13
14
|
from kash.llm_utils.llm_messages import Message, MessageTemplate
|
|
14
15
|
from kash.model.actions_model import LLMOptions
|
|
15
|
-
from kash.model.items_model import Item
|
|
16
|
+
from kash.model.items_model import Item, ItemType, ItemUpdateOptions
|
|
16
17
|
from kash.utils.errors import InvalidInput
|
|
17
18
|
from kash.utils.file_utils.file_formats_model import Format
|
|
18
19
|
from kash.utils.text_handling.doc_normalization import normalize_formatting
|
|
@@ -88,10 +89,11 @@ def llm_transform_str(options: LLMOptions, input_str: str, check_no_results: boo
|
|
|
88
89
|
def llm_transform_item(
|
|
89
90
|
item: Item,
|
|
90
91
|
model: LLMName | None = None,
|
|
92
|
+
*,
|
|
91
93
|
normalize: bool = True,
|
|
92
94
|
strip_fence: bool = True,
|
|
93
95
|
check_no_results: bool = True,
|
|
94
|
-
|
|
96
|
+
**updates: Unpack[ItemUpdateOptions],
|
|
95
97
|
) -> Item:
|
|
96
98
|
"""
|
|
97
99
|
Main function for running an LLM action on an item.
|
|
@@ -99,6 +101,14 @@ def llm_transform_item(
|
|
|
99
101
|
Model may be overridden by an explicit model parameter.
|
|
100
102
|
Also by default cleans up and normalizes output as Markdown.
|
|
101
103
|
"""
|
|
104
|
+
# Default to Markdown docs.
|
|
105
|
+
if "format" not in updates:
|
|
106
|
+
updates["format"] = Format.markdown
|
|
107
|
+
if "type" not in updates:
|
|
108
|
+
updates["type"] = ItemType.doc
|
|
109
|
+
if "body" not in updates:
|
|
110
|
+
updates["body"] = None
|
|
111
|
+
|
|
102
112
|
if not item.context:
|
|
103
113
|
raise InvalidInput(f"LLM actions expect a context on input item: {item}")
|
|
104
114
|
action = item.context.action
|
|
@@ -112,13 +122,12 @@ def llm_transform_item(
|
|
|
112
122
|
log.message("LLM transform from action `%s` on item: %s", action.name, item)
|
|
113
123
|
log.message("LLM options: %s", action.llm_options)
|
|
114
124
|
|
|
115
|
-
|
|
116
|
-
result_item = item.derived_copy(body=None, format=format)
|
|
125
|
+
result_item = item.derived_copy(**updates)
|
|
117
126
|
result_str = llm_transform_str(llm_options, item.body, check_no_results=check_no_results)
|
|
118
127
|
if strip_fence:
|
|
119
128
|
result_str = strip_markdown_fence(result_str)
|
|
120
129
|
if normalize:
|
|
121
|
-
result_str = normalize_formatting(result_str, format=format)
|
|
130
|
+
result_str = normalize_formatting(result_str, format=updates["format"])
|
|
122
131
|
|
|
123
132
|
result_item.body = result_str
|
|
124
133
|
return result_item
|
kash/exec/preconditions.py
CHANGED
|
@@ -69,7 +69,7 @@ def is_instructions(item: Item) -> bool:
|
|
|
69
69
|
|
|
70
70
|
@kash_precondition
|
|
71
71
|
def is_url_resource(item: Item) -> bool:
|
|
72
|
-
return bool(item.type == ItemType.resource and item.url)
|
|
72
|
+
return bool(item.type == ItemType.resource and item.format == Format.url and item.url)
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
@kash_precondition
|
|
@@ -126,7 +126,7 @@ def has_markdown_with_html_body(item: Item) -> bool:
|
|
|
126
126
|
|
|
127
127
|
@kash_precondition
|
|
128
128
|
def has_fullpage_html_body(item: Item) -> bool:
|
|
129
|
-
return bool(
|
|
129
|
+
return bool(has_html_compatible_body(item) and item.body and is_fullpage_html(item.body))
|
|
130
130
|
|
|
131
131
|
|
|
132
132
|
@kash_precondition
|
kash/exec/resolve_args.py
CHANGED
|
@@ -118,10 +118,13 @@ def import_locator_args(
|
|
|
118
118
|
*locators_or_strs: UnresolvedLocator,
|
|
119
119
|
as_type: ItemType = ItemType.resource,
|
|
120
120
|
reimport: bool = False,
|
|
121
|
+
with_sidematter: bool = False,
|
|
121
122
|
) -> list[StorePath]:
|
|
122
123
|
"""
|
|
123
124
|
Import locators into the current workspace.
|
|
124
125
|
"""
|
|
125
126
|
locators = [resolve_locator_arg(loc) for loc in locators_or_strs]
|
|
126
127
|
ws = current_ws()
|
|
127
|
-
return ws.import_items(
|
|
128
|
+
return ws.import_items(
|
|
129
|
+
*locators, as_type=as_type, reimport=reimport, with_sidematter=with_sidematter
|
|
130
|
+
)
|
kash/exec/runtime_settings.py
CHANGED
|
@@ -102,9 +102,11 @@ def kash_runtime(
|
|
|
102
102
|
override_state: State | None = None,
|
|
103
103
|
tmp_output: bool = False,
|
|
104
104
|
no_format: bool = False,
|
|
105
|
+
sync_to_s3: bool = False,
|
|
105
106
|
) -> RuntimeSettingsManager:
|
|
106
107
|
"""
|
|
107
108
|
Set a specific kash execution context for a with block.
|
|
109
|
+
|
|
108
110
|
This allows defining a workspace and other execution settings as the ambient
|
|
109
111
|
context within the block.
|
|
110
112
|
|
|
@@ -130,5 +132,6 @@ def kash_runtime(
|
|
|
130
132
|
override_state=override_state,
|
|
131
133
|
tmp_output=tmp_output,
|
|
132
134
|
no_format=no_format,
|
|
135
|
+
sync_to_s3=sync_to_s3,
|
|
133
136
|
)
|
|
134
137
|
return RuntimeSettingsManager(settings=settings)
|
kash/file_storage/file_store.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import os
|
|
3
|
+
import shutil
|
|
3
4
|
import threading
|
|
4
5
|
import time
|
|
5
6
|
from collections.abc import Callable, Generator
|
|
@@ -9,25 +10,21 @@ from typing import Concatenate, ParamSpec, TypeVar
|
|
|
9
10
|
|
|
10
11
|
from funlog import format_duration, log_calls
|
|
11
12
|
from prettyfmt import fmt_lines, fmt_path
|
|
12
|
-
from
|
|
13
|
+
from sidematter_format import copy_sidematter, move_sidematter, remove_sidematter
|
|
14
|
+
from strif import copyfile_atomic, hash_file
|
|
13
15
|
from typing_extensions import override
|
|
14
16
|
|
|
15
17
|
from kash.config.logger import get_log_settings, get_logger
|
|
16
18
|
from kash.config.text_styles import EMOJI_SAVED
|
|
17
|
-
from kash.file_storage.
|
|
19
|
+
from kash.file_storage.item_id_index import ItemIdIndex
|
|
18
20
|
from kash.file_storage.metadata_dirs import MetadataDirs
|
|
19
|
-
from kash.file_storage.store_filenames import
|
|
20
|
-
|
|
21
|
-
join_suffix,
|
|
22
|
-
parse_item_filename,
|
|
23
|
-
)
|
|
24
|
-
from kash.model.items_model import Item, ItemId, ItemType
|
|
21
|
+
from kash.file_storage.store_filenames import folder_for_type, join_suffix
|
|
22
|
+
from kash.model.items_model import Item, ItemType
|
|
25
23
|
from kash.model.paths_model import StorePath
|
|
26
24
|
from kash.shell.output.shell_output import PrintHooks
|
|
27
25
|
from kash.utils.common.format_utils import fmt_loc
|
|
28
|
-
from kash.utils.common.uniquifier import Uniquifier
|
|
29
26
|
from kash.utils.common.url import Locator, UnresolvedLocator, Url, is_url
|
|
30
|
-
from kash.utils.errors import FileExists, FileNotFound
|
|
27
|
+
from kash.utils.errors import FileExists, FileNotFound
|
|
31
28
|
from kash.utils.file_utils.file_formats_model import Format
|
|
32
29
|
from kash.utils.file_utils.file_walk import walk_by_dir
|
|
33
30
|
from kash.utils.file_utils.ignore_files import IgnoreChecker, add_to_ignore
|
|
@@ -83,11 +80,6 @@ class FileStore(Workspace):
|
|
|
83
80
|
def base_dir(self) -> Path:
|
|
84
81
|
return self.base_dir_path
|
|
85
82
|
|
|
86
|
-
@property
|
|
87
|
-
@override
|
|
88
|
-
def assets_dir(self) -> Path:
|
|
89
|
-
return self.base_dir / "assets"
|
|
90
|
-
|
|
91
83
|
@synchronized
|
|
92
84
|
@log_calls(level="warning", if_slower_than=2.0)
|
|
93
85
|
def reload(self, auto_init: bool = True):
|
|
@@ -98,9 +90,8 @@ class FileStore(Workspace):
|
|
|
98
90
|
self.info_logged = False
|
|
99
91
|
self.warnings: list[str] = []
|
|
100
92
|
|
|
101
|
-
#
|
|
102
|
-
self.
|
|
103
|
-
self.id_map: dict[ItemId, StorePath] = {}
|
|
93
|
+
# Index of item identifiers and unique slug history
|
|
94
|
+
self.id_index = ItemIdIndex()
|
|
104
95
|
|
|
105
96
|
self.dirs = MetadataDirs(base_dir=self.base_dir, is_global_ws=self.is_global_ws)
|
|
106
97
|
if not auto_init and not self.dirs.is_initialized():
|
|
@@ -137,7 +128,7 @@ class FileStore(Workspace):
|
|
|
137
128
|
def _id_index_init(self):
|
|
138
129
|
num_dups = 0
|
|
139
130
|
for store_path in self.walk_items():
|
|
140
|
-
dup_path = self.
|
|
131
|
+
dup_path = self.id_index.index_item(store_path, self.load)
|
|
141
132
|
if dup_path:
|
|
142
133
|
num_dups += 1
|
|
143
134
|
|
|
@@ -146,62 +137,6 @@ class FileStore(Workspace):
|
|
|
146
137
|
f"Found {num_dups} duplicate items in store. See `logs` for details."
|
|
147
138
|
)
|
|
148
139
|
|
|
149
|
-
@synchronized
|
|
150
|
-
def _id_index_item(self, store_path: StorePath) -> StorePath | None:
|
|
151
|
-
"""
|
|
152
|
-
Update metadata index with a new item.
|
|
153
|
-
"""
|
|
154
|
-
name, item_type, _format, file_ext = parse_item_filename(store_path)
|
|
155
|
-
if not file_ext:
|
|
156
|
-
log.debug(
|
|
157
|
-
"Skipping file with unrecognized name or extension: %s",
|
|
158
|
-
fmt_path(store_path),
|
|
159
|
-
)
|
|
160
|
-
return None
|
|
161
|
-
|
|
162
|
-
full_suffix = join_suffix(item_type.name, file_ext.name) if item_type else file_ext.name
|
|
163
|
-
self.uniquifier.add(name, full_suffix)
|
|
164
|
-
|
|
165
|
-
dup_path = None
|
|
166
|
-
|
|
167
|
-
try:
|
|
168
|
-
item = self.load(store_path)
|
|
169
|
-
item_id = item.item_id()
|
|
170
|
-
if item_id:
|
|
171
|
-
old_path = self.id_map.get(item_id)
|
|
172
|
-
if old_path and old_path != store_path:
|
|
173
|
-
dup_path = old_path
|
|
174
|
-
log.info(
|
|
175
|
-
"Duplicate items (%s):\n%s",
|
|
176
|
-
item_id,
|
|
177
|
-
fmt_lines([old_path, store_path]),
|
|
178
|
-
)
|
|
179
|
-
self.id_map[item_id] = store_path
|
|
180
|
-
except (ValueError, SkippableError) as e:
|
|
181
|
-
log.warning(
|
|
182
|
-
"Could not load file, skipping from store index: %s: %s",
|
|
183
|
-
fmt_path(store_path),
|
|
184
|
-
e,
|
|
185
|
-
)
|
|
186
|
-
|
|
187
|
-
return dup_path
|
|
188
|
-
|
|
189
|
-
@synchronized
|
|
190
|
-
def _id_unindex_item(self, store_path: StorePath):
|
|
191
|
-
"""
|
|
192
|
-
Remove an item from the metadata index.
|
|
193
|
-
"""
|
|
194
|
-
try:
|
|
195
|
-
item = self.load(store_path)
|
|
196
|
-
item_id = item.item_id()
|
|
197
|
-
if item_id:
|
|
198
|
-
try:
|
|
199
|
-
self.id_map.pop(item_id, None)
|
|
200
|
-
except KeyError:
|
|
201
|
-
pass # If we happen to reload a store it might no longer be in memory.
|
|
202
|
-
except (FileNotFoundError, InvalidFilename):
|
|
203
|
-
pass
|
|
204
|
-
|
|
205
140
|
def resolve_to_store_path(self, path: Path | StorePath) -> StorePath | None:
|
|
206
141
|
"""
|
|
207
142
|
Return a StorePath if the given path is within the store, otherwise None.
|
|
@@ -236,7 +171,6 @@ class FileStore(Workspace):
|
|
|
236
171
|
"""
|
|
237
172
|
return (self.base_dir / store_path).exists()
|
|
238
173
|
|
|
239
|
-
@synchronized
|
|
240
174
|
def _pick_filename_for(self, item: Item, *, overwrite: bool = False) -> tuple[str, str | None]:
|
|
241
175
|
"""
|
|
242
176
|
Get a suitable filename for this item. If `overwrite` is true, use the the slugified
|
|
@@ -255,7 +189,7 @@ class FileStore(Workspace):
|
|
|
255
189
|
slug = item.slug_name()
|
|
256
190
|
full_suffix = item.get_full_suffix()
|
|
257
191
|
# Get a unique name per item type.
|
|
258
|
-
unique_slug, old_slugs = self.
|
|
192
|
+
unique_slug, old_slugs = self.id_index.uniquify_slug(slug, full_suffix)
|
|
259
193
|
|
|
260
194
|
# Suffix files with both item type and a suitable file extension.
|
|
261
195
|
new_unique_filename = join_suffix(unique_slug, full_suffix)
|
|
@@ -278,24 +212,34 @@ class FileStore(Workspace):
|
|
|
278
212
|
Best effort to see if an item with the same identity is already in the store.
|
|
279
213
|
"""
|
|
280
214
|
item_id = item.item_id()
|
|
281
|
-
log.info("Looking for item by id
|
|
215
|
+
log.info("Looking for item by id:\n%s", fmt_lines([item, item_id]))
|
|
282
216
|
if not item_id:
|
|
283
217
|
return None
|
|
284
218
|
else:
|
|
285
|
-
store_path = self.
|
|
219
|
+
store_path = self.id_index.find_store_path_by_id(item_id)
|
|
286
220
|
if not store_path:
|
|
287
|
-
# Just in case the
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
221
|
+
# Just in case the index is not complete, check the other paths too
|
|
222
|
+
possible_paths = [
|
|
223
|
+
p
|
|
224
|
+
for p in [
|
|
225
|
+
item.store_path,
|
|
226
|
+
self.store_path_for(item)[0],
|
|
227
|
+
self.default_path_for(item),
|
|
228
|
+
]
|
|
229
|
+
if p
|
|
230
|
+
]
|
|
231
|
+
for p in possible_paths:
|
|
232
|
+
if self.exists(p):
|
|
233
|
+
old_item = self.load(p)
|
|
234
|
+
if old_item.item_id() == item_id:
|
|
235
|
+
log.info(
|
|
236
|
+
"Item with the same id already saved (disk check):\n%s",
|
|
237
|
+
fmt_lines([fmt_loc(p), item_id]),
|
|
238
|
+
)
|
|
239
|
+
# Ensure index is updated consistently and with logging
|
|
240
|
+
self.id_index.index_item(p, self.load)
|
|
241
|
+
return p
|
|
242
|
+
log.info("Also checked paths but no id match:\n%s", fmt_lines(possible_paths))
|
|
299
243
|
if store_path and self.exists(store_path):
|
|
300
244
|
log.info(
|
|
301
245
|
"Item with the same id already saved (disk check):\n%s",
|
|
@@ -325,9 +269,13 @@ class FileStore(Workspace):
|
|
|
325
269
|
return self._tmp_path_for(item), None
|
|
326
270
|
elif item.store_path:
|
|
327
271
|
return StorePath(item.store_path), None
|
|
328
|
-
elif
|
|
272
|
+
elif (
|
|
273
|
+
item_id
|
|
274
|
+
and (existing := self.id_index.find_store_path_by_id(item_id))
|
|
275
|
+
and self.exists(existing)
|
|
276
|
+
):
|
|
329
277
|
# If this item has an identity and we've saved under that id before, use the same store path.
|
|
330
|
-
store_path =
|
|
278
|
+
store_path = existing
|
|
331
279
|
log.info(
|
|
332
280
|
"When picking a store path, found an existing item with same id:\n%s",
|
|
333
281
|
fmt_lines([fmt_loc(store_path), item_id]),
|
|
@@ -345,16 +293,23 @@ class FileStore(Workspace):
|
|
|
345
293
|
|
|
346
294
|
return StorePath(store_path), old_store_path
|
|
347
295
|
|
|
348
|
-
|
|
296
|
+
@synchronized
|
|
297
|
+
def assign_store_path(self, item: Item) -> Path:
|
|
349
298
|
"""
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
the
|
|
299
|
+
Pick a new store path for the item and mutate `item.store_path`.
|
|
300
|
+
|
|
301
|
+
This is useful if you need to write to the store yourself, at the location
|
|
302
|
+
the item usually would be saved, and also want the path to be fixed.
|
|
353
303
|
|
|
354
|
-
If you write to
|
|
355
|
-
|
|
304
|
+
This is idempotent. If you also write to the file, call `mark_as_saved()`
|
|
305
|
+
to indicate that the file is now saved. Otherwise the item should be saved
|
|
306
|
+
with `save()`.
|
|
307
|
+
|
|
308
|
+
Returns the absolute path, for convenience if you wish to write to the file
|
|
309
|
+
directly.
|
|
356
310
|
"""
|
|
357
311
|
store_path, _old_store_path = self.store_path_for(item)
|
|
312
|
+
item.store_path = str(store_path)
|
|
358
313
|
return self.base_dir / store_path
|
|
359
314
|
|
|
360
315
|
def _tmp_path_for(self, item: Item) -> StorePath:
|
|
@@ -412,6 +367,8 @@ class FileStore(Workspace):
|
|
|
412
367
|
# Indicate this is an item with a store path, not an external path.
|
|
413
368
|
# Keep external_path set so we know body is in that file.
|
|
414
369
|
item.store_path = str(rel_path)
|
|
370
|
+
# Ensure index is updated for items written directly into the store.
|
|
371
|
+
self.id_index.index_item(StorePath(rel_path), self.load)
|
|
415
372
|
return StorePath(rel_path)
|
|
416
373
|
else:
|
|
417
374
|
# Otherwise it's still in memory or in a file outside the workspace and we need to save it.
|
|
@@ -455,6 +412,8 @@ class FileStore(Workspace):
|
|
|
455
412
|
# Save as a text item with frontmatter.
|
|
456
413
|
if item.external_path:
|
|
457
414
|
item.body = Path(item.external_path).read_text()
|
|
415
|
+
from kash.file_storage.item_file_format import write_item
|
|
416
|
+
|
|
458
417
|
write_item(item, full_path, normalize=not no_format)
|
|
459
418
|
except OSError as e:
|
|
460
419
|
log.error("Error saving item: %s", e)
|
|
@@ -485,7 +444,7 @@ class FileStore(Workspace):
|
|
|
485
444
|
|
|
486
445
|
# Update in-memory store_path only after successful save.
|
|
487
446
|
item.store_path = str(store_path)
|
|
488
|
-
self.
|
|
447
|
+
self.id_index.index_item(store_path, self.load)
|
|
489
448
|
|
|
490
449
|
if not skipped_save:
|
|
491
450
|
log.message("%s Saved item: %s", EMOJI_SAVED, fmt_loc(store_path))
|
|
@@ -499,6 +458,8 @@ class FileStore(Workspace):
|
|
|
499
458
|
"""
|
|
500
459
|
Load item at the given path.
|
|
501
460
|
"""
|
|
461
|
+
from kash.file_storage.item_file_format import read_item
|
|
462
|
+
|
|
502
463
|
return read_item(self.base_dir / store_path, self.base_dir)
|
|
503
464
|
|
|
504
465
|
def hash(self, store_path: StorePath) -> str:
|
|
@@ -513,6 +474,7 @@ class FileStore(Workspace):
|
|
|
513
474
|
*,
|
|
514
475
|
as_type: ItemType | None = None,
|
|
515
476
|
reimport: bool = False,
|
|
477
|
+
with_sidematter: bool = False,
|
|
516
478
|
) -> StorePath:
|
|
517
479
|
"""
|
|
518
480
|
Add resources from files or URLs. If a locator is a path, copy it into the store.
|
|
@@ -520,11 +482,15 @@ class FileStore(Workspace):
|
|
|
520
482
|
are not imported again and the existing store path is returned.
|
|
521
483
|
If `as_type` is specified, it will be used to override the item type, otherwise
|
|
522
484
|
we go with our best guess.
|
|
485
|
+
If `with_sidematter` is true, will copy any sidematter files (metadata/assets) to
|
|
486
|
+
the destination.
|
|
523
487
|
"""
|
|
488
|
+
from kash.file_storage.item_file_format import read_item
|
|
524
489
|
from kash.web_content.canon_url import canonicalize_url
|
|
525
490
|
|
|
526
491
|
if isinstance(locator, StorePath) and not reimport:
|
|
527
492
|
log.info("Store path already imported: %s", fmt_loc(locator))
|
|
493
|
+
self.id_index.index_item(locator, self.load)
|
|
528
494
|
return locator
|
|
529
495
|
elif is_url(locator):
|
|
530
496
|
# Import a URL as a resource.
|
|
@@ -580,6 +546,9 @@ class FileStore(Workspace):
|
|
|
580
546
|
# we'll pick a new store path.
|
|
581
547
|
store_path = self.save(item)
|
|
582
548
|
log.info("Imported text file: %s", item.as_str())
|
|
549
|
+
# If requested, also copy any sidematter files (metadata/assets) to match destination.
|
|
550
|
+
if with_sidematter:
|
|
551
|
+
copy_sidematter(path, self.base_dir / store_path, copy_original=False)
|
|
583
552
|
else:
|
|
584
553
|
# Binary or other files we just copy over as-is, preserving the name.
|
|
585
554
|
# We know the extension is recognized.
|
|
@@ -588,7 +557,10 @@ class FileStore(Workspace):
|
|
|
588
557
|
raise FileExists(f"Resource already in store: {fmt_loc(store_path)}")
|
|
589
558
|
|
|
590
559
|
log.message("Importing resource: %s", fmt_loc(path))
|
|
591
|
-
|
|
560
|
+
if with_sidematter:
|
|
561
|
+
copy_sidematter(path, self.base_dir / store_path)
|
|
562
|
+
else:
|
|
563
|
+
copyfile_atomic(path, self.base_dir / store_path, make_parents=True)
|
|
592
564
|
|
|
593
565
|
# Optimization: Don't import an identical file twice.
|
|
594
566
|
if old_store_path:
|
|
@@ -599,7 +571,10 @@ class FileStore(Workspace):
|
|
|
599
571
|
"Imported resource is identical to the previous import: %s",
|
|
600
572
|
fmt_loc(old_store_path),
|
|
601
573
|
)
|
|
602
|
-
|
|
574
|
+
if with_sidematter:
|
|
575
|
+
remove_sidematter(self.base_dir / store_path)
|
|
576
|
+
else:
|
|
577
|
+
os.unlink(self.base_dir / store_path)
|
|
603
578
|
store_path = old_store_path
|
|
604
579
|
log.message("Imported resource: %s", fmt_loc(store_path))
|
|
605
580
|
return store_path
|
|
@@ -609,16 +584,20 @@ class FileStore(Workspace):
|
|
|
609
584
|
*locators: Locator,
|
|
610
585
|
as_type: ItemType | None = None,
|
|
611
586
|
reimport: bool = False,
|
|
587
|
+
with_sidematter: bool = False,
|
|
612
588
|
) -> list[StorePath]:
|
|
613
589
|
return [
|
|
614
|
-
self.import_item(
|
|
590
|
+
self.import_item(
|
|
591
|
+
locator, as_type=as_type, reimport=reimport, with_sidematter=with_sidematter
|
|
592
|
+
)
|
|
593
|
+
for locator in locators
|
|
615
594
|
]
|
|
616
595
|
|
|
617
|
-
def import_and_load(self, locator: UnresolvedLocator) -> Item:
|
|
596
|
+
def import_and_load(self, locator: UnresolvedLocator, with_sidematter: bool = False) -> Item:
|
|
618
597
|
"""
|
|
619
598
|
Import a locator and return the item.
|
|
620
599
|
"""
|
|
621
|
-
store_path = self.import_item(locator)
|
|
600
|
+
store_path = self.import_item(locator, with_sidematter=with_sidematter)
|
|
622
601
|
return self.load(store_path)
|
|
623
602
|
|
|
624
603
|
def _filter_selection_paths(self):
|
|
@@ -645,7 +624,7 @@ class FileStore(Workspace):
|
|
|
645
624
|
"""
|
|
646
625
|
self.selections.remove_values(store_paths)
|
|
647
626
|
for store_path in store_paths:
|
|
648
|
-
self.
|
|
627
|
+
self.id_index.unindex_item(store_path, self.load)
|
|
649
628
|
# TODO: Update metadata of all relations that point to this path too.
|
|
650
629
|
|
|
651
630
|
@synchronized
|
|
@@ -655,12 +634,17 @@ class FileStore(Workspace):
|
|
|
655
634
|
"""
|
|
656
635
|
self.selections.replace_values(replacements)
|
|
657
636
|
for store_path, new_store_path in replacements:
|
|
658
|
-
self.
|
|
659
|
-
self.
|
|
637
|
+
self.id_index.unindex_item(store_path, self.load)
|
|
638
|
+
self.id_index.index_item(new_store_path, self.load)
|
|
660
639
|
# TODO: Update metadata of all relations that point to this path too.
|
|
661
640
|
|
|
662
641
|
def archive(
|
|
663
|
-
self,
|
|
642
|
+
self,
|
|
643
|
+
store_path: StorePath,
|
|
644
|
+
*,
|
|
645
|
+
missing_ok: bool = False,
|
|
646
|
+
quiet: bool = False,
|
|
647
|
+
with_sidematter: bool = False,
|
|
664
648
|
) -> StorePath:
|
|
665
649
|
"""
|
|
666
650
|
Archive the item by moving it into the archive directory.
|
|
@@ -672,20 +656,25 @@ class FileStore(Workspace):
|
|
|
672
656
|
fmt_loc(self.dirs.archive_dir),
|
|
673
657
|
)
|
|
674
658
|
orig_path = self.base_dir / store_path
|
|
675
|
-
|
|
659
|
+
full_archive_path = self.base_dir / self.dirs.archive_dir / store_path
|
|
676
660
|
if missing_ok and not orig_path.exists():
|
|
677
661
|
log.message("Item to archive not found so moving on: %s", fmt_loc(orig_path))
|
|
678
662
|
return store_path
|
|
679
663
|
if not orig_path.exists():
|
|
680
664
|
log.warning("Item to archive not found: %s", fmt_loc(orig_path))
|
|
681
665
|
return store_path
|
|
682
|
-
|
|
666
|
+
# Remove references (including id_map) before moving so we can load the item to compute id.
|
|
683
667
|
self._remove_references([store_path])
|
|
668
|
+
if with_sidematter:
|
|
669
|
+
move_sidematter(orig_path, full_archive_path)
|
|
670
|
+
else:
|
|
671
|
+
os.makedirs(full_archive_path.parent, exist_ok=True)
|
|
672
|
+
shutil.move(orig_path, full_archive_path)
|
|
684
673
|
|
|
685
674
|
archive_path = StorePath(self.dirs.archive_dir / store_path)
|
|
686
675
|
return archive_path
|
|
687
676
|
|
|
688
|
-
def unarchive(self, store_path: StorePath) -> StorePath:
|
|
677
|
+
def unarchive(self, store_path: StorePath, with_sidematter: bool = False) -> StorePath:
|
|
689
678
|
"""
|
|
690
679
|
Unarchive the item by moving back out of the archive directory.
|
|
691
680
|
Path may be with or without the archive dir prefix.
|
|
@@ -695,7 +684,12 @@ class FileStore(Workspace):
|
|
|
695
684
|
if full_input_path.is_relative_to(full_archive_path):
|
|
696
685
|
store_path = StorePath(relpath(full_input_path, full_archive_path))
|
|
697
686
|
original_path = self.base_dir / store_path
|
|
698
|
-
|
|
687
|
+
if with_sidematter:
|
|
688
|
+
move_sidematter(full_input_path, original_path)
|
|
689
|
+
else:
|
|
690
|
+
shutil.move(full_input_path, original_path)
|
|
691
|
+
# Re-index after restoring from archive.
|
|
692
|
+
self.id_index.index_item(store_path, self.load)
|
|
699
693
|
return StorePath(store_path)
|
|
700
694
|
|
|
701
695
|
@synchronized
|
|
@@ -712,7 +706,7 @@ class FileStore(Workspace):
|
|
|
712
706
|
log.message(
|
|
713
707
|
"Using workspace: %s (%s items)",
|
|
714
708
|
fmt_path(self.base_dir, rel_to_cwd=False),
|
|
715
|
-
len(self.
|
|
709
|
+
len(self.id_index),
|
|
716
710
|
)
|
|
717
711
|
log.message(
|
|
718
712
|
"Logging to: %s",
|