kash-shell 0.3.28__py3-none-any.whl → 0.3.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. kash/actions/core/chat.py +1 -0
  2. kash/actions/core/markdownify_html.py +4 -5
  3. kash/actions/core/minify_html.py +4 -5
  4. kash/actions/core/readability.py +1 -4
  5. kash/actions/core/render_as_html.py +10 -7
  6. kash/actions/core/save_sidematter_meta.py +47 -0
  7. kash/actions/core/show_webpage.py +2 -0
  8. kash/actions/core/zip_sidematter.py +47 -0
  9. kash/commands/base/basic_file_commands.py +7 -4
  10. kash/commands/base/diff_commands.py +6 -4
  11. kash/commands/base/files_command.py +31 -30
  12. kash/commands/base/general_commands.py +3 -2
  13. kash/commands/base/logs_commands.py +6 -4
  14. kash/commands/base/reformat_command.py +3 -2
  15. kash/commands/base/search_command.py +4 -3
  16. kash/commands/base/show_command.py +9 -7
  17. kash/commands/help/assistant_commands.py +6 -4
  18. kash/commands/help/help_commands.py +7 -4
  19. kash/commands/workspace/selection_commands.py +18 -16
  20. kash/commands/workspace/workspace_commands.py +39 -26
  21. kash/config/logger.py +1 -1
  22. kash/config/setup.py +2 -27
  23. kash/config/text_styles.py +1 -1
  24. kash/docs/markdown/topics/a1_what_is_kash.md +26 -18
  25. kash/docs/markdown/topics/a2_installation.md +3 -2
  26. kash/exec/action_decorators.py +7 -5
  27. kash/exec/action_exec.py +104 -53
  28. kash/exec/fetch_url_items.py +40 -11
  29. kash/exec/llm_transforms.py +14 -5
  30. kash/exec/preconditions.py +2 -2
  31. kash/exec/resolve_args.py +4 -1
  32. kash/exec/runtime_settings.py +3 -0
  33. kash/file_storage/file_store.py +108 -114
  34. kash/file_storage/item_file_format.py +91 -26
  35. kash/file_storage/item_id_index.py +128 -0
  36. kash/help/help_types.py +1 -1
  37. kash/llm_utils/llms.py +6 -1
  38. kash/local_server/local_server_commands.py +2 -1
  39. kash/mcp/mcp_server_commands.py +3 -2
  40. kash/mcp/mcp_server_routes.py +42 -12
  41. kash/model/actions_model.py +44 -32
  42. kash/model/compound_actions_model.py +4 -3
  43. kash/model/exec_model.py +33 -3
  44. kash/model/items_model.py +150 -60
  45. kash/model/params_model.py +4 -4
  46. kash/shell/output/shell_output.py +1 -2
  47. kash/utils/api_utils/gather_limited.py +2 -0
  48. kash/utils/api_utils/multitask_gather.py +74 -0
  49. kash/utils/common/s3_utils.py +108 -0
  50. kash/utils/common/url.py +16 -4
  51. kash/utils/file_formats/chat_format.py +7 -4
  52. kash/utils/file_utils/file_ext.py +1 -0
  53. kash/utils/file_utils/file_formats.py +4 -2
  54. kash/utils/file_utils/file_formats_model.py +12 -0
  55. kash/utils/text_handling/doc_normalization.py +1 -1
  56. kash/utils/text_handling/markdown_footnotes.py +224 -0
  57. kash/utils/text_handling/markdown_utils.py +532 -41
  58. kash/utils/text_handling/markdownify_utils.py +2 -1
  59. kash/web_content/web_fetch.py +2 -1
  60. kash/web_gen/templates/components/tooltip_scripts.js.jinja +186 -1
  61. kash/web_gen/templates/components/youtube_popover_scripts.js.jinja +223 -0
  62. kash/web_gen/templates/components/youtube_popover_styles.css.jinja +150 -0
  63. kash/web_gen/templates/content_styles.css.jinja +53 -1
  64. kash/web_gen/templates/youtube_webpage.html.jinja +47 -0
  65. kash/web_gen/webpage_render.py +103 -0
  66. kash/workspaces/workspaces.py +0 -5
  67. kash/xonsh_custom/custom_shell.py +4 -3
  68. {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/METADATA +35 -26
  69. {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/RECORD +72 -64
  70. kash/llm_utils/llm_features.py +0 -72
  71. kash/web_gen/simple_webpage.py +0 -55
  72. {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/WHEEL +0 -0
  73. {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/entry_points.txt +0 -0
  74. {kash_shell-0.3.28.dist-info → kash_shell-0.3.33.dist-info}/licenses/LICENSE +0 -0
@@ -2,7 +2,7 @@ from dataclasses import dataclass
2
2
 
3
3
  from kash.config.logger import get_logger
4
4
  from kash.exec.preconditions import is_url_resource
5
- from kash.model.items_model import Item, ItemType
5
+ from kash.model.items_model import Format, Item, ItemType
6
6
  from kash.model.paths_model import StorePath
7
7
  from kash.utils.common.format_utils import fmt_loc
8
8
  from kash.utils.common.url import Url, is_url
@@ -36,7 +36,15 @@ def fetch_url_item(
36
36
  save_content: bool = True,
37
37
  refetch: bool = False,
38
38
  cache: bool = True,
39
+ overwrite: bool = True,
39
40
  ) -> FetchItemResult:
41
+ """
42
+ Fetch or load an URL or path. For a URL, will fetch the content and metadata and save
43
+ as an item in the workspace.
44
+
45
+ Returns:
46
+ The fetched or loaded item, already saved to the workspace.
47
+ """
40
48
  from kash.workspaces import current_ws
41
49
 
42
50
  ws = current_ws()
@@ -51,11 +59,22 @@ def fetch_url_item(
51
59
  else:
52
60
  raise InvalidInput(f"Not a URL or URL resource: {fmt_loc(locator)}")
53
61
 
54
- return fetch_url_item_content(item, save_content=save_content, refetch=refetch, cache=cache)
62
+ return fetch_url_item_content(
63
+ item,
64
+ save_content=save_content,
65
+ refetch=refetch,
66
+ cache=cache,
67
+ overwrite=overwrite,
68
+ )
55
69
 
56
70
 
57
71
  def fetch_url_item_content(
58
- item: Item, *, save_content: bool = True, refetch: bool = False, cache: bool = True
72
+ item: Item,
73
+ *,
74
+ save_content: bool = True,
75
+ refetch: bool = False,
76
+ cache: bool = True,
77
+ overwrite: bool = True,
59
78
  ) -> FetchItemResult:
60
79
  """
61
80
  Fetch content and metadata for a URL using a media service if we
@@ -67,8 +86,11 @@ def fetch_url_item_content(
67
86
 
68
87
  If `cache` is true, the content is also cached in the local file cache.
69
88
 
70
- The content item is returned if content was saved. Otherwise, the updated
71
- URL item is returned.
89
+ If `overwrite` is true, the item is saved at the same location every time.
90
+ This is useful to keep resource filenames consistent.
91
+
92
+ Returns:
93
+ The fetched or loaded item, already saved to the workspace.
72
94
  """
73
95
  from kash.media_base.media_services import get_media_metadata
74
96
  from kash.web_content.canon_url import canonicalize_url
@@ -76,9 +98,11 @@ def fetch_url_item_content(
76
98
  from kash.workspaces import current_ws
77
99
 
78
100
  ws = current_ws()
79
- if not refetch and item.title and item.description and item.body:
101
+ # We could check for description too, but many pages don't have one.
102
+ has_key_content = item.title and (not item.has_body or item.body)
103
+ if not refetch and has_key_content:
80
104
  log.info(
81
- "Already have title, description, and body, will not fetch: %s",
105
+ "Already have title so assuming metadata is up to date, will not fetch: %s",
82
106
  item.fmt_loc(),
83
107
  )
84
108
  return FetchItemResult(item, was_cached=True)
@@ -109,7 +133,10 @@ def fetch_url_item_content(
109
133
  url_item = item.merged_copy(url_item)
110
134
  else:
111
135
  page_data = fetch_page_content(url, refetch=refetch, cache=cache)
112
- url_item = item.new_copy_with(
136
+ url_item = Item(
137
+ type=ItemType.resource,
138
+ format=Format.url,
139
+ url=url,
113
140
  title=page_data.title or item.title,
114
141
  description=page_data.description or item.description,
115
142
  thumbnail_url=page_data.thumbnail_url or item.thumbnail_url,
@@ -128,10 +155,10 @@ def fetch_url_item_content(
128
155
  log.warning("Failed to fetch page data: title is missing: %s", item.url)
129
156
 
130
157
  # Now save the updated URL item and also the content item if we have one.
131
- ws.save(url_item)
158
+ ws.save(url_item, overwrite=overwrite)
132
159
  assert url_item.store_path
133
160
  if content_item:
134
- ws.save(content_item)
161
+ ws.save(content_item, overwrite=overwrite)
135
162
  assert content_item.store_path
136
163
  log.info(
137
164
  "Saved both URL and content item: %s, %s",
@@ -144,4 +171,6 @@ def fetch_url_item_content(
144
171
  was_cached = bool(
145
172
  not page_data or (page_data.cache_result and page_data.cache_result.was_cached)
146
173
  )
147
- return FetchItemResult(content_item or url_item, was_cached=was_cached, page_data=page_data)
174
+ return FetchItemResult(
175
+ item=content_item or url_item, was_cached=was_cached, page_data=page_data
176
+ )
@@ -1,4 +1,5 @@
1
1
  from dataclasses import replace
2
+ from typing import Unpack
2
3
 
3
4
  from chopdiff.docs import DiffFilter, TextDoc
4
5
  from chopdiff.transforms import WindowSettings, filtered_transform
@@ -12,7 +13,7 @@ from kash.llm_utils.fuzzy_parsing import strip_markdown_fence
12
13
  from kash.llm_utils.llm_completion import llm_template_completion
13
14
  from kash.llm_utils.llm_messages import Message, MessageTemplate
14
15
  from kash.model.actions_model import LLMOptions
15
- from kash.model.items_model import Item
16
+ from kash.model.items_model import Item, ItemType, ItemUpdateOptions
16
17
  from kash.utils.errors import InvalidInput
17
18
  from kash.utils.file_utils.file_formats_model import Format
18
19
  from kash.utils.text_handling.doc_normalization import normalize_formatting
@@ -88,10 +89,11 @@ def llm_transform_str(options: LLMOptions, input_str: str, check_no_results: boo
88
89
  def llm_transform_item(
89
90
  item: Item,
90
91
  model: LLMName | None = None,
92
+ *,
91
93
  normalize: bool = True,
92
94
  strip_fence: bool = True,
93
95
  check_no_results: bool = True,
94
- format: Format | None = None,
96
+ **updates: Unpack[ItemUpdateOptions],
95
97
  ) -> Item:
96
98
  """
97
99
  Main function for running an LLM action on an item.
@@ -99,6 +101,14 @@ def llm_transform_item(
99
101
  Model may be overridden by an explicit model parameter.
100
102
  Also by default cleans up and normalizes output as Markdown.
101
103
  """
104
+ # Default to Markdown docs.
105
+ if "format" not in updates:
106
+ updates["format"] = Format.markdown
107
+ if "type" not in updates:
108
+ updates["type"] = ItemType.doc
109
+ if "body" not in updates:
110
+ updates["body"] = None
111
+
102
112
  if not item.context:
103
113
  raise InvalidInput(f"LLM actions expect a context on input item: {item}")
104
114
  action = item.context.action
@@ -112,13 +122,12 @@ def llm_transform_item(
112
122
  log.message("LLM transform from action `%s` on item: %s", action.name, item)
113
123
  log.message("LLM options: %s", action.llm_options)
114
124
 
115
- format = format or item.format or Format.markdown
116
- result_item = item.derived_copy(body=None, format=format)
125
+ result_item = item.derived_copy(**updates)
117
126
  result_str = llm_transform_str(llm_options, item.body, check_no_results=check_no_results)
118
127
  if strip_fence:
119
128
  result_str = strip_markdown_fence(result_str)
120
129
  if normalize:
121
- result_str = normalize_formatting(result_str, format=format)
130
+ result_str = normalize_formatting(result_str, format=updates["format"])
122
131
 
123
132
  result_item.body = result_str
124
133
  return result_item
@@ -69,7 +69,7 @@ def is_instructions(item: Item) -> bool:
69
69
 
70
70
  @kash_precondition
71
71
  def is_url_resource(item: Item) -> bool:
72
- return bool(item.type == ItemType.resource and item.url)
72
+ return bool(item.type == ItemType.resource and item.format == Format.url and item.url)
73
73
 
74
74
 
75
75
  @kash_precondition
@@ -126,7 +126,7 @@ def has_markdown_with_html_body(item: Item) -> bool:
126
126
 
127
127
  @kash_precondition
128
128
  def has_fullpage_html_body(item: Item) -> bool:
129
- return bool(has_html_body(item) and item.body and is_fullpage_html(item.body))
129
+ return bool(has_html_compatible_body(item) and item.body and is_fullpage_html(item.body))
130
130
 
131
131
 
132
132
  @kash_precondition
kash/exec/resolve_args.py CHANGED
@@ -118,10 +118,13 @@ def import_locator_args(
118
118
  *locators_or_strs: UnresolvedLocator,
119
119
  as_type: ItemType = ItemType.resource,
120
120
  reimport: bool = False,
121
+ with_sidematter: bool = False,
121
122
  ) -> list[StorePath]:
122
123
  """
123
124
  Import locators into the current workspace.
124
125
  """
125
126
  locators = [resolve_locator_arg(loc) for loc in locators_or_strs]
126
127
  ws = current_ws()
127
- return ws.import_items(*locators, as_type=as_type, reimport=reimport)
128
+ return ws.import_items(
129
+ *locators, as_type=as_type, reimport=reimport, with_sidematter=with_sidematter
130
+ )
@@ -102,9 +102,11 @@ def kash_runtime(
102
102
  override_state: State | None = None,
103
103
  tmp_output: bool = False,
104
104
  no_format: bool = False,
105
+ sync_to_s3: bool = False,
105
106
  ) -> RuntimeSettingsManager:
106
107
  """
107
108
  Set a specific kash execution context for a with block.
109
+
108
110
  This allows defining a workspace and other execution settings as the ambient
109
111
  context within the block.
110
112
 
@@ -130,5 +132,6 @@ def kash_runtime(
130
132
  override_state=override_state,
131
133
  tmp_output=tmp_output,
132
134
  no_format=no_format,
135
+ sync_to_s3=sync_to_s3,
133
136
  )
134
137
  return RuntimeSettingsManager(settings=settings)
@@ -1,5 +1,6 @@
1
1
  import functools
2
2
  import os
3
+ import shutil
3
4
  import threading
4
5
  import time
5
6
  from collections.abc import Callable, Generator
@@ -9,25 +10,21 @@ from typing import Concatenate, ParamSpec, TypeVar
9
10
 
10
11
  from funlog import format_duration, log_calls
11
12
  from prettyfmt import fmt_lines, fmt_path
12
- from strif import copyfile_atomic, hash_file, move_file
13
+ from sidematter_format import copy_sidematter, move_sidematter, remove_sidematter
14
+ from strif import copyfile_atomic, hash_file
13
15
  from typing_extensions import override
14
16
 
15
17
  from kash.config.logger import get_log_settings, get_logger
16
18
  from kash.config.text_styles import EMOJI_SAVED
17
- from kash.file_storage.item_file_format import read_item, write_item
19
+ from kash.file_storage.item_id_index import ItemIdIndex
18
20
  from kash.file_storage.metadata_dirs import MetadataDirs
19
- from kash.file_storage.store_filenames import (
20
- folder_for_type,
21
- join_suffix,
22
- parse_item_filename,
23
- )
24
- from kash.model.items_model import Item, ItemId, ItemType
21
+ from kash.file_storage.store_filenames import folder_for_type, join_suffix
22
+ from kash.model.items_model import Item, ItemType
25
23
  from kash.model.paths_model import StorePath
26
24
  from kash.shell.output.shell_output import PrintHooks
27
25
  from kash.utils.common.format_utils import fmt_loc
28
- from kash.utils.common.uniquifier import Uniquifier
29
26
  from kash.utils.common.url import Locator, UnresolvedLocator, Url, is_url
30
- from kash.utils.errors import FileExists, FileNotFound, InvalidFilename, SkippableError
27
+ from kash.utils.errors import FileExists, FileNotFound
31
28
  from kash.utils.file_utils.file_formats_model import Format
32
29
  from kash.utils.file_utils.file_walk import walk_by_dir
33
30
  from kash.utils.file_utils.ignore_files import IgnoreChecker, add_to_ignore
@@ -83,11 +80,6 @@ class FileStore(Workspace):
83
80
  def base_dir(self) -> Path:
84
81
  return self.base_dir_path
85
82
 
86
- @property
87
- @override
88
- def assets_dir(self) -> Path:
89
- return self.base_dir / "assets"
90
-
91
83
  @synchronized
92
84
  @log_calls(level="warning", if_slower_than=2.0)
93
85
  def reload(self, auto_init: bool = True):
@@ -98,9 +90,8 @@ class FileStore(Workspace):
98
90
  self.info_logged = False
99
91
  self.warnings: list[str] = []
100
92
 
101
- # TODO: Move this to its own IdentifierIndex class, and make it exactly mirror disk state.
102
- self.uniquifier = Uniquifier()
103
- self.id_map: dict[ItemId, StorePath] = {}
93
+ # Index of item identifiers and unique slug history
94
+ self.id_index = ItemIdIndex()
104
95
 
105
96
  self.dirs = MetadataDirs(base_dir=self.base_dir, is_global_ws=self.is_global_ws)
106
97
  if not auto_init and not self.dirs.is_initialized():
@@ -137,7 +128,7 @@ class FileStore(Workspace):
137
128
  def _id_index_init(self):
138
129
  num_dups = 0
139
130
  for store_path in self.walk_items():
140
- dup_path = self._id_index_item(store_path)
131
+ dup_path = self.id_index.index_item(store_path, self.load)
141
132
  if dup_path:
142
133
  num_dups += 1
143
134
 
@@ -146,62 +137,6 @@ class FileStore(Workspace):
146
137
  f"Found {num_dups} duplicate items in store. See `logs` for details."
147
138
  )
148
139
 
149
- @synchronized
150
- def _id_index_item(self, store_path: StorePath) -> StorePath | None:
151
- """
152
- Update metadata index with a new item.
153
- """
154
- name, item_type, _format, file_ext = parse_item_filename(store_path)
155
- if not file_ext:
156
- log.debug(
157
- "Skipping file with unrecognized name or extension: %s",
158
- fmt_path(store_path),
159
- )
160
- return None
161
-
162
- full_suffix = join_suffix(item_type.name, file_ext.name) if item_type else file_ext.name
163
- self.uniquifier.add(name, full_suffix)
164
-
165
- dup_path = None
166
-
167
- try:
168
- item = self.load(store_path)
169
- item_id = item.item_id()
170
- if item_id:
171
- old_path = self.id_map.get(item_id)
172
- if old_path and old_path != store_path:
173
- dup_path = old_path
174
- log.info(
175
- "Duplicate items (%s):\n%s",
176
- item_id,
177
- fmt_lines([old_path, store_path]),
178
- )
179
- self.id_map[item_id] = store_path
180
- except (ValueError, SkippableError) as e:
181
- log.warning(
182
- "Could not load file, skipping from store index: %s: %s",
183
- fmt_path(store_path),
184
- e,
185
- )
186
-
187
- return dup_path
188
-
189
- @synchronized
190
- def _id_unindex_item(self, store_path: StorePath):
191
- """
192
- Remove an item from the metadata index.
193
- """
194
- try:
195
- item = self.load(store_path)
196
- item_id = item.item_id()
197
- if item_id:
198
- try:
199
- self.id_map.pop(item_id, None)
200
- except KeyError:
201
- pass # If we happen to reload a store it might no longer be in memory.
202
- except (FileNotFoundError, InvalidFilename):
203
- pass
204
-
205
140
  def resolve_to_store_path(self, path: Path | StorePath) -> StorePath | None:
206
141
  """
207
142
  Return a StorePath if the given path is within the store, otherwise None.
@@ -236,7 +171,6 @@ class FileStore(Workspace):
236
171
  """
237
172
  return (self.base_dir / store_path).exists()
238
173
 
239
- @synchronized
240
174
  def _pick_filename_for(self, item: Item, *, overwrite: bool = False) -> tuple[str, str | None]:
241
175
  """
242
176
  Get a suitable filename for this item. If `overwrite` is true, use the the slugified
@@ -255,7 +189,7 @@ class FileStore(Workspace):
255
189
  slug = item.slug_name()
256
190
  full_suffix = item.get_full_suffix()
257
191
  # Get a unique name per item type.
258
- unique_slug, old_slugs = self.uniquifier.uniquify_historic(slug, full_suffix)
192
+ unique_slug, old_slugs = self.id_index.uniquify_slug(slug, full_suffix)
259
193
 
260
194
  # Suffix files with both item type and a suitable file extension.
261
195
  new_unique_filename = join_suffix(unique_slug, full_suffix)
@@ -278,24 +212,34 @@ class FileStore(Workspace):
278
212
  Best effort to see if an item with the same identity is already in the store.
279
213
  """
280
214
  item_id = item.item_id()
281
- log.info("Looking for item by id: %s", item_id)
215
+ log.info("Looking for item by id:\n%s", fmt_lines([item, item_id]))
282
216
  if not item_id:
283
217
  return None
284
218
  else:
285
- store_path = self.id_map.get(item_id)
219
+ store_path = self.id_index.find_store_path_by_id(item_id)
286
220
  if not store_path:
287
- # Just in case the id_map is not complete, check the default path too.
288
- default_path = self.default_path_for(item)
289
- if self.exists(default_path):
290
- old_item = self.load(default_path)
291
- if old_item.item_id() == item_id:
292
- log.info(
293
- "Item with the same id already saved (disk check):\n%s",
294
- fmt_lines([fmt_loc(default_path), item_id]),
295
- )
296
- store_path = default_path
297
- self.id_map[item_id] = default_path
298
- return default_path
221
+ # Just in case the index is not complete, check the other paths too
222
+ possible_paths = [
223
+ p
224
+ for p in [
225
+ item.store_path,
226
+ self.store_path_for(item)[0],
227
+ self.default_path_for(item),
228
+ ]
229
+ if p
230
+ ]
231
+ for p in possible_paths:
232
+ if self.exists(p):
233
+ old_item = self.load(p)
234
+ if old_item.item_id() == item_id:
235
+ log.info(
236
+ "Item with the same id already saved (disk check):\n%s",
237
+ fmt_lines([fmt_loc(p), item_id]),
238
+ )
239
+ # Ensure index is updated consistently and with logging
240
+ self.id_index.index_item(p, self.load)
241
+ return p
242
+ log.info("Also checked paths but no id match:\n%s", fmt_lines(possible_paths))
299
243
  if store_path and self.exists(store_path):
300
244
  log.info(
301
245
  "Item with the same id already saved (disk check):\n%s",
@@ -325,9 +269,13 @@ class FileStore(Workspace):
325
269
  return self._tmp_path_for(item), None
326
270
  elif item.store_path:
327
271
  return StorePath(item.store_path), None
328
- elif item_id in self.id_map and self.exists(self.id_map[item_id]):
272
+ elif (
273
+ item_id
274
+ and (existing := self.id_index.find_store_path_by_id(item_id))
275
+ and self.exists(existing)
276
+ ):
329
277
  # If this item has an identity and we've saved under that id before, use the same store path.
330
- store_path = self.id_map[item_id]
278
+ store_path = existing
331
279
  log.info(
332
280
  "When picking a store path, found an existing item with same id:\n%s",
333
281
  fmt_lines([fmt_loc(store_path), item_id]),
@@ -345,16 +293,23 @@ class FileStore(Workspace):
345
293
 
346
294
  return StorePath(store_path), old_store_path
347
295
 
348
- def target_path_for(self, item: Item) -> Path:
296
+ @synchronized
297
+ def assign_store_path(self, item: Item) -> Path:
349
298
  """
350
- Get an the absolute path for an item. Use this if you need to work around the
351
- usual save mechanism and write directly to the store yourself, at the location
352
- the item usually would be saved.
299
+ Pick a new store path for the item and mutate `item.store_path`.
300
+
301
+ This is useful if you need to write to the store yourself, at the location
302
+ the item usually would be saved, and also want the path to be fixed.
353
303
 
354
- If you write to this path, then set the item's `external_path` to indicate it's
355
- already saved.
304
+ This is idempotent. If you also write to the file, call `mark_as_saved()`
305
+ to indicate that the file is now saved. Otherwise the item should be saved
306
+ with `save()`.
307
+
308
+ Returns the absolute path, for convenience if you wish to write to the file
309
+ directly.
356
310
  """
357
311
  store_path, _old_store_path = self.store_path_for(item)
312
+ item.store_path = str(store_path)
358
313
  return self.base_dir / store_path
359
314
 
360
315
  def _tmp_path_for(self, item: Item) -> StorePath:
@@ -412,6 +367,8 @@ class FileStore(Workspace):
412
367
  # Indicate this is an item with a store path, not an external path.
413
368
  # Keep external_path set so we know body is in that file.
414
369
  item.store_path = str(rel_path)
370
+ # Ensure index is updated for items written directly into the store.
371
+ self.id_index.index_item(StorePath(rel_path), self.load)
415
372
  return StorePath(rel_path)
416
373
  else:
417
374
  # Otherwise it's still in memory or in a file outside the workspace and we need to save it.
@@ -455,6 +412,8 @@ class FileStore(Workspace):
455
412
  # Save as a text item with frontmatter.
456
413
  if item.external_path:
457
414
  item.body = Path(item.external_path).read_text()
415
+ from kash.file_storage.item_file_format import write_item
416
+
458
417
  write_item(item, full_path, normalize=not no_format)
459
418
  except OSError as e:
460
419
  log.error("Error saving item: %s", e)
@@ -485,7 +444,7 @@ class FileStore(Workspace):
485
444
 
486
445
  # Update in-memory store_path only after successful save.
487
446
  item.store_path = str(store_path)
488
- self._id_index_item(store_path)
447
+ self.id_index.index_item(store_path, self.load)
489
448
 
490
449
  if not skipped_save:
491
450
  log.message("%s Saved item: %s", EMOJI_SAVED, fmt_loc(store_path))
@@ -499,6 +458,8 @@ class FileStore(Workspace):
499
458
  """
500
459
  Load item at the given path.
501
460
  """
461
+ from kash.file_storage.item_file_format import read_item
462
+
502
463
  return read_item(self.base_dir / store_path, self.base_dir)
503
464
 
504
465
  def hash(self, store_path: StorePath) -> str:
@@ -513,6 +474,7 @@ class FileStore(Workspace):
513
474
  *,
514
475
  as_type: ItemType | None = None,
515
476
  reimport: bool = False,
477
+ with_sidematter: bool = False,
516
478
  ) -> StorePath:
517
479
  """
518
480
  Add resources from files or URLs. If a locator is a path, copy it into the store.
@@ -520,11 +482,15 @@ class FileStore(Workspace):
520
482
  are not imported again and the existing store path is returned.
521
483
  If `as_type` is specified, it will be used to override the item type, otherwise
522
484
  we go with our best guess.
485
+ If `with_sidematter` is true, will copy any sidematter files (metadata/assets) to
486
+ the destination.
523
487
  """
488
+ from kash.file_storage.item_file_format import read_item
524
489
  from kash.web_content.canon_url import canonicalize_url
525
490
 
526
491
  if isinstance(locator, StorePath) and not reimport:
527
492
  log.info("Store path already imported: %s", fmt_loc(locator))
493
+ self.id_index.index_item(locator, self.load)
528
494
  return locator
529
495
  elif is_url(locator):
530
496
  # Import a URL as a resource.
@@ -580,6 +546,9 @@ class FileStore(Workspace):
580
546
  # we'll pick a new store path.
581
547
  store_path = self.save(item)
582
548
  log.info("Imported text file: %s", item.as_str())
549
+ # If requested, also copy any sidematter files (metadata/assets) to match destination.
550
+ if with_sidematter:
551
+ copy_sidematter(path, self.base_dir / store_path, copy_original=False)
583
552
  else:
584
553
  # Binary or other files we just copy over as-is, preserving the name.
585
554
  # We know the extension is recognized.
@@ -588,7 +557,10 @@ class FileStore(Workspace):
588
557
  raise FileExists(f"Resource already in store: {fmt_loc(store_path)}")
589
558
 
590
559
  log.message("Importing resource: %s", fmt_loc(path))
591
- copyfile_atomic(path, self.base_dir / store_path, make_parents=True)
560
+ if with_sidematter:
561
+ copy_sidematter(path, self.base_dir / store_path)
562
+ else:
563
+ copyfile_atomic(path, self.base_dir / store_path, make_parents=True)
592
564
 
593
565
  # Optimization: Don't import an identical file twice.
594
566
  if old_store_path:
@@ -599,7 +571,10 @@ class FileStore(Workspace):
599
571
  "Imported resource is identical to the previous import: %s",
600
572
  fmt_loc(old_store_path),
601
573
  )
602
- os.unlink(self.base_dir / store_path)
574
+ if with_sidematter:
575
+ remove_sidematter(self.base_dir / store_path)
576
+ else:
577
+ os.unlink(self.base_dir / store_path)
603
578
  store_path = old_store_path
604
579
  log.message("Imported resource: %s", fmt_loc(store_path))
605
580
  return store_path
@@ -609,16 +584,20 @@ class FileStore(Workspace):
609
584
  *locators: Locator,
610
585
  as_type: ItemType | None = None,
611
586
  reimport: bool = False,
587
+ with_sidematter: bool = False,
612
588
  ) -> list[StorePath]:
613
589
  return [
614
- self.import_item(locator, as_type=as_type, reimport=reimport) for locator in locators
590
+ self.import_item(
591
+ locator, as_type=as_type, reimport=reimport, with_sidematter=with_sidematter
592
+ )
593
+ for locator in locators
615
594
  ]
616
595
 
617
- def import_and_load(self, locator: UnresolvedLocator) -> Item:
596
+ def import_and_load(self, locator: UnresolvedLocator, with_sidematter: bool = False) -> Item:
618
597
  """
619
598
  Import a locator and return the item.
620
599
  """
621
- store_path = self.import_item(locator)
600
+ store_path = self.import_item(locator, with_sidematter=with_sidematter)
622
601
  return self.load(store_path)
623
602
 
624
603
  def _filter_selection_paths(self):
@@ -645,7 +624,7 @@ class FileStore(Workspace):
645
624
  """
646
625
  self.selections.remove_values(store_paths)
647
626
  for store_path in store_paths:
648
- self._id_unindex_item(store_path)
627
+ self.id_index.unindex_item(store_path, self.load)
649
628
  # TODO: Update metadata of all relations that point to this path too.
650
629
 
651
630
  @synchronized
@@ -655,12 +634,17 @@ class FileStore(Workspace):
655
634
  """
656
635
  self.selections.replace_values(replacements)
657
636
  for store_path, new_store_path in replacements:
658
- self._id_unindex_item(store_path)
659
- self._id_index_item(new_store_path)
637
+ self.id_index.unindex_item(store_path, self.load)
638
+ self.id_index.index_item(new_store_path, self.load)
660
639
  # TODO: Update metadata of all relations that point to this path too.
661
640
 
662
641
  def archive(
663
- self, store_path: StorePath, *, missing_ok: bool = False, quiet: bool = False
642
+ self,
643
+ store_path: StorePath,
644
+ *,
645
+ missing_ok: bool = False,
646
+ quiet: bool = False,
647
+ with_sidematter: bool = False,
664
648
  ) -> StorePath:
665
649
  """
666
650
  Archive the item by moving it into the archive directory.
@@ -672,20 +656,25 @@ class FileStore(Workspace):
672
656
  fmt_loc(self.dirs.archive_dir),
673
657
  )
674
658
  orig_path = self.base_dir / store_path
675
- archive_path = self.dirs.archive_dir / store_path
659
+ full_archive_path = self.base_dir / self.dirs.archive_dir / store_path
676
660
  if missing_ok and not orig_path.exists():
677
661
  log.message("Item to archive not found so moving on: %s", fmt_loc(orig_path))
678
662
  return store_path
679
663
  if not orig_path.exists():
680
664
  log.warning("Item to archive not found: %s", fmt_loc(orig_path))
681
665
  return store_path
682
- move_file(orig_path, archive_path)
666
+ # Remove references (including id_map) before moving so we can load the item to compute id.
683
667
  self._remove_references([store_path])
668
+ if with_sidematter:
669
+ move_sidematter(orig_path, full_archive_path)
670
+ else:
671
+ os.makedirs(full_archive_path.parent, exist_ok=True)
672
+ shutil.move(orig_path, full_archive_path)
684
673
 
685
674
  archive_path = StorePath(self.dirs.archive_dir / store_path)
686
675
  return archive_path
687
676
 
688
- def unarchive(self, store_path: StorePath) -> StorePath:
677
+ def unarchive(self, store_path: StorePath, with_sidematter: bool = False) -> StorePath:
689
678
  """
690
679
  Unarchive the item by moving back out of the archive directory.
691
680
  Path may be with or without the archive dir prefix.
@@ -695,7 +684,12 @@ class FileStore(Workspace):
695
684
  if full_input_path.is_relative_to(full_archive_path):
696
685
  store_path = StorePath(relpath(full_input_path, full_archive_path))
697
686
  original_path = self.base_dir / store_path
698
- move_file(full_input_path, original_path)
687
+ if with_sidematter:
688
+ move_sidematter(full_input_path, original_path)
689
+ else:
690
+ shutil.move(full_input_path, original_path)
691
+ # Re-index after restoring from archive.
692
+ self.id_index.index_item(store_path, self.load)
699
693
  return StorePath(store_path)
700
694
 
701
695
  @synchronized
@@ -712,7 +706,7 @@ class FileStore(Workspace):
712
706
  log.message(
713
707
  "Using workspace: %s (%s items)",
714
708
  fmt_path(self.base_dir, rel_to_cwd=False),
715
- len(self.uniquifier),
709
+ len(self.id_index),
716
710
  )
717
711
  log.message(
718
712
  "Logging to: %s",