kash-shell 0.3.30__py3-none-any.whl → 0.3.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/chat.py +1 -0
- kash/actions/core/markdownify_html.py +1 -1
- kash/actions/core/readability.py +1 -4
- kash/actions/core/render_as_html.py +1 -0
- kash/actions/core/show_webpage.py +2 -0
- kash/actions/core/summarize_as_bullets.py +1 -1
- kash/config/logger.py +1 -1
- kash/config/text_styles.py +1 -1
- kash/docs/markdown/topics/a2_installation.md +3 -2
- kash/exec/action_decorators.py +5 -3
- kash/exec/action_exec.py +50 -5
- kash/exec/fetch_url_items.py +4 -2
- kash/exec/llm_transforms.py +14 -5
- kash/exec/runtime_settings.py +2 -0
- kash/file_storage/file_store.py +50 -92
- kash/file_storage/item_id_index.py +128 -0
- kash/mcp/mcp_server_routes.py +42 -12
- kash/model/actions_model.py +18 -7
- kash/model/exec_model.py +3 -0
- kash/model/items_model.py +54 -12
- kash/utils/api_utils/gather_limited.py +2 -0
- kash/utils/api_utils/multitask_gather.py +134 -0
- kash/utils/common/s3_utils.py +108 -0
- kash/utils/common/url.py +16 -4
- kash/utils/rich_custom/multitask_status.py +84 -10
- kash/utils/text_handling/markdown_footnotes.py +16 -43
- kash/utils/text_handling/markdown_utils.py +108 -28
- kash/web_content/web_fetch.py +2 -1
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/METADATA +5 -5
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/RECORD +33 -30
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.30.dist-info → kash_shell-0.3.34.dist-info}/licenses/LICENSE +0 -0
kash/actions/core/chat.py
CHANGED
|
@@ -13,7 +13,7 @@ from kash.web_content.web_extract_readabilipy import extract_text_readabilipy
|
|
|
13
13
|
log = get_logger(__name__)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
@kash_action(precondition=is_url_resource | has_html_body,
|
|
16
|
+
@kash_action(precondition=is_url_resource | has_html_body, output_format=Format.markdown)
|
|
17
17
|
def markdownify_html(item: Item) -> Item:
|
|
18
18
|
"""
|
|
19
19
|
Converts raw HTML or the URL of an HTML page to Markdown, fetching with the content
|
kash/actions/core/readability.py
CHANGED
|
@@ -9,10 +9,7 @@ from kash.web_content.web_extract_readabilipy import extract_text_readabilipy
|
|
|
9
9
|
log = get_logger(__name__)
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
@kash_action(
|
|
13
|
-
precondition=is_url_resource | has_html_body,
|
|
14
|
-
mcp_tool=True,
|
|
15
|
-
)
|
|
12
|
+
@kash_action(precondition=is_url_resource | has_html_body, output_format=Format.html)
|
|
16
13
|
def readability(item: Item) -> Item:
|
|
17
14
|
"""
|
|
18
15
|
Extracts clean HTML from a raw HTML item.
|
|
@@ -15,6 +15,7 @@ log = get_logger(__name__)
|
|
|
15
15
|
@kash_action(
|
|
16
16
|
expected_args=ONE_OR_MORE_ARGS,
|
|
17
17
|
precondition=(has_html_body | has_simple_text_body) & ~has_fullpage_html_body,
|
|
18
|
+
output_format=Format.html,
|
|
18
19
|
params=(Param("no_title", "Don't add a title to the page body.", type=bool),),
|
|
19
20
|
)
|
|
20
21
|
def render_as_html(input: ActionInput, no_title: bool = False) -> ActionResult:
|
|
@@ -6,11 +6,13 @@ from kash.exec_model.args_model import ONE_OR_MORE_ARGS
|
|
|
6
6
|
from kash.exec_model.commands_model import Command
|
|
7
7
|
from kash.exec_model.shell_model import ShellResult
|
|
8
8
|
from kash.model import ActionInput, ActionResult
|
|
9
|
+
from kash.utils.file_utils.file_formats_model import Format
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
@kash_action(
|
|
12
13
|
expected_args=ONE_OR_MORE_ARGS,
|
|
13
14
|
precondition=(has_html_body | has_simple_text_body) & ~has_fullpage_html_body,
|
|
15
|
+
output_format=Format.html,
|
|
14
16
|
)
|
|
15
17
|
def show_webpage(input: ActionInput) -> ActionResult:
|
|
16
18
|
"""
|
|
@@ -47,7 +47,7 @@ llm_options = LLMOptions(
|
|
|
47
47
|
)
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
@kash_action(llm_options=llm_options, params=common_params("model")
|
|
50
|
+
@kash_action(llm_options=llm_options, params=common_params("model"))
|
|
51
51
|
def summarize_as_bullets(item: Item, model: LLMName = LLM.default_standard) -> Item:
|
|
52
52
|
"""
|
|
53
53
|
Summarize text as bullet points.
|
kash/config/logger.py
CHANGED
|
@@ -281,7 +281,7 @@ def _do_logging_setup(log_settings: LogSettings):
|
|
|
281
281
|
def prefix(line: str, emoji: str = "", warn_emoji: str = "") -> str:
|
|
282
282
|
prefix = task_stack_prefix_str()
|
|
283
283
|
emojis = f"{warn_emoji}{emoji}".strip()
|
|
284
|
-
return "
|
|
284
|
+
return "".join(filter(None, [prefix, emojis, line]))
|
|
285
285
|
|
|
286
286
|
|
|
287
287
|
def prefix_args(
|
kash/config/text_styles.py
CHANGED
|
@@ -60,8 +60,9 @@ These are for `kash-media` but you can use a `kash-shell` for a more basic setup
|
|
|
60
60
|
|
|
61
61
|
```shell
|
|
62
62
|
sudo apt-get update
|
|
63
|
-
sudo apt-get install -y libgl1 ffmpeg libmagic-dev
|
|
64
|
-
|
|
63
|
+
sudo apt-get install -y libgl1 ffmpeg libmagic-dev imagemagick bat ripgrep hexyl
|
|
64
|
+
|
|
65
|
+
# Or for additional command-line tools, pixi is better on Ubuntu:
|
|
65
66
|
curl -fsSL https://pixi.sh/install.sh | sh
|
|
66
67
|
. ~/.bashrc
|
|
67
68
|
pixi global install ripgrep bat eza hexyl imagemagick zoxide
|
kash/exec/action_decorators.py
CHANGED
|
@@ -37,6 +37,7 @@ from kash.model.params_model import Param, ParamDeclarations, TypedParamValues
|
|
|
37
37
|
from kash.model.preconditions_model import Precondition
|
|
38
38
|
from kash.utils.common.function_inspect import FuncParam, inspect_function_params
|
|
39
39
|
from kash.utils.errors import InvalidDefinition
|
|
40
|
+
from kash.utils.file_utils.file_formats_model import Format
|
|
40
41
|
|
|
41
42
|
log = get_logger(__name__)
|
|
42
43
|
|
|
@@ -204,6 +205,7 @@ def kash_action(
|
|
|
204
205
|
arg_type: ArgType = ArgType.Locator,
|
|
205
206
|
expected_args: ArgCount = ONE_ARG,
|
|
206
207
|
output_type: ItemType = ItemType.doc,
|
|
208
|
+
output_format: Format | None = None,
|
|
207
209
|
expected_outputs: ArgCount = ONE_ARG,
|
|
208
210
|
params: ParamDeclarations = (),
|
|
209
211
|
run_per_item: bool | None = None,
|
|
@@ -318,6 +320,7 @@ def kash_action(
|
|
|
318
320
|
self.arg_type = arg_type
|
|
319
321
|
self.uses_selection = uses_selection
|
|
320
322
|
self.output_type = output_type
|
|
323
|
+
self.output_format = output_format
|
|
321
324
|
self.interactive_input = interactive_input
|
|
322
325
|
self.live_output = live_output
|
|
323
326
|
self.mcp_tool = mcp_tool
|
|
@@ -397,9 +400,8 @@ def kash_action(
|
|
|
397
400
|
context = ExecContext(action, current_runtime_settings())
|
|
398
401
|
|
|
399
402
|
# Run the action.
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
return result
|
|
403
|
+
result_with_paths = run_action_with_caching(context, action_input)
|
|
404
|
+
return result_with_paths.result
|
|
403
405
|
|
|
404
406
|
if is_simple_func:
|
|
405
407
|
# Need to convert back to a SimpleActionFunction.
|
kash/exec/action_exec.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import time
|
|
2
|
-
from dataclasses import replace
|
|
2
|
+
from dataclasses import dataclass, replace
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
5
|
from prettyfmt import fmt_lines, fmt_path, plural
|
|
@@ -29,8 +29,10 @@ from kash.model.operations_model import Input, Operation, Source
|
|
|
29
29
|
from kash.model.params_model import ALL_COMMON_PARAMS, GLOBAL_PARAMS, RawParamValues
|
|
30
30
|
from kash.model.paths_model import StorePath
|
|
31
31
|
from kash.shell.output.shell_output import PrintHooks
|
|
32
|
+
from kash.utils.common.s3_utils import get_s3_parent_folder, s3_sync_to_folder
|
|
32
33
|
from kash.utils.common.task_stack import task_stack
|
|
33
34
|
from kash.utils.common.type_utils import not_none
|
|
35
|
+
from kash.utils.common.url import Url, is_s3_url
|
|
34
36
|
from kash.utils.errors import ContentError, InvalidOutput, get_nonfatal_exceptions
|
|
35
37
|
from kash.workspaces import Selection, current_ws
|
|
36
38
|
|
|
@@ -182,7 +184,7 @@ def run_action_operation(
|
|
|
182
184
|
this_op = replace(operation, arguments=[operation.arguments[i]])
|
|
183
185
|
else:
|
|
184
186
|
this_op = operation
|
|
185
|
-
item.
|
|
187
|
+
item.update_source(Source(operation=this_op, output_num=i, cacheable=action.cacheable))
|
|
186
188
|
|
|
187
189
|
# Override the state if appropriate (this handles marking items as transient).
|
|
188
190
|
if settings.override_state:
|
|
@@ -339,20 +341,41 @@ def save_action_result(
|
|
|
339
341
|
return result_store_paths, archived_store_paths
|
|
340
342
|
|
|
341
343
|
|
|
344
|
+
@dataclass(frozen=True)
|
|
345
|
+
class ResultWithPaths:
|
|
346
|
+
"""
|
|
347
|
+
Result of an action, including the store paths of any S3 items created.
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
result: ActionResult
|
|
351
|
+
result_paths: list[StorePath]
|
|
352
|
+
archived_paths: list[StorePath]
|
|
353
|
+
s3_paths: list[Url]
|
|
354
|
+
|
|
355
|
+
|
|
342
356
|
def run_action_with_caching(
|
|
343
357
|
exec_context: ExecContext, action_input: ActionInput
|
|
344
|
-
) ->
|
|
358
|
+
) -> ResultWithPaths:
|
|
345
359
|
"""
|
|
346
360
|
Run an action, including validation, only rerunning if `rerun` requested or
|
|
347
361
|
result is not already present. Returns the result, the store paths of the
|
|
348
362
|
result items, and the store paths of any archived items.
|
|
349
363
|
|
|
364
|
+
Also handles optional S3 syncing if the input was from S3.
|
|
365
|
+
|
|
350
366
|
Note: Mutates the input but only to add `context` to each item.
|
|
351
367
|
"""
|
|
352
368
|
action = exec_context.action
|
|
353
369
|
settings = exec_context.settings
|
|
354
370
|
ws = settings.workspace
|
|
355
371
|
|
|
372
|
+
# If the input is from S3, we note the parent folder to copy the output back to.
|
|
373
|
+
s3_parent_folder = None
|
|
374
|
+
if exec_context.settings.sync_to_s3 and action_input.items and action_input.items[0].url:
|
|
375
|
+
url = action_input.items[0].url
|
|
376
|
+
if url and is_s3_url(url):
|
|
377
|
+
s3_parent_folder = get_s3_parent_folder(url)
|
|
378
|
+
|
|
356
379
|
# Assemble the operation and validate the action input.
|
|
357
380
|
operation = validate_action_input(exec_context, ws, action, action_input)
|
|
358
381
|
|
|
@@ -408,7 +431,26 @@ def run_action_with_caching(
|
|
|
408
431
|
finally:
|
|
409
432
|
action_input.clear_context()
|
|
410
433
|
|
|
411
|
-
|
|
434
|
+
# If the action created an S3 item, we copy it back to the same S3 parent folder.
|
|
435
|
+
# Only do this for the first result, for simplicity.
|
|
436
|
+
s3_urls: list[Url] = []
|
|
437
|
+
if s3_parent_folder and len(result_store_paths) > 0:
|
|
438
|
+
log.warning(
|
|
439
|
+
"Source was an S3 path so syncing result S3: %s -> %s",
|
|
440
|
+
result_store_paths[0],
|
|
441
|
+
s3_parent_folder,
|
|
442
|
+
)
|
|
443
|
+
s3_urls = s3_sync_to_folder(
|
|
444
|
+
result_store_paths[0], s3_parent_folder, include_sidematter=True
|
|
445
|
+
)
|
|
446
|
+
log.message("Synced result to S3:\n%s", fmt_lines(s3_urls))
|
|
447
|
+
|
|
448
|
+
return ResultWithPaths(
|
|
449
|
+
result=result,
|
|
450
|
+
result_paths=result_store_paths,
|
|
451
|
+
archived_paths=archived_store_paths,
|
|
452
|
+
s3_paths=s3_urls,
|
|
453
|
+
)
|
|
412
454
|
|
|
413
455
|
|
|
414
456
|
def run_action_with_shell_context(
|
|
@@ -486,7 +528,10 @@ def run_action_with_shell_context(
|
|
|
486
528
|
input = prepare_action_input(*args, refetch=refetch)
|
|
487
529
|
|
|
488
530
|
# Finally, run the action.
|
|
489
|
-
|
|
531
|
+
result_with_paths = run_action_with_caching(context, input)
|
|
532
|
+
result = result_with_paths.result
|
|
533
|
+
result_store_paths = result_with_paths.result_paths
|
|
534
|
+
archived_store_paths = result_with_paths.archived_paths
|
|
490
535
|
|
|
491
536
|
# Implement any path operations from the output and/or select the final output
|
|
492
537
|
if not internal_call:
|
kash/exec/fetch_url_items.py
CHANGED
|
@@ -98,9 +98,11 @@ def fetch_url_item_content(
|
|
|
98
98
|
from kash.workspaces import current_ws
|
|
99
99
|
|
|
100
100
|
ws = current_ws()
|
|
101
|
-
|
|
101
|
+
# We could check for description too, but many pages don't have one.
|
|
102
|
+
has_key_content = item.title and (not item.has_body or item.body)
|
|
103
|
+
if not refetch and has_key_content:
|
|
102
104
|
log.info(
|
|
103
|
-
"Already have title
|
|
105
|
+
"Already have title so assuming metadata is up to date, will not fetch: %s",
|
|
104
106
|
item.fmt_loc(),
|
|
105
107
|
)
|
|
106
108
|
return FetchItemResult(item, was_cached=True)
|
kash/exec/llm_transforms.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from dataclasses import replace
|
|
2
|
+
from typing import Unpack
|
|
2
3
|
|
|
3
4
|
from chopdiff.docs import DiffFilter, TextDoc
|
|
4
5
|
from chopdiff.transforms import WindowSettings, filtered_transform
|
|
@@ -12,7 +13,7 @@ from kash.llm_utils.fuzzy_parsing import strip_markdown_fence
|
|
|
12
13
|
from kash.llm_utils.llm_completion import llm_template_completion
|
|
13
14
|
from kash.llm_utils.llm_messages import Message, MessageTemplate
|
|
14
15
|
from kash.model.actions_model import LLMOptions
|
|
15
|
-
from kash.model.items_model import Item
|
|
16
|
+
from kash.model.items_model import Item, ItemType, ItemUpdateOptions
|
|
16
17
|
from kash.utils.errors import InvalidInput
|
|
17
18
|
from kash.utils.file_utils.file_formats_model import Format
|
|
18
19
|
from kash.utils.text_handling.doc_normalization import normalize_formatting
|
|
@@ -88,10 +89,11 @@ def llm_transform_str(options: LLMOptions, input_str: str, check_no_results: boo
|
|
|
88
89
|
def llm_transform_item(
|
|
89
90
|
item: Item,
|
|
90
91
|
model: LLMName | None = None,
|
|
92
|
+
*,
|
|
91
93
|
normalize: bool = True,
|
|
92
94
|
strip_fence: bool = True,
|
|
93
95
|
check_no_results: bool = True,
|
|
94
|
-
|
|
96
|
+
**updates: Unpack[ItemUpdateOptions],
|
|
95
97
|
) -> Item:
|
|
96
98
|
"""
|
|
97
99
|
Main function for running an LLM action on an item.
|
|
@@ -99,6 +101,14 @@ def llm_transform_item(
|
|
|
99
101
|
Model may be overridden by an explicit model parameter.
|
|
100
102
|
Also by default cleans up and normalizes output as Markdown.
|
|
101
103
|
"""
|
|
104
|
+
# Default to Markdown docs.
|
|
105
|
+
if "format" not in updates:
|
|
106
|
+
updates["format"] = Format.markdown
|
|
107
|
+
if "type" not in updates:
|
|
108
|
+
updates["type"] = ItemType.doc
|
|
109
|
+
if "body" not in updates:
|
|
110
|
+
updates["body"] = None
|
|
111
|
+
|
|
102
112
|
if not item.context:
|
|
103
113
|
raise InvalidInput(f"LLM actions expect a context on input item: {item}")
|
|
104
114
|
action = item.context.action
|
|
@@ -112,13 +122,12 @@ def llm_transform_item(
|
|
|
112
122
|
log.message("LLM transform from action `%s` on item: %s", action.name, item)
|
|
113
123
|
log.message("LLM options: %s", action.llm_options)
|
|
114
124
|
|
|
115
|
-
|
|
116
|
-
result_item = item.derived_copy(body=None, format=format)
|
|
125
|
+
result_item = item.derived_copy(**updates)
|
|
117
126
|
result_str = llm_transform_str(llm_options, item.body, check_no_results=check_no_results)
|
|
118
127
|
if strip_fence:
|
|
119
128
|
result_str = strip_markdown_fence(result_str)
|
|
120
129
|
if normalize:
|
|
121
|
-
result_str = normalize_formatting(result_str, format=format)
|
|
130
|
+
result_str = normalize_formatting(result_str, format=updates["format"])
|
|
122
131
|
|
|
123
132
|
result_item.body = result_str
|
|
124
133
|
return result_item
|
kash/exec/runtime_settings.py
CHANGED
|
@@ -102,6 +102,7 @@ def kash_runtime(
|
|
|
102
102
|
override_state: State | None = None,
|
|
103
103
|
tmp_output: bool = False,
|
|
104
104
|
no_format: bool = False,
|
|
105
|
+
sync_to_s3: bool = False,
|
|
105
106
|
) -> RuntimeSettingsManager:
|
|
106
107
|
"""
|
|
107
108
|
Set a specific kash execution context for a with block.
|
|
@@ -131,5 +132,6 @@ def kash_runtime(
|
|
|
131
132
|
override_state=override_state,
|
|
132
133
|
tmp_output=tmp_output,
|
|
133
134
|
no_format=no_format,
|
|
135
|
+
sync_to_s3=sync_to_s3,
|
|
134
136
|
)
|
|
135
137
|
return RuntimeSettingsManager(settings=settings)
|
kash/file_storage/file_store.py
CHANGED
|
@@ -16,19 +16,15 @@ from typing_extensions import override
|
|
|
16
16
|
|
|
17
17
|
from kash.config.logger import get_log_settings, get_logger
|
|
18
18
|
from kash.config.text_styles import EMOJI_SAVED
|
|
19
|
+
from kash.file_storage.item_id_index import ItemIdIndex
|
|
19
20
|
from kash.file_storage.metadata_dirs import MetadataDirs
|
|
20
|
-
from kash.file_storage.store_filenames import
|
|
21
|
-
|
|
22
|
-
join_suffix,
|
|
23
|
-
parse_item_filename,
|
|
24
|
-
)
|
|
25
|
-
from kash.model.items_model import Item, ItemId, ItemType
|
|
21
|
+
from kash.file_storage.store_filenames import folder_for_type, join_suffix
|
|
22
|
+
from kash.model.items_model import Item, ItemType
|
|
26
23
|
from kash.model.paths_model import StorePath
|
|
27
24
|
from kash.shell.output.shell_output import PrintHooks
|
|
28
25
|
from kash.utils.common.format_utils import fmt_loc
|
|
29
|
-
from kash.utils.common.uniquifier import Uniquifier
|
|
30
26
|
from kash.utils.common.url import Locator, UnresolvedLocator, Url, is_url
|
|
31
|
-
from kash.utils.errors import FileExists, FileNotFound
|
|
27
|
+
from kash.utils.errors import FileExists, FileNotFound
|
|
32
28
|
from kash.utils.file_utils.file_formats_model import Format
|
|
33
29
|
from kash.utils.file_utils.file_walk import walk_by_dir
|
|
34
30
|
from kash.utils.file_utils.ignore_files import IgnoreChecker, add_to_ignore
|
|
@@ -94,9 +90,8 @@ class FileStore(Workspace):
|
|
|
94
90
|
self.info_logged = False
|
|
95
91
|
self.warnings: list[str] = []
|
|
96
92
|
|
|
97
|
-
#
|
|
98
|
-
self.
|
|
99
|
-
self.id_map: dict[ItemId, StorePath] = {}
|
|
93
|
+
# Index of item identifiers and unique slug history
|
|
94
|
+
self.id_index = ItemIdIndex()
|
|
100
95
|
|
|
101
96
|
self.dirs = MetadataDirs(base_dir=self.base_dir, is_global_ws=self.is_global_ws)
|
|
102
97
|
if not auto_init and not self.dirs.is_initialized():
|
|
@@ -133,7 +128,7 @@ class FileStore(Workspace):
|
|
|
133
128
|
def _id_index_init(self):
|
|
134
129
|
num_dups = 0
|
|
135
130
|
for store_path in self.walk_items():
|
|
136
|
-
dup_path = self.
|
|
131
|
+
dup_path = self.id_index.index_item(store_path, self.load)
|
|
137
132
|
if dup_path:
|
|
138
133
|
num_dups += 1
|
|
139
134
|
|
|
@@ -142,62 +137,6 @@ class FileStore(Workspace):
|
|
|
142
137
|
f"Found {num_dups} duplicate items in store. See `logs` for details."
|
|
143
138
|
)
|
|
144
139
|
|
|
145
|
-
@synchronized
|
|
146
|
-
def _id_index_item(self, store_path: StorePath) -> StorePath | None:
|
|
147
|
-
"""
|
|
148
|
-
Update metadata index with a new item.
|
|
149
|
-
"""
|
|
150
|
-
name, item_type, _format, file_ext = parse_item_filename(store_path)
|
|
151
|
-
if not file_ext:
|
|
152
|
-
log.debug(
|
|
153
|
-
"Skipping file with unrecognized name or extension: %s",
|
|
154
|
-
fmt_path(store_path),
|
|
155
|
-
)
|
|
156
|
-
return None
|
|
157
|
-
|
|
158
|
-
full_suffix = join_suffix(item_type.name, file_ext.name) if item_type else file_ext.name
|
|
159
|
-
self.uniquifier.add(name, full_suffix)
|
|
160
|
-
|
|
161
|
-
dup_path = None
|
|
162
|
-
|
|
163
|
-
try:
|
|
164
|
-
item = self.load(store_path)
|
|
165
|
-
item_id = item.item_id()
|
|
166
|
-
if item_id:
|
|
167
|
-
old_path = self.id_map.get(item_id)
|
|
168
|
-
if old_path and old_path != store_path:
|
|
169
|
-
dup_path = old_path
|
|
170
|
-
log.info(
|
|
171
|
-
"Duplicate items (%s):\n%s",
|
|
172
|
-
item_id,
|
|
173
|
-
fmt_lines([old_path, store_path]),
|
|
174
|
-
)
|
|
175
|
-
self.id_map[item_id] = store_path
|
|
176
|
-
except (ValueError, SkippableError) as e:
|
|
177
|
-
log.warning(
|
|
178
|
-
"Could not load file, skipping from store index: %s: %s",
|
|
179
|
-
fmt_path(store_path),
|
|
180
|
-
e,
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
return dup_path
|
|
184
|
-
|
|
185
|
-
@synchronized
|
|
186
|
-
def _id_unindex_item(self, store_path: StorePath):
|
|
187
|
-
"""
|
|
188
|
-
Remove an item from the metadata index.
|
|
189
|
-
"""
|
|
190
|
-
try:
|
|
191
|
-
item = self.load(store_path)
|
|
192
|
-
item_id = item.item_id()
|
|
193
|
-
if item_id:
|
|
194
|
-
try:
|
|
195
|
-
self.id_map.pop(item_id, None)
|
|
196
|
-
except KeyError:
|
|
197
|
-
pass # If we happen to reload a store it might no longer be in memory.
|
|
198
|
-
except (FileNotFoundError, InvalidFilename):
|
|
199
|
-
pass
|
|
200
|
-
|
|
201
140
|
def resolve_to_store_path(self, path: Path | StorePath) -> StorePath | None:
|
|
202
141
|
"""
|
|
203
142
|
Return a StorePath if the given path is within the store, otherwise None.
|
|
@@ -232,7 +171,6 @@ class FileStore(Workspace):
|
|
|
232
171
|
"""
|
|
233
172
|
return (self.base_dir / store_path).exists()
|
|
234
173
|
|
|
235
|
-
@synchronized
|
|
236
174
|
def _pick_filename_for(self, item: Item, *, overwrite: bool = False) -> tuple[str, str | None]:
|
|
237
175
|
"""
|
|
238
176
|
Get a suitable filename for this item. If `overwrite` is true, use the the slugified
|
|
@@ -251,7 +189,7 @@ class FileStore(Workspace):
|
|
|
251
189
|
slug = item.slug_name()
|
|
252
190
|
full_suffix = item.get_full_suffix()
|
|
253
191
|
# Get a unique name per item type.
|
|
254
|
-
unique_slug, old_slugs = self.
|
|
192
|
+
unique_slug, old_slugs = self.id_index.uniquify_slug(slug, full_suffix)
|
|
255
193
|
|
|
256
194
|
# Suffix files with both item type and a suitable file extension.
|
|
257
195
|
new_unique_filename = join_suffix(unique_slug, full_suffix)
|
|
@@ -274,24 +212,34 @@ class FileStore(Workspace):
|
|
|
274
212
|
Best effort to see if an item with the same identity is already in the store.
|
|
275
213
|
"""
|
|
276
214
|
item_id = item.item_id()
|
|
277
|
-
log.info("Looking for item by id
|
|
215
|
+
log.info("Looking for item by id:\n%s", fmt_lines([item, item_id]))
|
|
278
216
|
if not item_id:
|
|
279
217
|
return None
|
|
280
218
|
else:
|
|
281
|
-
store_path = self.
|
|
219
|
+
store_path = self.id_index.find_store_path_by_id(item_id)
|
|
282
220
|
if not store_path:
|
|
283
|
-
# Just in case the
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
221
|
+
# Just in case the index is not complete, check the other paths too
|
|
222
|
+
possible_paths = [
|
|
223
|
+
p
|
|
224
|
+
for p in [
|
|
225
|
+
item.store_path,
|
|
226
|
+
self.store_path_for(item)[0],
|
|
227
|
+
self.default_path_for(item),
|
|
228
|
+
]
|
|
229
|
+
if p
|
|
230
|
+
]
|
|
231
|
+
for p in possible_paths:
|
|
232
|
+
if self.exists(p):
|
|
233
|
+
old_item = self.load(p)
|
|
234
|
+
if old_item.item_id() == item_id:
|
|
235
|
+
log.info(
|
|
236
|
+
"Item with the same id already saved (disk check):\n%s",
|
|
237
|
+
fmt_lines([fmt_loc(p), item_id]),
|
|
238
|
+
)
|
|
239
|
+
# Ensure index is updated consistently and with logging
|
|
240
|
+
self.id_index.index_item(p, self.load)
|
|
241
|
+
return p
|
|
242
|
+
log.info("Also checked paths but no id match:\n%s", fmt_lines(possible_paths))
|
|
295
243
|
if store_path and self.exists(store_path):
|
|
296
244
|
log.info(
|
|
297
245
|
"Item with the same id already saved (disk check):\n%s",
|
|
@@ -321,9 +269,13 @@ class FileStore(Workspace):
|
|
|
321
269
|
return self._tmp_path_for(item), None
|
|
322
270
|
elif item.store_path:
|
|
323
271
|
return StorePath(item.store_path), None
|
|
324
|
-
elif
|
|
272
|
+
elif (
|
|
273
|
+
item_id
|
|
274
|
+
and (existing := self.id_index.find_store_path_by_id(item_id))
|
|
275
|
+
and self.exists(existing)
|
|
276
|
+
):
|
|
325
277
|
# If this item has an identity and we've saved under that id before, use the same store path.
|
|
326
|
-
store_path =
|
|
278
|
+
store_path = existing
|
|
327
279
|
log.info(
|
|
328
280
|
"When picking a store path, found an existing item with same id:\n%s",
|
|
329
281
|
fmt_lines([fmt_loc(store_path), item_id]),
|
|
@@ -415,6 +367,8 @@ class FileStore(Workspace):
|
|
|
415
367
|
# Indicate this is an item with a store path, not an external path.
|
|
416
368
|
# Keep external_path set so we know body is in that file.
|
|
417
369
|
item.store_path = str(rel_path)
|
|
370
|
+
# Ensure index is updated for items written directly into the store.
|
|
371
|
+
self.id_index.index_item(StorePath(rel_path), self.load)
|
|
418
372
|
return StorePath(rel_path)
|
|
419
373
|
else:
|
|
420
374
|
# Otherwise it's still in memory or in a file outside the workspace and we need to save it.
|
|
@@ -490,7 +444,7 @@ class FileStore(Workspace):
|
|
|
490
444
|
|
|
491
445
|
# Update in-memory store_path only after successful save.
|
|
492
446
|
item.store_path = str(store_path)
|
|
493
|
-
self.
|
|
447
|
+
self.id_index.index_item(store_path, self.load)
|
|
494
448
|
|
|
495
449
|
if not skipped_save:
|
|
496
450
|
log.message("%s Saved item: %s", EMOJI_SAVED, fmt_loc(store_path))
|
|
@@ -536,6 +490,7 @@ class FileStore(Workspace):
|
|
|
536
490
|
|
|
537
491
|
if isinstance(locator, StorePath) and not reimport:
|
|
538
492
|
log.info("Store path already imported: %s", fmt_loc(locator))
|
|
493
|
+
self.id_index.index_item(locator, self.load)
|
|
539
494
|
return locator
|
|
540
495
|
elif is_url(locator):
|
|
541
496
|
# Import a URL as a resource.
|
|
@@ -669,7 +624,7 @@ class FileStore(Workspace):
|
|
|
669
624
|
"""
|
|
670
625
|
self.selections.remove_values(store_paths)
|
|
671
626
|
for store_path in store_paths:
|
|
672
|
-
self.
|
|
627
|
+
self.id_index.unindex_item(store_path, self.load)
|
|
673
628
|
# TODO: Update metadata of all relations that point to this path too.
|
|
674
629
|
|
|
675
630
|
@synchronized
|
|
@@ -679,8 +634,8 @@ class FileStore(Workspace):
|
|
|
679
634
|
"""
|
|
680
635
|
self.selections.replace_values(replacements)
|
|
681
636
|
for store_path, new_store_path in replacements:
|
|
682
|
-
self.
|
|
683
|
-
self.
|
|
637
|
+
self.id_index.unindex_item(store_path, self.load)
|
|
638
|
+
self.id_index.index_item(new_store_path, self.load)
|
|
684
639
|
# TODO: Update metadata of all relations that point to this path too.
|
|
685
640
|
|
|
686
641
|
def archive(
|
|
@@ -708,12 +663,13 @@ class FileStore(Workspace):
|
|
|
708
663
|
if not orig_path.exists():
|
|
709
664
|
log.warning("Item to archive not found: %s", fmt_loc(orig_path))
|
|
710
665
|
return store_path
|
|
666
|
+
# Remove references (including id_map) before moving so we can load the item to compute id.
|
|
667
|
+
self._remove_references([store_path])
|
|
711
668
|
if with_sidematter:
|
|
712
669
|
move_sidematter(orig_path, full_archive_path)
|
|
713
670
|
else:
|
|
714
671
|
os.makedirs(full_archive_path.parent, exist_ok=True)
|
|
715
672
|
shutil.move(orig_path, full_archive_path)
|
|
716
|
-
self._remove_references([store_path])
|
|
717
673
|
|
|
718
674
|
archive_path = StorePath(self.dirs.archive_dir / store_path)
|
|
719
675
|
return archive_path
|
|
@@ -732,6 +688,8 @@ class FileStore(Workspace):
|
|
|
732
688
|
move_sidematter(full_input_path, original_path)
|
|
733
689
|
else:
|
|
734
690
|
shutil.move(full_input_path, original_path)
|
|
691
|
+
# Re-index after restoring from archive.
|
|
692
|
+
self.id_index.index_item(store_path, self.load)
|
|
735
693
|
return StorePath(store_path)
|
|
736
694
|
|
|
737
695
|
@synchronized
|
|
@@ -748,7 +706,7 @@ class FileStore(Workspace):
|
|
|
748
706
|
log.message(
|
|
749
707
|
"Using workspace: %s (%s items)",
|
|
750
708
|
fmt_path(self.base_dir, rel_to_cwd=False),
|
|
751
|
-
len(self.
|
|
709
|
+
len(self.id_index),
|
|
752
710
|
)
|
|
753
711
|
log.message(
|
|
754
712
|
"Logging to: %s",
|