kash-shell 0.3.12__py3-none-any.whl → 0.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kash/actions/core/markdownify.py +5 -4
- kash/actions/core/readability.py +4 -4
- kash/actions/core/render_as_html.py +6 -4
- kash/commands/base/basic_file_commands.py +3 -0
- kash/commands/base/diff_commands.py +38 -3
- kash/commands/base/reformat_command.py +1 -1
- kash/commands/base/show_command.py +1 -1
- kash/commands/workspace/selection_commands.py +1 -1
- kash/commands/workspace/workspace_commands.py +62 -16
- kash/docs/load_source_code.py +1 -1
- kash/exec/action_exec.py +4 -5
- kash/exec/fetch_url_metadata.py +8 -5
- kash/exec/importing.py +4 -4
- kash/exec/llm_transforms.py +1 -1
- kash/exec/preconditions.py +7 -7
- kash/file_storage/file_store.py +73 -32
- kash/file_storage/item_file_format.py +1 -1
- kash/file_storage/store_filenames.py +2 -1
- kash/help/help_embeddings.py +2 -2
- kash/llm_utils/clean_headings.py +1 -1
- kash/{text_handling → llm_utils}/custom_sliding_transforms.py +0 -3
- kash/llm_utils/llm_completion.py +1 -1
- kash/local_server/__init__.py +1 -1
- kash/local_server/local_server_commands.py +2 -1
- kash/mcp/__init__.py +1 -1
- kash/mcp/mcp_server_commands.py +8 -2
- kash/media_base/media_cache.py +10 -3
- kash/model/actions_model.py +3 -0
- kash/model/items_model.py +71 -42
- kash/shell/ui/shell_results.py +2 -1
- kash/utils/common/format_utils.py +0 -8
- kash/utils/common/import_utils.py +46 -18
- kash/utils/file_utils/file_formats_model.py +46 -26
- kash/utils/file_utils/filename_parsing.py +41 -16
- kash/{text_handling → utils/text_handling}/doc_normalization.py +10 -8
- kash/utils/text_handling/escape_html_tags.py +156 -0
- kash/{text_handling → utils/text_handling}/markdown_utils.py +0 -3
- kash/utils/text_handling/markdownify_utils.py +87 -0
- kash/{text_handling → utils/text_handling}/unified_diffs.py +1 -44
- kash/web_content/file_cache_utils.py +42 -34
- kash/web_content/local_file_cache.py +29 -12
- kash/web_content/web_extract.py +1 -1
- kash/web_content/web_extract_readabilipy.py +4 -2
- kash/web_content/web_fetch.py +42 -7
- kash/web_content/web_page_model.py +2 -1
- kash/web_gen/simple_webpage.py +1 -1
- kash/web_gen/templates/base_styles.css.jinja +134 -16
- kash/web_gen/templates/simple_webpage.html.jinja +1 -1
- kash/workspaces/selections.py +2 -2
- kash/workspaces/workspace_importing.py +1 -1
- kash/workspaces/workspace_output.py +2 -2
- kash/xonsh_custom/load_into_xonsh.py +4 -2
- {kash_shell-0.3.12.dist-info → kash_shell-0.3.13.dist-info}/METADATA +1 -1
- {kash_shell-0.3.12.dist-info → kash_shell-0.3.13.dist-info}/RECORD +58 -57
- kash/utils/common/inflection.py +0 -22
- /kash/{text_handling → utils/text_handling}/markdown_render.py +0 -0
- {kash_shell-0.3.12.dist-info → kash_shell-0.3.13.dist-info}/WHEEL +0 -0
- {kash_shell-0.3.12.dist-info → kash_shell-0.3.13.dist-info}/entry_points.txt +0 -0
- {kash_shell-0.3.12.dist-info → kash_shell-0.3.13.dist-info}/licenses/LICENSE +0 -0
|
@@ -9,11 +9,12 @@ from kash.config.logger import get_logger
|
|
|
9
9
|
from kash.config.settings import atomic_global_settings, global_settings
|
|
10
10
|
from kash.model.items_model import Item
|
|
11
11
|
from kash.model.media_model import MediaType
|
|
12
|
+
from kash.model.paths_model import StorePath
|
|
12
13
|
from kash.utils.common.url import Url
|
|
13
14
|
from kash.utils.errors import FileNotFound, InvalidInput
|
|
14
15
|
from kash.utils.file_utils.file_formats_model import detect_media_type
|
|
15
16
|
from kash.web_content.canon_url import canonicalize_url
|
|
16
|
-
from kash.web_content.local_file_cache import Loadable, LocalFileCache
|
|
17
|
+
from kash.web_content.local_file_cache import CacheResult, Loadable, LocalFileCache
|
|
17
18
|
|
|
18
19
|
log = get_logger(__name__)
|
|
19
20
|
|
|
@@ -40,7 +41,7 @@ def reset_content_cache_dir(path: Path):
|
|
|
40
41
|
|
|
41
42
|
def cache_file(
|
|
42
43
|
source: Url | Path | Loadable, global_cache: bool = False, expiration_sec: float | None = None
|
|
43
|
-
) ->
|
|
44
|
+
) -> CacheResult:
|
|
44
45
|
"""
|
|
45
46
|
Return a local cached copy of the item. If it is an URL, content is fetched.
|
|
46
47
|
If it is a Path or a Loadable, a cached copy is returned.
|
|
@@ -50,8 +51,7 @@ def cache_file(
|
|
|
50
51
|
in which case the global cache is used.
|
|
51
52
|
"""
|
|
52
53
|
cache = _global_content_cache if global_cache else _content_cache
|
|
53
|
-
|
|
54
|
-
return path, was_cached
|
|
54
|
+
return cache.cache(source, expiration_sec)
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
def cache_api_response(
|
|
@@ -64,9 +64,9 @@ def cache_api_response(
|
|
|
64
64
|
Cache an API response. By default parse the response as JSON.
|
|
65
65
|
"""
|
|
66
66
|
cache = _global_content_cache if global_cache else _content_cache
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
return
|
|
67
|
+
result = cache.cache(url, expiration_sec)
|
|
68
|
+
parsed_result = parser(result.content.path.read_text())
|
|
69
|
+
return parsed_result, result.was_cached
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
def cache_resource(
|
|
@@ -74,7 +74,8 @@ def cache_resource(
|
|
|
74
74
|
) -> dict[MediaType, Path]:
|
|
75
75
|
"""
|
|
76
76
|
Cache a resource item for an external local path or a URL, fetching or
|
|
77
|
-
copying as needed
|
|
77
|
+
copying as needed and returning direct paths to the cached content.
|
|
78
|
+
For media this may yield more than one format.
|
|
78
79
|
"""
|
|
79
80
|
from kash.exec.preconditions import is_resource
|
|
80
81
|
from kash.media_base.media_services import is_media_url
|
|
@@ -83,62 +84,69 @@ def cache_resource(
|
|
|
83
84
|
if not is_resource(item):
|
|
84
85
|
raise ValueError(f"Item is not a resource: {item}")
|
|
85
86
|
|
|
86
|
-
path = None
|
|
87
|
-
|
|
87
|
+
path: Path | None = None
|
|
88
|
+
results: dict[MediaType, Path] = {}
|
|
89
|
+
cache_result: CacheResult | None = None
|
|
90
|
+
|
|
91
|
+
# Cache the content using media or content cache.
|
|
88
92
|
if item.url:
|
|
89
93
|
if is_media_url(item.url):
|
|
90
|
-
|
|
94
|
+
results = cache_media(item.url)
|
|
91
95
|
else:
|
|
92
|
-
|
|
96
|
+
cache_result = cache_file(item.url, global_cache, expiration_sec)
|
|
93
97
|
elif item.external_path:
|
|
94
|
-
|
|
95
|
-
if not
|
|
96
|
-
raise FileNotFound(f"External path not found: {
|
|
97
|
-
|
|
98
|
+
ext_path = Path(item.external_path)
|
|
99
|
+
if not ext_path.is_file():
|
|
100
|
+
raise FileNotFound(f"External path not found: {ext_path}")
|
|
101
|
+
cache_result = cache_file(ext_path, global_cache, expiration_sec)
|
|
98
102
|
elif item.original_filename:
|
|
99
|
-
|
|
100
|
-
if not
|
|
101
|
-
raise FileNotFound(f"Original filename not found: {
|
|
102
|
-
|
|
103
|
+
orig_path = Path(item.original_filename)
|
|
104
|
+
if not orig_path.is_file():
|
|
105
|
+
raise FileNotFound(f"Original filename not found: {orig_path}")
|
|
106
|
+
cache_result = cache_file(orig_path, global_cache, expiration_sec)
|
|
103
107
|
else:
|
|
104
108
|
raise ValueError(f"Item has no URL or external path: {item}")
|
|
105
109
|
|
|
110
|
+
if cache_result:
|
|
111
|
+
path = cache_result.content.path
|
|
112
|
+
|
|
106
113
|
# If we just have the local file path, determine its format.
|
|
107
|
-
if not
|
|
108
|
-
|
|
114
|
+
if not results and path:
|
|
115
|
+
results = {detect_media_type(path): path}
|
|
109
116
|
|
|
110
117
|
log.message(
|
|
111
118
|
"Cached resource %s:\n%s",
|
|
112
119
|
item.as_str_brief(),
|
|
113
120
|
fmt_lines(
|
|
114
121
|
f"{media_type.value}: {fmt_path(media_path)}"
|
|
115
|
-
for media_type, media_path in
|
|
122
|
+
for media_type, media_path in results.items()
|
|
116
123
|
),
|
|
117
124
|
)
|
|
118
125
|
|
|
119
|
-
return
|
|
126
|
+
return results
|
|
120
127
|
|
|
121
128
|
|
|
122
129
|
def get_url_html(
|
|
123
130
|
item: Item, global_cache: bool = False, expiration_sec: float | None = None
|
|
124
|
-
) -> tuple[Url, str]:
|
|
131
|
+
) -> tuple[Url | StorePath, str]:
|
|
125
132
|
"""
|
|
126
133
|
Returns the HTML content of an URL item, using the content cache,
|
|
127
134
|
or the body of the item if it has a URL and HTML body.
|
|
128
135
|
"""
|
|
129
|
-
from kash.exec.preconditions import has_html_body,
|
|
130
|
-
|
|
131
|
-
if not item.url:
|
|
132
|
-
raise InvalidInput("Item must have a URL or an HTML body")
|
|
133
|
-
url = Url(canonicalize_url(item.url))
|
|
136
|
+
from kash.exec.preconditions import has_html_body, is_url_resource
|
|
134
137
|
|
|
135
|
-
if
|
|
136
|
-
|
|
138
|
+
if is_url_resource(item) and item.url and not item.has_body:
|
|
139
|
+
# Need to fetch the content.
|
|
140
|
+
locator = Url(canonicalize_url(item.url))
|
|
141
|
+
path = cache_file(locator, global_cache, expiration_sec).content.path
|
|
137
142
|
with open(path) as file:
|
|
138
143
|
html_content = file.read()
|
|
139
144
|
else:
|
|
140
145
|
if not item.body or not has_html_body(item):
|
|
141
|
-
raise InvalidInput("Item must
|
|
146
|
+
raise InvalidInput("Item must be a URL resource or have an HTML body")
|
|
147
|
+
if not item.store_path:
|
|
148
|
+
raise InvalidInput("Item missing store path")
|
|
142
149
|
html_content = item.body
|
|
150
|
+
locator = StorePath(item.store_path)
|
|
143
151
|
|
|
144
|
-
return
|
|
152
|
+
return locator, html_content
|
|
@@ -12,9 +12,10 @@ from strif import atomic_output_file, copyfile_atomic
|
|
|
12
12
|
|
|
13
13
|
from kash.utils.common.url import Url, is_file_url, is_url, normalize_url, parse_file_url
|
|
14
14
|
from kash.utils.errors import FileNotFound
|
|
15
|
+
from kash.utils.file_utils.file_formats import MimeType
|
|
15
16
|
from kash.utils.file_utils.file_formats_model import choose_file_ext
|
|
16
17
|
from kash.web_content.dir_store import DirStore
|
|
17
|
-
from kash.web_content.web_fetch import download_url
|
|
18
|
+
from kash.web_content.web_fetch import HttpHeaders, download_url
|
|
18
19
|
|
|
19
20
|
log = logging.getLogger(__name__)
|
|
20
21
|
|
|
@@ -73,9 +74,26 @@ An item that can be cached as a file.
|
|
|
73
74
|
"""
|
|
74
75
|
|
|
75
76
|
|
|
76
|
-
|
|
77
|
+
@dataclass(frozen=True)
|
|
78
|
+
class CacheContent:
|
|
79
|
+
"""
|
|
80
|
+
An item in the local file cache. If it was a cache miss for a web-fetched URL,
|
|
81
|
+
also has HTTP headers.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
path: Path
|
|
85
|
+
headers: HttpHeaders | None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass(frozen=True)
|
|
89
|
+
class CacheResult:
|
|
90
|
+
content: CacheContent
|
|
91
|
+
was_cached: bool
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _suffix_for(cacheable: Cacheable, mime_type: MimeType | None = None) -> str | None:
|
|
77
95
|
key = cacheable.key if isinstance(cacheable, Loadable) else cacheable
|
|
78
|
-
file_ext = choose_file_ext(key)
|
|
96
|
+
file_ext = choose_file_ext(key, mime_type)
|
|
79
97
|
return file_ext.dot_ext if file_ext else None
|
|
80
98
|
|
|
81
99
|
|
|
@@ -135,7 +153,7 @@ class LocalFileCache(DirStore):
|
|
|
135
153
|
if backup_url and mode in (WebCacheMode.TEST, WebCacheMode.UPDATE):
|
|
136
154
|
self._restore(backup_url)
|
|
137
155
|
|
|
138
|
-
def _load_source(self, source: Cacheable) ->
|
|
156
|
+
def _load_source(self, source: Cacheable) -> CacheContent:
|
|
139
157
|
"""
|
|
140
158
|
Load or compute the given source and save it to the cache.
|
|
141
159
|
"""
|
|
@@ -147,6 +165,7 @@ class LocalFileCache(DirStore):
|
|
|
147
165
|
suffix = _suffix_for(source)
|
|
148
166
|
cache_path = self.path_for(key, folder=self.folder, suffix=_suffix_for(source))
|
|
149
167
|
|
|
168
|
+
headers = None
|
|
150
169
|
if isinstance(source, Path) or (isinstance(source, str) and is_file_url(source)):
|
|
151
170
|
# Local file or file:// URL.
|
|
152
171
|
url_or_path = source
|
|
@@ -165,7 +184,8 @@ class LocalFileCache(DirStore):
|
|
|
165
184
|
# URL.
|
|
166
185
|
url = _normalize_url(source)
|
|
167
186
|
log.info("Downloading to cache: %s -> %s", url, fmt_path(cache_path))
|
|
168
|
-
download_url(url, cache_path)
|
|
187
|
+
headers = download_url(url, cache_path)
|
|
188
|
+
log.debug("Response headers: %s", headers)
|
|
169
189
|
elif isinstance(source, Loadable):
|
|
170
190
|
# Arbitrary loadable. Load and save (atomically).
|
|
171
191
|
with atomic_output_file(
|
|
@@ -180,7 +200,7 @@ class LocalFileCache(DirStore):
|
|
|
180
200
|
else:
|
|
181
201
|
raise ValueError(f"Invalid source: {source}")
|
|
182
202
|
|
|
183
|
-
return cache_path
|
|
203
|
+
return CacheContent(cache_path, headers)
|
|
184
204
|
|
|
185
205
|
def _age_in_sec(self, cache_path: Path) -> float:
|
|
186
206
|
now = time.time()
|
|
@@ -210,7 +230,7 @@ class LocalFileCache(DirStore):
|
|
|
210
230
|
|
|
211
231
|
return cache_path is not None and not self._is_expired(cache_path, expiration_sec)
|
|
212
232
|
|
|
213
|
-
def cache(self, source: Cacheable, expiration_sec: float | None = None) ->
|
|
233
|
+
def cache(self, source: Cacheable, expiration_sec: float | None = None) -> CacheResult:
|
|
214
234
|
"""
|
|
215
235
|
Returns cached download path of given URL and whether it was previously cached.
|
|
216
236
|
For file:// URLs does a copy.
|
|
@@ -221,13 +241,10 @@ class LocalFileCache(DirStore):
|
|
|
221
241
|
|
|
222
242
|
if cache_path and not self._is_expired(cache_path, expiration_sec):
|
|
223
243
|
log.info("URL in cache, not fetching: %s: %s", key, fmt_path(cache_path))
|
|
224
|
-
return cache_path, True
|
|
244
|
+
return CacheResult(CacheContent(cache_path, None), True)
|
|
225
245
|
else:
|
|
226
246
|
log.info("Caching new copy: %s", key)
|
|
227
|
-
return (
|
|
228
|
-
self._load_source(source),
|
|
229
|
-
False,
|
|
230
|
-
)
|
|
247
|
+
return CacheResult(self._load_source(source), False)
|
|
231
248
|
|
|
232
249
|
def backup(self) -> None:
|
|
233
250
|
if not self.backup_url:
|
kash/web_content/web_extract.py
CHANGED
|
@@ -22,7 +22,7 @@ def fetch_extract(
|
|
|
22
22
|
"""
|
|
23
23
|
expiration_sec = 0 if refetch else None
|
|
24
24
|
if use_cache:
|
|
25
|
-
path
|
|
25
|
+
path = cache_file(url, expiration_sec=expiration_sec).content.path
|
|
26
26
|
with open(path, "rb") as file:
|
|
27
27
|
content = file.read()
|
|
28
28
|
page_data = extractor(url, content)
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
1
3
|
from kash.utils.common.url import Url
|
|
2
4
|
from kash.utils.errors import InvalidInput
|
|
3
5
|
from kash.web_content.web_page_model import WebPageData
|
|
4
6
|
|
|
5
7
|
|
|
6
|
-
def extract_text_readabilipy(
|
|
8
|
+
def extract_text_readabilipy(locator: Url | Path, html: str) -> WebPageData:
|
|
7
9
|
"""
|
|
8
10
|
Extracts text from HTML using readability.
|
|
9
11
|
This requires Node readability. Justext is an alternative and seems good for
|
|
@@ -16,7 +18,7 @@ def extract_text_readabilipy(url: Url, html: str) -> WebPageData:
|
|
|
16
18
|
raise InvalidInput("No clean HTML found")
|
|
17
19
|
|
|
18
20
|
return WebPageData(
|
|
19
|
-
|
|
21
|
+
locator=locator,
|
|
20
22
|
title=result["title"],
|
|
21
23
|
byline=result["byline"],
|
|
22
24
|
clean_html=result["content"],
|
kash/web_content/web_fetch.py
CHANGED
|
@@ -1,14 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from functools import cached_property
|
|
2
6
|
from pathlib import Path
|
|
3
|
-
from typing import Any
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
4
8
|
from urllib.parse import urlparse
|
|
5
9
|
|
|
6
|
-
import httpx
|
|
7
10
|
from strif import atomic_output_file, copyfile_atomic
|
|
8
|
-
from tqdm import tqdm
|
|
9
11
|
|
|
10
12
|
from kash.config.env_settings import KashEnv
|
|
11
13
|
from kash.utils.common.url import Url
|
|
14
|
+
from kash.utils.file_utils.file_formats import MimeType
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from httpx import Client, Response
|
|
12
18
|
|
|
13
19
|
log = logging.getLogger(__name__)
|
|
14
20
|
|
|
@@ -30,11 +36,13 @@ def fetch_url(
|
|
|
30
36
|
timeout: int = DEFAULT_TIMEOUT,
|
|
31
37
|
auth: Any | None = None,
|
|
32
38
|
headers: dict[str, str] | None = None,
|
|
33
|
-
) ->
|
|
39
|
+
) -> Response:
|
|
34
40
|
"""
|
|
35
41
|
Fetch a URL using httpx with logging and reasonable defaults.
|
|
36
42
|
Raise httpx.HTTPError for non-2xx responses.
|
|
37
43
|
"""
|
|
44
|
+
import httpx
|
|
45
|
+
|
|
38
46
|
with httpx.Client(
|
|
39
47
|
follow_redirects=True,
|
|
40
48
|
timeout=timeout,
|
|
@@ -48,36 +56,60 @@ def fetch_url(
|
|
|
48
56
|
return response
|
|
49
57
|
|
|
50
58
|
|
|
59
|
+
@dataclass(frozen=True)
|
|
60
|
+
class HttpHeaders:
|
|
61
|
+
"""
|
|
62
|
+
HTTP response headers.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
headers: dict[str, str]
|
|
66
|
+
|
|
67
|
+
@cached_property
|
|
68
|
+
def mime_type(self) -> MimeType | None:
|
|
69
|
+
"""Get content type header, if available."""
|
|
70
|
+
for key, value in self.headers.items():
|
|
71
|
+
if key.lower() == "content-type":
|
|
72
|
+
return MimeType(value)
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
|
|
51
76
|
def download_url(
|
|
52
77
|
url: Url,
|
|
53
78
|
target_filename: str | Path,
|
|
54
|
-
session:
|
|
79
|
+
session: Client | None = None,
|
|
55
80
|
show_progress: bool = False,
|
|
56
81
|
timeout: int = DEFAULT_TIMEOUT,
|
|
57
82
|
auth: Any | None = None,
|
|
58
83
|
headers: dict[str, str] | None = None,
|
|
59
|
-
) -> None:
|
|
84
|
+
) -> HttpHeaders | None:
|
|
60
85
|
"""
|
|
61
86
|
Download given file, optionally with progress bar, streaming to a target file.
|
|
62
87
|
Also handles file:// and s3:// URLs. Output file is created atomically.
|
|
63
88
|
Raise httpx.HTTPError for non-2xx responses.
|
|
89
|
+
Returns response headers for HTTP/HTTPS requests, None for other URL types.
|
|
64
90
|
"""
|
|
91
|
+
import httpx
|
|
92
|
+
from tqdm import tqdm
|
|
93
|
+
|
|
65
94
|
target_filename = str(target_filename)
|
|
66
95
|
parsed_url = urlparse(url)
|
|
67
96
|
if show_progress:
|
|
68
97
|
log.info("%s", url)
|
|
69
98
|
|
|
70
99
|
if parsed_url.scheme == "file" or parsed_url.scheme == "":
|
|
71
|
-
copyfile_atomic(parsed_url.netloc + parsed_url.path, target_filename)
|
|
100
|
+
copyfile_atomic(parsed_url.netloc + parsed_url.path, target_filename, make_parents=True)
|
|
101
|
+
return None
|
|
72
102
|
elif parsed_url.scheme == "s3":
|
|
73
103
|
import boto3 # pyright: ignore
|
|
74
104
|
|
|
75
105
|
s3 = boto3.resource("s3")
|
|
76
106
|
s3_path = parsed_url.path.lstrip("/")
|
|
77
107
|
s3.Bucket(parsed_url.netloc).download_file(s3_path, target_filename)
|
|
108
|
+
return None
|
|
78
109
|
else:
|
|
79
110
|
client = session or httpx.Client(follow_redirects=True, timeout=timeout)
|
|
80
111
|
response: httpx.Response | None = None
|
|
112
|
+
response_headers: dict[str, str] | None = None
|
|
81
113
|
try:
|
|
82
114
|
headers = headers or default_headers()
|
|
83
115
|
log.debug("download_url: using headers: %s", headers)
|
|
@@ -90,6 +122,7 @@ def download_url(
|
|
|
90
122
|
headers=headers,
|
|
91
123
|
) as response:
|
|
92
124
|
response.raise_for_status()
|
|
125
|
+
response_headers = dict(response.headers)
|
|
93
126
|
total_size = int(response.headers.get("content-length", "0"))
|
|
94
127
|
|
|
95
128
|
with atomic_output_file(target_filename, make_parents=True) as temp_filename:
|
|
@@ -107,3 +140,5 @@ def download_url(
|
|
|
107
140
|
client.close()
|
|
108
141
|
if response:
|
|
109
142
|
response.raise_for_status() # In case of errors during streaming
|
|
143
|
+
|
|
144
|
+
return HttpHeaders(response_headers) if response_headers else None
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from pathlib import Path
|
|
1
2
|
from typing import Protocol
|
|
2
3
|
|
|
3
4
|
from prettyfmt import abbrev_obj
|
|
@@ -12,7 +13,7 @@ class WebPageData:
|
|
|
12
13
|
Data about a web page, including URL, title and optionally description and extracted content.
|
|
13
14
|
"""
|
|
14
15
|
|
|
15
|
-
|
|
16
|
+
locator: Url | Path
|
|
16
17
|
title: str | None = None
|
|
17
18
|
byline: str | None = None
|
|
18
19
|
description: str | None = None
|
kash/web_gen/simple_webpage.py
CHANGED
|
@@ -15,7 +15,7 @@ def simple_webpage_render(
|
|
|
15
15
|
return render_web_template(
|
|
16
16
|
template_filename=page_template,
|
|
17
17
|
data={
|
|
18
|
-
"title": item.
|
|
18
|
+
"title": item.abbrev_title(),
|
|
19
19
|
"add_title_h1": add_title_h1,
|
|
20
20
|
"content_html": item.body_as_html(),
|
|
21
21
|
"thumbnail_url": item.thumbnail_url,
|