pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import dataclasses
|
|
2
|
-
from typing import Optional, Union
|
|
3
2
|
|
|
4
3
|
import pandas as pd
|
|
5
4
|
from pandas.io.formats.style import Styler
|
|
@@ -7,11 +6,11 @@ from pandas.io.formats.style import Styler
|
|
|
7
6
|
|
|
8
7
|
@dataclasses.dataclass
|
|
9
8
|
class _Descriptor:
|
|
10
|
-
body:
|
|
9
|
+
body: str | pd.DataFrame
|
|
11
10
|
# The remaining fields only affect the behavior if `body` is a pd.DataFrame.
|
|
12
11
|
show_index: bool
|
|
13
12
|
show_header: bool
|
|
14
|
-
styler:
|
|
13
|
+
styler: Styler | None = None
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
class DescriptionHelper:
|
|
@@ -25,6 +24,7 @@ class DescriptionHelper:
|
|
|
25
24
|
DescriptionHelper can convert a list of descriptors into either HTML or plaintext and do something reasonable
|
|
26
25
|
in each case.
|
|
27
26
|
"""
|
|
27
|
+
|
|
28
28
|
__descriptors: list[_Descriptor]
|
|
29
29
|
|
|
30
30
|
def __init__(self) -> None:
|
|
@@ -32,10 +32,10 @@ class DescriptionHelper:
|
|
|
32
32
|
|
|
33
33
|
def append(
|
|
34
34
|
self,
|
|
35
|
-
descriptor:
|
|
35
|
+
descriptor: str | pd.DataFrame,
|
|
36
36
|
show_index: bool = False,
|
|
37
37
|
show_header: bool = True,
|
|
38
|
-
styler:
|
|
38
|
+
styler: Styler | None = None,
|
|
39
39
|
) -> None:
|
|
40
40
|
self.__descriptors.append(_Descriptor(descriptor, show_index, show_header, styler))
|
|
41
41
|
|
|
@@ -69,18 +69,17 @@ class DescriptionHelper:
|
|
|
69
69
|
return (
|
|
70
70
|
# Render the string as a single-cell DataFrame. This will ensure a consistent style of output in
|
|
71
71
|
# cases where strings appear alongside DataFrames in the same DescriptionHelper.
|
|
72
|
-
pd.DataFrame([descriptor.body])
|
|
73
|
-
.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left', 'font-weight': 'bold'})
|
|
74
|
-
.hide(axis='index')
|
|
72
|
+
pd.DataFrame([descriptor.body])
|
|
73
|
+
.style.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left', 'font-weight': 'bold'})
|
|
74
|
+
.hide(axis='index')
|
|
75
|
+
.hide(axis='columns')
|
|
75
76
|
)
|
|
76
77
|
else:
|
|
77
78
|
styler = descriptor.styler
|
|
78
79
|
if styler is None:
|
|
79
80
|
styler = descriptor.body.style
|
|
80
|
-
styler = (
|
|
81
|
-
|
|
82
|
-
.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'})
|
|
83
|
-
.set_table_styles([dict(selector='th', props=[('text-align', 'left')])])
|
|
81
|
+
styler = styler.set_properties(None, **{'white-space': 'pre-wrap', 'text-align': 'left'}).set_table_styles(
|
|
82
|
+
[{'selector': 'th', 'props': [('text-align', 'left')]}]
|
|
84
83
|
)
|
|
85
84
|
if not descriptor.show_header:
|
|
86
85
|
styler = styler.hide(axis='columns')
|
pixeltable/utils/documents.py
CHANGED
|
@@ -1,86 +1,85 @@
|
|
|
1
1
|
import dataclasses
|
|
2
|
-
|
|
2
|
+
import os
|
|
3
3
|
|
|
4
4
|
import bs4
|
|
5
|
-
import fitz # type: ignore[import-untyped]
|
|
6
5
|
import puremagic
|
|
6
|
+
from pypdfium2 import PdfDocument # type: ignore[import-untyped]
|
|
7
7
|
|
|
8
|
-
import
|
|
8
|
+
from pixeltable import exceptions as excs, type_system as ts
|
|
9
9
|
from pixeltable.env import Env
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@dataclasses.dataclass
|
|
13
13
|
class DocumentHandle:
|
|
14
14
|
format: ts.DocumentType.DocumentFormat
|
|
15
|
-
bs_doc:
|
|
16
|
-
md_ast:
|
|
17
|
-
pdf_doc:
|
|
15
|
+
bs_doc: bs4.BeautifulSoup | None = None
|
|
16
|
+
md_ast: dict | None = None
|
|
17
|
+
pdf_doc: PdfDocument | None = None
|
|
18
|
+
txt_doc: str | None = None
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
def get_document_handle(path: str) ->
|
|
21
|
-
|
|
21
|
+
def get_document_handle(path: str) -> DocumentHandle:
|
|
22
|
+
_, extension = os.path.splitext(path)
|
|
23
|
+
handle = get_handle_by_extension(path, extension)
|
|
24
|
+
if handle is not None:
|
|
25
|
+
return handle
|
|
22
26
|
|
|
23
|
-
if
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
+
# if no extension, use puremagic to determine the type
|
|
28
|
+
extension = puremagic.from_file(path)
|
|
29
|
+
handle = get_handle_by_extension(path, extension)
|
|
30
|
+
if handle is not None:
|
|
31
|
+
return handle
|
|
27
32
|
|
|
28
|
-
|
|
29
|
-
bs_doc = get_html_handle(path)
|
|
30
|
-
if bs_doc is not None:
|
|
31
|
-
return DocumentHandle(format=ts.DocumentType.DocumentFormat.HTML, bs_doc=bs_doc)
|
|
33
|
+
raise excs.Error(f'Unrecognized document format: {path}')
|
|
32
34
|
|
|
33
|
-
if doc_format == '.md':
|
|
34
|
-
md_ast = get_markdown_handle(path)
|
|
35
|
-
if md_ast is not None:
|
|
36
|
-
return DocumentHandle(format=ts.DocumentType.DocumentFormat.MD, md_ast=md_ast)
|
|
37
35
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if bs_doc is not None:
|
|
41
|
-
return DocumentHandle(format=ts.DocumentType.DocumentFormat.XML, bs_doc=bs_doc)
|
|
36
|
+
def get_handle_by_extension(path: str, extension: str) -> DocumentHandle | None:
|
|
37
|
+
doc_format = ts.DocumentType.DocumentFormat.from_extension(extension)
|
|
42
38
|
|
|
43
|
-
return None
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def get_pdf_handle(path: str) -> Optional[fitz.Document]:
|
|
47
|
-
try:
|
|
48
|
-
doc = fitz.open(path)
|
|
49
|
-
# check pdf (bc it will work for images)
|
|
50
|
-
if not doc.is_pdf:
|
|
51
|
-
return None
|
|
52
|
-
# try to read one page
|
|
53
|
-
next(page for page in doc)
|
|
54
|
-
return doc
|
|
55
|
-
except Exception:
|
|
56
|
-
return None
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def get_html_handle(path: str) -> Optional[bs4.BeautifulSoup]:
|
|
60
39
|
try:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
40
|
+
if doc_format == ts.DocumentType.DocumentFormat.HTML:
|
|
41
|
+
return DocumentHandle(doc_format, bs_doc=get_html_handle(path))
|
|
42
|
+
if doc_format == ts.DocumentType.DocumentFormat.MD:
|
|
43
|
+
return DocumentHandle(doc_format, md_ast=get_markdown_handle(path))
|
|
44
|
+
if doc_format == ts.DocumentType.DocumentFormat.PDF:
|
|
45
|
+
return DocumentHandle(doc_format, pdf_doc=PdfDocument(path))
|
|
46
|
+
if doc_format == ts.DocumentType.DocumentFormat.XML:
|
|
47
|
+
return DocumentHandle(doc_format, bs_doc=get_xml_handle(path))
|
|
48
|
+
if doc_format == ts.DocumentType.DocumentFormat.TXT:
|
|
49
|
+
return DocumentHandle(doc_format, txt_doc=get_txt(path))
|
|
50
|
+
except Exception as exc:
|
|
51
|
+
raise excs.Error(f'An error occurred processing a {doc_format} document: {path}') from exc
|
|
66
52
|
|
|
53
|
+
return None
|
|
67
54
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
55
|
+
|
|
56
|
+
def get_html_handle(path: str) -> bs4.BeautifulSoup:
|
|
57
|
+
with open(path, 'r', encoding='utf8') as fp:
|
|
58
|
+
doc = bs4.BeautifulSoup(fp, 'lxml')
|
|
59
|
+
if doc.find() is None:
|
|
60
|
+
raise excs.Error(f'Not a valid HTML document: {path}')
|
|
61
|
+
return doc
|
|
75
62
|
|
|
76
63
|
|
|
77
|
-
def get_markdown_handle(path: str) ->
|
|
64
|
+
def get_markdown_handle(path: str) -> dict:
|
|
78
65
|
Env.get().require_package('mistune', [3, 0])
|
|
79
66
|
import mistune
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
67
|
+
|
|
68
|
+
with open(path, encoding='utf8') as file:
|
|
69
|
+
text = file.read()
|
|
70
|
+
md_ast = mistune.create_markdown(renderer=None)
|
|
71
|
+
return md_ast(text)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_xml_handle(path: str) -> bs4.BeautifulSoup:
|
|
75
|
+
with open(path, 'r', encoding='utf8') as fp:
|
|
76
|
+
doc = bs4.BeautifulSoup(fp, 'xml')
|
|
77
|
+
if doc.find() is None:
|
|
78
|
+
raise excs.Error(f'Not a valid XML document: {path}')
|
|
79
|
+
return doc
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def get_txt(path: str) -> str:
|
|
83
|
+
with open(path, 'r', encoding='utf-8') as fp:
|
|
84
|
+
doc = fp.read()
|
|
85
|
+
return doc
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Callable, TypeVar
|
|
3
|
+
|
|
4
|
+
R = TypeVar('R')
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger('pixeltable')
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def run_cleanup(cleanup_func: Callable[..., R], *args: Any, raise_error: bool = True, **kwargs: Any) -> R | None:
|
|
10
|
+
"""
|
|
11
|
+
Runs a cleanup function. If interrupted, retry cleanup.
|
|
12
|
+
The `run_cleanup()` function ensures that the `cleanup_func()` function executes at least once.
|
|
13
|
+
If the `cleanup_func()` is interrupted during execution, it will be retried.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
cleanup_func: an idempotent function
|
|
17
|
+
raise_error: raise an exception if an error occurs during cleanup.
|
|
18
|
+
"""
|
|
19
|
+
try:
|
|
20
|
+
logger.debug(f'Running cleanup function: {cleanup_func.__name__!r}')
|
|
21
|
+
return cleanup_func(*args, **kwargs)
|
|
22
|
+
except KeyboardInterrupt as interrupt:
|
|
23
|
+
# Save original exception and re-attempt cleanup
|
|
24
|
+
original_exception = interrupt
|
|
25
|
+
logger.debug(f'Cleanup {cleanup_func.__name__!r} interrupted, retrying')
|
|
26
|
+
try:
|
|
27
|
+
return cleanup_func(*args, **kwargs)
|
|
28
|
+
except Exception as e:
|
|
29
|
+
# Suppress this exception
|
|
30
|
+
logger.error(f'Cleanup {cleanup_func.__name__!r} failed with exception {e.__class__}: {e}')
|
|
31
|
+
raise KeyboardInterrupt from original_exception
|
|
32
|
+
except Exception as e:
|
|
33
|
+
logger.error(f'Cleanup {cleanup_func.__name__!r} failed with exception {e.__class__}: {e}')
|
|
34
|
+
if raise_error:
|
|
35
|
+
raise e
|
|
36
|
+
return None
|
pixeltable/utils/filecache.py
CHANGED
|
@@ -5,21 +5,22 @@ import hashlib
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
7
|
import warnings
|
|
8
|
-
from collections import OrderedDict, defaultdict
|
|
8
|
+
from collections import OrderedDict, defaultdict
|
|
9
9
|
from dataclasses import dataclass
|
|
10
10
|
from datetime import datetime, timezone
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import
|
|
12
|
+
from typing import NamedTuple
|
|
13
13
|
from uuid import UUID
|
|
14
14
|
|
|
15
15
|
import pixeltable.exceptions as excs
|
|
16
|
+
from pixeltable.config import Config
|
|
16
17
|
from pixeltable.env import Env
|
|
17
18
|
|
|
18
19
|
_logger = logging.getLogger('pixeltable')
|
|
19
20
|
|
|
21
|
+
|
|
20
22
|
@dataclass
|
|
21
23
|
class CacheEntry:
|
|
22
|
-
|
|
23
24
|
key: str
|
|
24
25
|
tbl_id: UUID
|
|
25
26
|
col_id: int
|
|
@@ -56,7 +57,8 @@ class FileCache:
|
|
|
56
57
|
TODO:
|
|
57
58
|
- implement MRU eviction for queries that exceed the capacity
|
|
58
59
|
"""
|
|
59
|
-
|
|
60
|
+
|
|
61
|
+
__instance: FileCache | None = None
|
|
60
62
|
|
|
61
63
|
cache: OrderedDict[str, CacheEntry]
|
|
62
64
|
total_size: int
|
|
@@ -77,11 +79,18 @@ class FileCache:
|
|
|
77
79
|
evicted_working_set_keys: set[str]
|
|
78
80
|
new_redownload_witnessed: bool # whether a new re-download has occurred since the last time a warning was issued
|
|
79
81
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
class FileCacheColumnStats(NamedTuple):
|
|
83
|
+
tbl_id: UUID
|
|
84
|
+
col_id: int
|
|
85
|
+
num_files: int
|
|
86
|
+
total_size: int
|
|
87
|
+
|
|
88
|
+
class FileCacheStats(NamedTuple):
|
|
89
|
+
total_size: int
|
|
90
|
+
num_requests: int
|
|
91
|
+
num_hits: int
|
|
92
|
+
num_evictions: int
|
|
93
|
+
column_stats: list[FileCache.FileCacheColumnStats]
|
|
85
94
|
|
|
86
95
|
@classmethod
|
|
87
96
|
def get(cls) -> FileCache:
|
|
@@ -93,7 +102,7 @@ class FileCache:
|
|
|
93
102
|
def init(cls) -> None:
|
|
94
103
|
cls.__instance = cls()
|
|
95
104
|
|
|
96
|
-
def __init__(self):
|
|
105
|
+
def __init__(self) -> None:
|
|
97
106
|
self.cache = OrderedDict()
|
|
98
107
|
self.total_size = 0
|
|
99
108
|
self.capacity_bytes = int(Env.get()._file_cache_size_g * (1 << 30))
|
|
@@ -117,17 +126,18 @@ class FileCache:
|
|
|
117
126
|
return 0
|
|
118
127
|
return int(self.total_size / len(self.cache))
|
|
119
128
|
|
|
120
|
-
def num_files(self, tbl_id:
|
|
129
|
+
def num_files(self, tbl_id: UUID | None = None) -> int:
|
|
121
130
|
if tbl_id is None:
|
|
122
131
|
return len(self.cache)
|
|
123
132
|
return sum(e.tbl_id == tbl_id for e in self.cache.values())
|
|
124
133
|
|
|
125
|
-
def clear(self, tbl_id:
|
|
134
|
+
def clear(self, tbl_id: UUID | None = None) -> None:
|
|
126
135
|
"""
|
|
127
136
|
For testing purposes: allow resetting capacity and stats.
|
|
128
137
|
"""
|
|
129
138
|
if tbl_id is None:
|
|
130
|
-
# We need to store the entries to remove in a list, because we can't remove items from a dict
|
|
139
|
+
# We need to store the entries to remove in a list, because we can't remove items from a dict
|
|
140
|
+
# while iterating
|
|
131
141
|
entries_to_remove = list(self.cache.values())
|
|
132
142
|
_logger.debug(f'clearing {self.num_files()} entries from file cache')
|
|
133
143
|
self.num_requests, self.num_hits, self.num_evictions = 0, 0, 0
|
|
@@ -153,8 +163,9 @@ class FileCache:
|
|
|
153
163
|
f'of the evicted file(s) is {round(extra_capacity_needed / (1 << 30), 1)} GiB.\n'
|
|
154
164
|
f'Consider increasing the cache size to at least {round(suggested_cache_size / (1 << 30), 1)} GiB '
|
|
155
165
|
f'(it is currently {round(self.capacity_bytes / (1 << 30), 1)} GiB).\n'
|
|
156
|
-
f'You can do this by setting the value of `file_cache_size_g` in: {
|
|
157
|
-
excs.PixeltableWarning
|
|
166
|
+
f'You can do this by setting the value of `file_cache_size_g` in: {Config.get().config_file}',
|
|
167
|
+
excs.PixeltableWarning,
|
|
168
|
+
stacklevel=2,
|
|
158
169
|
)
|
|
159
170
|
self.new_redownload_witnessed = False
|
|
160
171
|
|
|
@@ -163,7 +174,7 @@ class FileCache:
|
|
|
163
174
|
h.update(url.encode())
|
|
164
175
|
return h.hexdigest()
|
|
165
176
|
|
|
166
|
-
def lookup(self, url: str) ->
|
|
177
|
+
def lookup(self, url: str) -> Path | None:
|
|
167
178
|
self.num_requests += 1
|
|
168
179
|
key = self._url_hash(url)
|
|
169
180
|
entry = self.cache.get(key, None)
|
|
@@ -195,13 +206,15 @@ class FileCache:
|
|
|
195
206
|
self.evicted_working_set_keys.add(key)
|
|
196
207
|
self.new_redownload_witnessed = True
|
|
197
208
|
self.keys_retrieved.add(key)
|
|
198
|
-
entry = CacheEntry(
|
|
209
|
+
entry = CacheEntry(
|
|
210
|
+
key, tbl_id, col_id, file_info.st_size, datetime.fromtimestamp(file_info.st_mtime), path.suffix
|
|
211
|
+
)
|
|
199
212
|
self.cache[key] = entry
|
|
200
213
|
self.total_size += entry.size
|
|
201
214
|
new_path = entry.path
|
|
202
215
|
os.rename(str(path), str(new_path))
|
|
203
216
|
new_path.touch(exist_ok=True)
|
|
204
|
-
_logger.debug(f'
|
|
217
|
+
_logger.debug(f'FileCache: cached url {url} with file name {new_path}')
|
|
205
218
|
return new_path
|
|
206
219
|
|
|
207
220
|
def ensure_capacity(self, size: int) -> None:
|
|
@@ -217,7 +230,9 @@ class FileCache:
|
|
|
217
230
|
# Make a record of the eviction, so that we can generate a warning later if the key is retrieved again.
|
|
218
231
|
self.keys_evicted_after_retrieval.add(lru_entry.key)
|
|
219
232
|
os.remove(str(lru_entry.path))
|
|
220
|
-
_logger.debug(
|
|
233
|
+
_logger.debug(
|
|
234
|
+
f'evicted entry for cell {lru_entry.key} from file cache (of size {lru_entry.size // (1 << 20)} MiB)'
|
|
235
|
+
)
|
|
221
236
|
|
|
222
237
|
def set_capacity(self, capacity_bytes: int) -> None:
|
|
223
238
|
self.capacity_bytes = capacity_bytes
|
|
@@ -228,15 +243,16 @@ class FileCache:
|
|
|
228
243
|
# (tbl_id, col_id) -> (num_files, total_size)
|
|
229
244
|
d: dict[tuple[UUID, int], list[int]] = defaultdict(lambda: [0, 0])
|
|
230
245
|
for entry in self.cache.values():
|
|
231
|
-
t = d[
|
|
246
|
+
t = d[entry.tbl_id, entry.col_id]
|
|
232
247
|
t[0] += 1
|
|
233
248
|
t[1] += entry.size
|
|
234
249
|
col_stats = [
|
|
235
|
-
self.FileCacheColumnStats(tbl_id, col_id, num_files, size)
|
|
250
|
+
self.FileCacheColumnStats(tbl_id, col_id, num_files, size)
|
|
251
|
+
for (tbl_id, col_id), (num_files, size) in d.items()
|
|
236
252
|
]
|
|
237
253
|
col_stats.sort(key=lambda e: e[3], reverse=True)
|
|
238
254
|
return self.FileCacheStats(self.total_size, self.num_requests, self.num_hits, self.num_evictions, col_stats)
|
|
239
255
|
|
|
240
256
|
def debug_print(self) -> None:
|
|
241
257
|
for entry in self.cache.values():
|
|
242
|
-
|
|
258
|
+
_logger.debug(f'CacheEntry: tbl_id={entry.tbl_id}, col_id={entry.col_id}, size={entry.size}')
|
pixeltable/utils/formatter.py
CHANGED
|
@@ -4,12 +4,13 @@ import io
|
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
6
|
import mimetypes
|
|
7
|
-
|
|
7
|
+
import uuid
|
|
8
|
+
from typing import Any, Callable
|
|
8
9
|
|
|
9
|
-
import av
|
|
10
|
+
import av
|
|
10
11
|
import numpy as np
|
|
11
|
-
import
|
|
12
|
-
import
|
|
12
|
+
from PIL import Image
|
|
13
|
+
from pypdfium2 import PdfDocument # type: ignore[import-untyped]
|
|
13
14
|
|
|
14
15
|
import pixeltable.type_system as ts
|
|
15
16
|
from pixeltable.utils.http_server import get_file_uri
|
|
@@ -20,11 +21,11 @@ _logger = logging.getLogger('pixeltable')
|
|
|
20
21
|
class Formatter:
|
|
21
22
|
"""
|
|
22
23
|
A factory for constructing HTML formatters for Pixeltable data. The formatters are used to customize
|
|
23
|
-
the rendering of `
|
|
24
|
+
the rendering of `ResultSet`s in notebooks.
|
|
24
25
|
|
|
25
26
|
Args:
|
|
26
|
-
num_rows: Number of rows in the
|
|
27
|
-
num_cols: Number of columns in the
|
|
27
|
+
num_rows: Number of rows in the `ResultSet` being rendered.
|
|
28
|
+
num_cols: Number of columns in the `ResultSet` being rendered.
|
|
28
29
|
http_address: Root address of the Pixeltable HTTP server (used to construct URLs for media references).
|
|
29
30
|
"""
|
|
30
31
|
|
|
@@ -40,9 +41,13 @@ class Formatter:
|
|
|
40
41
|
self.__num_cols = num_cols
|
|
41
42
|
self.__http_address = http_address
|
|
42
43
|
|
|
43
|
-
def get_pandas_formatter(self, col_type: ts.ColumnType) ->
|
|
44
|
+
def get_pandas_formatter(self, col_type: ts.ColumnType) -> Callable | None:
|
|
44
45
|
if col_type.is_string_type():
|
|
45
46
|
return self.format_string
|
|
47
|
+
if col_type.is_uuid_type():
|
|
48
|
+
return self.format_uuid
|
|
49
|
+
if col_type.is_binary_type():
|
|
50
|
+
return self.format_binary
|
|
46
51
|
if col_type.is_float_type():
|
|
47
52
|
return self.format_float
|
|
48
53
|
if col_type.is_json_type():
|
|
@@ -64,10 +69,24 @@ class Formatter:
|
|
|
64
69
|
"""
|
|
65
70
|
Escapes special characters in `val`, and abbreviates `val` if its length exceeds `_STRING_MAX_LEN`.
|
|
66
71
|
"""
|
|
67
|
-
return cls.__escape(cls.
|
|
72
|
+
return cls.__escape(cls.abbreviate(val))
|
|
68
73
|
|
|
69
74
|
@classmethod
|
|
70
|
-
def
|
|
75
|
+
def format_uuid(cls, val: uuid.UUID | None) -> str:
|
|
76
|
+
"""
|
|
77
|
+
Formats a UUID by converting it to a string and applying string formatting.
|
|
78
|
+
"""
|
|
79
|
+
return '' if val is None else cls.format_string(str(val))
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def format_binary(cls, val: bytes) -> str:
|
|
83
|
+
"""
|
|
84
|
+
Formats binary data by converting it to an encoded string and applying string formatting.
|
|
85
|
+
"""
|
|
86
|
+
return cls.format_string(str(val))
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def abbreviate(cls, val: str, max_len: int = __STRING_MAX_LEN) -> str:
|
|
71
90
|
if len(val) > max_len:
|
|
72
91
|
edgeitems = (max_len - len(cls.__STRING_SEP)) // 2
|
|
73
92
|
return f'{val[:edgeitems]}{cls.__STRING_SEP}{val[-edgeitems:]}'
|
|
@@ -95,41 +114,45 @@ class Formatter:
|
|
|
95
114
|
)
|
|
96
115
|
|
|
97
116
|
@classmethod
|
|
98
|
-
def format_json(cls, val: Any) -> str:
|
|
117
|
+
def format_json(cls, val: Any, escape_strings: bool = True) -> str:
|
|
99
118
|
if isinstance(val, str):
|
|
100
119
|
# JSON-like formatting will be applied to strings that appear nested within a list or dict
|
|
101
120
|
# (quote the string; escape any quotes inside the string; shorter abbreviations).
|
|
102
121
|
# However, if the string appears in top-level position (i.e., the entire JSON value is a
|
|
103
122
|
# string), then we format it like an ordinary string.
|
|
104
|
-
return cls.format_string(val)
|
|
123
|
+
return cls.format_string(val) if escape_strings else cls.abbreviate(val)
|
|
105
124
|
# In all other cases, dump the JSON struct recursively.
|
|
106
|
-
return cls.__format_json_rec(val)
|
|
125
|
+
return cls.__format_json_rec(val, escape_strings)
|
|
107
126
|
|
|
108
127
|
@classmethod
|
|
109
|
-
def __format_json_rec(cls, val: Any) -> str:
|
|
128
|
+
def __format_json_rec(cls, val: Any, escape_strings: bool) -> str:
|
|
110
129
|
if isinstance(val, str):
|
|
111
|
-
|
|
130
|
+
formatted = json.dumps(cls.abbreviate(val, cls.__NESTED_STRING_MAX_LEN))
|
|
131
|
+
return cls.__escape(formatted) if escape_strings else formatted
|
|
112
132
|
if isinstance(val, float):
|
|
113
133
|
return cls.format_float(val)
|
|
114
134
|
if isinstance(val, np.ndarray):
|
|
115
135
|
return cls.format_array(val)
|
|
116
136
|
if isinstance(val, list):
|
|
117
137
|
if len(val) < cls.__LIST_THRESHOLD:
|
|
118
|
-
components = [cls.__format_json_rec(x) for x in val]
|
|
138
|
+
components = [cls.__format_json_rec(x, escape_strings) for x in val]
|
|
119
139
|
else:
|
|
120
|
-
components = [cls.__format_json_rec(x) for x in val[: cls.__LIST_EDGEITEMS]]
|
|
140
|
+
components = [cls.__format_json_rec(x, escape_strings) for x in val[: cls.__LIST_EDGEITEMS]]
|
|
121
141
|
components.append('...')
|
|
122
|
-
components.extend(cls.__format_json_rec(x) for x in val[-cls.__LIST_EDGEITEMS :])
|
|
142
|
+
components.extend(cls.__format_json_rec(x, escape_strings) for x in val[-cls.__LIST_EDGEITEMS :])
|
|
123
143
|
return '[' + ', '.join(components) + ']'
|
|
124
144
|
if isinstance(val, dict):
|
|
125
|
-
kv_pairs = (
|
|
145
|
+
kv_pairs = (
|
|
146
|
+
f'{cls.__format_json_rec(k, escape_strings)}: {cls.__format_json_rec(v, escape_strings)}'
|
|
147
|
+
for k, v in val.items()
|
|
148
|
+
)
|
|
126
149
|
return '{' + ', '.join(kv_pairs) + '}'
|
|
127
150
|
|
|
128
151
|
# Everything else
|
|
129
152
|
try:
|
|
130
153
|
return json.dumps(val)
|
|
131
154
|
except TypeError: # Not JSON serializable
|
|
132
|
-
return str(val)
|
|
155
|
+
return cls.__escape(str(val))
|
|
133
156
|
|
|
134
157
|
def format_img(self, img: Image.Image) -> str:
|
|
135
158
|
"""
|
|
@@ -153,22 +176,19 @@ class Formatter:
|
|
|
153
176
|
"""
|
|
154
177
|
|
|
155
178
|
def format_video(self, file_path: str) -> str:
|
|
156
|
-
thumb_tag = ''
|
|
157
179
|
# Attempt to extract the first frame of the video to use as a thumbnail,
|
|
158
180
|
# so that the notebook can be exported as HTML and viewed in contexts where
|
|
159
181
|
# the video itself is not accessible.
|
|
160
182
|
# TODO(aaron-siegel): If the video is backed by a concrete external URL,
|
|
161
183
|
# should we link to that instead?
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
except Exception:
|
|
171
|
-
pass
|
|
184
|
+
thumb = self.extract_first_video_frame(file_path)
|
|
185
|
+
if thumb is None:
|
|
186
|
+
thumb_tag = ''
|
|
187
|
+
else:
|
|
188
|
+
with io.BytesIO() as buffer:
|
|
189
|
+
thumb.save(buffer, 'jpeg')
|
|
190
|
+
thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
|
|
191
|
+
thumb_tag = f'poster="data:image/jpeg;base64,{thumb_base64}"'
|
|
172
192
|
if self.__num_rows > 1:
|
|
173
193
|
width = 320
|
|
174
194
|
elif self.__num_cols > 1:
|
|
@@ -183,6 +203,16 @@ class Formatter:
|
|
|
183
203
|
</div>
|
|
184
204
|
"""
|
|
185
205
|
|
|
206
|
+
@classmethod
|
|
207
|
+
def extract_first_video_frame(cls, file_path: str) -> Image.Image | None:
|
|
208
|
+
with av.open(file_path) as container:
|
|
209
|
+
try:
|
|
210
|
+
img = next(container.decode(video=0)).to_image()
|
|
211
|
+
assert isinstance(img, Image.Image)
|
|
212
|
+
return img
|
|
213
|
+
except Exception:
|
|
214
|
+
return None
|
|
215
|
+
|
|
186
216
|
def format_audio(self, file_path: str) -> str:
|
|
187
217
|
return f"""
|
|
188
218
|
<div class="pxt_audio">
|
|
@@ -192,29 +222,18 @@ class Formatter:
|
|
|
192
222
|
</div>
|
|
193
223
|
"""
|
|
194
224
|
|
|
195
|
-
def format_document(self, file_path: str) -> str:
|
|
196
|
-
max_width = max_height = 320
|
|
225
|
+
def format_document(self, file_path: str, max_width: int = 320, max_height: int = 320) -> str:
|
|
197
226
|
# by default, file path will be shown as a link
|
|
198
227
|
inner_element = file_path
|
|
199
228
|
inner_element = html.escape(inner_element)
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
# shrink(1) will halve each dimension
|
|
209
|
-
p.shrink(1)
|
|
210
|
-
data = p.tobytes(output='jpeg')
|
|
211
|
-
thumb_base64 = base64.b64encode(data).decode()
|
|
212
|
-
img_src = f'data:image/jpeg;base64,{thumb_base64}'
|
|
213
|
-
inner_element = f"""
|
|
214
|
-
<img style="object-fit: contain; border: 1px solid black;" src="{img_src}" />
|
|
215
|
-
"""
|
|
216
|
-
except:
|
|
217
|
-
logging.warning(f'Failed to produce PDF thumbnail {file_path}. Make sure you have PyMuPDF installed.')
|
|
229
|
+
|
|
230
|
+
thumb = self.make_document_thumbnail(file_path, max_width, max_height)
|
|
231
|
+
if thumb is not None:
|
|
232
|
+
with io.BytesIO() as buffer:
|
|
233
|
+
thumb.save(buffer, 'webp')
|
|
234
|
+
thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
|
|
235
|
+
thumb_tag = f'data:image/webp;base64,{thumb_base64}'
|
|
236
|
+
inner_element = f'<img style="object-fit: contain; border: 1px solid black;" src="{thumb_tag}" />'
|
|
218
237
|
|
|
219
238
|
return f"""
|
|
220
239
|
<div class="pxt_document" style="width:{max_width}px;">
|
|
@@ -224,6 +243,24 @@ class Formatter:
|
|
|
224
243
|
</div>
|
|
225
244
|
"""
|
|
226
245
|
|
|
246
|
+
@classmethod
|
|
247
|
+
def make_document_thumbnail(cls, file_path: str, max_width: int = 320, max_height: int = 320) -> Image.Image | None:
|
|
248
|
+
"""
|
|
249
|
+
Returns a thumbnail image of a document.
|
|
250
|
+
"""
|
|
251
|
+
if file_path.lower().endswith('.pdf'):
|
|
252
|
+
try:
|
|
253
|
+
doc = PdfDocument(file_path)
|
|
254
|
+
if len(doc) == 0:
|
|
255
|
+
return None
|
|
256
|
+
img = doc[0].render().to_pil()
|
|
257
|
+
img.thumbnail((max_width, max_height), Image.LANCZOS)
|
|
258
|
+
return img
|
|
259
|
+
except Exception:
|
|
260
|
+
logging.warning(f'Failed to produce PDF thumbnail {file_path}. Make sure you have pypdfium2 installed.')
|
|
261
|
+
|
|
262
|
+
return None
|
|
263
|
+
|
|
227
264
|
@classmethod
|
|
228
265
|
def __create_source_tag(cls, http_address: str, file_path: str) -> str:
|
|
229
266
|
src_url = get_file_uri(http_address, file_path)
|