scitex 2.16.0__py3-none-any.whl → 2.16.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/_mcp_tools/audio.py +11 -65
- scitex/audio/README.md +40 -12
- scitex/audio/__init__.py +27 -235
- scitex/audio/_audio_check.py +93 -0
- scitex/audio/_mcp/speak_handlers.py +56 -8
- scitex/audio/_speak.py +295 -0
- scitex/audio/mcp_server.py +98 -73
- scitex/scholar/url_finder/.tmp/open_url/KNOWN_RESOLVERS.py +462 -0
- scitex/scholar/url_finder/.tmp/open_url/README.md +223 -0
- scitex/scholar/url_finder/.tmp/open_url/_DOIToURLResolver.py +694 -0
- scitex/scholar/url_finder/.tmp/open_url/_OpenURLResolver.py +1160 -0
- scitex/scholar/url_finder/.tmp/open_url/_ResolverLinkFinder.py +344 -0
- scitex/scholar/url_finder/.tmp/open_url/__init__.py +24 -0
- scitex/social/__init__.py +1 -24
- scitex/writer/README.md +25 -409
- scitex/writer/__init__.py +98 -13
- {scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/METADATA +6 -1
- {scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/RECORD +21 -93
- scitex/dev/plt/data/mpl/PLOTTING_FUNCTIONS.yaml +0 -90
- scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES.yaml +0 -1571
- scitex/dev/plt/data/mpl/PLOTTING_SIGNATURES_DETAILED.yaml +0 -6262
- scitex/dev/plt/data/mpl/SIGNATURES_FLATTENED.yaml +0 -1274
- scitex/dev/plt/data/mpl/dir_ax.txt +0 -459
- scitex/scholar/data/.gitkeep +0 -0
- scitex/scholar/data/README.md +0 -44
- scitex/scholar/data/bib_files/bibliography.bib +0 -1952
- scitex/scholar/data/bib_files/neurovista.bib +0 -277
- scitex/scholar/data/bib_files/neurovista_enriched.bib +0 -441
- scitex/scholar/data/bib_files/neurovista_enriched_enriched.bib +0 -441
- scitex/scholar/data/bib_files/neurovista_processed.bib +0 -338
- scitex/scholar/data/bib_files/openaccess.bib +0 -89
- scitex/scholar/data/bib_files/pac-seizure_prediction_enriched.bib +0 -2178
- scitex/scholar/data/bib_files/pac.bib +0 -698
- scitex/scholar/data/bib_files/pac_enriched.bib +0 -1061
- scitex/scholar/data/bib_files/pac_processed.bib +0 -0
- scitex/scholar/data/bib_files/pac_titles.txt +0 -75
- scitex/scholar/data/bib_files/paywalled.bib +0 -98
- scitex/scholar/data/bib_files/related-papers-by-coauthors.bib +0 -58
- scitex/scholar/data/bib_files/related-papers-by-coauthors_enriched.bib +0 -87
- scitex/scholar/data/bib_files/seizure_prediction.bib +0 -694
- scitex/scholar/data/bib_files/seizure_prediction_processed.bib +0 -0
- scitex/scholar/data/bib_files/test_complete_enriched.bib +0 -437
- scitex/scholar/data/bib_files/test_final_enriched.bib +0 -437
- scitex/scholar/data/bib_files/test_seizure.bib +0 -46
- scitex/scholar/data/impact_factor/JCR_IF_2022.xlsx +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024.db +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024.xlsx +0 -0
- scitex/scholar/data/impact_factor/JCR_IF_2024_v01.db +0 -0
- scitex/scholar/data/impact_factor.db +0 -0
- scitex/writer/Writer.py +0 -487
- scitex/writer/_clone_writer_project.py +0 -160
- scitex/writer/_compile/__init__.py +0 -41
- scitex/writer/_compile/_compile_async.py +0 -130
- scitex/writer/_compile/_compile_unified.py +0 -148
- scitex/writer/_compile/_parser.py +0 -63
- scitex/writer/_compile/_runner.py +0 -457
- scitex/writer/_compile/_validator.py +0 -46
- scitex/writer/_compile/manuscript.py +0 -110
- scitex/writer/_compile/revision.py +0 -82
- scitex/writer/_compile/supplementary.py +0 -100
- scitex/writer/_dataclasses/__init__.py +0 -44
- scitex/writer/_dataclasses/config/_CONSTANTS.py +0 -46
- scitex/writer/_dataclasses/config/_WriterConfig.py +0 -175
- scitex/writer/_dataclasses/config/__init__.py +0 -9
- scitex/writer/_dataclasses/contents/_ManuscriptContents.py +0 -236
- scitex/writer/_dataclasses/contents/_RevisionContents.py +0 -136
- scitex/writer/_dataclasses/contents/_SupplementaryContents.py +0 -114
- scitex/writer/_dataclasses/contents/__init__.py +0 -9
- scitex/writer/_dataclasses/core/_Document.py +0 -146
- scitex/writer/_dataclasses/core/_DocumentSection.py +0 -546
- scitex/writer/_dataclasses/core/__init__.py +0 -7
- scitex/writer/_dataclasses/results/_CompilationResult.py +0 -165
- scitex/writer/_dataclasses/results/_LaTeXIssue.py +0 -102
- scitex/writer/_dataclasses/results/_SaveSectionsResponse.py +0 -118
- scitex/writer/_dataclasses/results/_SectionReadResponse.py +0 -131
- scitex/writer/_dataclasses/results/__init__.py +0 -11
- scitex/writer/_dataclasses/tree/MINIMUM_FILES.md +0 -121
- scitex/writer/_dataclasses/tree/_ConfigTree.py +0 -86
- scitex/writer/_dataclasses/tree/_ManuscriptTree.py +0 -84
- scitex/writer/_dataclasses/tree/_RevisionTree.py +0 -97
- scitex/writer/_dataclasses/tree/_ScriptsTree.py +0 -118
- scitex/writer/_dataclasses/tree/_SharedTree.py +0 -100
- scitex/writer/_dataclasses/tree/_SupplementaryTree.py +0 -101
- scitex/writer/_dataclasses/tree/__init__.py +0 -23
- scitex/writer/_mcp/__init__.py +0 -4
- scitex/writer/_mcp/handlers.py +0 -32
- scitex/writer/_mcp/tool_schemas.py +0 -33
- scitex/writer/_project/__init__.py +0 -29
- scitex/writer/_project/_create.py +0 -89
- scitex/writer/_project/_trees.py +0 -63
- scitex/writer/_project/_validate.py +0 -61
- scitex/writer/utils/.legacy_git_retry.py +0 -164
- scitex/writer/utils/__init__.py +0 -24
- scitex/writer/utils/_converters.py +0 -635
- scitex/writer/utils/_parse_latex_logs.py +0 -138
- scitex/writer/utils/_parse_script_args.py +0 -156
- scitex/writer/utils/_verify_tree_structure.py +0 -205
- scitex/writer/utils/_watch.py +0 -96
- {scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/WHEEL +0 -0
- {scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/entry_points.txt +0 -0
- {scitex-2.16.0.dist-info → scitex-2.16.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,635 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# Timestamp: 2026-01-09
|
|
3
|
-
# File: src/scitex/writer/utils/_converters.py
|
|
4
|
-
# ----------------------------------------
|
|
5
|
-
|
|
6
|
-
"""
|
|
7
|
-
Conversion utilities for writer module.
|
|
8
|
-
|
|
9
|
-
Provides:
|
|
10
|
-
- CSV <-> LaTeX table conversion
|
|
11
|
-
- PDF page to image rendering
|
|
12
|
-
- Figure format conversion
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
from __future__ import annotations
|
|
16
|
-
|
|
17
|
-
import re
|
|
18
|
-
import tempfile
|
|
19
|
-
from pathlib import Path
|
|
20
|
-
from typing import Any, Dict, List, Optional, Union
|
|
21
|
-
|
|
22
|
-
from scitex import logging
|
|
23
|
-
|
|
24
|
-
logger = logging.getLogger(__name__)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# =============================================================================
|
|
28
|
-
# CSV <-> LaTeX Table Converters
|
|
29
|
-
# =============================================================================
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def csv2latex(
|
|
33
|
-
csv_path: Union[str, Path],
|
|
34
|
-
output_path: Optional[Union[str, Path]] = None,
|
|
35
|
-
caption: Optional[str] = None,
|
|
36
|
-
label: Optional[str] = None,
|
|
37
|
-
escape: bool = True,
|
|
38
|
-
longtable: bool = False,
|
|
39
|
-
index: bool = False,
|
|
40
|
-
column_format: Optional[str] = None,
|
|
41
|
-
**kwargs,
|
|
42
|
-
) -> str:
|
|
43
|
-
"""
|
|
44
|
-
Convert CSV file to LaTeX table.
|
|
45
|
-
|
|
46
|
-
Parameters
|
|
47
|
-
----------
|
|
48
|
-
csv_path : str or Path
|
|
49
|
-
Path to CSV file
|
|
50
|
-
output_path : str or Path, optional
|
|
51
|
-
If provided, save LaTeX to this file
|
|
52
|
-
caption : str, optional
|
|
53
|
-
Table caption
|
|
54
|
-
label : str, optional
|
|
55
|
-
Table label for referencing
|
|
56
|
-
escape : bool, default True
|
|
57
|
-
Escape special LaTeX characters
|
|
58
|
-
longtable : bool, default False
|
|
59
|
-
Use longtable environment for multi-page tables
|
|
60
|
-
index : bool, default False
|
|
61
|
-
Include DataFrame index in output
|
|
62
|
-
column_format : str, optional
|
|
63
|
-
LaTeX column format (e.g., 'lcr', 'l|cc|r')
|
|
64
|
-
**kwargs
|
|
65
|
-
Additional arguments passed to pandas.DataFrame.to_latex()
|
|
66
|
-
|
|
67
|
-
Returns
|
|
68
|
-
-------
|
|
69
|
-
str
|
|
70
|
-
LaTeX table string
|
|
71
|
-
|
|
72
|
-
Examples
|
|
73
|
-
--------
|
|
74
|
-
>>> latex = csv2latex("data.csv", caption="Results", label="tab:results")
|
|
75
|
-
>>> csv2latex("data.csv", "table.tex") # Save to file
|
|
76
|
-
"""
|
|
77
|
-
import pandas as pd
|
|
78
|
-
|
|
79
|
-
csv_path = Path(csv_path)
|
|
80
|
-
if not csv_path.exists():
|
|
81
|
-
raise FileNotFoundError(f"CSV file not found: {csv_path}")
|
|
82
|
-
|
|
83
|
-
# Load CSV
|
|
84
|
-
df = pd.read_csv(csv_path)
|
|
85
|
-
|
|
86
|
-
# Build to_latex arguments
|
|
87
|
-
latex_kwargs = {
|
|
88
|
-
"index": index,
|
|
89
|
-
"escape": escape,
|
|
90
|
-
"caption": caption,
|
|
91
|
-
"label": label,
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
if longtable:
|
|
95
|
-
latex_kwargs["longtable"] = True
|
|
96
|
-
|
|
97
|
-
if column_format:
|
|
98
|
-
latex_kwargs["column_format"] = column_format
|
|
99
|
-
|
|
100
|
-
# Merge with user kwargs
|
|
101
|
-
latex_kwargs.update(kwargs)
|
|
102
|
-
|
|
103
|
-
# Convert to LaTeX
|
|
104
|
-
latex_content = df.to_latex(**latex_kwargs)
|
|
105
|
-
|
|
106
|
-
# Save if output path provided
|
|
107
|
-
if output_path:
|
|
108
|
-
output_path = Path(output_path)
|
|
109
|
-
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
110
|
-
with open(output_path, "w") as f:
|
|
111
|
-
f.write(latex_content)
|
|
112
|
-
logger.info(f"Saved LaTeX table to {output_path}")
|
|
113
|
-
|
|
114
|
-
return latex_content
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def latex2csv(
|
|
118
|
-
latex_path: Union[str, Path],
|
|
119
|
-
output_path: Optional[Union[str, Path]] = None,
|
|
120
|
-
table_index: int = 0,
|
|
121
|
-
) -> pd.DataFrame:
|
|
122
|
-
"""
|
|
123
|
-
Convert LaTeX table to CSV/DataFrame.
|
|
124
|
-
|
|
125
|
-
Parameters
|
|
126
|
-
----------
|
|
127
|
-
latex_path : str or Path
|
|
128
|
-
Path to LaTeX file containing table
|
|
129
|
-
output_path : str or Path, optional
|
|
130
|
-
If provided, save CSV to this file
|
|
131
|
-
table_index : int, default 0
|
|
132
|
-
Which table to extract if multiple tables exist
|
|
133
|
-
|
|
134
|
-
Returns
|
|
135
|
-
-------
|
|
136
|
-
pd.DataFrame
|
|
137
|
-
Extracted table as DataFrame
|
|
138
|
-
|
|
139
|
-
Examples
|
|
140
|
-
--------
|
|
141
|
-
>>> df = latex2csv("table.tex")
|
|
142
|
-
>>> df = latex2csv("table.tex", "output.csv")
|
|
143
|
-
"""
|
|
144
|
-
import pandas as pd
|
|
145
|
-
|
|
146
|
-
latex_path = Path(latex_path)
|
|
147
|
-
if not latex_path.exists():
|
|
148
|
-
raise FileNotFoundError(f"LaTeX file not found: {latex_path}")
|
|
149
|
-
|
|
150
|
-
with open(latex_path) as f:
|
|
151
|
-
content = f.read()
|
|
152
|
-
|
|
153
|
-
# Extract table content (between \begin{tabular} and \end{tabular})
|
|
154
|
-
# Also handle longtable
|
|
155
|
-
patterns = [
|
|
156
|
-
r"\\begin\{tabular\}.*?\n(.*?)\\end\{tabular\}",
|
|
157
|
-
r"\\begin\{longtable\}.*?\n(.*?)\\end\{longtable\}",
|
|
158
|
-
]
|
|
159
|
-
|
|
160
|
-
tables = []
|
|
161
|
-
for pattern in patterns:
|
|
162
|
-
matches = re.findall(pattern, content, re.DOTALL)
|
|
163
|
-
tables.extend(matches)
|
|
164
|
-
|
|
165
|
-
if not tables:
|
|
166
|
-
raise ValueError("No table found in LaTeX file")
|
|
167
|
-
|
|
168
|
-
if table_index >= len(tables):
|
|
169
|
-
raise IndexError(
|
|
170
|
-
f"Table index {table_index} out of range. Found {len(tables)} tables."
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
table_content = tables[table_index]
|
|
174
|
-
|
|
175
|
-
# Parse table rows
|
|
176
|
-
rows = []
|
|
177
|
-
for line in table_content.split("\n"):
|
|
178
|
-
line = line.strip()
|
|
179
|
-
if not line or line.startswith("\\"):
|
|
180
|
-
continue
|
|
181
|
-
if "&" in line:
|
|
182
|
-
# Remove trailing \\ and split by &
|
|
183
|
-
line = re.sub(r"\\\\.*$", "", line)
|
|
184
|
-
cells = [cell.strip() for cell in line.split("&")]
|
|
185
|
-
rows.append(cells)
|
|
186
|
-
|
|
187
|
-
if not rows:
|
|
188
|
-
raise ValueError("Could not parse table rows")
|
|
189
|
-
|
|
190
|
-
# Create DataFrame (first row as header if it looks like headers)
|
|
191
|
-
if len(rows) > 1:
|
|
192
|
-
df = pd.DataFrame(rows[1:], columns=rows[0])
|
|
193
|
-
else:
|
|
194
|
-
df = pd.DataFrame(rows)
|
|
195
|
-
|
|
196
|
-
# Save if output path provided
|
|
197
|
-
if output_path:
|
|
198
|
-
output_path = Path(output_path)
|
|
199
|
-
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
200
|
-
df.to_csv(output_path, index=False)
|
|
201
|
-
logger.info(f"Saved CSV to {output_path}")
|
|
202
|
-
|
|
203
|
-
return df
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
# =============================================================================
|
|
207
|
-
# PDF to Image Rendering
|
|
208
|
-
# =============================================================================
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
def pdf_to_images(
|
|
212
|
-
pdf_path: Union[str, Path],
|
|
213
|
-
output_dir: Optional[Union[str, Path]] = None,
|
|
214
|
-
pages: Optional[Union[int, List[int]]] = None,
|
|
215
|
-
dpi: int = 150,
|
|
216
|
-
format: str = "png",
|
|
217
|
-
prefix: str = "page",
|
|
218
|
-
) -> List[Dict[str, Any]]:
|
|
219
|
-
"""
|
|
220
|
-
Render PDF pages as images.
|
|
221
|
-
|
|
222
|
-
Parameters
|
|
223
|
-
----------
|
|
224
|
-
pdf_path : str or Path
|
|
225
|
-
Path to PDF file
|
|
226
|
-
output_dir : str or Path, optional
|
|
227
|
-
Directory to save images. If None, uses temp directory.
|
|
228
|
-
pages : int or list of int, optional
|
|
229
|
-
Page(s) to render (0-indexed). If None, renders all pages.
|
|
230
|
-
dpi : int, default 150
|
|
231
|
-
Resolution in DPI
|
|
232
|
-
format : str, default 'png'
|
|
233
|
-
Output format ('png', 'jpg', 'jpeg')
|
|
234
|
-
prefix : str, default 'page'
|
|
235
|
-
Filename prefix
|
|
236
|
-
|
|
237
|
-
Returns
|
|
238
|
-
-------
|
|
239
|
-
list of dict
|
|
240
|
-
List of dicts with image info:
|
|
241
|
-
- page: Page number (0-indexed)
|
|
242
|
-
- path: Path to saved image
|
|
243
|
-
- width: Image width in pixels
|
|
244
|
-
- height: Image height in pixels
|
|
245
|
-
|
|
246
|
-
Examples
|
|
247
|
-
--------
|
|
248
|
-
>>> # Render first page as thumbnail
|
|
249
|
-
>>> images = pdf_to_images("paper.pdf", pages=0, dpi=72)
|
|
250
|
-
>>> print(images[0]['path'])
|
|
251
|
-
|
|
252
|
-
>>> # Render all pages at high resolution
|
|
253
|
-
>>> images = pdf_to_images("paper.pdf", "output/", dpi=300)
|
|
254
|
-
"""
|
|
255
|
-
try:
|
|
256
|
-
import fitz # PyMuPDF
|
|
257
|
-
except ImportError:
|
|
258
|
-
raise ImportError(
|
|
259
|
-
"PyMuPDF required for PDF to image conversion. "
|
|
260
|
-
"Install with: pip install PyMuPDF"
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
pdf_path = Path(pdf_path)
|
|
264
|
-
if not pdf_path.exists():
|
|
265
|
-
raise FileNotFoundError(f"PDF file not found: {pdf_path}")
|
|
266
|
-
|
|
267
|
-
# Setup output directory
|
|
268
|
-
if output_dir is None:
|
|
269
|
-
output_dir = Path(tempfile.mkdtemp(prefix="pdf_images_"))
|
|
270
|
-
else:
|
|
271
|
-
output_dir = Path(output_dir)
|
|
272
|
-
output_dir.mkdir(parents=True, exist_ok=True)
|
|
273
|
-
|
|
274
|
-
# Normalize format
|
|
275
|
-
format = format.lower()
|
|
276
|
-
if format == "jpeg":
|
|
277
|
-
format = "jpg"
|
|
278
|
-
|
|
279
|
-
# Calculate zoom factor for DPI (default PDF DPI is 72)
|
|
280
|
-
zoom = dpi / 72.0
|
|
281
|
-
matrix = fitz.Matrix(zoom, zoom)
|
|
282
|
-
|
|
283
|
-
results = []
|
|
284
|
-
doc = fitz.open(pdf_path)
|
|
285
|
-
|
|
286
|
-
try:
|
|
287
|
-
if pages is None:
|
|
288
|
-
pages_to_render = range(len(doc))
|
|
289
|
-
elif isinstance(pages, int):
|
|
290
|
-
pages_to_render = [pages]
|
|
291
|
-
else:
|
|
292
|
-
pages_to_render = pages
|
|
293
|
-
|
|
294
|
-
for page_num in pages_to_render:
|
|
295
|
-
if page_num < 0 or page_num >= len(doc):
|
|
296
|
-
logger.warning(f"Page {page_num} out of range, skipping")
|
|
297
|
-
continue
|
|
298
|
-
|
|
299
|
-
pdf_page = doc[page_num]
|
|
300
|
-
pix = pdf_page.get_pixmap(matrix=matrix)
|
|
301
|
-
|
|
302
|
-
# Generate filename
|
|
303
|
-
filename = f"{prefix}_{page_num + 1:03d}.{format}"
|
|
304
|
-
filepath = output_dir / filename
|
|
305
|
-
|
|
306
|
-
# Save image
|
|
307
|
-
if format == "png":
|
|
308
|
-
pix.save(str(filepath))
|
|
309
|
-
else: # jpg
|
|
310
|
-
# Convert to RGB if needed and save as JPEG
|
|
311
|
-
try:
|
|
312
|
-
import io
|
|
313
|
-
|
|
314
|
-
from PIL import Image
|
|
315
|
-
|
|
316
|
-
img_data = pix.tobytes("png")
|
|
317
|
-
img = Image.open(io.BytesIO(img_data))
|
|
318
|
-
if img.mode in ("RGBA", "LA", "P"):
|
|
319
|
-
background = Image.new("RGB", img.size, (255, 255, 255))
|
|
320
|
-
if img.mode == "P":
|
|
321
|
-
img = img.convert("RGBA")
|
|
322
|
-
if img.mode == "RGBA":
|
|
323
|
-
background.paste(img, mask=img.split()[-1])
|
|
324
|
-
else:
|
|
325
|
-
background.paste(img)
|
|
326
|
-
img = background
|
|
327
|
-
elif img.mode != "RGB":
|
|
328
|
-
img = img.convert("RGB")
|
|
329
|
-
img.save(str(filepath), "JPEG", quality=95)
|
|
330
|
-
except ImportError:
|
|
331
|
-
# Fallback to PNG if PIL not available
|
|
332
|
-
filepath = filepath.with_suffix(".png")
|
|
333
|
-
pix.save(str(filepath))
|
|
334
|
-
format = "png"
|
|
335
|
-
|
|
336
|
-
results.append(
|
|
337
|
-
{
|
|
338
|
-
"page": page_num,
|
|
339
|
-
"path": str(filepath),
|
|
340
|
-
"width": pix.width,
|
|
341
|
-
"height": pix.height,
|
|
342
|
-
"dpi": dpi,
|
|
343
|
-
"format": format,
|
|
344
|
-
}
|
|
345
|
-
)
|
|
346
|
-
|
|
347
|
-
logger.debug(f"Rendered page {page_num + 1} to {filepath}")
|
|
348
|
-
|
|
349
|
-
finally:
|
|
350
|
-
doc.close()
|
|
351
|
-
|
|
352
|
-
logger.info(f"Rendered {len(results)} pages from {pdf_path}")
|
|
353
|
-
return results
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
def pdf_thumbnail(
|
|
357
|
-
pdf_path: Union[str, Path],
|
|
358
|
-
output_path: Optional[Union[str, Path]] = None,
|
|
359
|
-
page: int = 0,
|
|
360
|
-
width: int = 200,
|
|
361
|
-
format: str = "png",
|
|
362
|
-
) -> Dict[str, Any]:
|
|
363
|
-
"""
|
|
364
|
-
Generate a thumbnail from a PDF page.
|
|
365
|
-
|
|
366
|
-
Parameters
|
|
367
|
-
----------
|
|
368
|
-
pdf_path : str or Path
|
|
369
|
-
Path to PDF file
|
|
370
|
-
output_path : str or Path, optional
|
|
371
|
-
Path to save thumbnail. If None, auto-generates.
|
|
372
|
-
page : int, default 0
|
|
373
|
-
Page to use for thumbnail (0-indexed)
|
|
374
|
-
width : int, default 200
|
|
375
|
-
Thumbnail width in pixels (height auto-calculated)
|
|
376
|
-
format : str, default 'png'
|
|
377
|
-
Output format ('png', 'jpg')
|
|
378
|
-
|
|
379
|
-
Returns
|
|
380
|
-
-------
|
|
381
|
-
dict
|
|
382
|
-
Thumbnail info with path, width, height
|
|
383
|
-
|
|
384
|
-
Examples
|
|
385
|
-
--------
|
|
386
|
-
>>> thumb = pdf_thumbnail("paper.pdf")
|
|
387
|
-
>>> print(thumb['path'])
|
|
388
|
-
"""
|
|
389
|
-
try:
|
|
390
|
-
import fitz
|
|
391
|
-
except ImportError:
|
|
392
|
-
raise ImportError(
|
|
393
|
-
"PyMuPDF required for PDF thumbnails. Install with: pip install PyMuPDF"
|
|
394
|
-
)
|
|
395
|
-
|
|
396
|
-
pdf_path = Path(pdf_path)
|
|
397
|
-
if not pdf_path.exists():
|
|
398
|
-
raise FileNotFoundError(f"PDF file not found: {pdf_path}")
|
|
399
|
-
|
|
400
|
-
doc = fitz.open(pdf_path)
|
|
401
|
-
|
|
402
|
-
try:
|
|
403
|
-
if page < 0 or page >= len(doc):
|
|
404
|
-
raise IndexError(f"Page {page} out of range. PDF has {len(doc)} pages.")
|
|
405
|
-
|
|
406
|
-
pdf_page = doc[page]
|
|
407
|
-
|
|
408
|
-
# Calculate zoom to achieve desired width
|
|
409
|
-
page_rect = pdf_page.rect
|
|
410
|
-
zoom = width / page_rect.width
|
|
411
|
-
matrix = fitz.Matrix(zoom, zoom)
|
|
412
|
-
|
|
413
|
-
pix = pdf_page.get_pixmap(matrix=matrix)
|
|
414
|
-
|
|
415
|
-
# Determine output path
|
|
416
|
-
if output_path is None:
|
|
417
|
-
output_dir = Path(tempfile.mkdtemp(prefix="pdf_thumb_"))
|
|
418
|
-
output_path = output_dir / f"{pdf_path.stem}_thumb.{format}"
|
|
419
|
-
else:
|
|
420
|
-
output_path = Path(output_path)
|
|
421
|
-
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
422
|
-
|
|
423
|
-
# Save
|
|
424
|
-
pix.save(str(output_path))
|
|
425
|
-
|
|
426
|
-
return {
|
|
427
|
-
"path": str(output_path),
|
|
428
|
-
"width": pix.width,
|
|
429
|
-
"height": pix.height,
|
|
430
|
-
"source_page": page,
|
|
431
|
-
"source_pdf": str(pdf_path),
|
|
432
|
-
"format": format,
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
finally:
|
|
436
|
-
doc.close()
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
# =============================================================================
|
|
440
|
-
# Figure Handlers
|
|
441
|
-
# =============================================================================
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
def list_figures(
|
|
445
|
-
project_dir: Union[str, Path],
|
|
446
|
-
extensions: Optional[List[str]] = None,
|
|
447
|
-
) -> List[Dict[str, Any]]:
|
|
448
|
-
"""
|
|
449
|
-
List all figures in a writer project.
|
|
450
|
-
|
|
451
|
-
Parameters
|
|
452
|
-
----------
|
|
453
|
-
project_dir : str or Path
|
|
454
|
-
Path to writer project directory
|
|
455
|
-
extensions : list of str, optional
|
|
456
|
-
Figure extensions to include. Default: common image formats.
|
|
457
|
-
|
|
458
|
-
Returns
|
|
459
|
-
-------
|
|
460
|
-
list of dict
|
|
461
|
-
List of figure info dicts with path, name, size, etc.
|
|
462
|
-
|
|
463
|
-
Examples
|
|
464
|
-
--------
|
|
465
|
-
>>> figures = list_figures("my_paper")
|
|
466
|
-
>>> for fig in figures:
|
|
467
|
-
... print(fig['name'], fig['size_kb'])
|
|
468
|
-
"""
|
|
469
|
-
project_dir = Path(project_dir)
|
|
470
|
-
if not project_dir.exists():
|
|
471
|
-
raise FileNotFoundError(f"Project directory not found: {project_dir}")
|
|
472
|
-
|
|
473
|
-
if extensions is None:
|
|
474
|
-
extensions = [
|
|
475
|
-
".png",
|
|
476
|
-
".jpg",
|
|
477
|
-
".jpeg",
|
|
478
|
-
".pdf",
|
|
479
|
-
".eps",
|
|
480
|
-
".svg",
|
|
481
|
-
".tif",
|
|
482
|
-
".tiff",
|
|
483
|
-
".ppt",
|
|
484
|
-
".pptx",
|
|
485
|
-
]
|
|
486
|
-
|
|
487
|
-
# Search in common figure locations
|
|
488
|
-
figure_dirs = [
|
|
489
|
-
project_dir / "00_shared" / "figures",
|
|
490
|
-
project_dir / "00_shared" / "figs",
|
|
491
|
-
project_dir / "01_manuscript" / "figures",
|
|
492
|
-
project_dir / "01_manuscript" / "figs",
|
|
493
|
-
project_dir / "02_supplementary" / "figures",
|
|
494
|
-
project_dir / "02_supplementary" / "figs",
|
|
495
|
-
]
|
|
496
|
-
|
|
497
|
-
figures = []
|
|
498
|
-
for fig_dir in figure_dirs:
|
|
499
|
-
if fig_dir.exists():
|
|
500
|
-
for ext in extensions:
|
|
501
|
-
for filepath in fig_dir.glob(f"*{ext}"):
|
|
502
|
-
stat = filepath.stat()
|
|
503
|
-
figures.append(
|
|
504
|
-
{
|
|
505
|
-
"path": str(filepath),
|
|
506
|
-
"name": filepath.name,
|
|
507
|
-
"stem": filepath.stem,
|
|
508
|
-
"extension": filepath.suffix,
|
|
509
|
-
"size_bytes": stat.st_size,
|
|
510
|
-
"size_kb": round(stat.st_size / 1024, 2),
|
|
511
|
-
"directory": str(fig_dir),
|
|
512
|
-
"relative_path": str(filepath.relative_to(project_dir)),
|
|
513
|
-
}
|
|
514
|
-
)
|
|
515
|
-
|
|
516
|
-
# Sort by name
|
|
517
|
-
figures.sort(key=lambda x: x["name"])
|
|
518
|
-
|
|
519
|
-
logger.info(f"Found {len(figures)} figures in {project_dir}")
|
|
520
|
-
return figures
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
def convert_figure(
|
|
524
|
-
input_path: Union[str, Path],
|
|
525
|
-
output_path: Union[str, Path],
|
|
526
|
-
dpi: int = 300,
|
|
527
|
-
quality: int = 95,
|
|
528
|
-
) -> Dict[str, Any]:
|
|
529
|
-
"""
|
|
530
|
-
Convert figure between formats.
|
|
531
|
-
|
|
532
|
-
Parameters
|
|
533
|
-
----------
|
|
534
|
-
input_path : str or Path
|
|
535
|
-
Input figure path
|
|
536
|
-
output_path : str or Path
|
|
537
|
-
Output figure path (format determined by extension)
|
|
538
|
-
dpi : int, default 300
|
|
539
|
-
Resolution for rasterization (PDF/SVG to raster)
|
|
540
|
-
quality : int, default 95
|
|
541
|
-
JPEG quality (1-100)
|
|
542
|
-
|
|
543
|
-
Returns
|
|
544
|
-
-------
|
|
545
|
-
dict
|
|
546
|
-
Conversion result with paths and sizes
|
|
547
|
-
|
|
548
|
-
Examples
|
|
549
|
-
--------
|
|
550
|
-
>>> convert_figure("fig1.pdf", "fig1.png", dpi=300)
|
|
551
|
-
>>> convert_figure("fig1.png", "fig1.jpg", quality=90)
|
|
552
|
-
"""
|
|
553
|
-
from PIL import Image
|
|
554
|
-
|
|
555
|
-
input_path = Path(input_path)
|
|
556
|
-
output_path = Path(output_path)
|
|
557
|
-
|
|
558
|
-
if not input_path.exists():
|
|
559
|
-
raise FileNotFoundError(f"Input file not found: {input_path}")
|
|
560
|
-
|
|
561
|
-
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
562
|
-
|
|
563
|
-
input_ext = input_path.suffix.lower()
|
|
564
|
-
output_ext = output_path.suffix.lower()
|
|
565
|
-
|
|
566
|
-
# Handle PDF input
|
|
567
|
-
if input_ext == ".pdf":
|
|
568
|
-
try:
|
|
569
|
-
import fitz
|
|
570
|
-
|
|
571
|
-
doc = fitz.open(input_path)
|
|
572
|
-
page = doc[0]
|
|
573
|
-
zoom = dpi / 72.0
|
|
574
|
-
matrix = fitz.Matrix(zoom, zoom)
|
|
575
|
-
pix = page.get_pixmap(matrix=matrix)
|
|
576
|
-
|
|
577
|
-
if output_ext in [".jpg", ".jpeg"]:
|
|
578
|
-
# Save as PNG first, then convert
|
|
579
|
-
import io
|
|
580
|
-
|
|
581
|
-
img_data = pix.tobytes("png")
|
|
582
|
-
img = Image.open(io.BytesIO(img_data))
|
|
583
|
-
if img.mode != "RGB":
|
|
584
|
-
img = img.convert("RGB")
|
|
585
|
-
img.save(str(output_path), "JPEG", quality=quality)
|
|
586
|
-
else:
|
|
587
|
-
pix.save(str(output_path))
|
|
588
|
-
|
|
589
|
-
doc.close()
|
|
590
|
-
except ImportError:
|
|
591
|
-
raise ImportError("PyMuPDF required for PDF conversion")
|
|
592
|
-
else:
|
|
593
|
-
# Standard image conversion with PIL
|
|
594
|
-
img = Image.open(input_path)
|
|
595
|
-
|
|
596
|
-
# Handle format-specific conversions
|
|
597
|
-
if output_ext in [".jpg", ".jpeg"]:
|
|
598
|
-
if img.mode in ("RGBA", "LA", "P"):
|
|
599
|
-
background = Image.new("RGB", img.size, (255, 255, 255))
|
|
600
|
-
if img.mode == "P":
|
|
601
|
-
img = img.convert("RGBA")
|
|
602
|
-
if img.mode == "RGBA":
|
|
603
|
-
background.paste(img, mask=img.split()[-1])
|
|
604
|
-
else:
|
|
605
|
-
background.paste(img)
|
|
606
|
-
img = background
|
|
607
|
-
elif img.mode != "RGB":
|
|
608
|
-
img = img.convert("RGB")
|
|
609
|
-
img.save(str(output_path), "JPEG", quality=quality)
|
|
610
|
-
else:
|
|
611
|
-
img.save(str(output_path))
|
|
612
|
-
|
|
613
|
-
# Get output size
|
|
614
|
-
output_stat = output_path.stat()
|
|
615
|
-
|
|
616
|
-
return {
|
|
617
|
-
"input_path": str(input_path),
|
|
618
|
-
"output_path": str(output_path),
|
|
619
|
-
"input_size_kb": round(input_path.stat().st_size / 1024, 2),
|
|
620
|
-
"output_size_kb": round(output_stat.st_size / 1024, 2),
|
|
621
|
-
"dpi": dpi,
|
|
622
|
-
"quality": quality if output_ext in [".jpg", ".jpeg"] else None,
|
|
623
|
-
}
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
__all__ = [
|
|
627
|
-
"csv2latex",
|
|
628
|
-
"latex2csv",
|
|
629
|
-
"pdf_to_images",
|
|
630
|
-
"pdf_thumbnail",
|
|
631
|
-
"list_figures",
|
|
632
|
-
"convert_figure",
|
|
633
|
-
]
|
|
634
|
-
|
|
635
|
-
# EOF
|