pdf-auto-outline 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdf_auto_outline/__init__.py +1 -1
- pdf_auto_outline/main.py +57 -48
- pdf_auto_outline/test.py +4 -0
- {pdf_auto_outline-0.1.4.dist-info → pdf_auto_outline-0.1.6.dist-info}/METADATA +26 -2
- pdf_auto_outline-0.1.6.dist-info/RECORD +8 -0
- pdf_auto_outline-0.1.4.dist-info/RECORD +0 -7
- {pdf_auto_outline-0.1.4.dist-info → pdf_auto_outline-0.1.6.dist-info}/WHEEL +0 -0
- {pdf_auto_outline-0.1.4.dist-info → pdf_auto_outline-0.1.6.dist-info}/entry_points.txt +0 -0
pdf_auto_outline/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '0.1.
|
|
1
|
+
__version__ = '0.1.6'
|
pdf_auto_outline/main.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import pymupdf.layout
|
|
2
|
-
from pymupdf import Point
|
|
3
2
|
from time import perf_counter
|
|
4
3
|
from multiprocessing import Pool
|
|
5
4
|
import os
|
|
6
5
|
import subprocess
|
|
7
6
|
import argparse
|
|
7
|
+
import tempfile
|
|
8
8
|
|
|
9
9
|
SIOYEK = None
|
|
10
10
|
|
|
@@ -148,7 +148,12 @@ def align_toc_lvls(toc_entries: list) -> list:
|
|
|
148
148
|
|
|
149
149
|
return toc_entries
|
|
150
150
|
|
|
151
|
-
def
|
|
151
|
+
def get_tmpfile():
|
|
152
|
+
return tempfile.NamedTemporaryFile(
|
|
153
|
+
mode='w+', encoding='utf-8', delete=False, suffix='.txt'
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def generate_txtfile(toc_entries, txtfile=get_tmpfile()):
|
|
152
157
|
import textwrap
|
|
153
158
|
txt = textwrap.dedent("""\
|
|
154
159
|
============================================================
|
|
@@ -159,41 +164,46 @@ def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
|
|
|
159
164
|
============================================================
|
|
160
165
|
|
|
161
166
|
""")
|
|
162
|
-
if
|
|
167
|
+
if not toc_entries:
|
|
168
|
+
pass
|
|
169
|
+
elif len(toc_entries[0]) > 3:
|
|
163
170
|
txt += '\n'.join(f"{' '*4 * (i[0] - 1)}{i[1]} | {i[2]} | {i[3]}"
|
|
164
171
|
for i in toc_entries)
|
|
165
172
|
else:
|
|
166
173
|
txt += '\n'.join(f"{' '*4 * (i[0] - 1)}{i[1]} | {i[2]}"
|
|
167
174
|
for i in toc_entries)
|
|
168
175
|
|
|
169
|
-
|
|
170
|
-
|
|
176
|
+
txtfile.write(txt)
|
|
177
|
+
txtfile.flush()
|
|
178
|
+
txtfile.seek(0)
|
|
171
179
|
|
|
172
180
|
return txtfile
|
|
173
181
|
|
|
174
182
|
|
|
175
|
-
|
|
183
|
+
|
|
184
|
+
def parse_txtfile(f, tablevel=2) -> list:
|
|
176
185
|
toc_entries = []
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
186
|
+
if (c := f.read(1)) == 'C':
|
|
187
|
+
log('Outline not written')
|
|
188
|
+
exit()
|
|
189
|
+
elif c == '=':
|
|
190
|
+
lines = f.readlines()[7:]
|
|
191
|
+
else: lines = f.read()
|
|
192
|
+
|
|
193
|
+
for i in lines:
|
|
194
|
+
i = i.replace('\t', ' '*tablevel)
|
|
195
|
+
lvl = (len(i) - len(i.lstrip())) // 4 + 1
|
|
196
|
+
a = i.lstrip().split(' | ')
|
|
197
|
+
if len(a) < 3:
|
|
198
|
+
toc_entries.append(
|
|
199
|
+
[lvl, a[0], int(a[1])]
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
toc_entries.append(
|
|
203
|
+
[lvl, a[0], int(a[1]), eval(a[2])]
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
f.close()
|
|
197
207
|
|
|
198
208
|
return toc_entries
|
|
199
209
|
|
|
@@ -208,18 +218,15 @@ def embed_toc(pdfpath, toc_entries, newfile=''):
|
|
|
208
218
|
log(f"toc saved to '{pdfpath}'")
|
|
209
219
|
|
|
210
220
|
def get_toc_custom(doc) -> list:
|
|
211
|
-
toc_entries = [[*i[:3], i[3].get('to')] for i in doc.get_toc(False)]
|
|
221
|
+
toc_entries = [[*i[:3], i[3].get('to')[1]] for i in doc.get_toc(False)]
|
|
212
222
|
return toc_entries
|
|
213
223
|
|
|
214
|
-
def edit_txtfile(
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
name = os.name
|
|
218
|
-
if name == 'nt':
|
|
219
|
-
subprocess.run(['start', '/WAIT', txtfile], shell=True)
|
|
224
|
+
def edit_txtfile(f):
|
|
225
|
+
if os.name == 'nt':
|
|
226
|
+
subprocess.run(['start', '/WAIT', f.name], shell=True)
|
|
220
227
|
else: # name == 'posix':
|
|
221
228
|
editor = os.environ.get('EDITOR', 'vi')
|
|
222
|
-
subprocess.run([editor,
|
|
229
|
+
subprocess.run([editor, f.name])
|
|
223
230
|
|
|
224
231
|
def main():
|
|
225
232
|
parser = argparse.ArgumentParser(prog='pdfao')
|
|
@@ -232,19 +239,19 @@ def main():
|
|
|
232
239
|
parser.add_argument('-i', '--infile', type=str, metavar='<file>', help='write toc from file to pdf')
|
|
233
240
|
parser.add_argument('-t', '--tablevel', type=int, metavar='<n>', help='tab = n toc nesting levels (default 2)', default=2)
|
|
234
241
|
parser.add_argument('--sioyek', type=str, metavar='<path>', help='for users of the Sioyek pdf viewer')
|
|
235
|
-
parser.add_argument('--version', action='version', version='%(prog)s 0.1.
|
|
242
|
+
parser.add_argument('--version', action='version', version='%(prog)s 0.1.6')
|
|
236
243
|
|
|
237
244
|
args = parser.parse_args()
|
|
238
245
|
|
|
246
|
+
if args.out:
|
|
247
|
+
args.out = os.path.join(
|
|
248
|
+
os.path.dirname(args.filename),
|
|
249
|
+
args.out)
|
|
250
|
+
|
|
239
251
|
if args.sioyek:
|
|
240
252
|
from sioyek.sioyek import Sioyek
|
|
241
253
|
global SIOYEK
|
|
242
254
|
SIOYEK = Sioyek(args.sioyek)
|
|
243
|
-
if args.out:
|
|
244
|
-
args.out = os.path.join(
|
|
245
|
-
os.path.dirname(args.filename),
|
|
246
|
-
args.out
|
|
247
|
-
)
|
|
248
255
|
# local_db = args.sioyek[1]
|
|
249
256
|
# shared_db = args.sioyek[2]
|
|
250
257
|
# pdf_path = args.sioyek[3]
|
|
@@ -253,14 +260,15 @@ def main():
|
|
|
253
260
|
if args.edit or args.superedit:
|
|
254
261
|
doc = pymupdf.Document(args.filename)
|
|
255
262
|
if args.superedit:
|
|
256
|
-
generate_txtfile(doc.get_toc(False))
|
|
263
|
+
f = generate_txtfile(doc.get_toc(False))
|
|
257
264
|
else:
|
|
258
|
-
generate_txtfile(get_toc_custom(doc))
|
|
259
|
-
edit_txtfile()
|
|
260
|
-
toc_entries = parse_txtfile(
|
|
265
|
+
f = generate_txtfile(get_toc_custom(doc))
|
|
266
|
+
edit_txtfile(f)
|
|
267
|
+
toc_entries = parse_txtfile(f, args.tablevel)
|
|
261
268
|
embed_toc(args.filename, toc_entries, args.out)
|
|
269
|
+
os.remove(f.name)
|
|
262
270
|
elif args.infile:
|
|
263
|
-
toc_entries = parse_txtfile(args.infile, args.tablevel)
|
|
271
|
+
toc_entries = parse_txtfile(open(args.infile, encoding='utf-8'), args.tablevel)
|
|
264
272
|
embed_toc(args.filename, toc_entries, args.out)
|
|
265
273
|
else: # generate toc
|
|
266
274
|
start = perf_counter()
|
|
@@ -272,10 +280,11 @@ def main():
|
|
|
272
280
|
if args.straight:
|
|
273
281
|
embed_toc(args.filename, toc_entries, args.out)
|
|
274
282
|
else:
|
|
275
|
-
generate_txtfile(toc_entries)
|
|
276
|
-
edit_txtfile()
|
|
277
|
-
toc_entries = parse_txtfile(
|
|
283
|
+
f = generate_txtfile(toc_entries)
|
|
284
|
+
edit_txtfile(f)
|
|
285
|
+
toc_entries = parse_txtfile(f, args.tablevel)
|
|
278
286
|
embed_toc(args.filename, toc_entries, args.out)
|
|
287
|
+
os.remove(f.name)
|
|
279
288
|
|
|
280
289
|
# if args.sioyek and not args.out:
|
|
281
290
|
# to_hash = get_md5_hash(args.filename)
|
pdf_auto_outline/test.py
ADDED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pdf-auto-outline
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: Automatically generate and edit PDF table of contents / outline
|
|
5
5
|
Author: Rossikos
|
|
6
6
|
Author-email: Rossikos <216631970+rossikos@users.noreply.github.com>
|
|
@@ -62,6 +62,28 @@ Edit exiting pdf toc:
|
|
|
62
62
|
A save toc to new pdf from file:
|
|
63
63
|
`pdfao paper.pdf -o new.pdf -i outline.txt`
|
|
64
64
|
|
|
65
|
+
### Editing
|
|
66
|
+
|
|
67
|
+
The edit command opens the TOC in the OS default editor (result of 'start' command on Windows and 'EDITOR' environment variable on MacOS and Linux). The file schema is something like this:
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
Title 1 | 1
|
|
71
|
+
Title 2 | 2 | *
|
|
72
|
+
^^^^^^
|
|
73
|
+
optional
|
|
74
|
+
```
|
|
75
|
+
The essential parts of each line are:
|
|
76
|
+
- Indentation - 4 space characters per nesting level (or use tabs with the -t flag).
|
|
77
|
+
- Title text
|
|
78
|
+
- Delimiter - ' | ' (vertical bar with 2 spaces padding on each side)
|
|
79
|
+
- Page number
|
|
80
|
+
|
|
81
|
+
The optional part can be one of:
|
|
82
|
+
```
|
|
83
|
+
| None same as not including it
|
|
84
|
+
| 241.2 y-ordinate
|
|
85
|
+
| {<dictionary>} dictionary with more attributes for the ToC entry
|
|
86
|
+
```
|
|
65
87
|
|
|
66
88
|
## For Sioyek Users
|
|
67
89
|
|
|
@@ -69,8 +91,10 @@ Example commands; add to `prefs_user.config`.
|
|
|
69
91
|
|
|
70
92
|
```
|
|
71
93
|
new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
|
|
72
|
-
new_command _edit_toc pdfao %{file_path}"
|
|
94
|
+
new_command _edit_toc pdfao "%{file_path}"
|
|
73
95
|
```
|
|
74
96
|
|
|
97
|
+
The sioyek library and flag are optional; they allow logging to the status bar. This is more useful for ToC generation where you may want a progress bar.
|
|
98
|
+
|
|
75
99
|
If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
|
|
76
100
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
pdf_auto_outline/__init__.py,sha256=gW5NUxwGdPsiQjn0cOuuQT11pfthByI5DITDg_HMhLQ,22
|
|
2
|
+
pdf_auto_outline/__main__.py,sha256=mRKsAFeG5R17vTYubIKregAve4vnKc-nk7jY3tcK4wI,78
|
|
3
|
+
pdf_auto_outline/main.py,sha256=AWbXZNVw5e34v005qtoCGyTzNbT3T1mOo6xeLl-Q82k,9883
|
|
4
|
+
pdf_auto_outline/test.py,sha256=2wIjM8xqnDjHiokFF0WUNw0o_oTGW91V-MifHoLw4Rw,47
|
|
5
|
+
pdf_auto_outline-0.1.6.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
|
|
6
|
+
pdf_auto_outline-0.1.6.dist-info/entry_points.txt,sha256=HBvhmxJs8hHqbbpJmVTbBH3xy19Hk655O_ySwFC_53w,100
|
|
7
|
+
pdf_auto_outline-0.1.6.dist-info/METADATA,sha256=Rw9x1kQxghKSGVu31qbnLASX87QMjAeUzg2JMAecupU,3287
|
|
8
|
+
pdf_auto_outline-0.1.6.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
pdf_auto_outline/__init__.py,sha256=aBEbDvx4LMg8A1TJJR6dEHu8rQODVin528hLS_EDvuA,22
|
|
2
|
-
pdf_auto_outline/__main__.py,sha256=mRKsAFeG5R17vTYubIKregAve4vnKc-nk7jY3tcK4wI,78
|
|
3
|
-
pdf_auto_outline/main.py,sha256=s9rVJJjzqavhAfCAmCOadcOn_C_hbaTmNGSK-WX79JY,9978
|
|
4
|
-
pdf_auto_outline-0.1.4.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
|
|
5
|
-
pdf_auto_outline-0.1.4.dist-info/entry_points.txt,sha256=HBvhmxJs8hHqbbpJmVTbBH3xy19Hk655O_ySwFC_53w,100
|
|
6
|
-
pdf_auto_outline-0.1.4.dist-info/METADATA,sha256=ALWzcv1m6sCGDJ6dJyLu9i0RO8IIgNNh086z_Wx873E,2449
|
|
7
|
-
pdf_auto_outline-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|