pdf-auto-outline 0.1.5__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pdf_auto_outline-0.1.5 → pdf_auto_outline-0.1.7}/PKG-INFO +4 -3
- {pdf_auto_outline-0.1.5 → pdf_auto_outline-0.1.7}/README.md +3 -2
- {pdf_auto_outline-0.1.5 → pdf_auto_outline-0.1.7}/pyproject.toml +1 -1
- pdf_auto_outline-0.1.7/src/pdf_auto_outline/__init__.py +1 -0
- {pdf_auto_outline-0.1.5 → pdf_auto_outline-0.1.7}/src/pdf_auto_outline/main.py +77 -52
- pdf_auto_outline-0.1.7/src/pdf_auto_outline/test.py +10 -0
- pdf_auto_outline-0.1.7/src/pdf_auto_outline/test.txt +13 -0
- pdf_auto_outline-0.1.5/src/pdf_auto_outline/__init__.py +0 -1
- {pdf_auto_outline-0.1.5 → pdf_auto_outline-0.1.7}/src/pdf_auto_outline/__main__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pdf-auto-outline
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: Automatically generate and edit PDF table of contents / outline
|
|
5
5
|
Author: Rossikos
|
|
6
6
|
Author-email: Rossikos <216631970+rossikos@users.noreply.github.com>
|
|
@@ -82,7 +82,6 @@ The optional part can be one of:
|
|
|
82
82
|
```
|
|
83
83
|
| None same as not including it
|
|
84
84
|
| 241.2 y-ordinate
|
|
85
|
-
| Point(72.0, 363.9) x and y coords
|
|
86
85
|
| {<dictionary>} dictionary with more attributes for the ToC entry
|
|
87
86
|
```
|
|
88
87
|
|
|
@@ -92,8 +91,10 @@ Example commands; add to `prefs_user.config`.
|
|
|
92
91
|
|
|
93
92
|
```
|
|
94
93
|
new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
|
|
95
|
-
new_command _edit_toc pdfao "%{file_path}"
|
|
94
|
+
new_command _edit_toc pdfao "%{file_path}" -e
|
|
96
95
|
```
|
|
97
96
|
|
|
97
|
+
The sioyek library and flag are optional; they allow logging to the status bar. This is more useful for ToC generation where you may want a progress bar.
|
|
98
|
+
|
|
98
99
|
If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
|
|
99
100
|
|
|
@@ -65,7 +65,6 @@ The optional part can be one of:
|
|
|
65
65
|
```
|
|
66
66
|
| None same as not including it
|
|
67
67
|
| 241.2 y-ordinate
|
|
68
|
-
| Point(72.0, 363.9) x and y coords
|
|
69
68
|
| {<dictionary>} dictionary with more attributes for the ToC entry
|
|
70
69
|
```
|
|
71
70
|
|
|
@@ -75,8 +74,10 @@ Example commands; add to `prefs_user.config`.
|
|
|
75
74
|
|
|
76
75
|
```
|
|
77
76
|
new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
|
|
78
|
-
new_command _edit_toc pdfao "%{file_path}"
|
|
77
|
+
new_command _edit_toc pdfao "%{file_path}" -e
|
|
79
78
|
```
|
|
80
79
|
|
|
80
|
+
The sioyek library and flag are optional; they allow logging to the status bar. This is more useful for ToC generation where you may want a progress bar.
|
|
81
|
+
|
|
81
82
|
If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
|
|
82
83
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '0.1.7'
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import pymupdf.layout
|
|
2
|
-
from pymupdf import Point
|
|
3
2
|
from time import perf_counter
|
|
4
3
|
from multiprocessing import Pool
|
|
5
4
|
import os
|
|
6
5
|
import subprocess
|
|
7
6
|
import argparse
|
|
7
|
+
import tempfile
|
|
8
8
|
|
|
9
9
|
SIOYEK = None
|
|
10
10
|
|
|
@@ -148,14 +148,19 @@ def align_toc_lvls(toc_entries: list) -> list:
|
|
|
148
148
|
|
|
149
149
|
return toc_entries
|
|
150
150
|
|
|
151
|
-
def
|
|
151
|
+
def get_tmpfile():
|
|
152
|
+
return tempfile.NamedTemporaryFile(
|
|
153
|
+
mode='w+', encoding='utf-8', delete=False, suffix='.txt'
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def generate_txtfile(toc_entries, txtfile=get_tmpfile()):
|
|
152
157
|
import textwrap
|
|
153
158
|
txt = textwrap.dedent("""\
|
|
154
159
|
============================================================
|
|
155
160
|
TABLE OF CONTENTS OUTLINE
|
|
156
161
|
4spaces/lvl text | pg# | {details dictionary} OR y-coord
|
|
157
162
|
|
|
158
|
-
Type '
|
|
163
|
+
Type '\\' as the first character of this file to cancel
|
|
159
164
|
============================================================
|
|
160
165
|
|
|
161
166
|
""")
|
|
@@ -168,60 +173,77 @@ def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
|
|
|
168
173
|
txt += '\n'.join(f"{' '*4 * (i[0] - 1)}{i[1]} | {i[2]}"
|
|
169
174
|
for i in toc_entries)
|
|
170
175
|
|
|
171
|
-
|
|
172
|
-
|
|
176
|
+
txtfile.write(txt)
|
|
177
|
+
txtfile.flush()
|
|
178
|
+
txtfile.seek(0)
|
|
173
179
|
|
|
174
180
|
return txtfile
|
|
175
181
|
|
|
176
182
|
|
|
177
|
-
|
|
183
|
+
|
|
184
|
+
def parse_txtfile(f, tablevel=2) -> list:
|
|
178
185
|
toc_entries = []
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
186
|
+
if (c := f.read(1)) == '\\':
|
|
187
|
+
log('Outline not written')
|
|
188
|
+
exit()
|
|
189
|
+
elif c == '=':
|
|
190
|
+
for _ in range(7):
|
|
191
|
+
f.readline()
|
|
192
|
+
else:
|
|
193
|
+
f.seek(0)
|
|
194
|
+
|
|
195
|
+
for ln, i in enumerate(f):
|
|
196
|
+
i = i.replace('\t', ' '*tablevel)
|
|
197
|
+
lvl = (len(i) - len(i.lstrip())) // 4 + 1
|
|
198
|
+
a = i.lstrip().split(' | ')
|
|
199
|
+
# print(i)
|
|
200
|
+
if (l := len(a)) == 2:
|
|
201
|
+
toc_entries.append(
|
|
202
|
+
[lvl, a[0], int(a[1])]
|
|
203
|
+
)
|
|
204
|
+
elif l == 3:
|
|
205
|
+
toc_entries.append(
|
|
206
|
+
[lvl, a[0], int(a[1]), eval(a[2])]
|
|
207
|
+
)
|
|
208
|
+
else:
|
|
209
|
+
log(f'Error parsing line {ln+1}: {i}')
|
|
182
210
|
exit()
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
else: lines = f.read()
|
|
186
|
-
|
|
187
|
-
for i in lines:
|
|
188
|
-
i = i.replace('\t', ' '*tablevel)
|
|
189
|
-
lvl = (len(i) - len(i.lstrip())) // 4 + 1
|
|
190
|
-
a = i.lstrip().split(' | ')
|
|
191
|
-
if len(a) < 3:
|
|
192
|
-
toc_entries.append(
|
|
193
|
-
[lvl, a[0], int(a[1])]
|
|
194
|
-
)
|
|
195
|
-
else:
|
|
196
|
-
toc_entries.append(
|
|
197
|
-
[lvl, a[0], int(a[1]), eval(a[2])]
|
|
198
|
-
)
|
|
211
|
+
|
|
212
|
+
f.close()
|
|
199
213
|
|
|
200
214
|
return toc_entries
|
|
201
215
|
|
|
202
|
-
def embed_toc(pdfpath, toc_entries, newfile=''):
|
|
216
|
+
def embed_toc(pdfpath, toc_entries, newfile='', offset=0):
|
|
217
|
+
if offset != 0:
|
|
218
|
+
toc_entries = [[a, b, c + offset, *d] for a, b, c, *d in toc_entries]
|
|
203
219
|
doc = pymupdf.open(pdfpath)
|
|
204
220
|
doc.set_toc(toc_entries, collapse=2)
|
|
205
221
|
if newfile:
|
|
206
222
|
doc.save(newfile)
|
|
207
223
|
log(f"toc written to '{newfile}'")
|
|
208
|
-
|
|
224
|
+
elif doc.can_save_incrementally():
|
|
209
225
|
doc.saveIncr()
|
|
210
226
|
log(f"toc saved to '{pdfpath}'")
|
|
227
|
+
else:
|
|
228
|
+
log('cannot save to original; saving to new file...')
|
|
229
|
+
new_path = '_new.'.join(pdfpath.split('.'))
|
|
230
|
+
try:
|
|
231
|
+
doc.save(new_path, garbage=4, deflate=True, use_objstms=True)
|
|
232
|
+
except KeyboardInterrupt as e:
|
|
233
|
+
log('Cancelled')
|
|
234
|
+
exit()
|
|
235
|
+
log(f"toc written to '{new_path}'")
|
|
211
236
|
|
|
212
237
|
def get_toc_custom(doc) -> list:
|
|
213
238
|
toc_entries = [[*i[:3], i[3].get('to')[1]] for i in doc.get_toc(False)]
|
|
214
239
|
return toc_entries
|
|
215
240
|
|
|
216
|
-
def edit_txtfile(
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
name = os.name
|
|
220
|
-
if name == 'nt':
|
|
221
|
-
subprocess.run(['start', '/WAIT', txtfile], shell=True)
|
|
241
|
+
def edit_txtfile(f):
|
|
242
|
+
if os.name == 'nt':
|
|
243
|
+
subprocess.run(['start', '/WAIT', f.name], shell=True)
|
|
222
244
|
else: # name == 'posix':
|
|
223
245
|
editor = os.environ.get('EDITOR', 'vi')
|
|
224
|
-
subprocess.run([editor,
|
|
246
|
+
subprocess.run([editor, f.name])
|
|
225
247
|
|
|
226
248
|
def main():
|
|
227
249
|
parser = argparse.ArgumentParser(prog='pdfao')
|
|
@@ -233,20 +255,21 @@ def main():
|
|
|
233
255
|
parser.add_argument('-se', '--superedit', action='store_true', help='edit pdf toc (more attibutes available)')
|
|
234
256
|
parser.add_argument('-i', '--infile', type=str, metavar='<file>', help='write toc from file to pdf')
|
|
235
257
|
parser.add_argument('-t', '--tablevel', type=int, metavar='<n>', help='tab = n toc nesting levels (default 2)', default=2)
|
|
258
|
+
parser.add_argument('-os', '--offset', type=int, metavar='<n>', help='toc page offset; use with infile', default=0)
|
|
236
259
|
parser.add_argument('--sioyek', type=str, metavar='<path>', help='for users of the Sioyek pdf viewer')
|
|
237
|
-
parser.add_argument('--version', action='version', version='%(prog)s 0.1.
|
|
260
|
+
parser.add_argument('--version', action='version', version='%(prog)s 0.1.7')
|
|
238
261
|
|
|
239
262
|
args = parser.parse_args()
|
|
240
263
|
|
|
264
|
+
if args.out:
|
|
265
|
+
args.out = os.path.join(
|
|
266
|
+
os.path.dirname(args.filename),
|
|
267
|
+
args.out)
|
|
268
|
+
|
|
241
269
|
if args.sioyek:
|
|
242
270
|
from sioyek.sioyek import Sioyek
|
|
243
271
|
global SIOYEK
|
|
244
272
|
SIOYEK = Sioyek(args.sioyek)
|
|
245
|
-
if args.out:
|
|
246
|
-
args.out = os.path.join(
|
|
247
|
-
os.path.dirname(args.filename),
|
|
248
|
-
args.out
|
|
249
|
-
)
|
|
250
273
|
# local_db = args.sioyek[1]
|
|
251
274
|
# shared_db = args.sioyek[2]
|
|
252
275
|
# pdf_path = args.sioyek[3]
|
|
@@ -255,15 +278,16 @@ def main():
|
|
|
255
278
|
if args.edit or args.superedit:
|
|
256
279
|
doc = pymupdf.Document(args.filename)
|
|
257
280
|
if args.superedit:
|
|
258
|
-
generate_txtfile(doc.get_toc(False))
|
|
281
|
+
f = generate_txtfile(doc.get_toc(False))
|
|
259
282
|
else:
|
|
260
|
-
generate_txtfile(get_toc_custom(doc))
|
|
261
|
-
edit_txtfile()
|
|
262
|
-
toc_entries = parse_txtfile(
|
|
263
|
-
embed_toc(args.filename, toc_entries, args.out)
|
|
283
|
+
f = generate_txtfile(get_toc_custom(doc))
|
|
284
|
+
edit_txtfile(f)
|
|
285
|
+
toc_entries = parse_txtfile(f, args.tablevel)
|
|
286
|
+
embed_toc(args.filename, toc_entries, args.out, args.offset)
|
|
287
|
+
os.remove(f.name)
|
|
264
288
|
elif args.infile:
|
|
265
|
-
toc_entries = parse_txtfile(args.infile, args.tablevel)
|
|
266
|
-
embed_toc(args.filename, toc_entries, args.out)
|
|
289
|
+
toc_entries = parse_txtfile(open(args.infile, encoding='utf-8'), args.tablevel)
|
|
290
|
+
embed_toc(args.filename, toc_entries, args.out, args.offset)
|
|
267
291
|
else: # generate toc
|
|
268
292
|
start = perf_counter()
|
|
269
293
|
toc_entries = generate_toc_nnet(args.filename, args.multiprocess)
|
|
@@ -272,12 +296,13 @@ def main():
|
|
|
272
296
|
log(f"finished in {end - start:<4.1f} s")
|
|
273
297
|
toc_entries = align_toc_lvls(toc_entries)
|
|
274
298
|
if args.straight:
|
|
275
|
-
embed_toc(args.filename, toc_entries, args.out)
|
|
299
|
+
embed_toc(args.filename, toc_entries, args.out, args.offset)
|
|
276
300
|
else:
|
|
277
|
-
generate_txtfile(toc_entries)
|
|
278
|
-
edit_txtfile()
|
|
279
|
-
toc_entries = parse_txtfile(
|
|
280
|
-
embed_toc(args.filename, toc_entries, args.out)
|
|
301
|
+
f = generate_txtfile(toc_entries)
|
|
302
|
+
edit_txtfile(f)
|
|
303
|
+
toc_entries = parse_txtfile(f, args.tablevel)
|
|
304
|
+
embed_toc(args.filename, toc_entries, args.out, args.offset)
|
|
305
|
+
os.remove(f.name)
|
|
281
306
|
|
|
282
307
|
# if args.sioyek and not args.out:
|
|
283
308
|
# to_hash = get_md5_hash(args.filename)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
============================================================
|
|
2
|
+
TABLE OF CONTENTS OUTLINE
|
|
3
|
+
4spaces/lvl text | pg# | {details dictionary} OR y-coord
|
|
4
|
+
|
|
5
|
+
Type '\\' as the first character of this file to cancel
|
|
6
|
+
============================================================
|
|
7
|
+
|
|
8
|
+
1
|
|
9
|
+
2
|
|
10
|
+
3
|
|
11
|
+
4
|
|
12
|
+
5
|
|
13
|
+
6
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = '0.1.5'
|
|
File without changes
|