pdf-auto-outline 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdf_auto_outline/__init__.py +1 -1
- pdf_auto_outline/__main__.py +1 -1
- pdf_auto_outline/main.py +54 -64
- pdf_auto_outline-0.1.2.dist-info/METADATA +73 -0
- pdf_auto_outline-0.1.2.dist-info/RECORD +7 -0
- pdf_auto_outline-0.1.0.dist-info/METADATA +0 -46
- pdf_auto_outline-0.1.0.dist-info/RECORD +0 -7
- {pdf_auto_outline-0.1.0.dist-info → pdf_auto_outline-0.1.2.dist-info}/WHEEL +0 -0
- {pdf_auto_outline-0.1.0.dist-info → pdf_auto_outline-0.1.2.dist-info}/entry_points.txt +0 -0
pdf_auto_outline/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '0.1.
|
|
1
|
+
__version__ = '0.1.2'
|
pdf_auto_outline/__main__.py
CHANGED
pdf_auto_outline/main.py
CHANGED
|
@@ -96,77 +96,56 @@ def generate_toc_nnet(pdfpath, worker_cnt=3) -> list:
|
|
|
96
96
|
log('\nCancelled')
|
|
97
97
|
exit()
|
|
98
98
|
|
|
99
|
-
log('')
|
|
100
99
|
|
|
101
100
|
return [j for i in pg_nums for j in results[i]]
|
|
102
101
|
|
|
103
102
|
def align_toc_lvls(toc_entries: list) -> list:
|
|
104
|
-
# TODO: fix this spaghetti
|
|
105
103
|
import re
|
|
106
|
-
def act(
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
return e[current]
|
|
114
|
-
# return max(1, lvl - 1)
|
|
104
|
+
def act(current): # cur prev expected lvl
|
|
105
|
+
if current == d['prev_name']: # current is sibling
|
|
106
|
+
pass
|
|
107
|
+
elif current == 'p5': # current is figure/table type
|
|
108
|
+
d['lvl'] += 1
|
|
109
|
+
elif e[current] < d['prev_lvl']: # current is parent
|
|
110
|
+
d['lvl'] = e[current]
|
|
115
111
|
else: # e[current] > prev[1]: # current is child
|
|
116
|
-
e[current] = min(lvl + 1, e[current])
|
|
117
|
-
|
|
118
|
-
# else: #e[current] == prev: # current is sibling
|
|
119
|
-
# return lvl
|
|
112
|
+
e[current] = min(d['lvl'] + 1, e[current])
|
|
113
|
+
d['lvl'] = min(d['lvl'] + 1, e[current])
|
|
120
114
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
p4 = re.compile(r'^([AIVXC\d]+\.){2}[IVXC\d]\.? \w')
|
|
125
|
-
p5 = re.compile(r'^(Fig(ure)?\.?)|(Table\.? [\dIVXC]+)')
|
|
126
|
-
p6 = re.compile(r'''\d?\s?(Introduction)|((Materials and )?Methods)|(Results)|
|
|
127
|
-
(Discussion)|(References)|(Summary)|(Conclusion)|(Acknowledgements)
|
|
128
|
-
''', re.IGNORECASE)
|
|
129
|
-
p7 = re.compile(r'^\d?\s?[A-Z ]{2,}')
|
|
115
|
+
d['prev_name'] = current
|
|
116
|
+
d['prev_lvl'] = e[current]
|
|
117
|
+
toc_entries[i-d['removed']][0] = d['lvl']
|
|
130
118
|
|
|
119
|
+
p1 = re.compile(r'^[A-Z\d]')
|
|
120
|
+
patterns = (
|
|
121
|
+
re.compile(r'^(Contents)|(Chapter)|(Appendix)|(Index)|(Bibliography)|(Preface)'),
|
|
122
|
+
re.compile(r'^([IVXC\d])+\.[IVXC\d]\.? \w'),
|
|
123
|
+
re.compile(r'^([AIVXC\d]+\.){2}[IVXC\d]\.? \w'),
|
|
124
|
+
re.compile(r'^(Fig(ure)?\.?)|(Table\.? [\dIVXC]+)', re.IGNORECASE),
|
|
125
|
+
re.compile(r'''\d?\s?(Introduction)|((Materials? and )?Methods)|(Results)|
|
|
126
|
+
(Discussion)|(References)|(Summary)|(Conclusion)|(Acknowledgements)
|
|
127
|
+
''', re.IGNORECASE),
|
|
128
|
+
re.compile(r'^\d?\s?[A-Z ]{2,}'),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# expected nesting levels
|
|
131
132
|
e = {'p1': 1, 'p2': 1, 'p3': 2, 'p4': 3, 'p5': 5, 'p6': 1, 'p7': 1, 'l': 2,}
|
|
133
|
+
# line status
|
|
134
|
+
d = {'lvl': 1, 'prev_name': 'p1', 'prev_lvl': 1, 'titles': set(), 'removed': 0}
|
|
132
135
|
|
|
133
136
|
log('aligning levels..')
|
|
134
|
-
lvl, prev, titles, removed = 1, ('p1', 1), set(), 0
|
|
135
137
|
|
|
136
138
|
for i in range(1, len(toc_entries)):
|
|
137
|
-
title = toc_entries[i-removed][1]
|
|
138
|
-
if (not p1.match(title)) or len(title) < 4 or title in titles: #skip
|
|
139
|
-
toc_entries.pop(i-removed)
|
|
140
|
-
removed += 1
|
|
141
|
-
elif
|
|
142
|
-
|
|
143
|
-
toc_entries[i-removed][0] = lvl
|
|
144
|
-
prev = ('p2', e['p2'])
|
|
145
|
-
elif p7.match(title):
|
|
146
|
-
lvl = act(lvl, 'p7', prev)
|
|
147
|
-
toc_entries[i-removed][0] = lvl
|
|
148
|
-
prev = ('p7', e['p7'])
|
|
149
|
-
elif p6.match(title):
|
|
150
|
-
lvl = act(lvl, 'p6', prev)
|
|
151
|
-
toc_entries[i-removed][0] = lvl
|
|
152
|
-
prev = ('p6', e['p6'])
|
|
153
|
-
elif p3.match(title):
|
|
154
|
-
lvl = act(lvl, 'p3', prev)
|
|
155
|
-
toc_entries[i-removed][0] = lvl
|
|
156
|
-
prev = ('p3', e['p3'])
|
|
157
|
-
elif p4.match(title):
|
|
158
|
-
lvl = act(lvl, 'p4', prev)
|
|
159
|
-
toc_entries[i-removed][0] = lvl
|
|
160
|
-
prev = ('p4', e['p4'])
|
|
161
|
-
elif p5.match(title):
|
|
162
|
-
lvl = act(lvl, 'p5', prev)
|
|
163
|
-
toc_entries[i-removed][0] = lvl
|
|
164
|
-
prev = ('p5', e['p5'])
|
|
139
|
+
title = toc_entries[i-d['removed']][1]
|
|
140
|
+
if (not p1.match(title)) or len(title) < 4 or title in d['titles']: #skip
|
|
141
|
+
toc_entries.pop(i-d['removed'])
|
|
142
|
+
d['removed'] += 1
|
|
143
|
+
elif (name := next((idi for idi, i in enumerate(patterns) if i.match(title)), None)):
|
|
144
|
+
act(f'p{name+2}')
|
|
165
145
|
else:
|
|
166
|
-
titles.add(title)
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
prev = ('l', e['l'])
|
|
146
|
+
d['titles'].add(title)
|
|
147
|
+
act('l')
|
|
148
|
+
|
|
170
149
|
return toc_entries
|
|
171
150
|
|
|
172
151
|
def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
|
|
@@ -214,7 +193,6 @@ def parse_txtfile(txtfile='outline.txt', tablevel=2) -> list:
|
|
|
214
193
|
return toc_entries
|
|
215
194
|
|
|
216
195
|
def embed_toc(pdfpath, toc_entries, newfile=''):
|
|
217
|
-
print(len(toc_entries))
|
|
218
196
|
doc = pymupdf.open(pdfpath)
|
|
219
197
|
doc.set_toc(toc_entries, collapse=2)
|
|
220
198
|
if newfile:
|
|
@@ -224,10 +202,13 @@ def embed_toc(pdfpath, toc_entries, newfile=''):
|
|
|
224
202
|
doc.saveIncr()
|
|
225
203
|
log(f"toc saved to '{pdfpath}'")
|
|
226
204
|
|
|
227
|
-
|
|
205
|
+
def get_toc_custom(doc) -> list:
|
|
206
|
+
toc_entries = [[*i[:3], i[3].get('to')] for i in doc.get_toc(False)]
|
|
207
|
+
return toc_entries
|
|
228
208
|
|
|
229
209
|
def edit_txtfile(txtfile='outline.txt'):
|
|
230
|
-
editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
|
|
210
|
+
# editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
|
|
211
|
+
editor = os.environ.get('EDITOR', 'start' if os.name == 'nt' else 'xdg-open')
|
|
231
212
|
subprocess.run([editor, txtfile])
|
|
232
213
|
|
|
233
214
|
def main():
|
|
@@ -241,14 +222,19 @@ def main():
|
|
|
241
222
|
parser.add_argument('-i', '--infile', type=str, metavar='<file>', help='write toc from file to pdf')
|
|
242
223
|
parser.add_argument('-t', '--tablevel', type=int, metavar='<n>', help='tab = n toc nesting levels (default 2)', default=2)
|
|
243
224
|
parser.add_argument('--sioyek', type=str, metavar='<path>', help='for users of the Sioyek pdf viewer')
|
|
244
|
-
parser.add_argument('--version', action='version', version='%(prog)s 0.1.
|
|
225
|
+
parser.add_argument('--version', action='version', version='%(prog)s 0.1.2')
|
|
245
226
|
|
|
246
227
|
args = parser.parse_args()
|
|
247
228
|
|
|
248
229
|
if args.sioyek:
|
|
249
230
|
from sioyek.sioyek import Sioyek
|
|
250
|
-
|
|
251
|
-
SIOYEK = Sioyek(
|
|
231
|
+
global SIOYEK
|
|
232
|
+
SIOYEK = Sioyek(args.sioyek)
|
|
233
|
+
if args.out:
|
|
234
|
+
args.out = os.path.join(
|
|
235
|
+
os.path.dirname(args.filename),
|
|
236
|
+
args.out
|
|
237
|
+
)
|
|
252
238
|
# local_db = args.sioyek[1]
|
|
253
239
|
# shared_db = args.sioyek[2]
|
|
254
240
|
# pdf_path = args.sioyek[3]
|
|
@@ -256,7 +242,10 @@ def main():
|
|
|
256
242
|
|
|
257
243
|
if args.edit or args.superedit:
|
|
258
244
|
doc = pymupdf.Document(args.filename)
|
|
259
|
-
|
|
245
|
+
if args.superedit:
|
|
246
|
+
generate_txtfile(doc.get_toc(False))
|
|
247
|
+
else:
|
|
248
|
+
generate_txtfile(get_toc_custom(doc))
|
|
260
249
|
edit_txtfile()
|
|
261
250
|
toc_entries = parse_txtfile(tablevel=args.tablevel)
|
|
262
251
|
embed_toc(args.filename, toc_entries, args.out)
|
|
@@ -267,6 +256,7 @@ def main():
|
|
|
267
256
|
start = perf_counter()
|
|
268
257
|
toc_entries = generate_toc_nnet(args.filename, args.multiprocess)
|
|
269
258
|
end = perf_counter()
|
|
259
|
+
log('')
|
|
270
260
|
log(f"finished in {end - start:<4.1f} s")
|
|
271
261
|
toc_entries = align_toc_lvls(toc_entries)
|
|
272
262
|
if args.straight:
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: pdf-auto-outline
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Automatically generate and edit PDF table of contents / outline
|
|
5
|
+
Author: Rossikos
|
|
6
|
+
Author-email: Rossikos <216631970+rossikos@users.noreply.github.com>
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Requires-Dist: pymupdf-layout>=1.26.6
|
|
11
|
+
Requires-Dist: sioyek ; extra == 'sioyek'
|
|
12
|
+
Requires-Python: >=3.13
|
|
13
|
+
Project-URL: Bug Tracker, https://github.com/rossikos/pdf-auto-outline/issues
|
|
14
|
+
Project-URL: Homepage, https://github.com/rossikos/pdf-auto-outline
|
|
15
|
+
Provides-Extra: sioyek
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# PDF Auto Outline
|
|
19
|
+
|
|
20
|
+
Automatically generate and embed a table of contents or outline in a PDF.
|
|
21
|
+
|
|
22
|
+
Install: `python -m pip install pdf-auto-outline`
|
|
23
|
+
|
|
24
|
+
Suggestions and contributions are welcome.
|
|
25
|
+
|
|
26
|
+
## Usage
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
usage: pdfao [-h] [-s] [-o <path>] [-mp <n>] [-e] [-se] [-i <file>] [-t <n>] [--sioyek <path>] [--version] filename
|
|
30
|
+
|
|
31
|
+
positional arguments:
|
|
32
|
+
filename input pdf
|
|
33
|
+
|
|
34
|
+
options:
|
|
35
|
+
-h, --help show this help message and exit
|
|
36
|
+
-s, --straight write toc straight to pdf; skip editing
|
|
37
|
+
-o, --out <path> write changes to new pdf
|
|
38
|
+
-mp, --multiprocess <n>
|
|
39
|
+
spread job over n processes (faster on Linux)
|
|
40
|
+
-e, --edit edit pdf toc
|
|
41
|
+
-se, --superedit edit pdf toc (more attibutes available)
|
|
42
|
+
-i, --infile <file> write toc from file to pdf
|
|
43
|
+
-t, --tablevel <n> tab = n toc nesting levels (default 2)
|
|
44
|
+
--sioyek <path> for users of the Sioyek pdf viewer
|
|
45
|
+
--version show program's version number and exit
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Examples
|
|
49
|
+
|
|
50
|
+
Generate toc and edit before saving:
|
|
51
|
+
`pdfao paper.pdf`
|
|
52
|
+
|
|
53
|
+
Generate and save to new pdf:
|
|
54
|
+
`pdfao paper.pdf -o new.pdf`
|
|
55
|
+
|
|
56
|
+
Edit exiting pdf toc:
|
|
57
|
+
`pdfao paper.pdf -e`
|
|
58
|
+
|
|
59
|
+
A save toc to new pdf from file:
|
|
60
|
+
`pdfao paper.pdf -o new.pdf -i outline.txt`
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
## For Sioyek Users
|
|
64
|
+
|
|
65
|
+
Example commands; add to `prefs_user.config`.
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
|
|
69
|
+
new_command _edit_toc pdfao path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -e
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
|
|
73
|
+
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
pdf_auto_outline/__init__.py,sha256=mdp2CftfqYbdKtP-eWv1z7rAUycYv6X1ntXSMUf8Kss,22
|
|
2
|
+
pdf_auto_outline/__main__.py,sha256=mRKsAFeG5R17vTYubIKregAve4vnKc-nk7jY3tcK4wI,78
|
|
3
|
+
pdf_auto_outline/main.py,sha256=oZJMNGDBLJNGWmjGtDdNoFXsPtSS7Km7uPdwjHXPA00,9638
|
|
4
|
+
pdf_auto_outline-0.1.2.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
|
|
5
|
+
pdf_auto_outline-0.1.2.dist-info/entry_points.txt,sha256=HBvhmxJs8hHqbbpJmVTbBH3xy19Hk655O_ySwFC_53w,100
|
|
6
|
+
pdf_auto_outline-0.1.2.dist-info/METADATA,sha256=GyMWMb7y2SDwvxUEEMpVpz4Fxfe3_GwGJ-sdXHxAFZg,2316
|
|
7
|
+
pdf_auto_outline-0.1.2.dist-info/RECORD,,
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: pdf-auto-outline
|
|
3
|
-
Version: 0.1.0
|
|
4
|
-
Summary: Automatically generate and edit PDF table of contents / outline
|
|
5
|
-
Author: Rossikos
|
|
6
|
-
Author-email: Rossikos <216631970+rossikos@users.noreply.github.com>
|
|
7
|
-
Requires-Dist: pymupdf-layout>=1.26.6
|
|
8
|
-
Requires-Python: >=3.13
|
|
9
|
-
Description-Content-Type: text/markdown
|
|
10
|
-
|
|
11
|
-
# PDF Auto Outline
|
|
12
|
-
|
|
13
|
-
A simple python program to automatically generate and embed a table of contents or outline in a PDF.
|
|
14
|
-
|
|
15
|
-
## Usage
|
|
16
|
-
|
|
17
|
-
```
|
|
18
|
-
usage: pdfao [-h] [-s] [-o <path>] [-mp <n>] [-e] [-se] [-i <file>] [-t <n>] [--sioyek <path>] [--version] filename
|
|
19
|
-
|
|
20
|
-
positional arguments:
|
|
21
|
-
filename input pdf
|
|
22
|
-
|
|
23
|
-
options:
|
|
24
|
-
-h, --help show this help message and exit
|
|
25
|
-
-s, --straight write toc straight to pdf; skip editing
|
|
26
|
-
-o, --out <path> write changes to new pdf
|
|
27
|
-
-mp, --multiprocess <n>
|
|
28
|
-
spread job over n processes (faster on linux)
|
|
29
|
-
-e, --edit edit pdf toc
|
|
30
|
-
-se, --superedit edit pdf toc (more attibutes available)
|
|
31
|
-
-i, --infile <file> write toc from file to pdf
|
|
32
|
-
-t, --tablevel <n> tab = n toc nesting levels (default 2)
|
|
33
|
-
--sioyek <path> for users of the Sioyek pdf viewer
|
|
34
|
-
--version show program's version number and exit
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
## For Sioyek Users
|
|
38
|
-
|
|
39
|
-
Example commands; add to prefs_user.config.
|
|
40
|
-
|
|
41
|
-
```
|
|
42
|
-
new_command _gen_toc python3 path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -mp 4
|
|
43
|
-
new_command _edit_toc python3 path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -e
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
pdf_auto_outline/__init__.py,sha256=IMjkMO3twhQzluVTo8Z6rE7Eg-9U79_LGKMcsWLKBkY,22
|
|
2
|
-
pdf_auto_outline/__main__.py,sha256=7tzuGbeA5JiJWE_g9pzlcTXSsKlR-iEXNEbdYd4jZMs,62
|
|
3
|
-
pdf_auto_outline/main.py,sha256=KkLEIGCndRql55jjvAb8Y-onmkPoQwdExHcuNG3MPYw,9977
|
|
4
|
-
pdf_auto_outline-0.1.0.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
|
|
5
|
-
pdf_auto_outline-0.1.0.dist-info/entry_points.txt,sha256=HBvhmxJs8hHqbbpJmVTbBH3xy19Hk655O_ySwFC_53w,100
|
|
6
|
-
pdf_auto_outline-0.1.0.dist-info/METADATA,sha256=ZeQBy-6lWQbqhTmt2375o5c-CQymnOffWyFeL7vi3bY,1504
|
|
7
|
-
pdf_auto_outline-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|