pdf-auto-outline 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdf_auto_outline/__init__.py +1 -1
- pdf_auto_outline/__main__.py +1 -1
- pdf_auto_outline/main.py +59 -63
- {pdf_auto_outline-0.1.1.dist-info → pdf_auto_outline-0.1.3.dist-info}/METADATA +3 -3
- pdf_auto_outline-0.1.3.dist-info/RECORD +7 -0
- pdf_auto_outline-0.1.1.dist-info/RECORD +0 -7
- {pdf_auto_outline-0.1.1.dist-info → pdf_auto_outline-0.1.3.dist-info}/WHEEL +0 -0
- {pdf_auto_outline-0.1.1.dist-info → pdf_auto_outline-0.1.3.dist-info}/entry_points.txt +0 -0
pdf_auto_outline/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '0.1.
|
|
1
|
+
__version__ = '0.1.3'
|
pdf_auto_outline/__main__.py
CHANGED
pdf_auto_outline/main.py
CHANGED
|
@@ -100,72 +100,52 @@ def generate_toc_nnet(pdfpath, worker_cnt=3) -> list:
|
|
|
100
100
|
return [j for i in pg_nums for j in results[i]]
|
|
101
101
|
|
|
102
102
|
def align_toc_lvls(toc_entries: list) -> list:
|
|
103
|
-
# TODO: fix this spaghetti
|
|
104
103
|
import re
|
|
105
|
-
def act(
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
return e[current]
|
|
113
|
-
# return max(1, lvl - 1)
|
|
104
|
+
def act(current): # cur prev expected lvl
|
|
105
|
+
if current == d['prev_name']: # current is sibling
|
|
106
|
+
pass
|
|
107
|
+
elif current == 'p5': # current is figure/table type
|
|
108
|
+
d['lvl'] += 1
|
|
109
|
+
elif e[current] < d['prev_lvl']: # current is parent
|
|
110
|
+
d['lvl'] = e[current]
|
|
114
111
|
else: # e[current] > prev[1]: # current is child
|
|
115
|
-
e[current] = min(lvl + 1, e[current])
|
|
116
|
-
|
|
117
|
-
# else: #e[current] == prev: # current is sibling
|
|
118
|
-
# return lvl
|
|
112
|
+
e[current] = min(d['lvl'] + 1, e[current])
|
|
113
|
+
d['lvl'] = min(d['lvl'] + 1, e[current])
|
|
119
114
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
p4 = re.compile(r'^([AIVXC\d]+\.){2}[IVXC\d]\.? \w')
|
|
124
|
-
p5 = re.compile(r'^(Fig(ure)?\.?)|(Table\.? [\dIVXC]+)')
|
|
125
|
-
p6 = re.compile(r'''\d?\s?(Introduction)|((Materials and )?Methods)|(Results)|
|
|
126
|
-
(Discussion)|(References)|(Summary)|(Conclusion)|(Acknowledgements)
|
|
127
|
-
''', re.IGNORECASE)
|
|
128
|
-
p7 = re.compile(r'^\d?\s?[A-Z ]{2,}')
|
|
115
|
+
d['prev_name'] = current
|
|
116
|
+
d['prev_lvl'] = e[current]
|
|
117
|
+
toc_entries[i-d['removed']][0] = d['lvl']
|
|
129
118
|
|
|
119
|
+
p1 = re.compile(r'^[A-Z\d]')
|
|
120
|
+
patterns = (
|
|
121
|
+
re.compile(r'^(Contents)|(Chapter)|(Appendix)|(Index)|(Bibliography)|(Preface)'),
|
|
122
|
+
re.compile(r'^([IVXC\d])+\.[IVXC\d]\.? \w'),
|
|
123
|
+
re.compile(r'^([AIVXC\d]+\.){2}[IVXC\d]\.? \w'),
|
|
124
|
+
re.compile(r'^(Fig(ure)?\.?)|(Table\.? [\dIVXC]+)', re.IGNORECASE),
|
|
125
|
+
re.compile(r'''\d?\s?(Introduction)|((Materials? and )?Methods)|(Results)|
|
|
126
|
+
(Discussion)|(References)|(Summary)|(Conclusion)|(Acknowledgements)
|
|
127
|
+
''', re.IGNORECASE),
|
|
128
|
+
re.compile(r'^\d?\s?[A-Z ]{2,}'),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# expected nesting levels
|
|
130
132
|
e = {'p1': 1, 'p2': 1, 'p3': 2, 'p4': 3, 'p5': 5, 'p6': 1, 'p7': 1, 'l': 2,}
|
|
133
|
+
# line status
|
|
134
|
+
d = {'lvl': 1, 'prev_name': 'p1', 'prev_lvl': 1, 'titles': set(), 'removed': 0}
|
|
131
135
|
|
|
132
136
|
log('aligning levels..')
|
|
133
|
-
lvl, prev, titles, removed = 1, ('p1', 1), set(), 0
|
|
134
137
|
|
|
135
138
|
for i in range(1, len(toc_entries)):
|
|
136
|
-
title = toc_entries[i-removed][1]
|
|
137
|
-
if (not p1.match(title)) or len(title) < 4 or title in titles: #skip
|
|
138
|
-
toc_entries.pop(i-removed)
|
|
139
|
-
removed += 1
|
|
140
|
-
elif
|
|
141
|
-
|
|
142
|
-
toc_entries[i-removed][0] = lvl
|
|
143
|
-
prev = ('p2', e['p2'])
|
|
144
|
-
elif p7.match(title):
|
|
145
|
-
lvl = act(lvl, 'p7', prev)
|
|
146
|
-
toc_entries[i-removed][0] = lvl
|
|
147
|
-
prev = ('p7', e['p7'])
|
|
148
|
-
elif p6.match(title):
|
|
149
|
-
lvl = act(lvl, 'p6', prev)
|
|
150
|
-
toc_entries[i-removed][0] = lvl
|
|
151
|
-
prev = ('p6', e['p6'])
|
|
152
|
-
elif p3.match(title):
|
|
153
|
-
lvl = act(lvl, 'p3', prev)
|
|
154
|
-
toc_entries[i-removed][0] = lvl
|
|
155
|
-
prev = ('p3', e['p3'])
|
|
156
|
-
elif p4.match(title):
|
|
157
|
-
lvl = act(lvl, 'p4', prev)
|
|
158
|
-
toc_entries[i-removed][0] = lvl
|
|
159
|
-
prev = ('p4', e['p4'])
|
|
160
|
-
elif p5.match(title):
|
|
161
|
-
lvl = act(lvl, 'p5', prev)
|
|
162
|
-
toc_entries[i-removed][0] = lvl
|
|
163
|
-
prev = ('p5', e['p5'])
|
|
139
|
+
title = toc_entries[i-d['removed']][1]
|
|
140
|
+
if (not p1.match(title)) or len(title) < 4 or title in d['titles']: #skip
|
|
141
|
+
toc_entries.pop(i-d['removed'])
|
|
142
|
+
d['removed'] += 1
|
|
143
|
+
elif (name := next((idi for idi, i in enumerate(patterns) if i.match(title)), None)):
|
|
144
|
+
act(f'p{name+2}')
|
|
164
145
|
else:
|
|
165
|
-
titles.add(title)
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
prev = ('l', e['l'])
|
|
146
|
+
d['titles'].add(title)
|
|
147
|
+
act('l')
|
|
148
|
+
|
|
169
149
|
return toc_entries
|
|
170
150
|
|
|
171
151
|
def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
|
|
@@ -174,6 +154,8 @@ def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
|
|
|
174
154
|
============================================================
|
|
175
155
|
TABLE OF CONTENTS OUTLINE
|
|
176
156
|
4spaces/lvl text | pg# | {details dictionary} OR y-coord
|
|
157
|
+
|
|
158
|
+
Type 'C' as the first character of this file to cancel
|
|
177
159
|
============================================================
|
|
178
160
|
|
|
179
161
|
""")
|
|
@@ -193,8 +175,11 @@ def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
|
|
|
193
175
|
def parse_txtfile(txtfile='outline.txt', tablevel=2) -> list:
|
|
194
176
|
toc_entries = []
|
|
195
177
|
with open(txtfile) as f:
|
|
196
|
-
if f.read(1) == '
|
|
197
|
-
|
|
178
|
+
if (c := f.read(1)) == 'C':
|
|
179
|
+
log('Outline not written')
|
|
180
|
+
exit()
|
|
181
|
+
elif c == '=':
|
|
182
|
+
lines = f.readlines()[7:]
|
|
198
183
|
else: lines = f.read()
|
|
199
184
|
|
|
200
185
|
for i in lines:
|
|
@@ -222,11 +207,19 @@ def embed_toc(pdfpath, toc_entries, newfile=''):
|
|
|
222
207
|
doc.saveIncr()
|
|
223
208
|
log(f"toc saved to '{pdfpath}'")
|
|
224
209
|
|
|
225
|
-
|
|
210
|
+
def get_toc_custom(doc) -> list:
|
|
211
|
+
toc_entries = [[*i[:3], i[3].get('to')] for i in doc.get_toc(False)]
|
|
212
|
+
return toc_entries
|
|
226
213
|
|
|
227
214
|
def edit_txtfile(txtfile='outline.txt'):
|
|
228
|
-
editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
|
|
229
|
-
|
|
215
|
+
# editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
|
|
216
|
+
# editor = os.environ.get('EDITOR', 'start' if os.name == 'nt' else 'xdg-open')
|
|
217
|
+
name = os.name
|
|
218
|
+
if name == 'nt':
|
|
219
|
+
subprocess.run(['start', '/WAIT', txtfile], shell=True)
|
|
220
|
+
else: # name == 'posix':
|
|
221
|
+
editor = os.environ.get('EDITOR', 'vi')
|
|
222
|
+
subprocess.run([editor, txtfile])
|
|
230
223
|
|
|
231
224
|
def main():
|
|
232
225
|
parser = argparse.ArgumentParser(prog='pdfao')
|
|
@@ -239,7 +232,7 @@ def main():
|
|
|
239
232
|
parser.add_argument('-i', '--infile', type=str, metavar='<file>', help='write toc from file to pdf')
|
|
240
233
|
parser.add_argument('-t', '--tablevel', type=int, metavar='<n>', help='tab = n toc nesting levels (default 2)', default=2)
|
|
241
234
|
parser.add_argument('--sioyek', type=str, metavar='<path>', help='for users of the Sioyek pdf viewer')
|
|
242
|
-
parser.add_argument('--version', action='version', version='%(prog)s 0.1.
|
|
235
|
+
parser.add_argument('--version', action='version', version='%(prog)s 0.1.3')
|
|
243
236
|
|
|
244
237
|
args = parser.parse_args()
|
|
245
238
|
|
|
@@ -259,7 +252,10 @@ def main():
|
|
|
259
252
|
|
|
260
253
|
if args.edit or args.superedit:
|
|
261
254
|
doc = pymupdf.Document(args.filename)
|
|
262
|
-
|
|
255
|
+
if args.superedit:
|
|
256
|
+
generate_txtfile(doc.get_toc(False))
|
|
257
|
+
else:
|
|
258
|
+
generate_txtfile(get_toc_custom(doc))
|
|
263
259
|
edit_txtfile()
|
|
264
260
|
toc_entries = parse_txtfile(tablevel=args.tablevel)
|
|
265
261
|
embed_toc(args.filename, toc_entries, args.out)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pdf-auto-outline
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Automatically generate and edit PDF table of contents / outline
|
|
5
5
|
Author: Rossikos
|
|
6
6
|
Author-email: Rossikos <216631970+rossikos@users.noreply.github.com>
|
|
@@ -45,7 +45,7 @@ options:
|
|
|
45
45
|
--version show program's version number and exit
|
|
46
46
|
```
|
|
47
47
|
|
|
48
|
-
|
|
48
|
+
### Examples
|
|
49
49
|
|
|
50
50
|
Generate toc and edit before saving:
|
|
51
51
|
`pdfao paper.pdf`
|
|
@@ -66,7 +66,7 @@ Example commands; add to `prefs_user.config`.
|
|
|
66
66
|
|
|
67
67
|
```
|
|
68
68
|
new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
|
|
69
|
-
new_command _edit_toc pdfao path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -
|
|
69
|
+
new_command _edit_toc pdfao path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -e
|
|
70
70
|
```
|
|
71
71
|
|
|
72
72
|
If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
pdf_auto_outline/__init__.py,sha256=uZsygMXMKRw-7qhWojAjnpm8GFPXU92xW6XA8O5GwFY,22
|
|
2
|
+
pdf_auto_outline/__main__.py,sha256=mRKsAFeG5R17vTYubIKregAve4vnKc-nk7jY3tcK4wI,78
|
|
3
|
+
pdf_auto_outline/main.py,sha256=nv3w9ORMKDOoQLVnxfy1IB2t6zhRDC3yuKhEUovAYQU,9978
|
|
4
|
+
pdf_auto_outline-0.1.3.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
|
|
5
|
+
pdf_auto_outline-0.1.3.dist-info/entry_points.txt,sha256=HBvhmxJs8hHqbbpJmVTbBH3xy19Hk655O_ySwFC_53w,100
|
|
6
|
+
pdf_auto_outline-0.1.3.dist-info/METADATA,sha256=KANKIzlSCmMydXrlkU02hrreQNJJI-JiCFZ__hXmq1M,2316
|
|
7
|
+
pdf_auto_outline-0.1.3.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
pdf_auto_outline/__init__.py,sha256=ls1camlIoMxEZz9gSkZ1OJo-MXqHWwKPtdPbZJmwp7E,22
|
|
2
|
-
pdf_auto_outline/__main__.py,sha256=7tzuGbeA5JiJWE_g9pzlcTXSsKlR-iEXNEbdYd4jZMs,62
|
|
3
|
-
pdf_auto_outline/main.py,sha256=3TEr30H8Rp29-rXxg_vx604sUxbDQRKFA6fNuuqcOqA,10095
|
|
4
|
-
pdf_auto_outline-0.1.1.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
|
|
5
|
-
pdf_auto_outline-0.1.1.dist-info/entry_points.txt,sha256=HBvhmxJs8hHqbbpJmVTbBH3xy19Hk655O_ySwFC_53w,100
|
|
6
|
-
pdf_auto_outline-0.1.1.dist-info/METADATA,sha256=f9c0WwifI1VLxvZ0GXMQyNgNgMXNikmgNY-SmhkSw74,2315
|
|
7
|
-
pdf_auto_outline-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|