pdf-auto-outline 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdf_auto_outline/__init__.py +1 -1
- pdf_auto_outline/__main__.py +1 -1
- pdf_auto_outline/main.py +46 -60
- {pdf_auto_outline-0.1.1.dist-info → pdf_auto_outline-0.1.2.dist-info}/METADATA +3 -3
- pdf_auto_outline-0.1.2.dist-info/RECORD +7 -0
- pdf_auto_outline-0.1.1.dist-info/RECORD +0 -7
- {pdf_auto_outline-0.1.1.dist-info → pdf_auto_outline-0.1.2.dist-info}/WHEEL +0 -0
- {pdf_auto_outline-0.1.1.dist-info → pdf_auto_outline-0.1.2.dist-info}/entry_points.txt +0 -0
pdf_auto_outline/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '0.1.
|
|
1
|
+
__version__ = '0.1.2'
|
pdf_auto_outline/__main__.py
CHANGED
pdf_auto_outline/main.py
CHANGED
|
@@ -100,72 +100,52 @@ def generate_toc_nnet(pdfpath, worker_cnt=3) -> list:
|
|
|
100
100
|
return [j for i in pg_nums for j in results[i]]
|
|
101
101
|
|
|
102
102
|
def align_toc_lvls(toc_entries: list) -> list:
|
|
103
|
-
# TODO: fix this spaghetti
|
|
104
103
|
import re
|
|
105
|
-
def act(
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
return e[current]
|
|
113
|
-
# return max(1, lvl - 1)
|
|
104
|
+
def act(current): # cur prev expected lvl
|
|
105
|
+
if current == d['prev_name']: # current is sibling
|
|
106
|
+
pass
|
|
107
|
+
elif current == 'p5': # current is figure/table type
|
|
108
|
+
d['lvl'] += 1
|
|
109
|
+
elif e[current] < d['prev_lvl']: # current is parent
|
|
110
|
+
d['lvl'] = e[current]
|
|
114
111
|
else: # e[current] > prev[1]: # current is child
|
|
115
|
-
e[current] = min(lvl + 1, e[current])
|
|
116
|
-
|
|
117
|
-
# else: #e[current] == prev: # current is sibling
|
|
118
|
-
# return lvl
|
|
112
|
+
e[current] = min(d['lvl'] + 1, e[current])
|
|
113
|
+
d['lvl'] = min(d['lvl'] + 1, e[current])
|
|
119
114
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
p4 = re.compile(r'^([AIVXC\d]+\.){2}[IVXC\d]\.? \w')
|
|
124
|
-
p5 = re.compile(r'^(Fig(ure)?\.?)|(Table\.? [\dIVXC]+)')
|
|
125
|
-
p6 = re.compile(r'''\d?\s?(Introduction)|((Materials and )?Methods)|(Results)|
|
|
126
|
-
(Discussion)|(References)|(Summary)|(Conclusion)|(Acknowledgements)
|
|
127
|
-
''', re.IGNORECASE)
|
|
128
|
-
p7 = re.compile(r'^\d?\s?[A-Z ]{2,}')
|
|
115
|
+
d['prev_name'] = current
|
|
116
|
+
d['prev_lvl'] = e[current]
|
|
117
|
+
toc_entries[i-d['removed']][0] = d['lvl']
|
|
129
118
|
|
|
119
|
+
p1 = re.compile(r'^[A-Z\d]')
|
|
120
|
+
patterns = (
|
|
121
|
+
re.compile(r'^(Contents)|(Chapter)|(Appendix)|(Index)|(Bibliography)|(Preface)'),
|
|
122
|
+
re.compile(r'^([IVXC\d])+\.[IVXC\d]\.? \w'),
|
|
123
|
+
re.compile(r'^([AIVXC\d]+\.){2}[IVXC\d]\.? \w'),
|
|
124
|
+
re.compile(r'^(Fig(ure)?\.?)|(Table\.? [\dIVXC]+)', re.IGNORECASE),
|
|
125
|
+
re.compile(r'''\d?\s?(Introduction)|((Materials? and )?Methods)|(Results)|
|
|
126
|
+
(Discussion)|(References)|(Summary)|(Conclusion)|(Acknowledgements)
|
|
127
|
+
''', re.IGNORECASE),
|
|
128
|
+
re.compile(r'^\d?\s?[A-Z ]{2,}'),
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# expected nesting levels
|
|
130
132
|
e = {'p1': 1, 'p2': 1, 'p3': 2, 'p4': 3, 'p5': 5, 'p6': 1, 'p7': 1, 'l': 2,}
|
|
133
|
+
# line status
|
|
134
|
+
d = {'lvl': 1, 'prev_name': 'p1', 'prev_lvl': 1, 'titles': set(), 'removed': 0}
|
|
131
135
|
|
|
132
136
|
log('aligning levels..')
|
|
133
|
-
lvl, prev, titles, removed = 1, ('p1', 1), set(), 0
|
|
134
137
|
|
|
135
138
|
for i in range(1, len(toc_entries)):
|
|
136
|
-
title = toc_entries[i-removed][1]
|
|
137
|
-
if (not p1.match(title)) or len(title) < 4 or title in titles: #skip
|
|
138
|
-
toc_entries.pop(i-removed)
|
|
139
|
-
removed += 1
|
|
140
|
-
elif
|
|
141
|
-
|
|
142
|
-
toc_entries[i-removed][0] = lvl
|
|
143
|
-
prev = ('p2', e['p2'])
|
|
144
|
-
elif p7.match(title):
|
|
145
|
-
lvl = act(lvl, 'p7', prev)
|
|
146
|
-
toc_entries[i-removed][0] = lvl
|
|
147
|
-
prev = ('p7', e['p7'])
|
|
148
|
-
elif p6.match(title):
|
|
149
|
-
lvl = act(lvl, 'p6', prev)
|
|
150
|
-
toc_entries[i-removed][0] = lvl
|
|
151
|
-
prev = ('p6', e['p6'])
|
|
152
|
-
elif p3.match(title):
|
|
153
|
-
lvl = act(lvl, 'p3', prev)
|
|
154
|
-
toc_entries[i-removed][0] = lvl
|
|
155
|
-
prev = ('p3', e['p3'])
|
|
156
|
-
elif p4.match(title):
|
|
157
|
-
lvl = act(lvl, 'p4', prev)
|
|
158
|
-
toc_entries[i-removed][0] = lvl
|
|
159
|
-
prev = ('p4', e['p4'])
|
|
160
|
-
elif p5.match(title):
|
|
161
|
-
lvl = act(lvl, 'p5', prev)
|
|
162
|
-
toc_entries[i-removed][0] = lvl
|
|
163
|
-
prev = ('p5', e['p5'])
|
|
139
|
+
title = toc_entries[i-d['removed']][1]
|
|
140
|
+
if (not p1.match(title)) or len(title) < 4 or title in d['titles']: #skip
|
|
141
|
+
toc_entries.pop(i-d['removed'])
|
|
142
|
+
d['removed'] += 1
|
|
143
|
+
elif (name := next((idi for idi, i in enumerate(patterns) if i.match(title)), None)):
|
|
144
|
+
act(f'p{name+2}')
|
|
164
145
|
else:
|
|
165
|
-
titles.add(title)
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
prev = ('l', e['l'])
|
|
146
|
+
d['titles'].add(title)
|
|
147
|
+
act('l')
|
|
148
|
+
|
|
169
149
|
return toc_entries
|
|
170
150
|
|
|
171
151
|
def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
|
|
@@ -222,10 +202,13 @@ def embed_toc(pdfpath, toc_entries, newfile=''):
|
|
|
222
202
|
doc.saveIncr()
|
|
223
203
|
log(f"toc saved to '{pdfpath}'")
|
|
224
204
|
|
|
225
|
-
|
|
205
|
+
def get_toc_custom(doc) -> list:
|
|
206
|
+
toc_entries = [[*i[:3], i[3].get('to')] for i in doc.get_toc(False)]
|
|
207
|
+
return toc_entries
|
|
226
208
|
|
|
227
209
|
def edit_txtfile(txtfile='outline.txt'):
|
|
228
|
-
editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
|
|
210
|
+
# editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
|
|
211
|
+
editor = os.environ.get('EDITOR', 'start' if os.name == 'nt' else 'xdg-open')
|
|
229
212
|
subprocess.run([editor, txtfile])
|
|
230
213
|
|
|
231
214
|
def main():
|
|
@@ -239,7 +222,7 @@ def main():
|
|
|
239
222
|
parser.add_argument('-i', '--infile', type=str, metavar='<file>', help='write toc from file to pdf')
|
|
240
223
|
parser.add_argument('-t', '--tablevel', type=int, metavar='<n>', help='tab = n toc nesting levels (default 2)', default=2)
|
|
241
224
|
parser.add_argument('--sioyek', type=str, metavar='<path>', help='for users of the Sioyek pdf viewer')
|
|
242
|
-
parser.add_argument('--version', action='version', version='%(prog)s 0.1.
|
|
225
|
+
parser.add_argument('--version', action='version', version='%(prog)s 0.1.2')
|
|
243
226
|
|
|
244
227
|
args = parser.parse_args()
|
|
245
228
|
|
|
@@ -259,7 +242,10 @@ def main():
|
|
|
259
242
|
|
|
260
243
|
if args.edit or args.superedit:
|
|
261
244
|
doc = pymupdf.Document(args.filename)
|
|
262
|
-
|
|
245
|
+
if args.superedit:
|
|
246
|
+
generate_txtfile(doc.get_toc(False))
|
|
247
|
+
else:
|
|
248
|
+
generate_txtfile(get_toc_custom(doc))
|
|
263
249
|
edit_txtfile()
|
|
264
250
|
toc_entries = parse_txtfile(tablevel=args.tablevel)
|
|
265
251
|
embed_toc(args.filename, toc_entries, args.out)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pdf-auto-outline
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: Automatically generate and edit PDF table of contents / outline
|
|
5
5
|
Author: Rossikos
|
|
6
6
|
Author-email: Rossikos <216631970+rossikos@users.noreply.github.com>
|
|
@@ -45,7 +45,7 @@ options:
|
|
|
45
45
|
--version show program's version number and exit
|
|
46
46
|
```
|
|
47
47
|
|
|
48
|
-
|
|
48
|
+
### Examples
|
|
49
49
|
|
|
50
50
|
Generate toc and edit before saving:
|
|
51
51
|
`pdfao paper.pdf`
|
|
@@ -66,7 +66,7 @@ Example commands; add to `prefs_user.config`.
|
|
|
66
66
|
|
|
67
67
|
```
|
|
68
68
|
new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
|
|
69
|
-
new_command _edit_toc pdfao path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -
|
|
69
|
+
new_command _edit_toc pdfao path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -e
|
|
70
70
|
```
|
|
71
71
|
|
|
72
72
|
If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
pdf_auto_outline/__init__.py,sha256=mdp2CftfqYbdKtP-eWv1z7rAUycYv6X1ntXSMUf8Kss,22
|
|
2
|
+
pdf_auto_outline/__main__.py,sha256=mRKsAFeG5R17vTYubIKregAve4vnKc-nk7jY3tcK4wI,78
|
|
3
|
+
pdf_auto_outline/main.py,sha256=oZJMNGDBLJNGWmjGtDdNoFXsPtSS7Km7uPdwjHXPA00,9638
|
|
4
|
+
pdf_auto_outline-0.1.2.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
|
|
5
|
+
pdf_auto_outline-0.1.2.dist-info/entry_points.txt,sha256=HBvhmxJs8hHqbbpJmVTbBH3xy19Hk655O_ySwFC_53w,100
|
|
6
|
+
pdf_auto_outline-0.1.2.dist-info/METADATA,sha256=GyMWMb7y2SDwvxUEEMpVpz4Fxfe3_GwGJ-sdXHxAFZg,2316
|
|
7
|
+
pdf_auto_outline-0.1.2.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
pdf_auto_outline/__init__.py,sha256=ls1camlIoMxEZz9gSkZ1OJo-MXqHWwKPtdPbZJmwp7E,22
|
|
2
|
-
pdf_auto_outline/__main__.py,sha256=7tzuGbeA5JiJWE_g9pzlcTXSsKlR-iEXNEbdYd4jZMs,62
|
|
3
|
-
pdf_auto_outline/main.py,sha256=3TEr30H8Rp29-rXxg_vx604sUxbDQRKFA6fNuuqcOqA,10095
|
|
4
|
-
pdf_auto_outline-0.1.1.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
|
|
5
|
-
pdf_auto_outline-0.1.1.dist-info/entry_points.txt,sha256=HBvhmxJs8hHqbbpJmVTbBH3xy19Hk655O_ySwFC_53w,100
|
|
6
|
-
pdf_auto_outline-0.1.1.dist-info/METADATA,sha256=f9c0WwifI1VLxvZ0GXMQyNgNgMXNikmgNY-SmhkSw74,2315
|
|
7
|
-
pdf_auto_outline-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|