pdf-auto-outline 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pdf-auto-outline
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: Automatically generate and edit PDF table of contents / outline
5
5
  Author: Rossikos
6
6
  Author-email: Rossikos <216631970+rossikos@users.noreply.github.com>
@@ -82,7 +82,6 @@ The optional part can be one of:
82
82
  ```
83
83
  | None same as not including it
84
84
  | 241.2 y-ordinate
85
- | Point(72.0, 363.9) x and y coords
86
85
  | {<dictionary>} dictionary with more attributes for the ToC entry
87
86
  ```
88
87
 
@@ -92,8 +91,10 @@ Example commands; add to `prefs_user.config`.
92
91
 
93
92
  ```
94
93
  new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
95
- new_command _edit_toc pdfao "%{file_path}" --sioyek path/to/sioyek -e
94
+ new_command _edit_toc pdfao "%{file_path}" -e
96
95
  ```
97
96
 
97
+ The sioyek library and flag are optional; they allow logging to the status bar. This is more useful for ToC generation where you may want a progress bar.
98
+
98
99
  If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
99
100
 
@@ -65,7 +65,6 @@ The optional part can be one of:
65
65
  ```
66
66
  | None same as not including it
67
67
  | 241.2 y-ordinate
68
- | Point(72.0, 363.9) x and y coords
69
68
  | {<dictionary>} dictionary with more attributes for the ToC entry
70
69
  ```
71
70
 
@@ -75,8 +74,10 @@ Example commands; add to `prefs_user.config`.
75
74
 
76
75
  ```
77
76
  new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
78
- new_command _edit_toc pdfao "%{file_path}" --sioyek path/to/sioyek -e
77
+ new_command _edit_toc pdfao "%{file_path}" -e
79
78
  ```
80
79
 
80
+ The sioyek library and flag are optional; they allow logging to the status bar. This is more useful for ToC generation where you may want a progress bar.
81
+
81
82
  If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
82
83
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pdf-auto-outline"
3
- version = "0.1.5"
3
+ version = "0.1.7"
4
4
  description = "Automatically generate and edit PDF table of contents / outline"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -0,0 +1 @@
1
+ __version__ = '0.1.7'
@@ -1,10 +1,10 @@
1
1
  import pymupdf.layout
2
- from pymupdf import Point
3
2
  from time import perf_counter
4
3
  from multiprocessing import Pool
5
4
  import os
6
5
  import subprocess
7
6
  import argparse
7
+ import tempfile
8
8
 
9
9
  SIOYEK = None
10
10
 
@@ -148,14 +148,19 @@ def align_toc_lvls(toc_entries: list) -> list:
148
148
 
149
149
  return toc_entries
150
150
 
151
- def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
151
+ def get_tmpfile():
152
+ return tempfile.NamedTemporaryFile(
153
+ mode='w+', encoding='utf-8', delete=False, suffix='.txt'
154
+ )
155
+
156
+ def generate_txtfile(toc_entries, txtfile=get_tmpfile()):
152
157
  import textwrap
153
158
  txt = textwrap.dedent("""\
154
159
  ============================================================
155
160
  TABLE OF CONTENTS OUTLINE
156
161
  4spaces/lvl text | pg# | {details dictionary} OR y-coord
157
162
 
158
- Type 'C' as the first character of this file to cancel
163
+ Type '\\' as the first character of this file to cancel
159
164
  ============================================================
160
165
 
161
166
  """)
@@ -168,60 +173,77 @@ def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
168
173
  txt += '\n'.join(f"{' '*4 * (i[0] - 1)}{i[1]} | {i[2]}"
169
174
  for i in toc_entries)
170
175
 
171
- with open(txtfile, 'w', encoding='utf-8') as f:
172
- f.write(txt)
176
+ txtfile.write(txt)
177
+ txtfile.flush()
178
+ txtfile.seek(0)
173
179
 
174
180
  return txtfile
175
181
 
176
182
 
177
- def parse_txtfile(txtfile='outline.txt', tablevel=2) -> list:
183
+
184
+ def parse_txtfile(f, tablevel=2) -> list:
178
185
  toc_entries = []
179
- with open(txtfile) as f:
180
- if (c := f.read(1)) == 'C':
181
- log('Outline not written')
186
+ if (c := f.read(1)) == '\\':
187
+ log('Outline not written')
188
+ exit()
189
+ elif c == '=':
190
+ for _ in range(7):
191
+ f.readline()
192
+ else:
193
+ f.seek(0)
194
+
195
+ for ln, i in enumerate(f):
196
+ i = i.replace('\t', ' '*tablevel)
197
+ lvl = (len(i) - len(i.lstrip())) // 4 + 1
198
+ a = i.lstrip().split(' | ')
199
+ # print(i)
200
+ if (l := len(a)) == 2:
201
+ toc_entries.append(
202
+ [lvl, a[0], int(a[1])]
203
+ )
204
+ elif l == 3:
205
+ toc_entries.append(
206
+ [lvl, a[0], int(a[1]), eval(a[2])]
207
+ )
208
+ else:
209
+ log(f'Error parsing line {ln+1}: {i}')
182
210
  exit()
183
- elif c == '=':
184
- lines = f.readlines()[7:]
185
- else: lines = f.read()
186
-
187
- for i in lines:
188
- i = i.replace('\t', ' '*tablevel)
189
- lvl = (len(i) - len(i.lstrip())) // 4 + 1
190
- a = i.lstrip().split(' | ')
191
- if len(a) < 3:
192
- toc_entries.append(
193
- [lvl, a[0], int(a[1])]
194
- )
195
- else:
196
- toc_entries.append(
197
- [lvl, a[0], int(a[1]), eval(a[2])]
198
- )
211
+
212
+ f.close()
199
213
 
200
214
  return toc_entries
201
215
 
202
- def embed_toc(pdfpath, toc_entries, newfile=''):
216
+ def embed_toc(pdfpath, toc_entries, newfile='', offset=0):
217
+ if offset != 0:
218
+ toc_entries = [[a, b, c + offset, *d] for a, b, c, *d in toc_entries]
203
219
  doc = pymupdf.open(pdfpath)
204
220
  doc.set_toc(toc_entries, collapse=2)
205
221
  if newfile:
206
222
  doc.save(newfile)
207
223
  log(f"toc written to '{newfile}'")
208
- else:
224
+ elif doc.can_save_incrementally():
209
225
  doc.saveIncr()
210
226
  log(f"toc saved to '{pdfpath}'")
227
+ else:
228
+ log('cannot save to original; saving to new file...')
229
+ new_path = '_new.'.join(pdfpath.split('.'))
230
+ try:
231
+ doc.save(new_path, garbage=4, deflate=True, use_objstms=True)
232
+ except KeyboardInterrupt as e:
233
+ log('Cancelled')
234
+ exit()
235
+ log(f"toc written to '{new_path}'")
211
236
 
212
237
  def get_toc_custom(doc) -> list:
213
238
  toc_entries = [[*i[:3], i[3].get('to')[1]] for i in doc.get_toc(False)]
214
239
  return toc_entries
215
240
 
216
- def edit_txtfile(txtfile='outline.txt'):
217
- # editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
218
- # editor = os.environ.get('EDITOR', 'start' if os.name == 'nt' else 'xdg-open')
219
- name = os.name
220
- if name == 'nt':
221
- subprocess.run(['start', '/WAIT', txtfile], shell=True)
241
+ def edit_txtfile(f):
242
+ if os.name == 'nt':
243
+ subprocess.run(['start', '/WAIT', f.name], shell=True)
222
244
  else: # name == 'posix':
223
245
  editor = os.environ.get('EDITOR', 'vi')
224
- subprocess.run([editor, txtfile])
246
+ subprocess.run([editor, f.name])
225
247
 
226
248
  def main():
227
249
  parser = argparse.ArgumentParser(prog='pdfao')
@@ -233,20 +255,21 @@ def main():
233
255
  parser.add_argument('-se', '--superedit', action='store_true', help='edit pdf toc (more attibutes available)')
234
256
  parser.add_argument('-i', '--infile', type=str, metavar='<file>', help='write toc from file to pdf')
235
257
  parser.add_argument('-t', '--tablevel', type=int, metavar='<n>', help='tab = n toc nesting levels (default 2)', default=2)
258
+ parser.add_argument('-os', '--offset', type=int, metavar='<n>', help='toc page offset; use with infile', default=0)
236
259
  parser.add_argument('--sioyek', type=str, metavar='<path>', help='for users of the Sioyek pdf viewer')
237
- parser.add_argument('--version', action='version', version='%(prog)s 0.1.5')
260
+ parser.add_argument('--version', action='version', version='%(prog)s 0.1.7')
238
261
 
239
262
  args = parser.parse_args()
240
263
 
264
+ if args.out:
265
+ args.out = os.path.join(
266
+ os.path.dirname(args.filename),
267
+ args.out)
268
+
241
269
  if args.sioyek:
242
270
  from sioyek.sioyek import Sioyek
243
271
  global SIOYEK
244
272
  SIOYEK = Sioyek(args.sioyek)
245
- if args.out:
246
- args.out = os.path.join(
247
- os.path.dirname(args.filename),
248
- args.out
249
- )
250
273
  # local_db = args.sioyek[1]
251
274
  # shared_db = args.sioyek[2]
252
275
  # pdf_path = args.sioyek[3]
@@ -255,15 +278,16 @@ def main():
255
278
  if args.edit or args.superedit:
256
279
  doc = pymupdf.Document(args.filename)
257
280
  if args.superedit:
258
- generate_txtfile(doc.get_toc(False))
281
+ f = generate_txtfile(doc.get_toc(False))
259
282
  else:
260
- generate_txtfile(get_toc_custom(doc))
261
- edit_txtfile()
262
- toc_entries = parse_txtfile(tablevel=args.tablevel)
263
- embed_toc(args.filename, toc_entries, args.out)
283
+ f = generate_txtfile(get_toc_custom(doc))
284
+ edit_txtfile(f)
285
+ toc_entries = parse_txtfile(f, args.tablevel)
286
+ embed_toc(args.filename, toc_entries, args.out, args.offset)
287
+ os.remove(f.name)
264
288
  elif args.infile:
265
- toc_entries = parse_txtfile(args.infile, args.tablevel)
266
- embed_toc(args.filename, toc_entries, args.out)
289
+ toc_entries = parse_txtfile(open(args.infile, encoding='utf-8'), args.tablevel)
290
+ embed_toc(args.filename, toc_entries, args.out, args.offset)
267
291
  else: # generate toc
268
292
  start = perf_counter()
269
293
  toc_entries = generate_toc_nnet(args.filename, args.multiprocess)
@@ -272,12 +296,13 @@ def main():
272
296
  log(f"finished in {end - start:<4.1f} s")
273
297
  toc_entries = align_toc_lvls(toc_entries)
274
298
  if args.straight:
275
- embed_toc(args.filename, toc_entries, args.out)
299
+ embed_toc(args.filename, toc_entries, args.out, args.offset)
276
300
  else:
277
- generate_txtfile(toc_entries)
278
- edit_txtfile()
279
- toc_entries = parse_txtfile(tablevel=args.tablevel)
280
- embed_toc(args.filename, toc_entries, args.out)
301
+ f = generate_txtfile(toc_entries)
302
+ edit_txtfile(f)
303
+ toc_entries = parse_txtfile(f, args.tablevel)
304
+ embed_toc(args.filename, toc_entries, args.out, args.offset)
305
+ os.remove(f.name)
281
306
 
282
307
  # if args.sioyek and not args.out:
283
308
  # to_hash = get_md5_hash(args.filename)
@@ -0,0 +1,10 @@
1
+ f = 'test.txt'
2
+
3
+ with open(f) as f:
4
+ for _ in range(7):
5
+ f.readline()
6
+
7
+ for idi, i in enumerate(f):
8
+ print(idi, i)
9
+
10
+
@@ -0,0 +1,13 @@
1
+ ============================================================
2
+ TABLE OF CONTENTS OUTLINE
3
+ 4spaces/lvl text | pg# | {details dictionary} OR y-coord
4
+
5
+ Type '\\' as the first character of this file to cancel
6
+ ============================================================
7
+
8
+ 1
9
+ 2
10
+ 3
11
+ 4
12
+ 5
13
+ 6
@@ -1 +0,0 @@
1
- __version__ = '0.1.5'