pdf-auto-outline 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- __version__ = '0.1.0'
1
+ __version__ = '0.1.2'
@@ -1,4 +1,4 @@
1
- from .main import main
1
+ from pdf_auto_outline.main import main
2
2
 
3
3
  if __name__ == '__main__':
4
4
  main()
pdf_auto_outline/main.py CHANGED
@@ -96,77 +96,56 @@ def generate_toc_nnet(pdfpath, worker_cnt=3) -> list:
96
96
  log('\nCancelled')
97
97
  exit()
98
98
 
99
- log('')
100
99
 
101
100
  return [j for i in pg_nums for j in results[i]]
102
101
 
103
102
  def align_toc_lvls(toc_entries: list) -> list:
104
- # TODO: fix this spaghetti
105
103
  import re
106
- def act(lvl, current, prev): # cur prev expected lvl
107
- # if current == prev - 1: # current is parent
108
- if current == prev[0]: # current is sibling
109
- return lvl
110
- elif current == 'p5':
111
- return lvl + 1
112
- elif e[current] < prev[1]: # current is parent
113
- return e[current]
114
- # return max(1, lvl - 1)
104
+ def act(current): # cur prev expected lvl
105
+ if current == d['prev_name']: # current is sibling
106
+ pass
107
+ elif current == 'p5': # current is figure/table type
108
+ d['lvl'] += 1
109
+ elif e[current] < d['prev_lvl']: # current is parent
110
+ d['lvl'] = e[current]
115
111
  else: # e[current] > prev[1]: # current is child
116
- e[current] = min(lvl + 1, e[current])
117
- return min(lvl + 1, e[current])
118
- # else: #e[current] == prev: # current is sibling
119
- # return lvl
112
+ e[current] = min(d['lvl'] + 1, e[current])
113
+ d['lvl'] = min(d['lvl'] + 1, e[current])
120
114
 
121
- p1 = re.compile(r'^[A-Z\d]')
122
- p2 = re.compile(r'^(Contents)|(Chapter)|(Appendix)|(Index)|(Bibliograph)|(Preface)')
123
- p3 = re.compile(r'^([IVXC\d])+\.[IVXC\d]\.? \w')
124
- p4 = re.compile(r'^([AIVXC\d]+\.){2}[IVXC\d]\.? \w')
125
- p5 = re.compile(r'^(Fig(ure)?\.?)|(Table\.? [\dIVXC]+)')
126
- p6 = re.compile(r'''\d?\s?(Introduction)|((Materials and )?Methods)|(Results)|
127
- (Discussion)|(References)|(Summary)|(Conclusion)|(Acknowledgements)
128
- ''', re.IGNORECASE)
129
- p7 = re.compile(r'^\d?\s?[A-Z ]{2,}')
115
+ d['prev_name'] = current
116
+ d['prev_lvl'] = e[current]
117
+ toc_entries[i-d['removed']][0] = d['lvl']
130
118
 
119
+ p1 = re.compile(r'^[A-Z\d]')
120
+ patterns = (
121
+ re.compile(r'^(Contents)|(Chapter)|(Appendix)|(Index)|(Bibliography)|(Preface)'),
122
+ re.compile(r'^([IVXC\d])+\.[IVXC\d]\.? \w'),
123
+ re.compile(r'^([AIVXC\d]+\.){2}[IVXC\d]\.? \w'),
124
+ re.compile(r'^(Fig(ure)?\.?)|(Table\.? [\dIVXC]+)', re.IGNORECASE),
125
+ re.compile(r'''\d?\s?(Introduction)|((Materials? and )?Methods)|(Results)|
126
+ (Discussion)|(References)|(Summary)|(Conclusion)|(Acknowledgements)
127
+ ''', re.IGNORECASE),
128
+ re.compile(r'^\d?\s?[A-Z ]{2,}'),
129
+ )
130
+
131
+ # expected nesting levels
131
132
  e = {'p1': 1, 'p2': 1, 'p3': 2, 'p4': 3, 'p5': 5, 'p6': 1, 'p7': 1, 'l': 2,}
133
+ # line status
134
+ d = {'lvl': 1, 'prev_name': 'p1', 'prev_lvl': 1, 'titles': set(), 'removed': 0}
132
135
 
133
136
  log('aligning levels..')
134
- lvl, prev, titles, removed = 1, ('p1', 1), set(), 0
135
137
 
136
138
  for i in range(1, len(toc_entries)):
137
- title = toc_entries[i-removed][1]
138
- if (not p1.match(title)) or len(title) < 4 or title in titles: #skip
139
- toc_entries.pop(i-removed)
140
- removed += 1
141
- elif p2.match(title):
142
- lvl = act(lvl, 'p2', prev)
143
- toc_entries[i-removed][0] = lvl
144
- prev = ('p2', e['p2'])
145
- elif p7.match(title):
146
- lvl = act(lvl, 'p7', prev)
147
- toc_entries[i-removed][0] = lvl
148
- prev = ('p7', e['p7'])
149
- elif p6.match(title):
150
- lvl = act(lvl, 'p6', prev)
151
- toc_entries[i-removed][0] = lvl
152
- prev = ('p6', e['p6'])
153
- elif p3.match(title):
154
- lvl = act(lvl, 'p3', prev)
155
- toc_entries[i-removed][0] = lvl
156
- prev = ('p3', e['p3'])
157
- elif p4.match(title):
158
- lvl = act(lvl, 'p4', prev)
159
- toc_entries[i-removed][0] = lvl
160
- prev = ('p4', e['p4'])
161
- elif p5.match(title):
162
- lvl = act(lvl, 'p5', prev)
163
- toc_entries[i-removed][0] = lvl
164
- prev = ('p5', e['p5'])
139
+ title = toc_entries[i-d['removed']][1]
140
+ if (not p1.match(title)) or len(title) < 4 or title in d['titles']: #skip
141
+ toc_entries.pop(i-d['removed'])
142
+ d['removed'] += 1
143
+ elif (name := next((idi for idi, i in enumerate(patterns) if i.match(title)), None)):
144
+ act(f'p{name+2}')
165
145
  else:
166
- titles.add(title)
167
- lvl = act(lvl, 'l', prev)
168
- toc_entries[i-removed][0] = lvl
169
- prev = ('l', e['l'])
146
+ d['titles'].add(title)
147
+ act('l')
148
+
170
149
  return toc_entries
171
150
 
172
151
  def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
@@ -214,7 +193,6 @@ def parse_txtfile(txtfile='outline.txt', tablevel=2) -> list:
214
193
  return toc_entries
215
194
 
216
195
  def embed_toc(pdfpath, toc_entries, newfile=''):
217
- print(len(toc_entries))
218
196
  doc = pymupdf.open(pdfpath)
219
197
  doc.set_toc(toc_entries, collapse=2)
220
198
  if newfile:
@@ -224,10 +202,13 @@ def embed_toc(pdfpath, toc_entries, newfile=''):
224
202
  doc.saveIncr()
225
203
  log(f"toc saved to '{pdfpath}'")
226
204
 
227
-
205
+ def get_toc_custom(doc) -> list:
206
+ toc_entries = [[*i[:3], i[3].get('to')] for i in doc.get_toc(False)]
207
+ return toc_entries
228
208
 
229
209
  def edit_txtfile(txtfile='outline.txt'):
230
- editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
210
+ # editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
211
+ editor = os.environ.get('EDITOR', 'start' if os.name == 'nt' else 'xdg-open')
231
212
  subprocess.run([editor, txtfile])
232
213
 
233
214
  def main():
@@ -241,14 +222,19 @@ def main():
241
222
  parser.add_argument('-i', '--infile', type=str, metavar='<file>', help='write toc from file to pdf')
242
223
  parser.add_argument('-t', '--tablevel', type=int, metavar='<n>', help='tab = n toc nesting levels (default 2)', default=2)
243
224
  parser.add_argument('--sioyek', type=str, metavar='<path>', help='for users of the Sioyek pdf viewer')
244
- parser.add_argument('--version', action='version', version='%(prog)s 0.1.0')
225
+ parser.add_argument('--version', action='version', version='%(prog)s 0.1.2')
245
226
 
246
227
  args = parser.parse_args()
247
228
 
248
229
  if args.sioyek:
249
230
  from sioyek.sioyek import Sioyek
250
- sioyek_path = args.sioyek[0]
251
- SIOYEK = Sioyek(sioyek_path)
231
+ global SIOYEK
232
+ SIOYEK = Sioyek(args.sioyek)
233
+ if args.out:
234
+ args.out = os.path.join(
235
+ os.path.dirname(args.filename),
236
+ args.out
237
+ )
252
238
  # local_db = args.sioyek[1]
253
239
  # shared_db = args.sioyek[2]
254
240
  # pdf_path = args.sioyek[3]
@@ -256,7 +242,10 @@ def main():
256
242
 
257
243
  if args.edit or args.superedit:
258
244
  doc = pymupdf.Document(args.filename)
259
- generate_txtfile(doc.get_toc(not args.superedit))
245
+ if args.superedit:
246
+ generate_txtfile(doc.get_toc(False))
247
+ else:
248
+ generate_txtfile(get_toc_custom(doc))
260
249
  edit_txtfile()
261
250
  toc_entries = parse_txtfile(tablevel=args.tablevel)
262
251
  embed_toc(args.filename, toc_entries, args.out)
@@ -267,6 +256,7 @@ def main():
267
256
  start = perf_counter()
268
257
  toc_entries = generate_toc_nnet(args.filename, args.multiprocess)
269
258
  end = perf_counter()
259
+ log('')
270
260
  log(f"finished in {end - start:<4.1f} s")
271
261
  toc_entries = align_toc_lvls(toc_entries)
272
262
  if args.straight:
@@ -0,0 +1,73 @@
1
+ Metadata-Version: 2.3
2
+ Name: pdf-auto-outline
3
+ Version: 0.1.2
4
+ Summary: Automatically generate and edit PDF table of contents / outline
5
+ Author: Rossikos
6
+ Author-email: Rossikos <216631970+rossikos@users.noreply.github.com>
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
9
+ Classifier: Operating System :: OS Independent
10
+ Requires-Dist: pymupdf-layout>=1.26.6
11
+ Requires-Dist: sioyek ; extra == 'sioyek'
12
+ Requires-Python: >=3.13
13
+ Project-URL: Bug Tracker, https://github.com/rossikos/pdf-auto-outline/issues
14
+ Project-URL: Homepage, https://github.com/rossikos/pdf-auto-outline
15
+ Provides-Extra: sioyek
16
+ Description-Content-Type: text/markdown
17
+
18
+ # PDF Auto Outline
19
+
20
+ Automatically generate and embed a table of contents or outline in a PDF.
21
+
22
+ Install: `python -m pip install pdf-auto-outline`
23
+
24
+ Suggestions and contributions are welcome.
25
+
26
+ ## Usage
27
+
28
+ ```
29
+ usage: pdfao [-h] [-s] [-o <path>] [-mp <n>] [-e] [-se] [-i <file>] [-t <n>] [--sioyek <path>] [--version] filename
30
+
31
+ positional arguments:
32
+ filename input pdf
33
+
34
+ options:
35
+ -h, --help show this help message and exit
36
+ -s, --straight write toc straight to pdf; skip editing
37
+ -o, --out <path> write changes to new pdf
38
+ -mp, --multiprocess <n>
39
+ spread job over n processes (faster on Linux)
40
+ -e, --edit edit pdf toc
41
+ -se, --superedit edit pdf toc (more attibutes available)
42
+ -i, --infile <file> write toc from file to pdf
43
+ -t, --tablevel <n> tab = n toc nesting levels (default 2)
44
+ --sioyek <path> for users of the Sioyek pdf viewer
45
+ --version show program's version number and exit
46
+ ```
47
+
48
+ ### Examples
49
+
50
+ Generate toc and edit before saving:
51
+ `pdfao paper.pdf`
52
+
53
+ Generate and save to new pdf:
54
+ `pdfao paper.pdf -o new.pdf`
55
+
56
+ Edit exiting pdf toc:
57
+ `pdfao paper.pdf -e`
58
+
59
+ A save toc to new pdf from file:
60
+ `pdfao paper.pdf -o new.pdf -i outline.txt`
61
+
62
+
63
+ ## For Sioyek Users
64
+
65
+ Example commands; add to `prefs_user.config`.
66
+
67
+ ```
68
+ new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
69
+ new_command _edit_toc pdfao path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -e
70
+ ```
71
+
72
+ If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
73
+
@@ -0,0 +1,7 @@
1
+ pdf_auto_outline/__init__.py,sha256=mdp2CftfqYbdKtP-eWv1z7rAUycYv6X1ntXSMUf8Kss,22
2
+ pdf_auto_outline/__main__.py,sha256=mRKsAFeG5R17vTYubIKregAve4vnKc-nk7jY3tcK4wI,78
3
+ pdf_auto_outline/main.py,sha256=oZJMNGDBLJNGWmjGtDdNoFXsPtSS7Km7uPdwjHXPA00,9638
4
+ pdf_auto_outline-0.1.2.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
5
+ pdf_auto_outline-0.1.2.dist-info/entry_points.txt,sha256=HBvhmxJs8hHqbbpJmVTbBH3xy19Hk655O_ySwFC_53w,100
6
+ pdf_auto_outline-0.1.2.dist-info/METADATA,sha256=GyMWMb7y2SDwvxUEEMpVpz4Fxfe3_GwGJ-sdXHxAFZg,2316
7
+ pdf_auto_outline-0.1.2.dist-info/RECORD,,
@@ -1,46 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: pdf-auto-outline
3
- Version: 0.1.0
4
- Summary: Automatically generate and edit PDF table of contents / outline
5
- Author: Rossikos
6
- Author-email: Rossikos <216631970+rossikos@users.noreply.github.com>
7
- Requires-Dist: pymupdf-layout>=1.26.6
8
- Requires-Python: >=3.13
9
- Description-Content-Type: text/markdown
10
-
11
- # PDF Auto Outline
12
-
13
- A simple python program to automatically generate and embed a table of contents or outline in a PDF.
14
-
15
- ## Usage
16
-
17
- ```
18
- usage: pdfao [-h] [-s] [-o <path>] [-mp <n>] [-e] [-se] [-i <file>] [-t <n>] [--sioyek <path>] [--version] filename
19
-
20
- positional arguments:
21
- filename input pdf
22
-
23
- options:
24
- -h, --help show this help message and exit
25
- -s, --straight write toc straight to pdf; skip editing
26
- -o, --out <path> write changes to new pdf
27
- -mp, --multiprocess <n>
28
- spread job over n processes (faster on linux)
29
- -e, --edit edit pdf toc
30
- -se, --superedit edit pdf toc (more attibutes available)
31
- -i, --infile <file> write toc from file to pdf
32
- -t, --tablevel <n> tab = n toc nesting levels (default 2)
33
- --sioyek <path> for users of the Sioyek pdf viewer
34
- --version show program's version number and exit
35
- ```
36
-
37
- ## For Sioyek Users
38
-
39
- Example commands; add to prefs_user.config.
40
-
41
- ```
42
- new_command _gen_toc python3 path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -mp 4
43
- new_command _edit_toc python3 path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -e
44
- ```
45
-
46
-
@@ -1,7 +0,0 @@
1
- pdf_auto_outline/__init__.py,sha256=IMjkMO3twhQzluVTo8Z6rE7Eg-9U79_LGKMcsWLKBkY,22
2
- pdf_auto_outline/__main__.py,sha256=7tzuGbeA5JiJWE_g9pzlcTXSsKlR-iEXNEbdYd4jZMs,62
3
- pdf_auto_outline/main.py,sha256=KkLEIGCndRql55jjvAb8Y-onmkPoQwdExHcuNG3MPYw,9977
4
- pdf_auto_outline-0.1.0.dist-info/WHEEL,sha256=5w2T7AS2mz1-rW9CNagNYWRCaB0iQqBMYLwKdlgiR4Q,78
5
- pdf_auto_outline-0.1.0.dist-info/entry_points.txt,sha256=HBvhmxJs8hHqbbpJmVTbBH3xy19Hk655O_ySwFC_53w,100
6
- pdf_auto_outline-0.1.0.dist-info/METADATA,sha256=ZeQBy-6lWQbqhTmt2375o5c-CQymnOffWyFeL7vi3bY,1504
7
- pdf_auto_outline-0.1.0.dist-info/RECORD,,