pdf-auto-outline 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pdf-auto-outline
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Automatically generate and edit PDF table of contents / outline
5
5
  Author: Rossikos
6
6
  Author-email: Rossikos <216631970+rossikos@users.noreply.github.com>
@@ -45,7 +45,7 @@ options:
45
45
  --version show program's version number and exit
46
46
  ```
47
47
 
48
- # Examples
48
+ ### Examples
49
49
 
50
50
  Generate toc and edit before saving:
51
51
  `pdfao paper.pdf`
@@ -66,7 +66,7 @@ Example commands; add to `prefs_user.config`.
66
66
 
67
67
  ```
68
68
  new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
69
- new_command _edit_toc pdfao path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -se
69
+ new_command _edit_toc pdfao path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -e
70
70
  ```
71
71
 
72
72
  If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
@@ -28,7 +28,7 @@ options:
28
28
  --version show program's version number and exit
29
29
  ```
30
30
 
31
- # Examples
31
+ ### Examples
32
32
 
33
33
  Generate toc and edit before saving:
34
34
  `pdfao paper.pdf`
@@ -49,7 +49,7 @@ Example commands; add to `prefs_user.config`.
49
49
 
50
50
  ```
51
51
  new_command _gen_toc pdfao "%{file_path}" --sioyek path/to/sioyek -mp 4
52
- new_command _edit_toc pdfao path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -se
52
+ new_command _edit_toc pdfao path/to/pdfao.py "%{file_path}" --sioyek path/to/sioyek -e
53
53
  ```
54
54
 
55
55
  If you don't wish to install from PyPI, download source and use `python3 -m path/to/src/pdf_auto_outline` in place of `pdfao`.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pdf-auto-outline"
3
- version = "0.1.1"
3
+ version = "0.1.3"
4
4
  description = "Automatically generate and edit PDF table of contents / outline"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -0,0 +1 @@
1
+ __version__ = '0.1.3'
@@ -1,4 +1,4 @@
1
- from .main import main
1
+ from pdf_auto_outline.main import main
2
2
 
3
3
  if __name__ == '__main__':
4
4
  main()
@@ -100,72 +100,52 @@ def generate_toc_nnet(pdfpath, worker_cnt=3) -> list:
100
100
  return [j for i in pg_nums for j in results[i]]
101
101
 
102
102
  def align_toc_lvls(toc_entries: list) -> list:
103
- # TODO: fix this spaghetti
104
103
  import re
105
- def act(lvl, current, prev): # cur prev expected lvl
106
- # if current == prev - 1: # current is parent
107
- if current == prev[0]: # current is sibling
108
- return lvl
109
- elif current == 'p5':
110
- return lvl + 1
111
- elif e[current] < prev[1]: # current is parent
112
- return e[current]
113
- # return max(1, lvl - 1)
104
+ def act(current): # cur prev expected lvl
105
+ if current == d['prev_name']: # current is sibling
106
+ pass
107
+ elif current == 'p5': # current is figure/table type
108
+ d['lvl'] += 1
109
+ elif e[current] < d['prev_lvl']: # current is parent
110
+ d['lvl'] = e[current]
114
111
  else: # e[current] > prev[1]: # current is child
115
- e[current] = min(lvl + 1, e[current])
116
- return min(lvl + 1, e[current])
117
- # else: #e[current] == prev: # current is sibling
118
- # return lvl
112
+ e[current] = min(d['lvl'] + 1, e[current])
113
+ d['lvl'] = min(d['lvl'] + 1, e[current])
119
114
 
120
- p1 = re.compile(r'^[A-Z\d]')
121
- p2 = re.compile(r'^(Contents)|(Chapter)|(Appendix)|(Index)|(Bibliograph)|(Preface)')
122
- p3 = re.compile(r'^([IVXC\d])+\.[IVXC\d]\.? \w')
123
- p4 = re.compile(r'^([AIVXC\d]+\.){2}[IVXC\d]\.? \w')
124
- p5 = re.compile(r'^(Fig(ure)?\.?)|(Table\.? [\dIVXC]+)')
125
- p6 = re.compile(r'''\d?\s?(Introduction)|((Materials and )?Methods)|(Results)|
126
- (Discussion)|(References)|(Summary)|(Conclusion)|(Acknowledgements)
127
- ''', re.IGNORECASE)
128
- p7 = re.compile(r'^\d?\s?[A-Z ]{2,}')
115
+ d['prev_name'] = current
116
+ d['prev_lvl'] = e[current]
117
+ toc_entries[i-d['removed']][0] = d['lvl']
129
118
 
119
+ p1 = re.compile(r'^[A-Z\d]')
120
+ patterns = (
121
+ re.compile(r'^(Contents)|(Chapter)|(Appendix)|(Index)|(Bibliography)|(Preface)'),
122
+ re.compile(r'^([IVXC\d])+\.[IVXC\d]\.? \w'),
123
+ re.compile(r'^([AIVXC\d]+\.){2}[IVXC\d]\.? \w'),
124
+ re.compile(r'^(Fig(ure)?\.?)|(Table\.? [\dIVXC]+)', re.IGNORECASE),
125
+ re.compile(r'''\d?\s?(Introduction)|((Materials? and )?Methods)|(Results)|
126
+ (Discussion)|(References)|(Summary)|(Conclusion)|(Acknowledgements)
127
+ ''', re.IGNORECASE),
128
+ re.compile(r'^\d?\s?[A-Z ]{2,}'),
129
+ )
130
+
131
+ # expected nesting levels
130
132
  e = {'p1': 1, 'p2': 1, 'p3': 2, 'p4': 3, 'p5': 5, 'p6': 1, 'p7': 1, 'l': 2,}
133
+ # line status
134
+ d = {'lvl': 1, 'prev_name': 'p1', 'prev_lvl': 1, 'titles': set(), 'removed': 0}
131
135
 
132
136
  log('aligning levels..')
133
- lvl, prev, titles, removed = 1, ('p1', 1), set(), 0
134
137
 
135
138
  for i in range(1, len(toc_entries)):
136
- title = toc_entries[i-removed][1]
137
- if (not p1.match(title)) or len(title) < 4 or title in titles: #skip
138
- toc_entries.pop(i-removed)
139
- removed += 1
140
- elif p2.match(title):
141
- lvl = act(lvl, 'p2', prev)
142
- toc_entries[i-removed][0] = lvl
143
- prev = ('p2', e['p2'])
144
- elif p7.match(title):
145
- lvl = act(lvl, 'p7', prev)
146
- toc_entries[i-removed][0] = lvl
147
- prev = ('p7', e['p7'])
148
- elif p6.match(title):
149
- lvl = act(lvl, 'p6', prev)
150
- toc_entries[i-removed][0] = lvl
151
- prev = ('p6', e['p6'])
152
- elif p3.match(title):
153
- lvl = act(lvl, 'p3', prev)
154
- toc_entries[i-removed][0] = lvl
155
- prev = ('p3', e['p3'])
156
- elif p4.match(title):
157
- lvl = act(lvl, 'p4', prev)
158
- toc_entries[i-removed][0] = lvl
159
- prev = ('p4', e['p4'])
160
- elif p5.match(title):
161
- lvl = act(lvl, 'p5', prev)
162
- toc_entries[i-removed][0] = lvl
163
- prev = ('p5', e['p5'])
139
+ title = toc_entries[i-d['removed']][1]
140
+ if (not p1.match(title)) or len(title) < 4 or title in d['titles']: #skip
141
+ toc_entries.pop(i-d['removed'])
142
+ d['removed'] += 1
143
+ elif (name := next((idi for idi, i in enumerate(patterns) if i.match(title)), None)):
144
+ act(f'p{name+2}')
164
145
  else:
165
- titles.add(title)
166
- lvl = act(lvl, 'l', prev)
167
- toc_entries[i-removed][0] = lvl
168
- prev = ('l', e['l'])
146
+ d['titles'].add(title)
147
+ act('l')
148
+
169
149
  return toc_entries
170
150
 
171
151
  def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
@@ -174,6 +154,8 @@ def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
174
154
  ============================================================
175
155
  TABLE OF CONTENTS OUTLINE
176
156
  4spaces/lvl text | pg# | {details dictionary} OR y-coord
157
+
158
+ Type 'C' as the first character of this file to cancel
177
159
  ============================================================
178
160
 
179
161
  """)
@@ -193,8 +175,11 @@ def generate_txtfile(toc_entries, txtfile='outline.txt') -> str:
193
175
  def parse_txtfile(txtfile='outline.txt', tablevel=2) -> list:
194
176
  toc_entries = []
195
177
  with open(txtfile) as f:
196
- if f.read(1) == '=':
197
- lines = f.readlines()[5:]
178
+ if (c := f.read(1)) == 'C':
179
+ log('Outline not written')
180
+ exit()
181
+ elif c == '=':
182
+ lines = f.readlines()[7:]
198
183
  else: lines = f.read()
199
184
 
200
185
  for i in lines:
@@ -222,11 +207,19 @@ def embed_toc(pdfpath, toc_entries, newfile=''):
222
207
  doc.saveIncr()
223
208
  log(f"toc saved to '{pdfpath}'")
224
209
 
225
-
210
+ def get_toc_custom(doc) -> list:
211
+ toc_entries = [[*i[:3], i[3].get('to')] for i in doc.get_toc(False)]
212
+ return toc_entries
226
213
 
227
214
  def edit_txtfile(txtfile='outline.txt'):
228
- editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
229
- subprocess.run([editor, txtfile])
215
+ # editor = os.environ.get('EDITOR', 'notepad' if os.name == 'nt' else 'vi')
216
+ # editor = os.environ.get('EDITOR', 'start' if os.name == 'nt' else 'xdg-open')
217
+ name = os.name
218
+ if name == 'nt':
219
+ subprocess.run(['start', '/WAIT', txtfile], shell=True)
220
+ else: # name == 'posix':
221
+ editor = os.environ.get('EDITOR', 'vi')
222
+ subprocess.run([editor, txtfile])
230
223
 
231
224
  def main():
232
225
  parser = argparse.ArgumentParser(prog='pdfao')
@@ -239,7 +232,7 @@ def main():
239
232
  parser.add_argument('-i', '--infile', type=str, metavar='<file>', help='write toc from file to pdf')
240
233
  parser.add_argument('-t', '--tablevel', type=int, metavar='<n>', help='tab = n toc nesting levels (default 2)', default=2)
241
234
  parser.add_argument('--sioyek', type=str, metavar='<path>', help='for users of the Sioyek pdf viewer')
242
- parser.add_argument('--version', action='version', version='%(prog)s 0.1.1')
235
+ parser.add_argument('--version', action='version', version='%(prog)s 0.1.3')
243
236
 
244
237
  args = parser.parse_args()
245
238
 
@@ -259,7 +252,10 @@ def main():
259
252
 
260
253
  if args.edit or args.superedit:
261
254
  doc = pymupdf.Document(args.filename)
262
- generate_txtfile(doc.get_toc(not args.superedit))
255
+ if args.superedit:
256
+ generate_txtfile(doc.get_toc(False))
257
+ else:
258
+ generate_txtfile(get_toc_custom(doc))
263
259
  edit_txtfile()
264
260
  toc_entries = parse_txtfile(tablevel=args.tablevel)
265
261
  embed_toc(args.filename, toc_entries, args.out)
@@ -1 +0,0 @@
1
- __version__ = '0.1.1'