doctra 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. doctra/__init__.py +4 -0
  2. doctra/cli/main.py +168 -0
  3. doctra/engines/image_restoration/__init__.py +10 -0
  4. doctra/engines/image_restoration/docres_engine.py +566 -0
  5. doctra/engines/vlm/service.py +0 -12
  6. doctra/parsers/enhanced_pdf_parser.py +370 -0
  7. doctra/parsers/structured_pdf_parser.py +11 -60
  8. doctra/parsers/table_chart_extractor.py +8 -44
  9. doctra/third_party/docres/data/MBD/MBD.py +110 -0
  10. doctra/third_party/docres/data/MBD/MBD_utils.py +291 -0
  11. doctra/third_party/docres/data/MBD/infer.py +151 -0
  12. doctra/third_party/docres/data/MBD/model/deep_lab_model/aspp.py +95 -0
  13. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/__init__.py +13 -0
  14. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/drn.py +402 -0
  15. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/mobilenet.py +151 -0
  16. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/resnet.py +170 -0
  17. doctra/third_party/docres/data/MBD/model/deep_lab_model/backbone/xception.py +288 -0
  18. doctra/third_party/docres/data/MBD/model/deep_lab_model/decoder.py +59 -0
  19. doctra/third_party/docres/data/MBD/model/deep_lab_model/deeplab.py +81 -0
  20. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/__init__.py +12 -0
  21. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/batchnorm.py +282 -0
  22. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/comm.py +129 -0
  23. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/replicate.py +88 -0
  24. doctra/third_party/docres/data/MBD/model/deep_lab_model/sync_batchnorm/unittest.py +29 -0
  25. doctra/third_party/docres/data/preprocess/crop_merge_image.py +142 -0
  26. doctra/third_party/docres/inference.py +370 -0
  27. doctra/third_party/docres/models/restormer_arch.py +308 -0
  28. doctra/third_party/docres/utils.py +464 -0
  29. doctra/ui/app.py +5 -32
  30. doctra/utils/progress.py +13 -98
  31. doctra/utils/structured_utils.py +45 -49
  32. doctra/version.py +1 -1
  33. {doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/METADATA +1 -1
  34. doctra-0.4.0.dist-info/RECORD +67 -0
  35. doctra-0.3.2.dist-info/RECORD +0 -44
  36. {doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/WHEEL +0 -0
  37. {doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/licenses/LICENSE +0 -0
  38. {doctra-0.3.2.dist-info → doctra-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,464 @@
1
+ from collections import OrderedDict
2
+ import os
3
+ import numpy as np
4
+ import torch
5
+ import torch.nn.functional as F
6
+ import os
7
+ from skimage.filters import threshold_sauvola
8
+ import cv2
9
+
10
+ def second2hours(seconds):
11
+ h = seconds//3600
12
+ seconds %= 3600
13
+ m = seconds//60
14
+ seconds %= 60
15
+
16
+ hms = '{:d} H : {:d} Min'.format(int(h),int(m))
17
+ return hms
18
+
19
+
20
+ def dict2string(loss_dict):
21
+ loss_string = ''
22
+ for key, value in loss_dict.items():
23
+ loss_string += key+' {:.4f}, '.format(value)
24
+ return loss_string[:-2]
25
+ def mkdir(dir):
26
+ if not os.path.exists(dir):
27
+ os.makedirs(dir)
28
+
29
+ def convert_state_dict(state_dict):
30
+ """Converts a state dict saved from a dataParallel module to normal
31
+ module state_dict inplace
32
+ :param state_dict is the loaded DataParallel model_state
33
+
34
+ """
35
+ new_state_dict = OrderedDict()
36
+ for k, v in state_dict.items():
37
+ name = k[7:] # remove `module.`
38
+ new_state_dict[name] = v
39
+ return new_state_dict
40
+
41
+
42
+ def get_lr(optimizer):
43
+ for param_group in optimizer.param_groups:
44
+ return float(param_group['lr'])
45
+
46
+
47
+ def torch2cvimg(tensor,min=0,max=1):
48
+ '''
49
+ input:
50
+ tensor -> torch.tensor BxCxHxW C can be 1,3
51
+ return
52
+ im -> ndarray uint8 HxWxC
53
+ '''
54
+ im_list = []
55
+ for i in range(tensor.shape[0]):
56
+ im = tensor.detach().cpu().data.numpy()[i]
57
+ im = im.transpose(1,2,0)
58
+ im = np.clip(im,min,max)
59
+ im = ((im-min)/(max-min)*255).astype(np.uint8)
60
+ im_list.append(im)
61
+ return im_list
62
+ def cvimg2torch(img,min=0,max=1):
63
+ '''
64
+ input:
65
+ im -> ndarray uint8 HxWxC
66
+ return
67
+ tensor -> torch.tensor BxCxHxW
68
+ '''
69
+ img = img.astype(float) / 255.0
70
+ img = img.transpose(2, 0, 1) # NHWC -> NCHW
71
+ img = np.expand_dims(img, 0)
72
+ img = torch.from_numpy(img).float()
73
+ return img
74
+
75
+
76
+ def setup_seed(seed):
77
+ # np.random.seed(seed)
78
+ # random.seed(seed)
79
+ # torch.manual_seed(seed) #cpu
80
+ # torch.cuda.manual_seed_all(seed) #并行gpu
81
+ torch.backends.cudnn.deterministic = True #cpu/gpu结果一致
82
+ # torch.backends.cudnn.benchmark = False #训练集变化不大时使训练加速
83
+
84
+ def SauvolaModBinarization(image,n1=51,n2=51,k1=0.3,k2=0.3,default=True):
85
+ '''
86
+ Binarization using Sauvola's algorithm
87
+ @name : SauvolaModBinarization
88
+ parameters
89
+ @param image (numpy array of shape (3/1) of type np.uint8): color or gray scale image
90
+ optional parameters
91
+ @param n1 (int) : window size for running sauvola during the first pass
92
+ @param n2 (int): window size for running sauvola during the second pass
93
+ @param k1 (float): k value corresponding to sauvola during the first pass
94
+ @param k2 (float): k value corresponding to sauvola during the second pass
95
+ @param default (bool) : bollean variable to set the above parameter as default.
96
+ @param default is set to True : thus default values of the above optional parameters (n1,n2,k1,k2) are set to
97
+ n1 = 5 % of min(image height, image width)
98
+ n2 = 10 % of min(image height, image width)
99
+ k1 = 0.5
100
+ k2 = 0.5
101
+ Returns
102
+ @return A binary image of same size as @param image
103
+
104
+ @cite https://drive.google.com/file/d/1D3CyI5vtodPJeZaD2UV5wdcaIMtkBbdZ/view?usp=sharing
105
+ '''
106
+
107
+ if(default):
108
+ n1 = int(0.05*min(image.shape[0],image.shape[1]))
109
+ if (n1%2==0):
110
+ n1 = n1+1
111
+ n2 = int(0.1*min(image.shape[0],image.shape[1]))
112
+ if (n2%2==0):
113
+ n2 = n2+1
114
+ k1 = 0.5
115
+ k2 = 0.5
116
+ if(image.ndim==3):
117
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
118
+ else:
119
+ gray = np.copy(image)
120
+ T1 = threshold_sauvola(gray, window_size=n1,k=k1)
121
+ max_val = np.amax(gray)
122
+ min_val = np.amin(gray)
123
+ C = np.copy(T1)
124
+ C = C.astype(np.float32)
125
+ C[gray > T1] = (gray[gray > T1] - T1[gray > T1])/(max_val - T1[gray > T1])
126
+ C[gray <= T1] = 0
127
+ C = C * 255.0
128
+ new_in = np.copy(C.astype(np.uint8))
129
+ T2 = threshold_sauvola(new_in, window_size=n2,k=k2)
130
+ binary = np.copy(gray)
131
+ binary[new_in <= T2] = 0
132
+ binary[new_in > T2] = 255
133
+ return binary,T2
134
+
135
+
136
+ def getBasecoord(h,w):
137
+ base_coord0 = np.tile(np.arange(h).reshape(h,1),(1,w)).astype(np.float32)
138
+ base_coord1 = np.tile(np.arange(w).reshape(1,w),(h,1)).astype(np.float32)
139
+ base_coord = np.concatenate((np.expand_dims(base_coord1,-1),np.expand_dims(base_coord0,-1)),-1)
140
+ return base_coord
141
+
142
+
143
+
144
+
145
+
146
+
147
+ import numpy as np
148
+ from scipy import ndimage as ndi
149
+
150
+ # lookup tables for bwmorph_thin
151
+
152
+ G123_LUT = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
153
+ 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
154
+ 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
155
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0,
156
+ 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
157
+ 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
159
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
160
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
161
+ 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,
162
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1,
163
+ 0, 0, 0], dtype=np.bool)
164
+
165
+ G123P_LUT = np.array([0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
166
+ 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
168
+ 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
169
+ 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
171
+ 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
172
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
173
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0,
174
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
175
+ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176
+ 0, 0, 0], dtype=np.bool)
177
+
178
+ def bwmorph(image, n_iter=None):
179
+ """
180
+ Perform morphological thinning of a binary image
181
+
182
+ Parameters
183
+ ----------
184
+ image : binary (M, N) ndarray
185
+ The image to be thinned.
186
+
187
+ n_iter : int, number of iterations, optional
188
+ Regardless of the value of this parameter, the thinned image
189
+ is returned immediately if an iteration produces no change.
190
+ If this parameter is specified it thus sets an upper bound on
191
+ the number of iterations performed.
192
+
193
+ Returns
194
+ -------
195
+ out : ndarray of bools
196
+ Thinned image.
197
+
198
+ See also
199
+ --------
200
+ skeletonize
201
+
202
+ Notes
203
+ -----
204
+ This algorithm [1]_ works by making multiple passes over the image,
205
+ removing pixels matching a set of criteria designed to thin
206
+ connected regions while preserving eight-connected components and
207
+ 2 x 2 squares [2]_. In each of the two sub-iterations the algorithm
208
+ correlates the intermediate skeleton image with a neighborhood mask,
209
+ then looks up each neighborhood in a lookup table indicating whether
210
+ the central pixel should be deleted in that sub-iteration.
211
+
212
+ References
213
+ ----------
214
+ .. [1] Z. Guo and R. W. Hall, "Parallel thinning with
215
+ two-subiteration algorithms," Comm. ACM, vol. 32, no. 3,
216
+ pp. 359-373, 1989.
217
+ .. [2] Lam, L., Seong-Whan Lee, and Ching Y. Suen, "Thinning
218
+ Methodologies-A Comprehensive Survey," IEEE Transactions on
219
+ Pattern Analysis and Machine Intelligence, Vol 14, No. 9,
220
+ September 1992, p. 879
221
+
222
+ Examples
223
+ --------
224
+ >>> square = np.zeros((7, 7), dtype=np.uint8)
225
+ >>> square[1:-1, 2:-2] = 1
226
+ >>> square[0,1] = 1
227
+ >>> square
228
+ array([[0, 1, 0, 0, 0, 0, 0],
229
+ [0, 0, 1, 1, 1, 0, 0],
230
+ [0, 0, 1, 1, 1, 0, 0],
231
+ [0, 0, 1, 1, 1, 0, 0],
232
+ [0, 0, 1, 1, 1, 0, 0],
233
+ [0, 0, 1, 1, 1, 0, 0],
234
+ [0, 0, 0, 0, 0, 0, 0]], dtype=uint8)
235
+ >>> skel = bwmorph_thin(square)
236
+ >>> skel.astype(np.uint8)
237
+ array([[0, 1, 0, 0, 0, 0, 0],
238
+ [0, 0, 1, 0, 0, 0, 0],
239
+ [0, 0, 0, 1, 0, 0, 0],
240
+ [0, 0, 0, 1, 0, 0, 0],
241
+ [0, 0, 0, 1, 0, 0, 0],
242
+ [0, 0, 0, 0, 0, 0, 0],
243
+ [0, 0, 0, 0, 0, 0, 0]], dtype=uint8)
244
+ """
245
+ # check parameters
246
+ if n_iter is None:
247
+ n = -1
248
+ elif n_iter <= 0:
249
+ raise ValueError('n_iter must be > 0')
250
+ else:
251
+ n = n_iter
252
+
253
+ # check that we have a 2d binary image, and convert it
254
+ # to uint8
255
+ skel = np.array(image).astype(np.uint8)
256
+
257
+ if skel.ndim != 2:
258
+ raise ValueError('2D array required')
259
+ if not np.all(np.in1d(image.flat,(0,1))):
260
+ raise ValueError('Image contains values other than 0 and 1')
261
+
262
+ # neighborhood mask
263
+ mask = np.array([[ 8, 4, 2],
264
+ [16, 0, 1],
265
+ [32, 64,128]],dtype=np.uint8)
266
+
267
+ # iterate either 1) indefinitely or 2) up to iteration limit
268
+ while n != 0:
269
+ before = np.sum(skel) # count points before thinning
270
+
271
+ # for each subiteration
272
+ for lut in [G123_LUT, G123P_LUT]:
273
+ # correlate image with neighborhood mask
274
+ N = ndi.correlate(skel, mask, mode='constant')
275
+ # take deletion decision from this subiteration's LUT
276
+ D = np.take(lut, N)
277
+ # perform deletion
278
+ skel[D] = 0
279
+
280
+ after = np.sum(skel) # coint points after thinning
281
+
282
+ if before == after:
283
+ # iteration had no effect: finish
284
+ break
285
+
286
+ # count down to iteration limit (or endlessly negative)
287
+ n -= 1
288
+
289
+ return skel.astype(np.bool)
290
+
291
+ """
292
+ # here's how to make the LUTs
293
+ def nabe(n):
294
+ return np.array([n>>i&1 for i in range(0,9)]).astype(np.bool)
295
+ def hood(n):
296
+ return np.take(nabe(n), np.array([[3, 2, 1],
297
+ [4, 8, 0],
298
+ [5, 6, 7]]))
299
+ def G1(n):
300
+ s = 0
301
+ bits = nabe(n)
302
+ for i in (0,2,4,6):
303
+ if not(bits[i]) and (bits[i+1] or bits[(i+2) % 8]):
304
+ s += 1
305
+ return s==1
306
+
307
+ g1_lut = np.array([G1(n) for n in range(256)])
308
+ def G2(n):
309
+ n1, n2 = 0, 0
310
+ bits = nabe(n)
311
+ for k in (1,3,5,7):
312
+ if bits[k] or bits[k-1]:
313
+ n1 += 1
314
+ if bits[k] or bits[(k+1) % 8]:
315
+ n2 += 1
316
+ return min(n1,n2) in [2,3]
317
+ g2_lut = np.array([G2(n) for n in range(256)])
318
+ g12_lut = g1_lut & g2_lut
319
+ def G3(n):
320
+ bits = nabe(n)
321
+ return not((bits[1] or bits[2] or not(bits[7])) and bits[0])
322
+ def G3p(n):
323
+ bits = nabe(n)
324
+ return not((bits[5] or bits[6] or not(bits[3])) and bits[4])
325
+ g3_lut = np.array([G3(n) for n in range(256)])
326
+ g3p_lut = np.array([G3p(n) for n in range(256)])
327
+ g123_lut = g12_lut & g3_lut
328
+ g123p_lut = g12_lut & g3p_lut
329
+ """
330
+
331
+ """
332
+ author : Peb Ruswono Aryan
333
+
334
+ metric for evaluating binarization algorithms
335
+ implemented :
336
+
337
+ * F-Measure
338
+ * pseudo F-Measure (as in H-DIBCO 2010 & 2012)
339
+ * Peak Signal to Noise Ratio (PSNR)
340
+ * Negative Rate Measure (NRM)
341
+ * Misclassification Penaltiy Measure (MPM)
342
+ * Distance Reciprocal Distortion (DRD)
343
+
344
+ usage:
345
+ python metric.py test-image.png ground-truth-image.png
346
+ """
347
+
348
+
349
+ def drd_fn(im, im_gt):
350
+ height, width = im.shape
351
+ neg = np.zeros(im.shape)
352
+ neg[im_gt!=im] = 1
353
+ y, x = np.unravel_index(np.flatnonzero(neg), im.shape)
354
+
355
+ n = 2
356
+ m = n*2+1
357
+ W = np.zeros((m,m), dtype=np.uint8)
358
+ W[n,n] = 1.
359
+ W = cv2.distanceTransform(1-W, cv2.DIST_L2, cv2.DIST_MASK_PRECISE)
360
+ W[n,n] = 1.
361
+ W = 1./W
362
+ W[n,n] = 0.
363
+ W /= W.sum()
364
+
365
+ nubn = 0.
366
+ block_size = 8
367
+ for y1 in range(0, height, block_size):
368
+ for x1 in range(0, width, block_size):
369
+ y2 = min(y1+block_size-1,height-1)
370
+ x2 = min(x1+block_size-1,width-1)
371
+ block_dim = (x2-x1+1)*(y1-y1+1)
372
+ block = 1-im_gt[y1:y2, x1:x2]
373
+ block_sum = np.sum(block)
374
+ if block_sum>0 and block_sum<block_dim:
375
+ nubn += 1
376
+
377
+ drd_sum= 0.
378
+ tmp = np.zeros(W.shape)
379
+ for i in range(min(1,len(y))):
380
+ tmp[:,:] = 0
381
+
382
+ x1 = max(0, x[i]-n)
383
+ y1 = max(0, y[i]-n)
384
+ x2 = min(width-1, x[i]+n)
385
+ y2 = min(height-1, y[i]+n)
386
+
387
+ yy1 = y1-y[i]+n
388
+ yy2 = y2-y[i]+n
389
+ xx1 = x1-x[i]+n
390
+ xx2 = x2-x[i]+n
391
+
392
+ tmp[yy1:yy2+1,xx1:xx2+1] = np.abs(im[y[i],x[i]]-im_gt[y1:y2+1,x1:x2+1])
393
+ tmp *= W
394
+
395
+ drd_sum += np.sum(tmp)
396
+ return drd_sum/nubn
397
+
398
+ def bin_metric(im,im_gt):
399
+ height, width = im.shape
400
+ npixel = height*width
401
+
402
+ im[im>0] = 1
403
+ gt_mask = im_gt==0
404
+ im_gt[im_gt>0] = 1
405
+
406
+ sk = bwmorph(1-im_gt)
407
+ im_sk = np.ones(im_gt.shape)
408
+ im_sk[sk] = 0
409
+
410
+ kernel = np.ones((3,3), dtype=np.uint8)
411
+ im_dil = cv2.erode(im_gt, kernel)
412
+ im_gtb = im_gt-im_dil
413
+ im_gtbd = cv2.distanceTransform(1-im_gtb, cv2.DIST_L2, 3)
414
+
415
+ nd = im_gtbd.sum()
416
+
417
+ ptp = np.zeros(im_gt.shape)
418
+ ptp[(im==0) & (im_sk==0)] = 1
419
+ numptp = ptp.sum()
420
+
421
+ tp = np.zeros(im_gt.shape)
422
+ tp[(im==0) & (im_gt==0)] = 1
423
+ numtp = tp.sum()
424
+
425
+ tn = np.zeros(im_gt.shape)
426
+ tn[(im==1) & (im_gt==1)] = 1
427
+ numtn = tn.sum()
428
+
429
+ fp = np.zeros(im_gt.shape)
430
+ fp[(im==0) & (im_gt==1)] = 1
431
+ numfp = fp.sum()
432
+
433
+ fn = np.zeros(im_gt.shape)
434
+ fn[(im==1) & (im_gt==0)] = 1
435
+ numfn = fn.sum()
436
+
437
+ precision = numtp / (numtp + numfp)
438
+ recall = numtp / (numtp + numfn)
439
+ precall = numptp / np.sum(1-im_sk)
440
+ fmeasure = (2*recall*precision)/(recall+precision)
441
+ pfmeasure = (2*precall*precision)/(precall+precision)
442
+
443
+ mse = (numfp+numfn)/npixel
444
+ psnr = 10.*np.log10(1./mse)
445
+
446
+ nrfn = numfn / (numfn + numtp)
447
+ nrfp = numfp / (numfp + numtn)
448
+ nrm = (nrfn + nrfp)/2
449
+
450
+ im_dn = im_gtbd.copy()
451
+ im_dn[fn==0] = 0
452
+ dn = np.sum(im_dn)
453
+ mpfn = dn / nd
454
+
455
+ im_dp = im_gtbd.copy()
456
+ im_dp[fp==0] = 0
457
+ dp = np.sum(im_dp)
458
+ mpfp = dp / nd
459
+
460
+ mpm = (mpfp + mpfn) / 2
461
+ drd = drd_fn(im, im_gt)
462
+
463
+ return fmeasure, pfmeasure,psnr,nrm, mpm,drd
464
+ # print("F-measure\t: {0}\npF-measure\t: {1}\nPSNR\t\t: {2}\nNRM\t\t: {3}\nMPM\t\t: {4}\nDRD\t\t: {5}".format(fmeasure, pfmeasure, psnr, nrm, mpm, drd))
doctra/ui/app.py CHANGED
@@ -17,13 +17,10 @@ def _gather_outputs(out_dir: Path, allowed_kinds: Optional[List[str]] = None, zi
17
17
 
18
18
  if out_dir.exists():
19
19
  if is_structured_parsing:
20
- # For structured parsing, show ALL files in the directory
21
20
  for file_path in sorted(out_dir.rglob("*")):
22
21
  if file_path.is_file():
23
22
  file_paths.append(str(file_path))
24
23
  else:
25
- # For full parsing, use the original logic
26
- # Always add main output files (HTML, Markdown, etc.) regardless of allowed_kinds
27
24
  main_files = [
28
25
  "result.html",
29
26
  "result.md",
@@ -36,22 +33,18 @@ def _gather_outputs(out_dir: Path, allowed_kinds: Optional[List[str]] = None, zi
36
33
  if file_path.exists():
37
34
  file_paths.append(str(file_path))
38
35
 
39
- # Add image files based on allowed_kinds or all images if not specified
40
36
  if allowed_kinds:
41
37
  for kind in allowed_kinds:
42
- # ChartTablePDFParser saves directly to charts/ and tables/ directories
43
38
  p = out_dir / kind
44
39
  if p.exists():
45
- for img in sorted(p.glob("*.png")): # ChartTablePDFParser saves as .png
40
+ for img in sorted(p.glob("*.png")):
46
41
  file_paths.append(str(img))
47
42
 
48
- # Also check images/ subdirectories (for StructuredPDFParser)
49
43
  images_dir = out_dir / "images" / kind
50
44
  if images_dir.exists():
51
- for img in sorted(images_dir.glob("*.jpg")): # StructuredPDFParser saves as .jpg
45
+ for img in sorted(images_dir.glob("*.jpg")):
52
46
  file_paths.append(str(img))
53
47
  else:
54
- # Fallback: look in both direct directories and images/ subdirectories
55
48
  for p in (out_dir / "charts").glob("*.png"):
56
49
  file_paths.append(str(p))
57
50
  for p in (out_dir / "tables").glob("*.png"):
@@ -59,7 +52,6 @@ def _gather_outputs(out_dir: Path, allowed_kinds: Optional[List[str]] = None, zi
59
52
  for p in (out_dir / "images").rglob("*.jpg"):
60
53
  file_paths.append(str(p))
61
54
 
62
- # Add Excel files based on extraction target (for structured parsing)
63
55
  if allowed_kinds:
64
56
  if "charts" in allowed_kinds and "tables" in allowed_kinds:
65
57
  excel_files = ["parsed_tables_charts.xlsx"]
@@ -77,30 +69,24 @@ def _gather_outputs(out_dir: Path, allowed_kinds: Optional[List[str]] = None, zi
77
69
 
78
70
  kinds = allowed_kinds if allowed_kinds else ["tables", "charts", "figures"]
79
71
  for sub in kinds:
80
- # Look in both direct directories and images/ subdirectories
81
- # First try direct directories (for ChartTablePDFParser)
82
72
  p = out_dir / sub
83
73
  if p.exists():
84
- for img in sorted(p.glob("*.png")): # ChartTablePDFParser saves as .png
74
+ for img in sorted(p.glob("*.png")):
85
75
  gallery_items.append((str(img), f"{sub}: {img.name}"))
86
76
 
87
- # Also try images/ subdirectories (for StructuredPDFParser)
88
77
  images_dir = out_dir / "images" / sub
89
78
  if images_dir.exists():
90
- for img in sorted(images_dir.glob("*.jpg")): # StructuredPDFParser saves as .jpg
79
+ for img in sorted(images_dir.glob("*.jpg")):
91
80
  gallery_items.append((str(img), f"{sub}: {img.name}"))
92
81
 
93
82
  tmp_zip_dir = Path(tempfile.mkdtemp(prefix="doctra_zip_"))
94
83
 
95
- # Use custom filename if provided, otherwise use default
96
84
  if zip_filename:
97
- # Clean the filename to be safe for file systems
98
85
  safe_filename = re.sub(r'[<>:"/\\|?*]', '_', zip_filename)
99
86
  zip_base = tmp_zip_dir / safe_filename
100
87
  else:
101
88
  zip_base = tmp_zip_dir / "doctra_outputs"
102
89
 
103
- # Create a filtered copy of the output directory excluding temp files
104
90
  filtered_dir = tmp_zip_dir / "filtered_outputs"
105
91
  shutil.copytree(out_dir, filtered_dir, ignore=shutil.ignore_patterns('~$*', '*.tmp', '*.temp'))
106
92
 
@@ -125,13 +111,10 @@ def _parse_markdown_by_pages(md_content: str) -> List[Dict[str, Any]]:
125
111
  while i < len(lines):
126
112
  line = lines[i].strip()
127
113
 
128
- # Check for page header
129
114
  if line.startswith('## Page '):
130
- # Save previous page if exists
131
115
  if current_page:
132
116
  pages.append(current_page)
133
117
 
134
- # Start new page
135
118
  page_num = line.replace('## Page ', '').strip()
136
119
  current_page = {
137
120
  'page_num': page_num,
@@ -145,15 +128,12 @@ def _parse_markdown_by_pages(md_content: str) -> List[Dict[str, Any]]:
145
128
  i += 1
146
129
  continue
147
130
 
148
- # Check for images (tables, charts, figures)
149
131
  if line.startswith('![') and '](images/' in line:
150
- # Extract image info
151
132
  match = re.match(r'!\[([^\]]+)\]\(([^)]+)\)', line)
152
133
  if match:
153
134
  caption = match.group(1)
154
135
  img_path = match.group(2)
155
136
 
156
- # Categorize by type
157
137
  if 'Table' in caption:
158
138
  current_page['tables'].append({'caption': caption, 'path': img_path})
159
139
  elif 'Chart' in caption:
@@ -163,18 +143,15 @@ def _parse_markdown_by_pages(md_content: str) -> List[Dict[str, Any]]:
163
143
 
164
144
  current_page['images'].append({'caption': caption, 'path': img_path})
165
145
 
166
- # Add to full content with proper markdown formatting
167
146
  current_page['full_content'].append(f"![{caption}]({img_path})")
168
147
 
169
- # Regular content
170
148
  elif current_page:
171
- if line: # Only add non-empty lines
149
+ if line:
172
150
  current_page['content'].append(line)
173
151
  current_page['full_content'].append(line)
174
152
 
175
153
  i += 1
176
154
 
177
- # Add the last page
178
155
  if current_page:
179
156
  pages.append(current_page)
180
157
 
@@ -198,12 +175,9 @@ def run_full_parse(
198
175
  if not pdf_file:
199
176
  return ("No file provided.", None, [], [], "")
200
177
 
201
- # Extract filename from the uploaded file path
202
- # Gradio provides the original filename in the file path
203
178
  original_filename = Path(pdf_file).stem
204
179
 
205
180
  tmp_dir = Path(tempfile.mkdtemp(prefix="doctra_"))
206
- # Use original filename for temp file so parser creates correct output directory
207
181
  input_pdf = tmp_dir / f"{original_filename}.pdf"
208
182
  shutil.copy2(pdf_file, input_pdf)
209
183
 
@@ -295,7 +269,6 @@ def run_extract(
295
269
  original_filename = Path(pdf_file).stem
296
270
 
297
271
  tmp_dir = Path(tempfile.mkdtemp(prefix="doctra_"))
298
- # Use original filename for temp file so parser creates correct output directory
299
272
  input_pdf = tmp_dir / f"{original_filename}.pdf"
300
273
  shutil.copy2(pdf_file, input_pdf)
301
274