label-studio-converter 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,6 +1,560 @@
1
+ // src/lib/schema.ts
2
+ import z from "zod";
3
+ var FullOCRLabelStudioSchema = z.array(
4
+ z.object({
5
+ id: z.number(),
6
+ annotations: z.array(
7
+ z.object({
8
+ id: z.number(),
9
+ completed_by: z.number(),
10
+ result: z.array(
11
+ z.union([
12
+ // Most specific rectangle variants first (with text or labels)
13
+ z.object({
14
+ original_width: z.number(),
15
+ original_height: z.number(),
16
+ image_rotation: z.number(),
17
+ value: z.object({
18
+ x: z.number(),
19
+ y: z.number(),
20
+ width: z.number(),
21
+ height: z.number(),
22
+ rotation: z.number(),
23
+ text: z.array(z.string())
24
+ }),
25
+ id: z.string(),
26
+ from_name: z.string(),
27
+ to_name: z.string(),
28
+ type: z.string(),
29
+ origin: z.string()
30
+ }),
31
+ z.object({
32
+ original_width: z.number(),
33
+ original_height: z.number(),
34
+ image_rotation: z.number(),
35
+ value: z.object({
36
+ x: z.number(),
37
+ y: z.number(),
38
+ width: z.number(),
39
+ height: z.number(),
40
+ rotation: z.number(),
41
+ labels: z.array(z.string())
42
+ }),
43
+ id: z.string(),
44
+ from_name: z.string(),
45
+ to_name: z.string(),
46
+ type: z.string(),
47
+ origin: z.string()
48
+ }),
49
+ // Base rectangle without text or labels
50
+ z.object({
51
+ original_width: z.number(),
52
+ original_height: z.number(),
53
+ image_rotation: z.number(),
54
+ value: z.object({
55
+ x: z.number(),
56
+ y: z.number(),
57
+ width: z.number(),
58
+ height: z.number(),
59
+ rotation: z.number()
60
+ }),
61
+ id: z.string(),
62
+ from_name: z.string(),
63
+ to_name: z.string(),
64
+ type: z.string(),
65
+ origin: z.string()
66
+ }),
67
+ // Most specific polygon variants first (with text or labels)
68
+ z.object({
69
+ original_width: z.number(),
70
+ original_height: z.number(),
71
+ image_rotation: z.number(),
72
+ value: z.object({
73
+ points: z.array(z.array(z.number())),
74
+ closed: z.boolean(),
75
+ text: z.array(z.string())
76
+ }),
77
+ id: z.string(),
78
+ from_name: z.string(),
79
+ to_name: z.string(),
80
+ type: z.string(),
81
+ origin: z.string()
82
+ }),
83
+ z.object({
84
+ original_width: z.number(),
85
+ original_height: z.number(),
86
+ image_rotation: z.number(),
87
+ value: z.object({
88
+ points: z.array(z.array(z.number())),
89
+ closed: z.boolean(),
90
+ labels: z.array(z.string())
91
+ }),
92
+ id: z.string(),
93
+ from_name: z.string(),
94
+ to_name: z.string(),
95
+ type: z.string(),
96
+ origin: z.string()
97
+ }),
98
+ // Base polygon without text or labels
99
+ z.object({
100
+ original_width: z.number(),
101
+ original_height: z.number(),
102
+ image_rotation: z.number(),
103
+ value: z.object({
104
+ points: z.array(z.array(z.number())),
105
+ closed: z.boolean()
106
+ }),
107
+ id: z.string(),
108
+ from_name: z.string(),
109
+ to_name: z.string(),
110
+ type: z.string(),
111
+ origin: z.string()
112
+ })
113
+ ])
114
+ ),
115
+ was_cancelled: z.boolean(),
116
+ ground_truth: z.boolean(),
117
+ created_at: z.string(),
118
+ updated_at: z.string(),
119
+ draft_created_at: z.string(),
120
+ lead_time: z.number(),
121
+ prediction: z.object({}),
122
+ result_count: z.number(),
123
+ unique_id: z.string(),
124
+ import_id: z.null(),
125
+ last_action: z.null(),
126
+ bulk_created: z.boolean(),
127
+ task: z.number(),
128
+ project: z.number(),
129
+ updated_by: z.number(),
130
+ parent_prediction: z.null(),
131
+ parent_annotation: z.null(),
132
+ last_created_by: z.null()
133
+ })
134
+ ),
135
+ file_upload: z.string(),
136
+ drafts: z.array(
137
+ z.object({
138
+ id: z.number(),
139
+ user: z.string(),
140
+ created_username: z.string(),
141
+ created_ago: z.string(),
142
+ result: z.array(
143
+ z.union([
144
+ // Most specific rectangle variants first (with text or labels)
145
+ z.object({
146
+ original_width: z.number(),
147
+ original_height: z.number(),
148
+ image_rotation: z.number(),
149
+ value: z.object({
150
+ x: z.number(),
151
+ y: z.number(),
152
+ width: z.number(),
153
+ height: z.number(),
154
+ rotation: z.number(),
155
+ text: z.array(z.string())
156
+ }),
157
+ id: z.string(),
158
+ from_name: z.string(),
159
+ to_name: z.string(),
160
+ type: z.string(),
161
+ origin: z.string()
162
+ }),
163
+ z.object({
164
+ original_width: z.number(),
165
+ original_height: z.number(),
166
+ image_rotation: z.number(),
167
+ value: z.object({
168
+ x: z.number(),
169
+ y: z.number(),
170
+ width: z.number(),
171
+ height: z.number(),
172
+ rotation: z.number(),
173
+ labels: z.array(z.string())
174
+ }),
175
+ id: z.string(),
176
+ from_name: z.string(),
177
+ to_name: z.string(),
178
+ type: z.string(),
179
+ origin: z.string()
180
+ }),
181
+ // Base rectangle without text or labels
182
+ z.object({
183
+ original_width: z.number(),
184
+ original_height: z.number(),
185
+ image_rotation: z.number(),
186
+ value: z.object({
187
+ x: z.number(),
188
+ y: z.number(),
189
+ width: z.number(),
190
+ height: z.number(),
191
+ rotation: z.number()
192
+ }),
193
+ id: z.string(),
194
+ from_name: z.string(),
195
+ to_name: z.string(),
196
+ type: z.string(),
197
+ origin: z.string()
198
+ }),
199
+ // Most specific polygon variants first (with text or labels)
200
+ z.object({
201
+ original_width: z.number(),
202
+ original_height: z.number(),
203
+ image_rotation: z.number(),
204
+ value: z.object({
205
+ points: z.array(z.array(z.number())),
206
+ closed: z.boolean(),
207
+ text: z.array(z.string())
208
+ }),
209
+ id: z.string(),
210
+ from_name: z.string(),
211
+ to_name: z.string(),
212
+ type: z.string(),
213
+ origin: z.string()
214
+ }),
215
+ z.object({
216
+ original_width: z.number(),
217
+ original_height: z.number(),
218
+ image_rotation: z.number(),
219
+ value: z.object({
220
+ points: z.array(z.array(z.number())),
221
+ closed: z.boolean(),
222
+ labels: z.array(z.string())
223
+ }),
224
+ id: z.string(),
225
+ from_name: z.string(),
226
+ to_name: z.string(),
227
+ type: z.string(),
228
+ origin: z.string()
229
+ }),
230
+ // Base polygon without text or labels
231
+ z.object({
232
+ original_width: z.number(),
233
+ original_height: z.number(),
234
+ image_rotation: z.number(),
235
+ value: z.object({
236
+ points: z.array(z.array(z.number())),
237
+ closed: z.boolean()
238
+ }),
239
+ id: z.string(),
240
+ from_name: z.string(),
241
+ to_name: z.string(),
242
+ type: z.string(),
243
+ origin: z.string()
244
+ })
245
+ ])
246
+ ),
247
+ lead_time: z.number(),
248
+ was_postponed: z.boolean(),
249
+ import_id: z.null(),
250
+ created_at: z.string(),
251
+ updated_at: z.string(),
252
+ task: z.number(),
253
+ annotation: z.number()
254
+ })
255
+ ),
256
+ predictions: z.array(z.unknown()),
257
+ data: z.object({ ocr: z.string() }),
258
+ meta: z.object({}),
259
+ created_at: z.string(),
260
+ updated_at: z.string(),
261
+ allow_skip: z.boolean(),
262
+ inner_id: z.number(),
263
+ total_annotations: z.number(),
264
+ cancelled_annotations: z.number(),
265
+ total_predictions: z.number(),
266
+ comment_count: z.number(),
267
+ unresolved_comment_count: z.number(),
268
+ last_comment_updated_at: z.null(),
269
+ project: z.number(),
270
+ updated_by: z.number(),
271
+ comment_authors: z.array(z.unknown())
272
+ })
273
+ );
274
+ var MinOCRLabelStudioSchema = z.array(
275
+ z.object({
276
+ ocr: z.string(),
277
+ id: z.number(),
278
+ bbox: z.array(
279
+ z.object({
280
+ x: z.number(),
281
+ y: z.number(),
282
+ width: z.number(),
283
+ height: z.number(),
284
+ rotation: z.number(),
285
+ original_width: z.number(),
286
+ original_height: z.number()
287
+ })
288
+ ).optional().default([]),
289
+ label: z.array(
290
+ z.union([
291
+ z.object({
292
+ x: z.number(),
293
+ y: z.number(),
294
+ width: z.number(),
295
+ height: z.number(),
296
+ rotation: z.number(),
297
+ labels: z.array(z.string()),
298
+ original_width: z.number(),
299
+ original_height: z.number()
300
+ }),
301
+ z.object({
302
+ points: z.array(z.array(z.number())),
303
+ closed: z.boolean(),
304
+ labels: z.array(z.string()),
305
+ original_width: z.number(),
306
+ original_height: z.number()
307
+ })
308
+ ])
309
+ ).optional().default([]),
310
+ transcription: z.union([z.string(), z.array(z.string())]).optional().transform((val) => {
311
+ if (!val) return [];
312
+ return Array.isArray(val) ? val : [val];
313
+ }),
314
+ poly: z.array(
315
+ z.object({
316
+ points: z.array(z.array(z.number())),
317
+ closed: z.boolean(),
318
+ original_width: z.number(),
319
+ original_height: z.number()
320
+ })
321
+ ).optional().default([]),
322
+ annotator: z.number(),
323
+ annotation_id: z.number(),
324
+ created_at: z.string(),
325
+ updated_at: z.string(),
326
+ lead_time: z.number()
327
+ })
328
+ );
329
+ var PPOCRLabelSchema = z.array(
330
+ z.object({
331
+ transcription: z.string(),
332
+ points: z.array(z.array(z.number())),
333
+ dt_score: z.number().optional(),
334
+ // Detection score (from PaddleOCR)
335
+ difficult: z.boolean().optional()
336
+ // Difficult flag (from PPOCRLabel tool)
337
+ })
338
+ );
339
+
1
340
  // src/lib/label-studio.ts
2
341
  import * as turf from "@turf/turf";
3
- var labelStudioToPPOCR = async (data, baseImageDir) => {
342
+
343
+ // src/lib/geometry.ts
344
+ function roundToPrecision(value, precision) {
345
+ if (precision < 0) {
346
+ return value;
347
+ }
348
+ const multiplier = Math.pow(10, precision);
349
+ return Math.round(value * multiplier) / multiplier;
350
+ }
351
+ function roundPoints(points, precision) {
352
+ if (precision < 0) {
353
+ return points;
354
+ }
355
+ return points.map(
356
+ ([x, y]) => [roundToPrecision(x, precision), roundToPrecision(y, precision)]
357
+ );
358
+ }
359
+ function calculateCenter(points) {
360
+ const sum = points.reduce((acc, [x, y]) => [acc[0] + x, acc[1] + y], [
361
+ 0,
362
+ 0
363
+ ]);
364
+ return [sum[0] / points.length, sum[1] / points.length];
365
+ }
366
+ function getMinimumBoundingRect(points) {
367
+ const minX = Math.min(...points.map(([x]) => x));
368
+ const maxX = Math.max(...points.map(([x]) => x));
369
+ const minY = Math.min(...points.map(([, y]) => y));
370
+ const maxY = Math.max(...points.map(([, y]) => y));
371
+ return {
372
+ minX,
373
+ minY,
374
+ maxX,
375
+ maxY,
376
+ width: maxX - minX,
377
+ height: maxY - minY
378
+ };
379
+ }
380
+ function normalizeShape(points) {
381
+ if (points.length < 3) {
382
+ return points;
383
+ }
384
+ const { minX, minY, maxX, maxY } = getMinimumBoundingRect(points);
385
+ return [
386
+ [minX, minY],
387
+ [maxX, minY],
388
+ [maxX, maxY],
389
+ [minX, maxY]
390
+ ];
391
+ }
392
+ function resizeBoundingBox(points, widthIncrement, heightIncrement) {
393
+ if (points.length === 0) {
394
+ return points;
395
+ }
396
+ const center = calculateCenter(points);
397
+ const bbox = getMinimumBoundingRect(points);
398
+ const newWidth = Math.max(1, bbox.width + widthIncrement);
399
+ const newHeight = Math.max(1, bbox.height + heightIncrement);
400
+ const scaleX = newWidth / bbox.width;
401
+ const scaleY = newHeight / bbox.height;
402
+ return points.map(([x, y]) => {
403
+ const relX = x - center[0];
404
+ const relY = y - center[1];
405
+ return [center[0] + relX * scaleX, center[1] + relY * scaleY];
406
+ });
407
+ }
408
+ function transformPoints(points, options) {
409
+ let result = points;
410
+ if (options.normalizeShape && options.normalizeShape === "rectangle") {
411
+ result = normalizeShape(result);
412
+ }
413
+ if (options.widthIncrement !== void 0 || options.heightIncrement !== void 0) {
414
+ result = resizeBoundingBox(
415
+ result,
416
+ options.widthIncrement ?? 0,
417
+ options.heightIncrement ?? 0
418
+ );
419
+ }
420
+ return result;
421
+ }
422
+
423
+ // src/constants.ts
424
+ var DEFAULT_LABEL_NAME = "Text";
425
+ var SORT_VERTICAL_NONE = "none";
426
+ var SORT_VERTICAL_TOP_BOTTOM = "top-bottom";
427
+ var SORT_HORIZONTAL_NONE = "none";
428
+ var SORT_HORIZONTAL_LTR = "ltr";
429
+ var SORT_HORIZONTAL_RTL = "rtl";
430
+ var DEFAULT_LABEL_STUDIO_PRECISION = -1;
431
+
432
+ // src/lib/sort.ts
433
+ var GROUPING_TOLERANCE = 50;
434
+ function getBoundingBoxCenter(points) {
435
+ let minX = Infinity;
436
+ let minY = Infinity;
437
+ let maxX = -Infinity;
438
+ let maxY = -Infinity;
439
+ for (const [x, y] of points) {
440
+ if (x !== void 0 && y !== void 0) {
441
+ minX = Math.min(minX, x);
442
+ minY = Math.min(minY, y);
443
+ maxX = Math.max(maxX, x);
444
+ maxY = Math.max(maxY, y);
445
+ }
446
+ }
447
+ return {
448
+ x: (minX + maxX) / 2,
449
+ y: (minY + maxY) / 2,
450
+ width: maxX - minX,
451
+ height: maxY - minY
452
+ };
453
+ }
454
+ function sortBoundingBoxes(annotations, verticalSort, horizontalSort) {
455
+ if (verticalSort === SORT_VERTICAL_NONE && horizontalSort === SORT_HORIZONTAL_NONE) {
456
+ return annotations;
457
+ }
458
+ const sorted = [...annotations];
459
+ const isVerticalText = sorted.length > 0 && (() => {
460
+ const verticalCount = sorted.filter((ann) => {
461
+ const center = getBoundingBoxCenter(ann.points);
462
+ return center.height > center.width * 1.5;
463
+ }).length;
464
+ return verticalCount > sorted.length / 2;
465
+ })();
466
+ if (horizontalSort === SORT_HORIZONTAL_RTL && verticalSort !== SORT_VERTICAL_NONE && isVerticalText) {
467
+ const annotationsWithCenters = sorted.map((ann) => ({
468
+ annotation: ann,
469
+ center: getBoundingBoxCenter(ann.points)
470
+ }));
471
+ const columns = [];
472
+ for (const item of annotationsWithCenters) {
473
+ let addedToColumn = false;
474
+ for (const column of columns) {
475
+ const avgX = column.reduce((sum, c) => sum + c.center.x, 0) / column.length;
476
+ if (Math.abs(item.center.x - avgX) < GROUPING_TOLERANCE) {
477
+ column.push(item);
478
+ addedToColumn = true;
479
+ break;
480
+ }
481
+ }
482
+ if (!addedToColumn) {
483
+ columns.push([item]);
484
+ }
485
+ }
486
+ columns.sort((colA, colB) => {
487
+ const avgXA = colA.reduce((sum, c) => sum + c.center.x, 0) / colA.length;
488
+ const avgXB = colB.reduce((sum, c) => sum + c.center.x, 0) / colB.length;
489
+ return avgXB - avgXA;
490
+ });
491
+ for (const column of columns) {
492
+ column.sort((a, b) => {
493
+ return verticalSort === SORT_VERTICAL_TOP_BOTTOM ? a.center.y - b.center.y : b.center.y - a.center.y;
494
+ });
495
+ }
496
+ return columns.flat().map((item) => item.annotation);
497
+ }
498
+ sorted.sort((a, b) => {
499
+ const centerA = getBoundingBoxCenter(a.points);
500
+ const centerB = getBoundingBoxCenter(b.points);
501
+ if (verticalSort !== SORT_VERTICAL_NONE) {
502
+ const yDiff = verticalSort === SORT_VERTICAL_TOP_BOTTOM ? centerA.y - centerB.y : centerB.y - centerA.y;
503
+ if (Math.abs(yDiff) > GROUPING_TOLERANCE) {
504
+ return yDiff;
505
+ }
506
+ }
507
+ if (horizontalSort !== SORT_HORIZONTAL_NONE) {
508
+ return horizontalSort === SORT_HORIZONTAL_LTR ? centerA.x - centerB.x : centerB.x - centerA.x;
509
+ }
510
+ return 0;
511
+ });
512
+ return sorted;
513
+ }
514
+
515
+ // src/lib/enhance.ts
516
+ function enhancePPOCRLabel(data, options) {
517
+ const {
518
+ sortVertical,
519
+ sortHorizontal,
520
+ normalizeShape: normalizeShape2,
521
+ widthIncrement = 0,
522
+ heightIncrement = 0,
523
+ precision = 0
524
+ } = options;
525
+ let enhanced = data;
526
+ if (sortVertical && sortHorizontal) {
527
+ enhanced = sortBoundingBoxes(enhanced, sortVertical, sortHorizontal);
528
+ }
529
+ if (normalizeShape2 || widthIncrement !== 0 || heightIncrement !== 0) {
530
+ enhanced = enhanced.map((annotation) => {
531
+ let points = transformPoints(annotation.points, {
532
+ normalizeShape: normalizeShape2,
533
+ widthIncrement,
534
+ heightIncrement
535
+ });
536
+ points = roundPoints(points, precision);
537
+ return {
538
+ ...annotation,
539
+ points
540
+ };
541
+ });
542
+ }
543
+ return enhanced;
544
+ }
545
+ function hasEnhancementOptions(options) {
546
+ return !!(options.sortVertical || options.sortHorizontal || options.normalizeShape || options.widthIncrement !== 0 || options.heightIncrement !== 0);
547
+ }
548
+
549
+ // src/lib/label-studio.ts
550
+ var labelStudioToPPOCR = async (data, options) => {
551
+ const {
552
+ baseImageDir,
553
+ normalizeShape: normalizeShape2,
554
+ widthIncrement = 0,
555
+ heightIncrement = 0,
556
+ precision = 0
557
+ } = options || {};
4
558
  const resultMap = /* @__PURE__ */ new Map();
5
559
  for (const task of data) {
6
560
  let imagePath = task.file_upload || "";
@@ -51,6 +605,12 @@ var labelStudioToPPOCR = async (data, baseImageDir) => {
51
605
  }
52
606
  }
53
607
  if (points && points.length > 0) {
608
+ points = transformPoints(points, {
609
+ normalizeShape: normalizeShape2,
610
+ widthIncrement,
611
+ heightIncrement
612
+ });
613
+ points = roundPoints(points, precision);
54
614
  let dt_score = 1;
55
615
  try {
56
616
  const firstPoint = points[0];
@@ -76,7 +636,14 @@ var labelStudioToPPOCR = async (data, baseImageDir) => {
76
636
  }
77
637
  return resultMap;
78
638
  };
79
- var minLabelStudioToPPOCR = async (data, baseImageDir) => {
639
+ var minLabelStudioToPPOCR = async (data, options) => {
640
+ const {
641
+ baseImageDir,
642
+ normalizeShape: normalizeShape2,
643
+ widthIncrement = 0,
644
+ heightIncrement = 0,
645
+ precision = 0
646
+ } = options || {};
80
647
  const resultMap = /* @__PURE__ */ new Map();
81
648
  for (const item of data) {
82
649
  let imagePath = item.ocr || "";
@@ -88,57 +655,242 @@ var minLabelStudioToPPOCR = async (data, baseImageDir) => {
88
655
  if (baseImageDir) {
89
656
  imagePath = `${baseImageDir}/${imagePath.split("/").pop() || imagePath}`;
90
657
  }
91
- let points;
92
- if (item.poly.length > 0 && item.poly[0]) {
93
- const { points: polyPoints } = item.poly[0];
94
- points = polyPoints;
95
- } else if (item.bbox.length > 0 && item.bbox[0]) {
96
- const bbox = item.bbox[0];
97
- const { x, y, width, height } = bbox;
98
- points = [
99
- [x, y],
100
- [x + width, y],
101
- [x + width, y + height],
102
- [x, y + height]
103
- ];
104
- } else {
105
- continue;
106
- }
107
- const transcription = item.transcription.length > 0 ? item.transcription[0] : "";
108
- let dt_score = 1;
109
- try {
110
- const firstPoint = points[0];
111
- if (firstPoint) {
112
- const polygon2 = turf.polygon([points.concat([firstPoint])]);
113
- const area2 = turf.area(polygon2);
114
- dt_score = Math.min(1, Math.max(0.5, area2 / 1e4));
658
+ const numAnnotations = Math.max(
659
+ item.poly?.length || 0,
660
+ item.bbox?.length || 0,
661
+ item.transcription?.length || 0
662
+ );
663
+ for (let i = 0; i < numAnnotations; i++) {
664
+ let points;
665
+ if (item.poly && item.poly.length > i && item.poly[i]) {
666
+ const poly = item.poly[i];
667
+ if (poly) {
668
+ const { points: polyPoints } = poly;
669
+ points = polyPoints;
670
+ }
671
+ } else if (item.bbox && item.bbox.length > i && item.bbox[i]) {
672
+ const bbox = item.bbox[i];
673
+ if (bbox) {
674
+ const { x, y, width, height } = bbox;
675
+ points = [
676
+ [x, y],
677
+ [x + width, y],
678
+ [x + width, y + height],
679
+ [x, y + height]
680
+ ];
681
+ }
115
682
  }
116
- } catch {
117
- dt_score = 0.8;
118
- }
119
- const annotation = {
120
- transcription: transcription ?? "",
121
- points,
122
- dt_score
123
- };
124
- if (!resultMap.has(imagePath)) {
125
- resultMap.set(imagePath, []);
683
+ if (!points) {
684
+ continue;
685
+ }
686
+ points = transformPoints(points, {
687
+ normalizeShape: normalizeShape2,
688
+ widthIncrement,
689
+ heightIncrement
690
+ });
691
+ points = roundPoints(points, precision);
692
+ const transcription = item.transcription && item.transcription.length > i ? item.transcription[i] : "";
693
+ let dt_score = 1;
694
+ try {
695
+ const firstPoint = points[0];
696
+ if (firstPoint) {
697
+ const polygon2 = turf.polygon([points.concat([firstPoint])]);
698
+ const area2 = turf.area(polygon2);
699
+ dt_score = Math.min(1, Math.max(0.5, area2 / 1e4));
700
+ }
701
+ } catch {
702
+ dt_score = 0.8;
703
+ }
704
+ const annotation = {
705
+ transcription: transcription ?? "",
706
+ points,
707
+ dt_score
708
+ };
709
+ if (!resultMap.has(imagePath)) {
710
+ resultMap.set(imagePath, []);
711
+ }
712
+ resultMap.get(imagePath).push(annotation);
126
713
  }
127
- resultMap.get(imagePath).push(annotation);
128
714
  }
129
715
  return resultMap;
130
716
  };
717
+ var enhanceLabelStudioData = async (data, isFull, options) => {
718
+ const {
719
+ sortVertical,
720
+ sortHorizontal,
721
+ normalizeShape: normalizeShape2,
722
+ widthIncrement = 0,
723
+ heightIncrement = 0,
724
+ precision = 0
725
+ } = options;
726
+ if (isFull) {
727
+ const fullData = data;
728
+ return fullData.map((task) => ({
729
+ ...task,
730
+ annotations: task.annotations.map((annotation) => {
731
+ const groupedById = /* @__PURE__ */ new Map();
732
+ for (const resultItem of annotation.result) {
733
+ const { id } = resultItem;
734
+ if (!groupedById.has(id)) {
735
+ groupedById.set(id, []);
736
+ }
737
+ groupedById.get(id).push(resultItem);
738
+ }
739
+ const enhancedResult = [];
740
+ for (const [_, resultItems] of groupedById) {
741
+ let ppocrAnnotations = [];
742
+ for (const resultItem of resultItems) {
743
+ let points;
744
+ if ("points" in resultItem.value && resultItem.value.points) {
745
+ const { points: valuePoints } = resultItem.value;
746
+ const { original_width, original_height } = resultItem;
747
+ points = valuePoints.map(([x, y]) => [
748
+ (x ?? 0) * original_width / 100,
749
+ (y ?? 0) * original_height / 100
750
+ ]);
751
+ } else if ("x" in resultItem.value && "y" in resultItem.value && "width" in resultItem.value && "height" in resultItem.value) {
752
+ const { x, y, width, height } = resultItem.value;
753
+ const { original_width, original_height } = resultItem;
754
+ const absX = x * original_width / 100;
755
+ const absY = y * original_height / 100;
756
+ const absWidth = width * original_width / 100;
757
+ const absHeight = height * original_height / 100;
758
+ points = [
759
+ [absX, absY],
760
+ [absX + absWidth, absY],
761
+ [absX + absWidth, absY + absHeight],
762
+ [absX, absY + absHeight]
763
+ ];
764
+ }
765
+ if (points) {
766
+ ppocrAnnotations.push({
767
+ transcription: "",
768
+ points,
769
+ dt_score: 1
770
+ });
771
+ }
772
+ }
773
+ if (ppocrAnnotations.length > 0) {
774
+ ppocrAnnotations = enhancePPOCRLabel(ppocrAnnotations, {
775
+ sortVertical,
776
+ sortHorizontal,
777
+ normalizeShape: normalizeShape2,
778
+ widthIncrement,
779
+ heightIncrement,
780
+ precision
781
+ });
782
+ for (let i = 0; i < resultItems.length; i++) {
783
+ const resultItem = resultItems[i];
784
+ const enhanced = ppocrAnnotations[i];
785
+ if (!enhanced) {
786
+ enhancedResult.push(resultItem);
787
+ continue;
788
+ }
789
+ if ("points" in resultItem.value && resultItem.value.points) {
790
+ const { original_width, original_height } = resultItem;
791
+ enhancedResult.push({
792
+ ...resultItem,
793
+ value: {
794
+ ...resultItem.value,
795
+ points: enhanced.points.map(
796
+ ([x, y]) => [
797
+ (x ?? 0) / original_width * 100,
798
+ (y ?? 0) / original_height * 100
799
+ ]
800
+ )
801
+ }
802
+ });
803
+ } else if ("x" in resultItem.value && "y" in resultItem.value && "width" in resultItem.value && "height" in resultItem.value) {
804
+ const { original_width, original_height } = resultItem;
805
+ const xs = enhanced.points.map(([x]) => x ?? 0);
806
+ const ys = enhanced.points.map(([, y]) => y ?? 0);
807
+ const minX = Math.min(...xs);
808
+ const maxX = Math.max(...xs);
809
+ const minY = Math.min(...ys);
810
+ const maxY = Math.max(...ys);
811
+ enhancedResult.push({
812
+ ...resultItem,
813
+ value: {
814
+ ...resultItem.value,
815
+ x: minX / original_width * 100,
816
+ y: minY / original_height * 100,
817
+ width: (maxX - minX) / original_width * 100,
818
+ height: (maxY - minY) / original_height * 100
819
+ }
820
+ });
821
+ } else {
822
+ enhancedResult.push(resultItem);
823
+ }
824
+ }
825
+ } else {
826
+ enhancedResult.push(...resultItems);
827
+ }
828
+ }
829
+ return {
830
+ ...annotation,
831
+ result: enhancedResult
832
+ };
833
+ })
834
+ }));
835
+ } else {
836
+ const minData = data;
837
+ return minData.map((item) => {
838
+ let ppocrAnnotations = [];
839
+ const numAnnotations = Math.max(
840
+ item.poly?.length || 0,
841
+ item.bbox?.length || 0,
842
+ item.transcription?.length || 0
843
+ );
844
+ for (let i = 0; i < numAnnotations; i++) {
845
+ let points;
846
+ if (item.poly && item.poly.length > i && item.poly[i]) {
847
+ const { points: polyPoints } = item.poly[i];
848
+ points = polyPoints;
849
+ } else if (item.bbox && item.bbox.length > i && item.bbox[i]) {
850
+ const { x, y, width, height } = item.bbox[i];
851
+ points = [
852
+ [x, y],
853
+ [x + width, y],
854
+ [x + width, y + height],
855
+ [x, y + height]
856
+ ];
857
+ }
858
+ if (points) {
859
+ ppocrAnnotations.push({
860
+ transcription: item.transcription && item.transcription.length > i ? item.transcription[i] ?? "" : "",
861
+ points,
862
+ dt_score: 1
863
+ });
864
+ }
865
+ }
866
+ if (ppocrAnnotations.length > 0) {
867
+ ppocrAnnotations = enhancePPOCRLabel(ppocrAnnotations, {
868
+ sortVertical,
869
+ sortHorizontal,
870
+ normalizeShape: normalizeShape2,
871
+ widthIncrement,
872
+ heightIncrement,
873
+ precision
874
+ });
875
+ const newPoly = ppocrAnnotations.map((ann) => ({
876
+ points: ann.points
877
+ }));
878
+ const { bbox: _, ...itemWithoutBbox } = item;
879
+ return {
880
+ ...itemWithoutBbox,
881
+ poly: newPoly
882
+ };
883
+ }
884
+ return item;
885
+ });
886
+ }
887
+ };
131
888
 
132
889
  // src/lib/ppocr-label.ts
133
890
  import { randomUUID } from "crypto";
134
891
  import { existsSync, readFileSync } from "fs";
135
892
  import { join } from "path";
136
893
  import sizeOf from "image-size";
137
-
138
- // src/constants.ts
139
- var DEFAULT_LABEL_NAME = "Text";
140
-
141
- // src/lib/ppocr-label.ts
142
894
  var ppocrToLabelStudio = async (data, options) => {
143
895
  const {
144
896
  imagePath,
@@ -146,7 +898,11 @@ var ppocrToLabelStudio = async (data, options) => {
146
898
  inputDir,
147
899
  toFullJson = true,
148
900
  taskId = 1,
149
- labelName = DEFAULT_LABEL_NAME
901
+ labelName = DEFAULT_LABEL_NAME,
902
+ normalizeShape: normalizeShape2,
903
+ widthIncrement = 0,
904
+ heightIncrement = 0,
905
+ precision = DEFAULT_LABEL_STUDIO_PRECISION
150
906
  } = options || {};
151
907
  if (toFullJson) {
152
908
  return ppocrToFullLabelStudio(
@@ -155,7 +911,11 @@ var ppocrToLabelStudio = async (data, options) => {
155
911
  baseServerUrl,
156
912
  inputDir,
157
913
  taskId,
158
- labelName
914
+ labelName,
915
+ normalizeShape2,
916
+ widthIncrement,
917
+ heightIncrement,
918
+ precision
159
919
  );
160
920
  } else {
161
921
  return ppocrToMinLabelStudio(
@@ -163,11 +923,15 @@ var ppocrToLabelStudio = async (data, options) => {
163
923
  imagePath,
164
924
  baseServerUrl,
165
925
  inputDir,
166
- labelName
926
+ labelName,
927
+ normalizeShape2,
928
+ widthIncrement,
929
+ heightIncrement,
930
+ precision
167
931
  );
168
932
  }
169
933
  };
170
- var ppocrToFullLabelStudio = (data, imagePath, baseServerUrl, inputDir, taskId = 1, labelName = DEFAULT_LABEL_NAME) => {
934
+ var ppocrToFullLabelStudio = (data, imagePath, baseServerUrl, inputDir, taskId = 1, labelName = DEFAULT_LABEL_NAME, normalizeShape2, widthIncrement = 0, heightIncrement = 0, precision = DEFAULT_LABEL_STUDIO_PRECISION) => {
171
935
  const newBaseServerUrl = baseServerUrl.replace(/\/+$/, "") + (baseServerUrl === "" ? "" : "/");
172
936
  const now = (/* @__PURE__ */ new Date()).toISOString();
173
937
  let original_width = 1920;
@@ -194,11 +958,16 @@ var ppocrToFullLabelStudio = (data, imagePath, baseServerUrl, inputDir, taskId =
194
958
  id: taskId,
195
959
  completed_by: 1,
196
960
  result: data.map((item) => {
197
- const { points } = item;
961
+ let { points } = item;
962
+ points = transformPoints(points, {
963
+ normalizeShape: normalizeShape2,
964
+ widthIncrement,
965
+ heightIncrement
966
+ });
198
967
  const annotationId = randomUUID().slice(0, 10);
199
968
  const polygonPoints = points.map(([x, y]) => [
200
- (x ?? 0) / original_width * 100,
201
- (y ?? 0) / original_height * 100
969
+ roundToPrecision((x ?? 0) / original_width * 100, precision),
970
+ roundToPrecision((y ?? 0) / original_height * 100, precision)
202
971
  ]);
203
972
  return [
204
973
  // 1. Polygon geometry only
@@ -292,7 +1061,7 @@ var ppocrToFullLabelStudio = (data, imagePath, baseServerUrl, inputDir, taskId =
292
1061
  ];
293
1062
  return result;
294
1063
  };
295
- var ppocrToMinLabelStudio = (data, imagePath, baseServerUrl, inputDir, labelName = "text") => {
1064
+ var ppocrToMinLabelStudio = (data, imagePath, baseServerUrl, inputDir, labelName = "text", normalizeShape2, widthIncrement = 0, heightIncrement = 0, precision = DEFAULT_LABEL_STUDIO_PRECISION) => {
296
1065
  const newBaseServerUrl = baseServerUrl.replace(/\/+$/, "") + (baseServerUrl === "" ? "" : "/");
297
1066
  const now = (/* @__PURE__ */ new Date()).toISOString();
298
1067
  let original_width = 1920;
@@ -311,12 +1080,23 @@ var ppocrToMinLabelStudio = (data, imagePath, baseServerUrl, inputDir, labelName
311
1080
  original_width = dimensions.width;
312
1081
  original_height = dimensions.height;
313
1082
  return data.map((item, index) => {
314
- const { points } = item;
1083
+ let { points } = item;
1084
+ points = transformPoints(points, {
1085
+ normalizeShape: normalizeShape2,
1086
+ widthIncrement,
1087
+ heightIncrement
1088
+ });
1089
+ const roundedPoints = points.map(
1090
+ ([x, y]) => [
1091
+ roundToPrecision(x ?? 0, precision),
1092
+ roundToPrecision(y ?? 0, precision)
1093
+ ]
1094
+ );
315
1095
  let minX = Infinity;
316
1096
  let minY = Infinity;
317
1097
  let maxX = -Infinity;
318
1098
  let maxY = -Infinity;
319
- for (const point of points) {
1099
+ for (const point of roundedPoints) {
320
1100
  const [x, y] = point;
321
1101
  if (x !== void 0 && y !== void 0) {
322
1102
  minX = Math.min(minX, x);
@@ -343,7 +1123,7 @@ var ppocrToMinLabelStudio = (data, imagePath, baseServerUrl, inputDir, labelName
343
1123
  ],
344
1124
  label: [
345
1125
  {
346
- points,
1126
+ points: roundedPoints,
347
1127
  closed: true,
348
1128
  labels: [labelName],
349
1129
  original_width,
@@ -353,7 +1133,7 @@ var ppocrToMinLabelStudio = (data, imagePath, baseServerUrl, inputDir, labelName
353
1133
  transcription: [item.transcription],
354
1134
  poly: [
355
1135
  {
356
- points,
1136
+ points: roundedPoints,
357
1137
  closed: true,
358
1138
  original_width,
359
1139
  original_height
@@ -368,10 +1148,24 @@ var ppocrToMinLabelStudio = (data, imagePath, baseServerUrl, inputDir, labelName
368
1148
  });
369
1149
  };
370
1150
  export {
1151
+ FullOCRLabelStudioSchema,
1152
+ MinOCRLabelStudioSchema,
1153
+ PPOCRLabelSchema,
1154
+ calculateCenter,
1155
+ enhanceLabelStudioData,
1156
+ enhancePPOCRLabel,
1157
+ getMinimumBoundingRect,
1158
+ hasEnhancementOptions,
371
1159
  labelStudioToPPOCR,
372
1160
  minLabelStudioToPPOCR,
1161
+ normalizeShape,
373
1162
  ppocrToFullLabelStudio,
374
1163
  ppocrToLabelStudio,
375
- ppocrToMinLabelStudio
1164
+ ppocrToMinLabelStudio,
1165
+ resizeBoundingBox,
1166
+ roundPoints,
1167
+ roundToPrecision,
1168
+ sortBoundingBoxes,
1169
+ transformPoints
376
1170
  };
377
1171
  //# sourceMappingURL=index.js.map