label-studio-converter 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +580 -20
- package/dist/bash-complete.cjs +316 -74
- package/dist/bash-complete.cjs.map +1 -1
- package/dist/bash-complete.js +316 -74
- package/dist/bash-complete.js.map +1 -1
- package/dist/cli.cjs +316 -74
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +316 -74
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +199 -49
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +31 -16
- package/dist/index.d.ts +31 -16
- package/dist/index.js +199 -49
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.cjs
CHANGED
|
@@ -38,7 +38,7 @@ var init_cjs_shims = __esm({
|
|
|
38
38
|
});
|
|
39
39
|
|
|
40
40
|
// src/constants.ts
|
|
41
|
-
var OUTPUT_BASE_DIR, DEFAULT_LABEL_NAME, DEFAULT_LABEL_STUDIO_FULL_JSON, DEFAULT_CREATE_FILE_PER_IMAGE, DEFAULT_CREATE_FILE_LIST_FOR_SERVING, DEFAULT_FILE_LIST_NAME, DEFAULT_BASE_SERVER_URL, DEFAULT_PPOCR_FILE_NAME, SORT_VERTICAL_NONE, SORT_VERTICAL_TOP_BOTTOM, SORT_VERTICAL_BOTTOM_TOP, DEFAULT_SORT_VERTICAL, SORT_HORIZONTAL_NONE, SORT_HORIZONTAL_LTR, SORT_HORIZONTAL_RTL, DEFAULT_SORT_HORIZONTAL;
|
|
41
|
+
var OUTPUT_BASE_DIR, DEFAULT_LABEL_NAME, DEFAULT_LABEL_STUDIO_FULL_JSON, DEFAULT_CREATE_FILE_PER_IMAGE, DEFAULT_CREATE_FILE_LIST_FOR_SERVING, DEFAULT_FILE_LIST_NAME, DEFAULT_BASE_SERVER_URL, DEFAULT_PPOCR_FILE_NAME, SORT_VERTICAL_NONE, SORT_VERTICAL_TOP_BOTTOM, SORT_VERTICAL_BOTTOM_TOP, DEFAULT_SORT_VERTICAL, SORT_HORIZONTAL_NONE, SORT_HORIZONTAL_LTR, SORT_HORIZONTAL_RTL, DEFAULT_SORT_HORIZONTAL, SHAPE_NORMALIZE_NONE, SHAPE_NORMALIZE_RECTANGLE, DEFAULT_SHAPE_NORMALIZE, DEFAULT_WIDTH_INCREMENT, DEFAULT_HEIGHT_INCREMENT, DEFAULT_LABEL_STUDIO_PRECISION, DEFAULT_PPOCR_PRECISION;
|
|
42
42
|
var init_constants = __esm({
|
|
43
43
|
"src/constants.ts"() {
|
|
44
44
|
"use strict";
|
|
@@ -59,6 +59,99 @@ var init_constants = __esm({
|
|
|
59
59
|
SORT_HORIZONTAL_LTR = "ltr";
|
|
60
60
|
SORT_HORIZONTAL_RTL = "rtl";
|
|
61
61
|
DEFAULT_SORT_HORIZONTAL = SORT_HORIZONTAL_NONE;
|
|
62
|
+
SHAPE_NORMALIZE_NONE = "none";
|
|
63
|
+
SHAPE_NORMALIZE_RECTANGLE = "rectangle";
|
|
64
|
+
DEFAULT_SHAPE_NORMALIZE = SHAPE_NORMALIZE_NONE;
|
|
65
|
+
DEFAULT_WIDTH_INCREMENT = 0;
|
|
66
|
+
DEFAULT_HEIGHT_INCREMENT = 0;
|
|
67
|
+
DEFAULT_LABEL_STUDIO_PRECISION = -1;
|
|
68
|
+
DEFAULT_PPOCR_PRECISION = 0;
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
// src/lib/geometry.ts
|
|
73
|
+
function roundToPrecision(value, precision) {
|
|
74
|
+
if (precision < 0) {
|
|
75
|
+
return value;
|
|
76
|
+
}
|
|
77
|
+
const multiplier = Math.pow(10, precision);
|
|
78
|
+
return Math.round(value * multiplier) / multiplier;
|
|
79
|
+
}
|
|
80
|
+
function roundPoints(points, precision) {
|
|
81
|
+
if (precision < 0) {
|
|
82
|
+
return points;
|
|
83
|
+
}
|
|
84
|
+
return points.map(
|
|
85
|
+
([x, y]) => [roundToPrecision(x, precision), roundToPrecision(y, precision)]
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
function calculateCenter(points) {
|
|
89
|
+
const sum = points.reduce((acc, [x, y]) => [acc[0] + x, acc[1] + y], [
|
|
90
|
+
0,
|
|
91
|
+
0
|
|
92
|
+
]);
|
|
93
|
+
return [sum[0] / points.length, sum[1] / points.length];
|
|
94
|
+
}
|
|
95
|
+
function getMinimumBoundingRect(points) {
|
|
96
|
+
const minX = Math.min(...points.map(([x]) => x));
|
|
97
|
+
const maxX = Math.max(...points.map(([x]) => x));
|
|
98
|
+
const minY = Math.min(...points.map(([, y]) => y));
|
|
99
|
+
const maxY = Math.max(...points.map(([, y]) => y));
|
|
100
|
+
return {
|
|
101
|
+
minX,
|
|
102
|
+
minY,
|
|
103
|
+
maxX,
|
|
104
|
+
maxY,
|
|
105
|
+
width: maxX - minX,
|
|
106
|
+
height: maxY - minY
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
function normalizeShape(points) {
|
|
110
|
+
if (points.length < 3) {
|
|
111
|
+
return points;
|
|
112
|
+
}
|
|
113
|
+
const { minX, minY, maxX, maxY } = getMinimumBoundingRect(points);
|
|
114
|
+
return [
|
|
115
|
+
[minX, minY],
|
|
116
|
+
[maxX, minY],
|
|
117
|
+
[maxX, maxY],
|
|
118
|
+
[minX, maxY]
|
|
119
|
+
];
|
|
120
|
+
}
|
|
121
|
+
function resizeBoundingBox(points, widthIncrement, heightIncrement) {
|
|
122
|
+
if (points.length === 0) {
|
|
123
|
+
return points;
|
|
124
|
+
}
|
|
125
|
+
const center = calculateCenter(points);
|
|
126
|
+
const bbox = getMinimumBoundingRect(points);
|
|
127
|
+
const newWidth = Math.max(1, bbox.width + widthIncrement);
|
|
128
|
+
const newHeight = Math.max(1, bbox.height + heightIncrement);
|
|
129
|
+
const scaleX = newWidth / bbox.width;
|
|
130
|
+
const scaleY = newHeight / bbox.height;
|
|
131
|
+
return points.map(([x, y]) => {
|
|
132
|
+
const relX = x - center[0];
|
|
133
|
+
const relY = y - center[1];
|
|
134
|
+
return [center[0] + relX * scaleX, center[1] + relY * scaleY];
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
function transformPoints(points, options) {
|
|
138
|
+
let result = points;
|
|
139
|
+
if (options.normalizeShape && options.normalizeShape === "rectangle") {
|
|
140
|
+
result = normalizeShape(result);
|
|
141
|
+
}
|
|
142
|
+
if (options.widthIncrement !== void 0 || options.heightIncrement !== void 0) {
|
|
143
|
+
result = resizeBoundingBox(
|
|
144
|
+
result,
|
|
145
|
+
options.widthIncrement ?? 0,
|
|
146
|
+
options.heightIncrement ?? 0
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
return result;
|
|
150
|
+
}
|
|
151
|
+
var init_geometry = __esm({
|
|
152
|
+
"src/lib/geometry.ts"() {
|
|
153
|
+
"use strict";
|
|
154
|
+
init_cjs_shims();
|
|
62
155
|
}
|
|
63
156
|
});
|
|
64
157
|
|
|
@@ -73,6 +166,7 @@ var init_ppocr_label = __esm({
|
|
|
73
166
|
import_node_path = require("path");
|
|
74
167
|
import_image_size = __toESM(require("image-size"), 1);
|
|
75
168
|
init_constants();
|
|
169
|
+
init_geometry();
|
|
76
170
|
ppocrToLabelStudio = async (data, options) => {
|
|
77
171
|
const {
|
|
78
172
|
imagePath,
|
|
@@ -80,7 +174,11 @@ var init_ppocr_label = __esm({
|
|
|
80
174
|
inputDir,
|
|
81
175
|
toFullJson = true,
|
|
82
176
|
taskId = 1,
|
|
83
|
-
labelName = DEFAULT_LABEL_NAME
|
|
177
|
+
labelName = DEFAULT_LABEL_NAME,
|
|
178
|
+
normalizeShape: normalizeShape2,
|
|
179
|
+
widthIncrement = 0,
|
|
180
|
+
heightIncrement = 0,
|
|
181
|
+
precision = DEFAULT_LABEL_STUDIO_PRECISION
|
|
84
182
|
} = options || {};
|
|
85
183
|
if (toFullJson) {
|
|
86
184
|
return ppocrToFullLabelStudio(
|
|
@@ -89,7 +187,11 @@ var init_ppocr_label = __esm({
|
|
|
89
187
|
baseServerUrl,
|
|
90
188
|
inputDir,
|
|
91
189
|
taskId,
|
|
92
|
-
labelName
|
|
190
|
+
labelName,
|
|
191
|
+
normalizeShape2,
|
|
192
|
+
widthIncrement,
|
|
193
|
+
heightIncrement,
|
|
194
|
+
precision
|
|
93
195
|
);
|
|
94
196
|
} else {
|
|
95
197
|
return ppocrToMinLabelStudio(
|
|
@@ -97,11 +199,15 @@ var init_ppocr_label = __esm({
|
|
|
97
199
|
imagePath,
|
|
98
200
|
baseServerUrl,
|
|
99
201
|
inputDir,
|
|
100
|
-
labelName
|
|
202
|
+
labelName,
|
|
203
|
+
normalizeShape2,
|
|
204
|
+
widthIncrement,
|
|
205
|
+
heightIncrement,
|
|
206
|
+
precision
|
|
101
207
|
);
|
|
102
208
|
}
|
|
103
209
|
};
|
|
104
|
-
ppocrToFullLabelStudio = (data, imagePath, baseServerUrl, inputDir, taskId = 1, labelName = DEFAULT_LABEL_NAME) => {
|
|
210
|
+
ppocrToFullLabelStudio = (data, imagePath, baseServerUrl, inputDir, taskId = 1, labelName = DEFAULT_LABEL_NAME, normalizeShape2, widthIncrement = 0, heightIncrement = 0, precision = DEFAULT_LABEL_STUDIO_PRECISION) => {
|
|
105
211
|
const newBaseServerUrl = baseServerUrl.replace(/\/+$/, "") + (baseServerUrl === "" ? "" : "/");
|
|
106
212
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
107
213
|
let original_width = 1920;
|
|
@@ -128,11 +234,16 @@ var init_ppocr_label = __esm({
|
|
|
128
234
|
id: taskId,
|
|
129
235
|
completed_by: 1,
|
|
130
236
|
result: data.map((item) => {
|
|
131
|
-
|
|
237
|
+
let { points } = item;
|
|
238
|
+
points = transformPoints(points, {
|
|
239
|
+
normalizeShape: normalizeShape2,
|
|
240
|
+
widthIncrement,
|
|
241
|
+
heightIncrement
|
|
242
|
+
});
|
|
132
243
|
const annotationId = (0, import_node_crypto.randomUUID)().slice(0, 10);
|
|
133
244
|
const polygonPoints = points.map(([x, y]) => [
|
|
134
|
-
(x ?? 0) / original_width * 100,
|
|
135
|
-
(y ?? 0) / original_height * 100
|
|
245
|
+
roundToPrecision((x ?? 0) / original_width * 100, precision),
|
|
246
|
+
roundToPrecision((y ?? 0) / original_height * 100, precision)
|
|
136
247
|
]);
|
|
137
248
|
return [
|
|
138
249
|
// 1. Polygon geometry only
|
|
@@ -226,7 +337,7 @@ var init_ppocr_label = __esm({
|
|
|
226
337
|
];
|
|
227
338
|
return result;
|
|
228
339
|
};
|
|
229
|
-
ppocrToMinLabelStudio = (data, imagePath, baseServerUrl, inputDir, labelName = "text") => {
|
|
340
|
+
ppocrToMinLabelStudio = (data, imagePath, baseServerUrl, inputDir, labelName = "text", normalizeShape2, widthIncrement = 0, heightIncrement = 0, precision = DEFAULT_LABEL_STUDIO_PRECISION) => {
|
|
230
341
|
const newBaseServerUrl = baseServerUrl.replace(/\/+$/, "") + (baseServerUrl === "" ? "" : "/");
|
|
231
342
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
232
343
|
let original_width = 1920;
|
|
@@ -245,12 +356,23 @@ var init_ppocr_label = __esm({
|
|
|
245
356
|
original_width = dimensions.width;
|
|
246
357
|
original_height = dimensions.height;
|
|
247
358
|
return data.map((item, index) => {
|
|
248
|
-
|
|
359
|
+
let { points } = item;
|
|
360
|
+
points = transformPoints(points, {
|
|
361
|
+
normalizeShape: normalizeShape2,
|
|
362
|
+
widthIncrement,
|
|
363
|
+
heightIncrement
|
|
364
|
+
});
|
|
365
|
+
const roundedPoints = points.map(
|
|
366
|
+
([x, y]) => [
|
|
367
|
+
roundToPrecision(x ?? 0, precision),
|
|
368
|
+
roundToPrecision(y ?? 0, precision)
|
|
369
|
+
]
|
|
370
|
+
);
|
|
249
371
|
let minX = Infinity;
|
|
250
372
|
let minY = Infinity;
|
|
251
373
|
let maxX = -Infinity;
|
|
252
374
|
let maxY = -Infinity;
|
|
253
|
-
for (const point of
|
|
375
|
+
for (const point of roundedPoints) {
|
|
254
376
|
const [x, y] = point;
|
|
255
377
|
if (x !== void 0 && y !== void 0) {
|
|
256
378
|
minX = Math.min(minX, x);
|
|
@@ -277,7 +399,7 @@ var init_ppocr_label = __esm({
|
|
|
277
399
|
],
|
|
278
400
|
label: [
|
|
279
401
|
{
|
|
280
|
-
points,
|
|
402
|
+
points: roundedPoints,
|
|
281
403
|
closed: true,
|
|
282
404
|
labels: [labelName],
|
|
283
405
|
original_width,
|
|
@@ -287,7 +409,7 @@ var init_ppocr_label = __esm({
|
|
|
287
409
|
transcription: [item.transcription],
|
|
288
410
|
poly: [
|
|
289
411
|
{
|
|
290
|
-
points,
|
|
412
|
+
points: roundedPoints,
|
|
291
413
|
closed: true,
|
|
292
414
|
original_width,
|
|
293
415
|
original_height
|
|
@@ -320,6 +442,7 @@ var init_schema = __esm({
|
|
|
320
442
|
completed_by: import_zod.default.number(),
|
|
321
443
|
result: import_zod.default.array(
|
|
322
444
|
import_zod.default.union([
|
|
445
|
+
// Most specific rectangle variants first (with text or labels)
|
|
323
446
|
import_zod.default.object({
|
|
324
447
|
original_width: import_zod.default.number(),
|
|
325
448
|
original_height: import_zod.default.number(),
|
|
@@ -329,7 +452,8 @@ var init_schema = __esm({
|
|
|
329
452
|
y: import_zod.default.number(),
|
|
330
453
|
width: import_zod.default.number(),
|
|
331
454
|
height: import_zod.default.number(),
|
|
332
|
-
rotation: import_zod.default.number()
|
|
455
|
+
rotation: import_zod.default.number(),
|
|
456
|
+
text: import_zod.default.array(import_zod.default.string())
|
|
333
457
|
}),
|
|
334
458
|
id: import_zod.default.string(),
|
|
335
459
|
from_name: import_zod.default.string(),
|
|
@@ -355,6 +479,7 @@ var init_schema = __esm({
|
|
|
355
479
|
type: import_zod.default.string(),
|
|
356
480
|
origin: import_zod.default.string()
|
|
357
481
|
}),
|
|
482
|
+
// Base rectangle without text or labels
|
|
358
483
|
import_zod.default.object({
|
|
359
484
|
original_width: import_zod.default.number(),
|
|
360
485
|
original_height: import_zod.default.number(),
|
|
@@ -364,8 +489,7 @@ var init_schema = __esm({
|
|
|
364
489
|
y: import_zod.default.number(),
|
|
365
490
|
width: import_zod.default.number(),
|
|
366
491
|
height: import_zod.default.number(),
|
|
367
|
-
rotation: import_zod.default.number()
|
|
368
|
-
text: import_zod.default.array(import_zod.default.string())
|
|
492
|
+
rotation: import_zod.default.number()
|
|
369
493
|
}),
|
|
370
494
|
id: import_zod.default.string(),
|
|
371
495
|
from_name: import_zod.default.string(),
|
|
@@ -373,13 +497,15 @@ var init_schema = __esm({
|
|
|
373
497
|
type: import_zod.default.string(),
|
|
374
498
|
origin: import_zod.default.string()
|
|
375
499
|
}),
|
|
500
|
+
// Most specific polygon variants first (with text or labels)
|
|
376
501
|
import_zod.default.object({
|
|
377
502
|
original_width: import_zod.default.number(),
|
|
378
503
|
original_height: import_zod.default.number(),
|
|
379
504
|
image_rotation: import_zod.default.number(),
|
|
380
505
|
value: import_zod.default.object({
|
|
381
506
|
points: import_zod.default.array(import_zod.default.array(import_zod.default.number())),
|
|
382
|
-
closed: import_zod.default.boolean()
|
|
507
|
+
closed: import_zod.default.boolean(),
|
|
508
|
+
text: import_zod.default.array(import_zod.default.string())
|
|
383
509
|
}),
|
|
384
510
|
id: import_zod.default.string(),
|
|
385
511
|
from_name: import_zod.default.string(),
|
|
@@ -402,14 +528,14 @@ var init_schema = __esm({
|
|
|
402
528
|
type: import_zod.default.string(),
|
|
403
529
|
origin: import_zod.default.string()
|
|
404
530
|
}),
|
|
531
|
+
// Base polygon without text or labels
|
|
405
532
|
import_zod.default.object({
|
|
406
533
|
original_width: import_zod.default.number(),
|
|
407
534
|
original_height: import_zod.default.number(),
|
|
408
535
|
image_rotation: import_zod.default.number(),
|
|
409
536
|
value: import_zod.default.object({
|
|
410
537
|
points: import_zod.default.array(import_zod.default.array(import_zod.default.number())),
|
|
411
|
-
closed: import_zod.default.boolean()
|
|
412
|
-
text: import_zod.default.array(import_zod.default.string())
|
|
538
|
+
closed: import_zod.default.boolean()
|
|
413
539
|
}),
|
|
414
540
|
id: import_zod.default.string(),
|
|
415
541
|
from_name: import_zod.default.string(),
|
|
@@ -448,6 +574,7 @@ var init_schema = __esm({
|
|
|
448
574
|
created_ago: import_zod.default.string(),
|
|
449
575
|
result: import_zod.default.array(
|
|
450
576
|
import_zod.default.union([
|
|
577
|
+
// Most specific rectangle variants first (with text or labels)
|
|
451
578
|
import_zod.default.object({
|
|
452
579
|
original_width: import_zod.default.number(),
|
|
453
580
|
original_height: import_zod.default.number(),
|
|
@@ -457,7 +584,8 @@ var init_schema = __esm({
|
|
|
457
584
|
y: import_zod.default.number(),
|
|
458
585
|
width: import_zod.default.number(),
|
|
459
586
|
height: import_zod.default.number(),
|
|
460
|
-
rotation: import_zod.default.number()
|
|
587
|
+
rotation: import_zod.default.number(),
|
|
588
|
+
text: import_zod.default.array(import_zod.default.string())
|
|
461
589
|
}),
|
|
462
590
|
id: import_zod.default.string(),
|
|
463
591
|
from_name: import_zod.default.string(),
|
|
@@ -483,6 +611,7 @@ var init_schema = __esm({
|
|
|
483
611
|
type: import_zod.default.string(),
|
|
484
612
|
origin: import_zod.default.string()
|
|
485
613
|
}),
|
|
614
|
+
// Base rectangle without text or labels
|
|
486
615
|
import_zod.default.object({
|
|
487
616
|
original_width: import_zod.default.number(),
|
|
488
617
|
original_height: import_zod.default.number(),
|
|
@@ -492,8 +621,7 @@ var init_schema = __esm({
|
|
|
492
621
|
y: import_zod.default.number(),
|
|
493
622
|
width: import_zod.default.number(),
|
|
494
623
|
height: import_zod.default.number(),
|
|
495
|
-
rotation: import_zod.default.number()
|
|
496
|
-
text: import_zod.default.array(import_zod.default.string())
|
|
624
|
+
rotation: import_zod.default.number()
|
|
497
625
|
}),
|
|
498
626
|
id: import_zod.default.string(),
|
|
499
627
|
from_name: import_zod.default.string(),
|
|
@@ -501,13 +629,15 @@ var init_schema = __esm({
|
|
|
501
629
|
type: import_zod.default.string(),
|
|
502
630
|
origin: import_zod.default.string()
|
|
503
631
|
}),
|
|
632
|
+
// Most specific polygon variants first (with text or labels)
|
|
504
633
|
import_zod.default.object({
|
|
505
634
|
original_width: import_zod.default.number(),
|
|
506
635
|
original_height: import_zod.default.number(),
|
|
507
636
|
image_rotation: import_zod.default.number(),
|
|
508
637
|
value: import_zod.default.object({
|
|
509
638
|
points: import_zod.default.array(import_zod.default.array(import_zod.default.number())),
|
|
510
|
-
closed: import_zod.default.boolean()
|
|
639
|
+
closed: import_zod.default.boolean(),
|
|
640
|
+
text: import_zod.default.array(import_zod.default.string())
|
|
511
641
|
}),
|
|
512
642
|
id: import_zod.default.string(),
|
|
513
643
|
from_name: import_zod.default.string(),
|
|
@@ -530,14 +660,14 @@ var init_schema = __esm({
|
|
|
530
660
|
type: import_zod.default.string(),
|
|
531
661
|
origin: import_zod.default.string()
|
|
532
662
|
}),
|
|
663
|
+
// Base polygon without text or labels
|
|
533
664
|
import_zod.default.object({
|
|
534
665
|
original_width: import_zod.default.number(),
|
|
535
666
|
original_height: import_zod.default.number(),
|
|
536
667
|
image_rotation: import_zod.default.number(),
|
|
537
668
|
value: import_zod.default.object({
|
|
538
669
|
points: import_zod.default.array(import_zod.default.array(import_zod.default.number())),
|
|
539
|
-
closed: import_zod.default.boolean()
|
|
540
|
-
text: import_zod.default.array(import_zod.default.string())
|
|
670
|
+
closed: import_zod.default.boolean()
|
|
541
671
|
}),
|
|
542
672
|
id: import_zod.default.string(),
|
|
543
673
|
from_name: import_zod.default.string(),
|
|
@@ -588,7 +718,7 @@ var init_schema = __esm({
|
|
|
588
718
|
original_width: import_zod.default.number(),
|
|
589
719
|
original_height: import_zod.default.number()
|
|
590
720
|
})
|
|
591
|
-
),
|
|
721
|
+
).optional().default([]),
|
|
592
722
|
label: import_zod.default.array(
|
|
593
723
|
import_zod.default.union([
|
|
594
724
|
import_zod.default.object({
|
|
@@ -609,8 +739,11 @@ var init_schema = __esm({
|
|
|
609
739
|
original_height: import_zod.default.number()
|
|
610
740
|
})
|
|
611
741
|
])
|
|
612
|
-
),
|
|
613
|
-
transcription: import_zod.default.array(import_zod.default.string())
|
|
742
|
+
).optional().default([]),
|
|
743
|
+
transcription: import_zod.default.union([import_zod.default.string(), import_zod.default.array(import_zod.default.string())]).optional().transform((val) => {
|
|
744
|
+
if (!val) return [];
|
|
745
|
+
return Array.isArray(val) ? val : [val];
|
|
746
|
+
}),
|
|
614
747
|
poly: import_zod.default.array(
|
|
615
748
|
import_zod.default.object({
|
|
616
749
|
points: import_zod.default.array(import_zod.default.array(import_zod.default.number())),
|
|
@@ -618,7 +751,7 @@ var init_schema = __esm({
|
|
|
618
751
|
original_width: import_zod.default.number(),
|
|
619
752
|
original_height: import_zod.default.number()
|
|
620
753
|
})
|
|
621
|
-
),
|
|
754
|
+
).optional().default([]),
|
|
622
755
|
annotator: import_zod.default.number(),
|
|
623
756
|
annotation_id: import_zod.default.number(),
|
|
624
757
|
created_at: import_zod.default.string(),
|
|
@@ -742,7 +875,11 @@ async function convertToLabelStudio(flags, ...inputDirs) {
|
|
|
742
875
|
fileListName = DEFAULT_FILE_LIST_NAME,
|
|
743
876
|
baseServerUrl = DEFAULT_BASE_SERVER_URL,
|
|
744
877
|
sortVertical = DEFAULT_SORT_VERTICAL,
|
|
745
|
-
sortHorizontal = DEFAULT_SORT_HORIZONTAL
|
|
878
|
+
sortHorizontal = DEFAULT_SORT_HORIZONTAL,
|
|
879
|
+
normalizeShape: normalizeShape2 = DEFAULT_SHAPE_NORMALIZE,
|
|
880
|
+
widthIncrement = DEFAULT_WIDTH_INCREMENT,
|
|
881
|
+
heightIncrement = DEFAULT_HEIGHT_INCREMENT,
|
|
882
|
+
precision = DEFAULT_LABEL_STUDIO_PRECISION
|
|
746
883
|
} = flags;
|
|
747
884
|
const newBaseServerUrl = baseServerUrl.replace(/\/+$/, "") + (baseServerUrl === "" ? "" : "/");
|
|
748
885
|
await (0, import_promises.mkdir)(outDir, { recursive: true });
|
|
@@ -785,7 +922,11 @@ async function convertToLabelStudio(flags, ...inputDirs) {
|
|
|
785
922
|
baseServerUrl: newBaseServerUrl,
|
|
786
923
|
inputDir,
|
|
787
924
|
taskId,
|
|
788
|
-
labelName: defaultLabelName
|
|
925
|
+
labelName: defaultLabelName,
|
|
926
|
+
normalizeShape: normalizeShape2 !== SHAPE_NORMALIZE_NONE ? normalizeShape2 : void 0,
|
|
927
|
+
widthIncrement,
|
|
928
|
+
heightIncrement,
|
|
929
|
+
precision
|
|
789
930
|
});
|
|
790
931
|
if (toFullJson) {
|
|
791
932
|
allLabelStudioData.push(labelStudioData[0]);
|
|
@@ -870,7 +1011,15 @@ var init_label_studio = __esm({
|
|
|
870
1011
|
"use strict";
|
|
871
1012
|
init_cjs_shims();
|
|
872
1013
|
turf = __toESM(require("@turf/turf"), 1);
|
|
873
|
-
|
|
1014
|
+
init_geometry();
|
|
1015
|
+
labelStudioToPPOCR = async (data, options) => {
|
|
1016
|
+
const {
|
|
1017
|
+
baseImageDir,
|
|
1018
|
+
normalizeShape: normalizeShape2,
|
|
1019
|
+
widthIncrement = 0,
|
|
1020
|
+
heightIncrement = 0,
|
|
1021
|
+
precision = 0
|
|
1022
|
+
} = options || {};
|
|
874
1023
|
const resultMap = /* @__PURE__ */ new Map();
|
|
875
1024
|
for (const task of data) {
|
|
876
1025
|
let imagePath = task.file_upload || "";
|
|
@@ -921,6 +1070,12 @@ var init_label_studio = __esm({
|
|
|
921
1070
|
}
|
|
922
1071
|
}
|
|
923
1072
|
if (points && points.length > 0) {
|
|
1073
|
+
points = transformPoints(points, {
|
|
1074
|
+
normalizeShape: normalizeShape2,
|
|
1075
|
+
widthIncrement,
|
|
1076
|
+
heightIncrement
|
|
1077
|
+
});
|
|
1078
|
+
points = roundPoints(points, precision);
|
|
924
1079
|
let dt_score = 1;
|
|
925
1080
|
try {
|
|
926
1081
|
const firstPoint = points[0];
|
|
@@ -946,7 +1101,14 @@ var init_label_studio = __esm({
|
|
|
946
1101
|
}
|
|
947
1102
|
return resultMap;
|
|
948
1103
|
};
|
|
949
|
-
minLabelStudioToPPOCR = async (data,
|
|
1104
|
+
minLabelStudioToPPOCR = async (data, options) => {
|
|
1105
|
+
const {
|
|
1106
|
+
baseImageDir,
|
|
1107
|
+
normalizeShape: normalizeShape2,
|
|
1108
|
+
widthIncrement = 0,
|
|
1109
|
+
heightIncrement = 0,
|
|
1110
|
+
precision = 0
|
|
1111
|
+
} = options || {};
|
|
950
1112
|
const resultMap = /* @__PURE__ */ new Map();
|
|
951
1113
|
for (const item of data) {
|
|
952
1114
|
let imagePath = item.ocr || "";
|
|
@@ -958,43 +1120,62 @@ var init_label_studio = __esm({
|
|
|
958
1120
|
if (baseImageDir) {
|
|
959
1121
|
imagePath = `${baseImageDir}/${imagePath.split("/").pop() || imagePath}`;
|
|
960
1122
|
}
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
[
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
1123
|
+
const numAnnotations = Math.max(
|
|
1124
|
+
item.poly?.length || 0,
|
|
1125
|
+
item.bbox?.length || 0,
|
|
1126
|
+
item.transcription?.length || 0
|
|
1127
|
+
);
|
|
1128
|
+
for (let i = 0; i < numAnnotations; i++) {
|
|
1129
|
+
let points;
|
|
1130
|
+
if (item.poly && item.poly.length > i && item.poly[i]) {
|
|
1131
|
+
const poly = item.poly[i];
|
|
1132
|
+
if (poly) {
|
|
1133
|
+
const { points: polyPoints } = poly;
|
|
1134
|
+
points = polyPoints;
|
|
1135
|
+
}
|
|
1136
|
+
} else if (item.bbox && item.bbox.length > i && item.bbox[i]) {
|
|
1137
|
+
const bbox = item.bbox[i];
|
|
1138
|
+
if (bbox) {
|
|
1139
|
+
const { x, y, width, height } = bbox;
|
|
1140
|
+
points = [
|
|
1141
|
+
[x, y],
|
|
1142
|
+
[x + width, y],
|
|
1143
|
+
[x + width, y + height],
|
|
1144
|
+
[x, y + height]
|
|
1145
|
+
];
|
|
1146
|
+
}
|
|
985
1147
|
}
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
1148
|
+
if (!points) {
|
|
1149
|
+
continue;
|
|
1150
|
+
}
|
|
1151
|
+
points = transformPoints(points, {
|
|
1152
|
+
normalizeShape: normalizeShape2,
|
|
1153
|
+
widthIncrement,
|
|
1154
|
+
heightIncrement
|
|
1155
|
+
});
|
|
1156
|
+
points = roundPoints(points, precision);
|
|
1157
|
+
const transcription = item.transcription && item.transcription.length > i ? item.transcription[i] : "";
|
|
1158
|
+
let dt_score = 1;
|
|
1159
|
+
try {
|
|
1160
|
+
const firstPoint = points[0];
|
|
1161
|
+
if (firstPoint) {
|
|
1162
|
+
const polygon2 = turf.polygon([points.concat([firstPoint])]);
|
|
1163
|
+
const area2 = turf.area(polygon2);
|
|
1164
|
+
dt_score = Math.min(1, Math.max(0.5, area2 / 1e4));
|
|
1165
|
+
}
|
|
1166
|
+
} catch {
|
|
1167
|
+
dt_score = 0.8;
|
|
1168
|
+
}
|
|
1169
|
+
const annotation = {
|
|
1170
|
+
transcription: transcription ?? "",
|
|
1171
|
+
points,
|
|
1172
|
+
dt_score
|
|
1173
|
+
};
|
|
1174
|
+
if (!resultMap.has(imagePath)) {
|
|
1175
|
+
resultMap.set(imagePath, []);
|
|
1176
|
+
}
|
|
1177
|
+
resultMap.get(imagePath).push(annotation);
|
|
996
1178
|
}
|
|
997
|
-
resultMap.get(imagePath).push(annotation);
|
|
998
1179
|
}
|
|
999
1180
|
return resultMap;
|
|
1000
1181
|
};
|
|
@@ -1012,7 +1193,11 @@ async function convertToPPOCR(flags, ...inputDirs) {
|
|
|
1012
1193
|
fileName = DEFAULT_PPOCR_FILE_NAME,
|
|
1013
1194
|
baseImageDir,
|
|
1014
1195
|
sortVertical = DEFAULT_SORT_VERTICAL,
|
|
1015
|
-
sortHorizontal = DEFAULT_SORT_HORIZONTAL
|
|
1196
|
+
sortHorizontal = DEFAULT_SORT_HORIZONTAL,
|
|
1197
|
+
normalizeShape: normalizeShape2 = DEFAULT_SHAPE_NORMALIZE,
|
|
1198
|
+
widthIncrement = DEFAULT_WIDTH_INCREMENT,
|
|
1199
|
+
heightIncrement = DEFAULT_HEIGHT_INCREMENT,
|
|
1200
|
+
precision = DEFAULT_PPOCR_PRECISION
|
|
1016
1201
|
} = flags;
|
|
1017
1202
|
await (0, import_promises2.mkdir)(outDir, { recursive: true });
|
|
1018
1203
|
for (const inputDir of inputDirs) {
|
|
@@ -1028,10 +1213,19 @@ async function convertToPPOCR(flags, ...inputDirs) {
|
|
|
1028
1213
|
const fileData = await (0, import_promises2.readFile)(filePath, "utf-8");
|
|
1029
1214
|
const labelStudioData = JSON.parse(fileData);
|
|
1030
1215
|
const { data, isFull } = isLabelStudioFullJSON(labelStudioData);
|
|
1031
|
-
const ppocrDataMap = isFull ? await labelStudioToPPOCR(data,
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1216
|
+
const ppocrDataMap = isFull ? await labelStudioToPPOCR(data, {
|
|
1217
|
+
baseImageDir,
|
|
1218
|
+
normalizeShape: normalizeShape2 !== SHAPE_NORMALIZE_NONE ? normalizeShape2 : void 0,
|
|
1219
|
+
widthIncrement,
|
|
1220
|
+
heightIncrement,
|
|
1221
|
+
precision
|
|
1222
|
+
}) : await minLabelStudioToPPOCR(data, {
|
|
1223
|
+
baseImageDir,
|
|
1224
|
+
normalizeShape: normalizeShape2 !== SHAPE_NORMALIZE_NONE ? normalizeShape2 : void 0,
|
|
1225
|
+
widthIncrement,
|
|
1226
|
+
heightIncrement,
|
|
1227
|
+
precision
|
|
1228
|
+
});
|
|
1035
1229
|
const outputLines = [];
|
|
1036
1230
|
for (const [imagePath, annotations] of ppocrDataMap.entries()) {
|
|
1037
1231
|
const sortedAnnotations = sortBoundingBoxes(
|
|
@@ -1099,7 +1293,7 @@ var import_auto_complete = require("@stricli/auto-complete");
|
|
|
1099
1293
|
var import_core3 = require("@stricli/core");
|
|
1100
1294
|
|
|
1101
1295
|
// package.json
|
|
1102
|
-
var version = "1.
|
|
1296
|
+
var version = "1.1.0";
|
|
1103
1297
|
var description = "Convert between Label Studio OCR format and PPOCRLabelv2 format";
|
|
1104
1298
|
|
|
1105
1299
|
// src/commands/toLabelStudio/command.ts
|
|
@@ -1171,6 +1365,30 @@ var toLabelStudioCommand = (0, import_core.buildCommand)({
|
|
|
1171
1365
|
brief: `Sort bounding boxes horizontally. Options: "${SORT_HORIZONTAL_NONE}" (default), "${SORT_HORIZONTAL_LTR}", "${SORT_HORIZONTAL_RTL}"`,
|
|
1172
1366
|
parse: String,
|
|
1173
1367
|
optional: true
|
|
1368
|
+
},
|
|
1369
|
+
normalizeShape: {
|
|
1370
|
+
kind: "parsed",
|
|
1371
|
+
brief: `Normalize diamond-like shapes to axis-aligned rectangles. Options: "${SHAPE_NORMALIZE_NONE}" (default), "${SHAPE_NORMALIZE_RECTANGLE}"`,
|
|
1372
|
+
parse: String,
|
|
1373
|
+
optional: true
|
|
1374
|
+
},
|
|
1375
|
+
widthIncrement: {
|
|
1376
|
+
kind: "parsed",
|
|
1377
|
+
brief: `Increase bounding box width by this amount (in pixels). Can be negative to decrease. Default: ${DEFAULT_WIDTH_INCREMENT}`,
|
|
1378
|
+
parse: Number,
|
|
1379
|
+
optional: true
|
|
1380
|
+
},
|
|
1381
|
+
heightIncrement: {
|
|
1382
|
+
kind: "parsed",
|
|
1383
|
+
brief: `Increase bounding box height by this amount (in pixels). Can be negative to decrease. Default: ${DEFAULT_HEIGHT_INCREMENT}`,
|
|
1384
|
+
parse: Number,
|
|
1385
|
+
optional: true
|
|
1386
|
+
},
|
|
1387
|
+
precision: {
|
|
1388
|
+
kind: "parsed",
|
|
1389
|
+
brief: `Number of decimal places for coordinates. Use -1 for full precision (no rounding). Default: ${DEFAULT_LABEL_STUDIO_PRECISION}`,
|
|
1390
|
+
parse: Number,
|
|
1391
|
+
optional: true
|
|
1174
1392
|
}
|
|
1175
1393
|
}
|
|
1176
1394
|
},
|
|
@@ -1227,6 +1445,30 @@ var toPPOCRCommand = (0, import_core2.buildCommand)({
|
|
|
1227
1445
|
brief: `Sort bounding boxes horizontally. Options: "${SORT_HORIZONTAL_NONE}" (default), "${SORT_HORIZONTAL_LTR}", "${SORT_HORIZONTAL_RTL}"`,
|
|
1228
1446
|
parse: String,
|
|
1229
1447
|
optional: true
|
|
1448
|
+
},
|
|
1449
|
+
normalizeShape: {
|
|
1450
|
+
kind: "parsed",
|
|
1451
|
+
brief: `Normalize diamond-like shapes to axis-aligned rectangles. Options: "${SHAPE_NORMALIZE_NONE}" (default), "${SHAPE_NORMALIZE_RECTANGLE}"`,
|
|
1452
|
+
parse: String,
|
|
1453
|
+
optional: true
|
|
1454
|
+
},
|
|
1455
|
+
widthIncrement: {
|
|
1456
|
+
kind: "parsed",
|
|
1457
|
+
brief: `Increase bounding box width by this amount (in pixels). Can be negative to decrease. Default: ${DEFAULT_WIDTH_INCREMENT}`,
|
|
1458
|
+
parse: Number,
|
|
1459
|
+
optional: true
|
|
1460
|
+
},
|
|
1461
|
+
heightIncrement: {
|
|
1462
|
+
kind: "parsed",
|
|
1463
|
+
brief: `Increase bounding box height by this amount (in pixels). Can be negative to decrease. Default: ${DEFAULT_HEIGHT_INCREMENT}`,
|
|
1464
|
+
parse: Number,
|
|
1465
|
+
optional: true
|
|
1466
|
+
},
|
|
1467
|
+
precision: {
|
|
1468
|
+
kind: "parsed",
|
|
1469
|
+
brief: `Number of decimal places for coordinates. Use -1 for full precision (no rounding). Default: ${DEFAULT_PPOCR_PRECISION} (integers)`,
|
|
1470
|
+
parse: Number,
|
|
1471
|
+
optional: true
|
|
1230
1472
|
}
|
|
1231
1473
|
}
|
|
1232
1474
|
},
|