label-studio-converter 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +721 -258
- package/dist/bash-complete.cjs +1142 -629
- package/dist/bash-complete.cjs.map +1 -1
- package/dist/bash-complete.js +1153 -641
- package/dist/bash-complete.js.map +1 -1
- package/dist/cli.cjs +1141 -628
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +1153 -641
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +666 -8
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +110 -6
- package/dist/index.d.ts +110 -6
- package/dist/index.js +651 -7
- package/dist/index.js.map +1 -1
- package/package.json +6 -3
package/dist/cli.cjs
CHANGED
|
@@ -155,273 +155,482 @@ var init_geometry = __esm({
|
|
|
155
155
|
}
|
|
156
156
|
});
|
|
157
157
|
|
|
158
|
-
// src/lib/
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
158
|
+
// src/lib/sort.ts
|
|
159
|
+
function getBoundingBoxCenter(points) {
|
|
160
|
+
let minX = Infinity;
|
|
161
|
+
let minY = Infinity;
|
|
162
|
+
let maxX = -Infinity;
|
|
163
|
+
let maxY = -Infinity;
|
|
164
|
+
for (const [x, y] of points) {
|
|
165
|
+
if (x !== void 0 && y !== void 0) {
|
|
166
|
+
minX = Math.min(minX, x);
|
|
167
|
+
minY = Math.min(minY, y);
|
|
168
|
+
maxX = Math.max(maxX, x);
|
|
169
|
+
maxY = Math.max(maxY, y);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return {
|
|
173
|
+
x: (minX + maxX) / 2,
|
|
174
|
+
y: (minY + maxY) / 2,
|
|
175
|
+
width: maxX - minX,
|
|
176
|
+
height: maxY - minY
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
function sortBoundingBoxes(annotations, verticalSort, horizontalSort) {
|
|
180
|
+
if (verticalSort === SORT_VERTICAL_NONE && horizontalSort === SORT_HORIZONTAL_NONE) {
|
|
181
|
+
return annotations;
|
|
182
|
+
}
|
|
183
|
+
const sorted = [...annotations];
|
|
184
|
+
const isVerticalText = sorted.length > 0 && (() => {
|
|
185
|
+
const verticalCount = sorted.filter((ann) => {
|
|
186
|
+
const center = getBoundingBoxCenter(ann.points);
|
|
187
|
+
return center.height > center.width * 1.5;
|
|
188
|
+
}).length;
|
|
189
|
+
return verticalCount > sorted.length / 2;
|
|
190
|
+
})();
|
|
191
|
+
if (horizontalSort === SORT_HORIZONTAL_RTL && verticalSort !== SORT_VERTICAL_NONE && isVerticalText) {
|
|
192
|
+
const annotationsWithCenters = sorted.map((ann) => ({
|
|
193
|
+
annotation: ann,
|
|
194
|
+
center: getBoundingBoxCenter(ann.points)
|
|
195
|
+
}));
|
|
196
|
+
const columns = [];
|
|
197
|
+
for (const item of annotationsWithCenters) {
|
|
198
|
+
let addedToColumn = false;
|
|
199
|
+
for (const column of columns) {
|
|
200
|
+
const avgX = column.reduce((sum, c) => sum + c.center.x, 0) / column.length;
|
|
201
|
+
if (Math.abs(item.center.x - avgX) < GROUPING_TOLERANCE) {
|
|
202
|
+
column.push(item);
|
|
203
|
+
addedToColumn = true;
|
|
204
|
+
break;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
if (!addedToColumn) {
|
|
208
|
+
columns.push([item]);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
columns.sort((colA, colB) => {
|
|
212
|
+
const avgXA = colA.reduce((sum, c) => sum + c.center.x, 0) / colA.length;
|
|
213
|
+
const avgXB = colB.reduce((sum, c) => sum + c.center.x, 0) / colB.length;
|
|
214
|
+
return avgXB - avgXA;
|
|
215
|
+
});
|
|
216
|
+
for (const column of columns) {
|
|
217
|
+
column.sort((a, b) => {
|
|
218
|
+
return verticalSort === SORT_VERTICAL_TOP_BOTTOM ? a.center.y - b.center.y : b.center.y - a.center.y;
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
return columns.flat().map((item) => item.annotation);
|
|
222
|
+
}
|
|
223
|
+
sorted.sort((a, b) => {
|
|
224
|
+
const centerA = getBoundingBoxCenter(a.points);
|
|
225
|
+
const centerB = getBoundingBoxCenter(b.points);
|
|
226
|
+
if (verticalSort !== SORT_VERTICAL_NONE) {
|
|
227
|
+
const yDiff = verticalSort === SORT_VERTICAL_TOP_BOTTOM ? centerA.y - centerB.y : centerB.y - centerA.y;
|
|
228
|
+
if (Math.abs(yDiff) > GROUPING_TOLERANCE) {
|
|
229
|
+
return yDiff;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
if (horizontalSort !== SORT_HORIZONTAL_NONE) {
|
|
233
|
+
return horizontalSort === SORT_HORIZONTAL_LTR ? centerA.x - centerB.x : centerB.x - centerA.x;
|
|
234
|
+
}
|
|
235
|
+
return 0;
|
|
236
|
+
});
|
|
237
|
+
return sorted;
|
|
238
|
+
}
|
|
239
|
+
var GROUPING_TOLERANCE;
|
|
240
|
+
var init_sort = __esm({
|
|
241
|
+
"src/lib/sort.ts"() {
|
|
162
242
|
"use strict";
|
|
163
243
|
init_cjs_shims();
|
|
164
|
-
import_node_crypto = require("crypto");
|
|
165
|
-
import_node_fs = require("fs");
|
|
166
|
-
import_node_path = require("path");
|
|
167
|
-
import_image_size = __toESM(require("image-size"), 1);
|
|
168
244
|
init_constants();
|
|
245
|
+
GROUPING_TOLERANCE = 50;
|
|
246
|
+
}
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
// src/lib/enhance.ts
|
|
250
|
+
function enhancePPOCRLabel(data, options) {
|
|
251
|
+
const {
|
|
252
|
+
sortVertical,
|
|
253
|
+
sortHorizontal,
|
|
254
|
+
normalizeShape: normalizeShape2,
|
|
255
|
+
widthIncrement = 0,
|
|
256
|
+
heightIncrement = 0,
|
|
257
|
+
precision = 0
|
|
258
|
+
} = options;
|
|
259
|
+
let enhanced = data;
|
|
260
|
+
if (sortVertical && sortHorizontal) {
|
|
261
|
+
enhanced = sortBoundingBoxes(enhanced, sortVertical, sortHorizontal);
|
|
262
|
+
}
|
|
263
|
+
if (normalizeShape2 || widthIncrement !== 0 || heightIncrement !== 0) {
|
|
264
|
+
enhanced = enhanced.map((annotation) => {
|
|
265
|
+
let points = transformPoints(annotation.points, {
|
|
266
|
+
normalizeShape: normalizeShape2,
|
|
267
|
+
widthIncrement,
|
|
268
|
+
heightIncrement
|
|
269
|
+
});
|
|
270
|
+
points = roundPoints(points, precision);
|
|
271
|
+
return {
|
|
272
|
+
...annotation,
|
|
273
|
+
points
|
|
274
|
+
};
|
|
275
|
+
});
|
|
276
|
+
}
|
|
277
|
+
return enhanced;
|
|
278
|
+
}
|
|
279
|
+
var init_enhance = __esm({
|
|
280
|
+
"src/lib/enhance.ts"() {
|
|
281
|
+
"use strict";
|
|
282
|
+
init_cjs_shims();
|
|
169
283
|
init_geometry();
|
|
170
|
-
|
|
284
|
+
init_sort();
|
|
285
|
+
}
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
// src/lib/label-studio.ts
|
|
289
|
+
var turf, labelStudioToPPOCR, minLabelStudioToPPOCR, enhanceLabelStudioData;
|
|
290
|
+
var init_label_studio = __esm({
|
|
291
|
+
"src/lib/label-studio.ts"() {
|
|
292
|
+
"use strict";
|
|
293
|
+
init_cjs_shims();
|
|
294
|
+
turf = __toESM(require("@turf/turf"), 1);
|
|
295
|
+
init_enhance();
|
|
296
|
+
init_geometry();
|
|
297
|
+
labelStudioToPPOCR = async (data, options) => {
|
|
171
298
|
const {
|
|
172
|
-
|
|
173
|
-
baseServerUrl,
|
|
174
|
-
inputDir,
|
|
175
|
-
toFullJson = true,
|
|
176
|
-
taskId = 1,
|
|
177
|
-
labelName = DEFAULT_LABEL_NAME,
|
|
299
|
+
baseImageDir,
|
|
178
300
|
normalizeShape: normalizeShape2,
|
|
179
301
|
widthIncrement = 0,
|
|
180
302
|
heightIncrement = 0,
|
|
181
|
-
precision =
|
|
303
|
+
precision = 0
|
|
182
304
|
} = options || {};
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
305
|
+
const resultMap = /* @__PURE__ */ new Map();
|
|
306
|
+
for (const task of data) {
|
|
307
|
+
let imagePath = task.file_upload || "";
|
|
308
|
+
if (task.data.ocr) {
|
|
309
|
+
const urlPath = task.data.ocr.replace(/^https?:\/\/[^/]+\//, "");
|
|
310
|
+
imagePath = decodeURIComponent(urlPath);
|
|
311
|
+
}
|
|
312
|
+
if (baseImageDir) {
|
|
313
|
+
imagePath = `${baseImageDir}/${task.file_upload || imagePath.split("/").pop() || imagePath}`;
|
|
314
|
+
}
|
|
315
|
+
const imageAnnotations = [];
|
|
316
|
+
for (const annotation of task.annotations) {
|
|
317
|
+
const groupedById = /* @__PURE__ */ new Map();
|
|
318
|
+
for (const resultItem of annotation.result) {
|
|
319
|
+
const { id } = resultItem;
|
|
320
|
+
if (!groupedById.has(id)) {
|
|
321
|
+
groupedById.set(id, []);
|
|
322
|
+
}
|
|
323
|
+
groupedById.get(id).push(resultItem);
|
|
324
|
+
}
|
|
325
|
+
for (const [_, resultItems] of groupedById) {
|
|
326
|
+
let points;
|
|
327
|
+
let transcription = "";
|
|
328
|
+
for (const resultItem of resultItems) {
|
|
329
|
+
if ("points" in resultItem.value && resultItem.value.points) {
|
|
330
|
+
const { points: valuePoints } = resultItem.value;
|
|
331
|
+
const { original_width, original_height } = resultItem;
|
|
332
|
+
points = valuePoints.map(([x, y]) => [
|
|
333
|
+
(x ?? 0) * original_width / 100,
|
|
334
|
+
(y ?? 0) * original_height / 100
|
|
335
|
+
]);
|
|
336
|
+
} else if ("x" in resultItem.value && "y" in resultItem.value && "width" in resultItem.value && "height" in resultItem.value) {
|
|
337
|
+
const { x, y, width, height } = resultItem.value;
|
|
338
|
+
const { original_width, original_height } = resultItem;
|
|
339
|
+
const absX = x * original_width / 100;
|
|
340
|
+
const absY = y * original_height / 100;
|
|
341
|
+
const absWidth = width * original_width / 100;
|
|
342
|
+
const absHeight = height * original_height / 100;
|
|
343
|
+
points = [
|
|
344
|
+
[absX, absY],
|
|
345
|
+
[absX + absWidth, absY],
|
|
346
|
+
[absX + absWidth, absY + absHeight],
|
|
347
|
+
[absX, absY + absHeight]
|
|
348
|
+
];
|
|
349
|
+
}
|
|
350
|
+
if ("text" in resultItem.value && Array.isArray(resultItem.value.text)) {
|
|
351
|
+
transcription = resultItem.value.text[0] || "";
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
if (points && points.length > 0) {
|
|
355
|
+
points = transformPoints(points, {
|
|
356
|
+
normalizeShape: normalizeShape2,
|
|
357
|
+
widthIncrement,
|
|
358
|
+
heightIncrement
|
|
359
|
+
});
|
|
360
|
+
points = roundPoints(points, precision);
|
|
361
|
+
let dt_score = 1;
|
|
362
|
+
try {
|
|
363
|
+
const firstPoint = points[0];
|
|
364
|
+
if (firstPoint) {
|
|
365
|
+
const polygon2 = turf.polygon([points.concat([firstPoint])]);
|
|
366
|
+
const area2 = turf.area(polygon2);
|
|
367
|
+
dt_score = Math.min(1, Math.max(0.5, area2 / 1e4));
|
|
368
|
+
}
|
|
369
|
+
} catch {
|
|
370
|
+
dt_score = 0.8;
|
|
371
|
+
}
|
|
372
|
+
imageAnnotations.push({
|
|
373
|
+
transcription,
|
|
374
|
+
points,
|
|
375
|
+
dt_score
|
|
376
|
+
});
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
if (imageAnnotations.length > 0) {
|
|
381
|
+
resultMap.set(imagePath, imageAnnotations);
|
|
382
|
+
}
|
|
208
383
|
}
|
|
384
|
+
return resultMap;
|
|
209
385
|
};
|
|
210
|
-
|
|
211
|
-
const
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
const
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
}
|
|
226
|
-
original_width = dimensions.width;
|
|
227
|
-
original_height = dimensions.height;
|
|
228
|
-
const fileName = imagePath.split("/").pop() || imagePath;
|
|
229
|
-
const result = [
|
|
230
|
-
{
|
|
231
|
-
id: taskId,
|
|
232
|
-
annotations: [
|
|
233
|
-
{
|
|
234
|
-
id: taskId,
|
|
235
|
-
completed_by: 1,
|
|
236
|
-
result: data.map((item) => {
|
|
237
|
-
let { points } = item;
|
|
238
|
-
points = transformPoints(points, {
|
|
239
|
-
normalizeShape: normalizeShape2,
|
|
240
|
-
widthIncrement,
|
|
241
|
-
heightIncrement
|
|
242
|
-
});
|
|
243
|
-
const annotationId = (0, import_node_crypto.randomUUID)().slice(0, 10);
|
|
244
|
-
const polygonPoints = points.map(([x, y]) => [
|
|
245
|
-
roundToPrecision((x ?? 0) / original_width * 100, precision),
|
|
246
|
-
roundToPrecision((y ?? 0) / original_height * 100, precision)
|
|
247
|
-
]);
|
|
248
|
-
return [
|
|
249
|
-
// 1. Polygon geometry only
|
|
250
|
-
{
|
|
251
|
-
original_width,
|
|
252
|
-
original_height,
|
|
253
|
-
image_rotation: 0,
|
|
254
|
-
value: {
|
|
255
|
-
points: polygonPoints,
|
|
256
|
-
closed: true
|
|
257
|
-
},
|
|
258
|
-
id: annotationId,
|
|
259
|
-
from_name: "poly",
|
|
260
|
-
to_name: "image",
|
|
261
|
-
type: "polygon",
|
|
262
|
-
origin: "manual"
|
|
263
|
-
},
|
|
264
|
-
// 2. Labels with polygon geometry
|
|
265
|
-
{
|
|
266
|
-
original_width,
|
|
267
|
-
original_height,
|
|
268
|
-
image_rotation: 0,
|
|
269
|
-
value: {
|
|
270
|
-
points: polygonPoints,
|
|
271
|
-
closed: true,
|
|
272
|
-
labels: [labelName]
|
|
273
|
-
},
|
|
274
|
-
id: annotationId,
|
|
275
|
-
from_name: "label",
|
|
276
|
-
to_name: "image",
|
|
277
|
-
type: "labels",
|
|
278
|
-
origin: "manual"
|
|
279
|
-
},
|
|
280
|
-
// 3. Textarea with polygon geometry and text
|
|
281
|
-
{
|
|
282
|
-
original_width,
|
|
283
|
-
original_height,
|
|
284
|
-
image_rotation: 0,
|
|
285
|
-
value: {
|
|
286
|
-
points: polygonPoints,
|
|
287
|
-
closed: true,
|
|
288
|
-
text: [item.transcription]
|
|
289
|
-
},
|
|
290
|
-
id: annotationId,
|
|
291
|
-
from_name: "transcription",
|
|
292
|
-
to_name: "image",
|
|
293
|
-
type: "textarea",
|
|
294
|
-
origin: "manual"
|
|
295
|
-
}
|
|
296
|
-
];
|
|
297
|
-
}).flat(),
|
|
298
|
-
was_cancelled: false,
|
|
299
|
-
ground_truth: false,
|
|
300
|
-
created_at: now,
|
|
301
|
-
updated_at: now,
|
|
302
|
-
draft_created_at: now,
|
|
303
|
-
lead_time: 0,
|
|
304
|
-
prediction: {},
|
|
305
|
-
result_count: data.length * 3,
|
|
306
|
-
unique_id: (0, import_node_crypto.randomUUID)(),
|
|
307
|
-
import_id: null,
|
|
308
|
-
last_action: null,
|
|
309
|
-
bulk_created: false,
|
|
310
|
-
task: taskId,
|
|
311
|
-
project: 1,
|
|
312
|
-
updated_by: 1,
|
|
313
|
-
parent_prediction: null,
|
|
314
|
-
parent_annotation: null,
|
|
315
|
-
last_created_by: null
|
|
316
|
-
}
|
|
317
|
-
],
|
|
318
|
-
file_upload: fileName,
|
|
319
|
-
drafts: [],
|
|
320
|
-
predictions: [],
|
|
321
|
-
data: { ocr: `${newBaseServerUrl}${imagePath}` },
|
|
322
|
-
meta: {},
|
|
323
|
-
created_at: now,
|
|
324
|
-
updated_at: now,
|
|
325
|
-
allow_skip: false,
|
|
326
|
-
inner_id: taskId,
|
|
327
|
-
total_annotations: 1,
|
|
328
|
-
cancelled_annotations: 0,
|
|
329
|
-
total_predictions: 0,
|
|
330
|
-
comment_count: 0,
|
|
331
|
-
unresolved_comment_count: 0,
|
|
332
|
-
last_comment_updated_at: null,
|
|
333
|
-
project: 1,
|
|
334
|
-
updated_by: 1,
|
|
335
|
-
comment_authors: []
|
|
386
|
+
minLabelStudioToPPOCR = async (data, options) => {
|
|
387
|
+
const {
|
|
388
|
+
baseImageDir,
|
|
389
|
+
normalizeShape: normalizeShape2,
|
|
390
|
+
widthIncrement = 0,
|
|
391
|
+
heightIncrement = 0,
|
|
392
|
+
precision = 0
|
|
393
|
+
} = options || {};
|
|
394
|
+
const resultMap = /* @__PURE__ */ new Map();
|
|
395
|
+
for (const item of data) {
|
|
396
|
+
let imagePath = item.ocr || "";
|
|
397
|
+
if (imagePath) {
|
|
398
|
+
imagePath = decodeURIComponent(
|
|
399
|
+
imagePath.replace(/^https?:\/\/[^/]+\//, "")
|
|
400
|
+
);
|
|
336
401
|
}
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
let original_height = 1080;
|
|
345
|
-
const resolvedImagePath = inputDir ? (0, import_node_path.join)(inputDir, imagePath) : imagePath;
|
|
346
|
-
if (!(0, import_node_fs.existsSync)(resolvedImagePath)) {
|
|
347
|
-
throw new Error(`Image file not found: ${resolvedImagePath}`);
|
|
348
|
-
}
|
|
349
|
-
const buffer = (0, import_node_fs.readFileSync)(resolvedImagePath);
|
|
350
|
-
const dimensions = (0, import_image_size.default)(buffer);
|
|
351
|
-
if (!dimensions.width || !dimensions.height) {
|
|
352
|
-
throw new Error(
|
|
353
|
-
`Failed to read image dimensions from: ${resolvedImagePath}`
|
|
354
|
-
);
|
|
355
|
-
}
|
|
356
|
-
original_width = dimensions.width;
|
|
357
|
-
original_height = dimensions.height;
|
|
358
|
-
return data.map((item, index) => {
|
|
359
|
-
let { points } = item;
|
|
360
|
-
points = transformPoints(points, {
|
|
361
|
-
normalizeShape: normalizeShape2,
|
|
362
|
-
widthIncrement,
|
|
363
|
-
heightIncrement
|
|
364
|
-
});
|
|
365
|
-
const roundedPoints = points.map(
|
|
366
|
-
([x, y]) => [
|
|
367
|
-
roundToPrecision(x ?? 0, precision),
|
|
368
|
-
roundToPrecision(y ?? 0, precision)
|
|
369
|
-
]
|
|
402
|
+
if (baseImageDir) {
|
|
403
|
+
imagePath = `${baseImageDir}/${imagePath.split("/").pop() || imagePath}`;
|
|
404
|
+
}
|
|
405
|
+
const numAnnotations = Math.max(
|
|
406
|
+
item.poly?.length || 0,
|
|
407
|
+
item.bbox?.length || 0,
|
|
408
|
+
item.transcription?.length || 0
|
|
370
409
|
);
|
|
371
|
-
let
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
410
|
+
for (let i = 0; i < numAnnotations; i++) {
|
|
411
|
+
let points;
|
|
412
|
+
if (item.poly && item.poly.length > i && item.poly[i]) {
|
|
413
|
+
const poly = item.poly[i];
|
|
414
|
+
if (poly) {
|
|
415
|
+
const { points: polyPoints } = poly;
|
|
416
|
+
points = polyPoints;
|
|
417
|
+
}
|
|
418
|
+
} else if (item.bbox && item.bbox.length > i && item.bbox[i]) {
|
|
419
|
+
const bbox = item.bbox[i];
|
|
420
|
+
if (bbox) {
|
|
421
|
+
const { x, y, width, height } = bbox;
|
|
422
|
+
points = [
|
|
423
|
+
[x, y],
|
|
424
|
+
[x + width, y],
|
|
425
|
+
[x + width, y + height],
|
|
426
|
+
[x, y + height]
|
|
427
|
+
];
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
if (!points) {
|
|
431
|
+
continue;
|
|
432
|
+
}
|
|
433
|
+
points = transformPoints(points, {
|
|
434
|
+
normalizeShape: normalizeShape2,
|
|
435
|
+
widthIncrement,
|
|
436
|
+
heightIncrement
|
|
437
|
+
});
|
|
438
|
+
points = roundPoints(points, precision);
|
|
439
|
+
const transcription = item.transcription && item.transcription.length > i ? item.transcription[i] : "";
|
|
440
|
+
let dt_score = 1;
|
|
441
|
+
try {
|
|
442
|
+
const firstPoint = points[0];
|
|
443
|
+
if (firstPoint) {
|
|
444
|
+
const polygon2 = turf.polygon([points.concat([firstPoint])]);
|
|
445
|
+
const area2 = turf.area(polygon2);
|
|
446
|
+
dt_score = Math.min(1, Math.max(0.5, area2 / 1e4));
|
|
447
|
+
}
|
|
448
|
+
} catch {
|
|
449
|
+
dt_score = 0.8;
|
|
450
|
+
}
|
|
451
|
+
const annotation = {
|
|
452
|
+
transcription: transcription ?? "",
|
|
453
|
+
points,
|
|
454
|
+
dt_score
|
|
455
|
+
};
|
|
456
|
+
if (!resultMap.has(imagePath)) {
|
|
457
|
+
resultMap.set(imagePath, []);
|
|
382
458
|
}
|
|
459
|
+
resultMap.get(imagePath).push(annotation);
|
|
383
460
|
}
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
461
|
+
}
|
|
462
|
+
return resultMap;
|
|
463
|
+
};
|
|
464
|
+
enhanceLabelStudioData = async (data, isFull, options) => {
|
|
465
|
+
const {
|
|
466
|
+
sortVertical,
|
|
467
|
+
sortHorizontal,
|
|
468
|
+
normalizeShape: normalizeShape2,
|
|
469
|
+
widthIncrement = 0,
|
|
470
|
+
heightIncrement = 0,
|
|
471
|
+
precision = 0
|
|
472
|
+
} = options;
|
|
473
|
+
if (isFull) {
|
|
474
|
+
const fullData = data;
|
|
475
|
+
return fullData.map((task) => ({
|
|
476
|
+
...task,
|
|
477
|
+
annotations: task.annotations.map((annotation) => {
|
|
478
|
+
const groupedById = /* @__PURE__ */ new Map();
|
|
479
|
+
for (const resultItem of annotation.result) {
|
|
480
|
+
const { id } = resultItem;
|
|
481
|
+
if (!groupedById.has(id)) {
|
|
482
|
+
groupedById.set(id, []);
|
|
483
|
+
}
|
|
484
|
+
groupedById.get(id).push(resultItem);
|
|
398
485
|
}
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
486
|
+
const enhancedResult = [];
|
|
487
|
+
for (const [_, resultItems] of groupedById) {
|
|
488
|
+
let ppocrAnnotations = [];
|
|
489
|
+
for (const resultItem of resultItems) {
|
|
490
|
+
let points;
|
|
491
|
+
if ("points" in resultItem.value && resultItem.value.points) {
|
|
492
|
+
const { points: valuePoints } = resultItem.value;
|
|
493
|
+
const { original_width, original_height } = resultItem;
|
|
494
|
+
points = valuePoints.map(([x, y]) => [
|
|
495
|
+
(x ?? 0) * original_width / 100,
|
|
496
|
+
(y ?? 0) * original_height / 100
|
|
497
|
+
]);
|
|
498
|
+
} else if ("x" in resultItem.value && "y" in resultItem.value && "width" in resultItem.value && "height" in resultItem.value) {
|
|
499
|
+
const { x, y, width, height } = resultItem.value;
|
|
500
|
+
const { original_width, original_height } = resultItem;
|
|
501
|
+
const absX = x * original_width / 100;
|
|
502
|
+
const absY = y * original_height / 100;
|
|
503
|
+
const absWidth = width * original_width / 100;
|
|
504
|
+
const absHeight = height * original_height / 100;
|
|
505
|
+
points = [
|
|
506
|
+
[absX, absY],
|
|
507
|
+
[absX + absWidth, absY],
|
|
508
|
+
[absX + absWidth, absY + absHeight],
|
|
509
|
+
[absX, absY + absHeight]
|
|
510
|
+
];
|
|
511
|
+
}
|
|
512
|
+
if (points) {
|
|
513
|
+
ppocrAnnotations.push({
|
|
514
|
+
transcription: "",
|
|
515
|
+
points,
|
|
516
|
+
dt_score: 1
|
|
517
|
+
});
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
if (ppocrAnnotations.length > 0) {
|
|
521
|
+
ppocrAnnotations = enhancePPOCRLabel(ppocrAnnotations, {
|
|
522
|
+
sortVertical,
|
|
523
|
+
sortHorizontal,
|
|
524
|
+
normalizeShape: normalizeShape2,
|
|
525
|
+
widthIncrement,
|
|
526
|
+
heightIncrement,
|
|
527
|
+
precision
|
|
528
|
+
});
|
|
529
|
+
for (let i = 0; i < resultItems.length; i++) {
|
|
530
|
+
const resultItem = resultItems[i];
|
|
531
|
+
const enhanced = ppocrAnnotations[i];
|
|
532
|
+
if (!enhanced) {
|
|
533
|
+
enhancedResult.push(resultItem);
|
|
534
|
+
continue;
|
|
535
|
+
}
|
|
536
|
+
if ("points" in resultItem.value && resultItem.value.points) {
|
|
537
|
+
const { original_width, original_height } = resultItem;
|
|
538
|
+
enhancedResult.push({
|
|
539
|
+
...resultItem,
|
|
540
|
+
value: {
|
|
541
|
+
...resultItem.value,
|
|
542
|
+
points: enhanced.points.map(
|
|
543
|
+
([x, y]) => [
|
|
544
|
+
(x ?? 0) / original_width * 100,
|
|
545
|
+
(y ?? 0) / original_height * 100
|
|
546
|
+
]
|
|
547
|
+
)
|
|
548
|
+
}
|
|
549
|
+
});
|
|
550
|
+
} else if ("x" in resultItem.value && "y" in resultItem.value && "width" in resultItem.value && "height" in resultItem.value) {
|
|
551
|
+
const { original_width, original_height } = resultItem;
|
|
552
|
+
const xs = enhanced.points.map(([x]) => x ?? 0);
|
|
553
|
+
const ys = enhanced.points.map(([, y]) => y ?? 0);
|
|
554
|
+
const minX = Math.min(...xs);
|
|
555
|
+
const maxX = Math.max(...xs);
|
|
556
|
+
const minY = Math.min(...ys);
|
|
557
|
+
const maxY = Math.max(...ys);
|
|
558
|
+
enhancedResult.push({
|
|
559
|
+
...resultItem,
|
|
560
|
+
value: {
|
|
561
|
+
...resultItem.value,
|
|
562
|
+
x: minX / original_width * 100,
|
|
563
|
+
y: minY / original_height * 100,
|
|
564
|
+
width: (maxX - minX) / original_width * 100,
|
|
565
|
+
height: (maxY - minY) / original_height * 100
|
|
566
|
+
}
|
|
567
|
+
});
|
|
568
|
+
} else {
|
|
569
|
+
enhancedResult.push(resultItem);
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
} else {
|
|
573
|
+
enhancedResult.push(...resultItems);
|
|
574
|
+
}
|
|
407
575
|
}
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
576
|
+
return {
|
|
577
|
+
...annotation,
|
|
578
|
+
result: enhancedResult
|
|
579
|
+
};
|
|
580
|
+
})
|
|
581
|
+
}));
|
|
582
|
+
} else {
|
|
583
|
+
const minData = data;
|
|
584
|
+
return minData.map((item) => {
|
|
585
|
+
let ppocrAnnotations = [];
|
|
586
|
+
const numAnnotations = Math.max(
|
|
587
|
+
item.poly?.length || 0,
|
|
588
|
+
item.bbox?.length || 0,
|
|
589
|
+
item.transcription?.length || 0
|
|
590
|
+
);
|
|
591
|
+
for (let i = 0; i < numAnnotations; i++) {
|
|
592
|
+
let points;
|
|
593
|
+
if (item.poly && item.poly.length > i && item.poly[i]) {
|
|
594
|
+
const { points: polyPoints } = item.poly[i];
|
|
595
|
+
points = polyPoints;
|
|
596
|
+
} else if (item.bbox && item.bbox.length > i && item.bbox[i]) {
|
|
597
|
+
const { x, y, width, height } = item.bbox[i];
|
|
598
|
+
points = [
|
|
599
|
+
[x, y],
|
|
600
|
+
[x + width, y],
|
|
601
|
+
[x + width, y + height],
|
|
602
|
+
[x, y + height]
|
|
603
|
+
];
|
|
416
604
|
}
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
605
|
+
if (points) {
|
|
606
|
+
ppocrAnnotations.push({
|
|
607
|
+
transcription: item.transcription && item.transcription.length > i ? item.transcription[i] ?? "" : "",
|
|
608
|
+
points,
|
|
609
|
+
dt_score: 1
|
|
610
|
+
});
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
if (ppocrAnnotations.length > 0) {
|
|
614
|
+
ppocrAnnotations = enhancePPOCRLabel(ppocrAnnotations, {
|
|
615
|
+
sortVertical,
|
|
616
|
+
sortHorizontal,
|
|
617
|
+
normalizeShape: normalizeShape2,
|
|
618
|
+
widthIncrement,
|
|
619
|
+
heightIncrement,
|
|
620
|
+
precision
|
|
621
|
+
});
|
|
622
|
+
const newPoly = ppocrAnnotations.map((ann) => ({
|
|
623
|
+
points: ann.points
|
|
624
|
+
}));
|
|
625
|
+
const { bbox: _, ...itemWithoutBbox } = item;
|
|
626
|
+
return {
|
|
627
|
+
...itemWithoutBbox,
|
|
628
|
+
poly: newPoly
|
|
629
|
+
};
|
|
630
|
+
}
|
|
631
|
+
return item;
|
|
632
|
+
});
|
|
633
|
+
}
|
|
425
634
|
};
|
|
426
635
|
}
|
|
427
636
|
});
|
|
@@ -763,139 +972,129 @@ var init_schema = __esm({
|
|
|
763
972
|
import_zod.default.object({
|
|
764
973
|
transcription: import_zod.default.string(),
|
|
765
974
|
points: import_zod.default.array(import_zod.default.array(import_zod.default.number())),
|
|
766
|
-
dt_score: import_zod.default.number()
|
|
975
|
+
dt_score: import_zod.default.number().optional(),
|
|
976
|
+
// Detection score (from PaddleOCR)
|
|
977
|
+
difficult: import_zod.default.boolean().optional()
|
|
978
|
+
// Difficult flag (from PPOCRLabel tool)
|
|
767
979
|
})
|
|
768
980
|
);
|
|
769
981
|
}
|
|
770
982
|
});
|
|
771
983
|
|
|
772
|
-
// src/
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
if (verticalSort === SORT_VERTICAL_NONE && horizontalSort === SORT_HORIZONTAL_NONE) {
|
|
795
|
-
return annotations;
|
|
796
|
-
}
|
|
797
|
-
const sorted = [...annotations];
|
|
798
|
-
const isVerticalText = sorted.length > 0 && (() => {
|
|
799
|
-
const verticalCount = sorted.filter((ann) => {
|
|
800
|
-
const center = getBoundingBoxCenter(ann.points);
|
|
801
|
-
return center.height > center.width * 1.5;
|
|
802
|
-
}).length;
|
|
803
|
-
return verticalCount > sorted.length / 2;
|
|
804
|
-
})();
|
|
805
|
-
if (horizontalSort === SORT_HORIZONTAL_RTL && verticalSort !== SORT_VERTICAL_NONE && isVerticalText) {
|
|
806
|
-
const annotationsWithCenters = sorted.map((ann) => ({
|
|
807
|
-
annotation: ann,
|
|
808
|
-
center: getBoundingBoxCenter(ann.points)
|
|
809
|
-
}));
|
|
810
|
-
const columns = [];
|
|
811
|
-
for (const item of annotationsWithCenters) {
|
|
812
|
-
let addedToColumn = false;
|
|
813
|
-
for (const column of columns) {
|
|
814
|
-
const avgX = column.reduce((sum, c) => sum + c.center.x, 0) / column.length;
|
|
815
|
-
if (Math.abs(item.center.x - avgX) < GROUPING_TOLERANCE) {
|
|
816
|
-
column.push(item);
|
|
817
|
-
addedToColumn = true;
|
|
818
|
-
break;
|
|
819
|
-
}
|
|
984
|
+
// src/commands/enhance-labelstudio/impl.ts
|
|
985
|
+
var impl_exports = {};
|
|
986
|
+
__export(impl_exports, {
|
|
987
|
+
enhanceLabelStudio: () => enhanceLabelStudio
|
|
988
|
+
});
|
|
989
|
+
async function enhanceLabelStudio(flags, ...inputDirs) {
|
|
990
|
+
const {
|
|
991
|
+
outDir = OUTPUT_BASE_DIR,
|
|
992
|
+
sortVertical = DEFAULT_SORT_VERTICAL,
|
|
993
|
+
sortHorizontal = DEFAULT_SORT_HORIZONTAL,
|
|
994
|
+
normalizeShape: normalizeShape2 = DEFAULT_SHAPE_NORMALIZE,
|
|
995
|
+
widthIncrement = DEFAULT_WIDTH_INCREMENT,
|
|
996
|
+
heightIncrement = DEFAULT_HEIGHT_INCREMENT,
|
|
997
|
+
precision = DEFAULT_LABEL_STUDIO_PRECISION
|
|
998
|
+
} = flags;
|
|
999
|
+
await (0, import_promises.mkdir)(outDir, { recursive: true });
|
|
1000
|
+
for (const inputDir of inputDirs) {
|
|
1001
|
+
console.log(import_chalk.default.blue(`Processing input directory: ${inputDir}`));
|
|
1002
|
+
const files = await (0, import_promises.readdir)(inputDir);
|
|
1003
|
+
for (const file of files) {
|
|
1004
|
+
if (!file.endsWith(".json")) {
|
|
1005
|
+
continue;
|
|
820
1006
|
}
|
|
821
|
-
|
|
822
|
-
|
|
1007
|
+
const filePath = (0, import_path.join)(inputDir, file);
|
|
1008
|
+
console.log(import_chalk.default.gray(`Processing file: ${file}`));
|
|
1009
|
+
try {
|
|
1010
|
+
const fileData = await (0, import_promises.readFile)(filePath, "utf-8");
|
|
1011
|
+
const labelStudioData = JSON.parse(fileData);
|
|
1012
|
+
const { data, isFull } = isLabelStudioFullJSON(labelStudioData);
|
|
1013
|
+
const enhanced = await enhanceLabelStudioData(data, isFull, {
|
|
1014
|
+
sortVertical,
|
|
1015
|
+
sortHorizontal,
|
|
1016
|
+
normalizeShape: normalizeShape2 !== SHAPE_NORMALIZE_NONE ? normalizeShape2 : void 0,
|
|
1017
|
+
widthIncrement,
|
|
1018
|
+
heightIncrement,
|
|
1019
|
+
precision
|
|
1020
|
+
});
|
|
1021
|
+
const outputFilePath = (0, import_path.join)(outDir, file);
|
|
1022
|
+
await (0, import_promises.writeFile)(
|
|
1023
|
+
outputFilePath,
|
|
1024
|
+
JSON.stringify(enhanced, null, 2),
|
|
1025
|
+
"utf-8"
|
|
1026
|
+
);
|
|
1027
|
+
console.log(import_chalk.default.green(`\u2713 Enhanced file saved: ${outputFilePath}`));
|
|
1028
|
+
} catch (error) {
|
|
1029
|
+
console.error(
|
|
1030
|
+
import_chalk.default.red(`Error processing file ${file}:`),
|
|
1031
|
+
error instanceof Error ? error.message : String(error)
|
|
1032
|
+
);
|
|
823
1033
|
}
|
|
824
1034
|
}
|
|
825
|
-
columns.sort((colA, colB) => {
|
|
826
|
-
const avgXA = colA.reduce((sum, c) => sum + c.center.x, 0) / colA.length;
|
|
827
|
-
const avgXB = colB.reduce((sum, c) => sum + c.center.x, 0) / colB.length;
|
|
828
|
-
return avgXB - avgXA;
|
|
829
|
-
});
|
|
830
|
-
for (const column of columns) {
|
|
831
|
-
column.sort((a, b) => {
|
|
832
|
-
return verticalSort === SORT_VERTICAL_TOP_BOTTOM ? a.center.y - b.center.y : b.center.y - a.center.y;
|
|
833
|
-
});
|
|
834
|
-
}
|
|
835
|
-
return columns.flat().map((item) => item.annotation);
|
|
836
1035
|
}
|
|
837
|
-
|
|
838
|
-
const centerA = getBoundingBoxCenter(a.points);
|
|
839
|
-
const centerB = getBoundingBoxCenter(b.points);
|
|
840
|
-
if (verticalSort !== SORT_VERTICAL_NONE) {
|
|
841
|
-
const yDiff = verticalSort === SORT_VERTICAL_TOP_BOTTOM ? centerA.y - centerB.y : centerB.y - centerA.y;
|
|
842
|
-
if (Math.abs(yDiff) > GROUPING_TOLERANCE) {
|
|
843
|
-
return yDiff;
|
|
844
|
-
}
|
|
845
|
-
}
|
|
846
|
-
if (horizontalSort !== SORT_HORIZONTAL_NONE) {
|
|
847
|
-
return horizontalSort === SORT_HORIZONTAL_LTR ? centerA.x - centerB.x : centerB.x - centerA.x;
|
|
848
|
-
}
|
|
849
|
-
return 0;
|
|
850
|
-
});
|
|
851
|
-
return sorted;
|
|
1036
|
+
console.log(import_chalk.default.green("\n\u2713 Enhancement complete!"));
|
|
852
1037
|
}
|
|
853
|
-
var
|
|
854
|
-
var
|
|
855
|
-
"src/
|
|
1038
|
+
var import_promises, import_path, import_chalk, isLabelStudioFullJSON;
|
|
1039
|
+
var init_impl = __esm({
|
|
1040
|
+
"src/commands/enhance-labelstudio/impl.ts"() {
|
|
856
1041
|
"use strict";
|
|
857
1042
|
init_cjs_shims();
|
|
1043
|
+
import_promises = require("fs/promises");
|
|
1044
|
+
import_path = require("path");
|
|
1045
|
+
import_chalk = __toESM(require("chalk"), 1);
|
|
858
1046
|
init_constants();
|
|
859
|
-
|
|
1047
|
+
init_label_studio();
|
|
1048
|
+
init_schema();
|
|
1049
|
+
isLabelStudioFullJSON = (data) => {
|
|
1050
|
+
const parsedFull = FullOCRLabelStudioSchema.safeParse(data);
|
|
1051
|
+
if (parsedFull.success) {
|
|
1052
|
+
return { isFull: true, data: parsedFull.data };
|
|
1053
|
+
}
|
|
1054
|
+
if (!Array.isArray(data) && typeof data === "object" && data !== null) {
|
|
1055
|
+
const parsedSingleFull = FullOCRLabelStudioSchema.safeParse([data]);
|
|
1056
|
+
if (parsedSingleFull.success) {
|
|
1057
|
+
return { isFull: true, data: parsedSingleFull.data };
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
const parsedMin = MinOCRLabelStudioSchema.safeParse(data);
|
|
1061
|
+
if (parsedMin.success) {
|
|
1062
|
+
return { isFull: false, data: parsedMin.data };
|
|
1063
|
+
}
|
|
1064
|
+
throw new Error("Input data is not valid Label Studio JSON format.");
|
|
1065
|
+
};
|
|
860
1066
|
}
|
|
861
1067
|
});
|
|
862
1068
|
|
|
863
|
-
// src/commands/
|
|
864
|
-
var
|
|
865
|
-
__export(
|
|
866
|
-
|
|
1069
|
+
// src/commands/enhance-ppocr/impl.ts
|
|
1070
|
+
var impl_exports2 = {};
|
|
1071
|
+
__export(impl_exports2, {
|
|
1072
|
+
enhancePPOCR: () => enhancePPOCR
|
|
867
1073
|
});
|
|
868
|
-
async function
|
|
1074
|
+
async function enhancePPOCR(flags, ...inputDirs) {
|
|
869
1075
|
const {
|
|
870
1076
|
outDir = OUTPUT_BASE_DIR,
|
|
871
|
-
defaultLabelName = DEFAULT_LABEL_NAME,
|
|
872
|
-
toFullJson = DEFAULT_LABEL_STUDIO_FULL_JSON,
|
|
873
|
-
createFilePerImage = DEFAULT_CREATE_FILE_PER_IMAGE,
|
|
874
|
-
createFileListForServing = DEFAULT_CREATE_FILE_LIST_FOR_SERVING,
|
|
875
|
-
fileListName = DEFAULT_FILE_LIST_NAME,
|
|
876
|
-
baseServerUrl = DEFAULT_BASE_SERVER_URL,
|
|
877
1077
|
sortVertical = DEFAULT_SORT_VERTICAL,
|
|
878
1078
|
sortHorizontal = DEFAULT_SORT_HORIZONTAL,
|
|
879
1079
|
normalizeShape: normalizeShape2 = DEFAULT_SHAPE_NORMALIZE,
|
|
880
1080
|
widthIncrement = DEFAULT_WIDTH_INCREMENT,
|
|
881
1081
|
heightIncrement = DEFAULT_HEIGHT_INCREMENT,
|
|
882
|
-
precision =
|
|
1082
|
+
precision = DEFAULT_PPOCR_PRECISION
|
|
883
1083
|
} = flags;
|
|
884
|
-
|
|
885
|
-
await (0, import_promises.mkdir)(outDir, { recursive: true });
|
|
1084
|
+
await (0, import_promises2.mkdir)(outDir, { recursive: true });
|
|
886
1085
|
for (const inputDir of inputDirs) {
|
|
887
|
-
console.log(
|
|
888
|
-
const files = await (0,
|
|
1086
|
+
console.log(import_chalk2.default.blue(`Processing input directory: ${inputDir}`));
|
|
1087
|
+
const files = await (0, import_promises2.readdir)(inputDir);
|
|
889
1088
|
for (const file of files) {
|
|
890
1089
|
if (!file.endsWith(".txt")) {
|
|
891
1090
|
continue;
|
|
892
1091
|
}
|
|
893
|
-
const filePath = (0,
|
|
894
|
-
console.log(
|
|
1092
|
+
const filePath = (0, import_path2.join)(inputDir, file);
|
|
1093
|
+
console.log(import_chalk2.default.gray(`Processing file: ${file}`));
|
|
895
1094
|
try {
|
|
896
|
-
const fileData = await (0,
|
|
1095
|
+
const fileData = await (0, import_promises2.readFile)(filePath, "utf-8");
|
|
897
1096
|
const lines = fileData.trim().split("\n");
|
|
898
|
-
const
|
|
1097
|
+
const enhancedLines = [];
|
|
899
1098
|
for (const line of lines) {
|
|
900
1099
|
const parts = line.split(" ");
|
|
901
1100
|
if (parts.length !== 2) {
|
|
@@ -904,287 +1103,463 @@ async function convertToLabelStudio(flags, ...inputDirs) {
|
|
|
904
1103
|
const [imagePath, annotationsStr] = parts;
|
|
905
1104
|
const annotations = JSON.parse(annotationsStr);
|
|
906
1105
|
PPOCRLabelSchema.parse(annotations);
|
|
907
|
-
|
|
908
|
-
}
|
|
909
|
-
const allLabelStudioData = [];
|
|
910
|
-
const fileList = [];
|
|
911
|
-
let taskId = 1;
|
|
912
|
-
for (const [imagePath, ppocrData] of imageDataMap.entries()) {
|
|
913
|
-
const sortedPpocrData = sortBoundingBoxes(
|
|
914
|
-
ppocrData,
|
|
1106
|
+
const enhanced = enhancePPOCRLabel(annotations, {
|
|
915
1107
|
sortVertical,
|
|
916
|
-
sortHorizontal
|
|
917
|
-
);
|
|
918
|
-
const finalImagePath = createFileListForServing ? encodeURI(`${newBaseServerUrl}${imagePath}`) : imagePath;
|
|
919
|
-
const labelStudioData = await ppocrToLabelStudio(sortedPpocrData, {
|
|
920
|
-
toFullJson,
|
|
921
|
-
imagePath,
|
|
922
|
-
baseServerUrl: newBaseServerUrl,
|
|
923
|
-
inputDir,
|
|
924
|
-
taskId,
|
|
925
|
-
labelName: defaultLabelName,
|
|
1108
|
+
sortHorizontal,
|
|
926
1109
|
normalizeShape: normalizeShape2 !== SHAPE_NORMALIZE_NONE ? normalizeShape2 : void 0,
|
|
927
1110
|
widthIncrement,
|
|
928
1111
|
heightIncrement,
|
|
929
1112
|
precision
|
|
930
1113
|
});
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
}
|
|
934
|
-
allLabelStudioData.push(...labelStudioData);
|
|
935
|
-
}
|
|
936
|
-
if (createFilePerImage) {
|
|
937
|
-
const imageBaseName = imagePath.replace(/\//g, "_").replace(/\.[^.]+$/, "");
|
|
938
|
-
const individualOutputPath = (0, import_path.join)(
|
|
939
|
-
outDir,
|
|
940
|
-
`${imageBaseName}_${toFullJson ? "full" : "min"}.json`
|
|
941
|
-
);
|
|
942
|
-
await (0, import_promises.writeFile)(
|
|
943
|
-
individualOutputPath,
|
|
944
|
-
JSON.stringify(
|
|
945
|
-
toFullJson ? labelStudioData[0] : labelStudioData,
|
|
946
|
-
null,
|
|
947
|
-
2
|
|
948
|
-
),
|
|
949
|
-
"utf-8"
|
|
950
|
-
);
|
|
951
|
-
console.log(
|
|
952
|
-
import_chalk.default.gray(
|
|
953
|
-
` \u2713 Created individual file: ${individualOutputPath}`
|
|
954
|
-
)
|
|
955
|
-
);
|
|
956
|
-
}
|
|
957
|
-
if (createFileListForServing) {
|
|
958
|
-
fileList.push(finalImagePath);
|
|
959
|
-
}
|
|
960
|
-
taskId++;
|
|
961
|
-
}
|
|
962
|
-
const baseName = file.replace(".txt", "");
|
|
963
|
-
const outputPath = (0, import_path.join)(
|
|
964
|
-
outDir,
|
|
965
|
-
`${baseName}_${toFullJson ? "full" : "min"}.json`
|
|
966
|
-
);
|
|
967
|
-
await (0, import_promises.writeFile)(
|
|
968
|
-
outputPath,
|
|
969
|
-
JSON.stringify(allLabelStudioData, null, 2),
|
|
970
|
-
"utf-8"
|
|
971
|
-
);
|
|
972
|
-
console.log(import_chalk.default.green(`\u2713 Converted ${file} -> ${outputPath}`));
|
|
973
|
-
if (createFileListForServing && fileList.length > 0) {
|
|
974
|
-
const fileListPath = (0, import_path.join)(outDir, fileListName);
|
|
975
|
-
await (0, import_promises.writeFile)(fileListPath, fileList.join("\n"), "utf-8");
|
|
976
|
-
console.log(
|
|
977
|
-
import_chalk.default.green(
|
|
978
|
-
`\u2713 Created file list: ${fileListPath} (${fileList.length} files)`
|
|
979
|
-
)
|
|
980
|
-
);
|
|
1114
|
+
PPOCRLabelSchema.parse(enhanced);
|
|
1115
|
+
const jsonArray = JSON.stringify(enhanced);
|
|
1116
|
+
enhancedLines.push(`${imagePath} ${jsonArray}`);
|
|
981
1117
|
}
|
|
1118
|
+
const outputFilePath = (0, import_path2.join)(outDir, file);
|
|
1119
|
+
await (0, import_promises2.writeFile)(outputFilePath, enhancedLines.join("\n"), "utf-8");
|
|
1120
|
+
console.log(import_chalk2.default.green(`\u2713 Enhanced file saved: ${outputFilePath}`));
|
|
982
1121
|
} catch (error) {
|
|
983
1122
|
console.error(
|
|
984
|
-
|
|
985
|
-
error instanceof Error ? error.message : error
|
|
1123
|
+
import_chalk2.default.red(`Error processing file ${file}:`),
|
|
1124
|
+
error instanceof Error ? error.message : String(error)
|
|
986
1125
|
);
|
|
987
1126
|
}
|
|
988
1127
|
}
|
|
989
1128
|
}
|
|
990
|
-
console.log(
|
|
1129
|
+
console.log(import_chalk2.default.green("\n\u2713 Enhancement complete!"));
|
|
991
1130
|
}
|
|
992
|
-
var
|
|
993
|
-
var
|
|
994
|
-
"src/commands/
|
|
1131
|
+
var import_promises2, import_path2, import_chalk2;
|
|
1132
|
+
var init_impl2 = __esm({
|
|
1133
|
+
"src/commands/enhance-ppocr/impl.ts"() {
|
|
995
1134
|
"use strict";
|
|
996
1135
|
init_cjs_shims();
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1136
|
+
import_promises2 = require("fs/promises");
|
|
1137
|
+
import_path2 = require("path");
|
|
1138
|
+
import_chalk2 = __toESM(require("chalk"), 1);
|
|
1000
1139
|
init_constants();
|
|
1001
|
-
|
|
1140
|
+
init_enhance();
|
|
1002
1141
|
init_schema();
|
|
1003
|
-
init_sort();
|
|
1004
1142
|
}
|
|
1005
1143
|
});
|
|
1006
1144
|
|
|
1007
|
-
// src/lib/label
|
|
1008
|
-
var
|
|
1009
|
-
var
|
|
1010
|
-
"src/lib/label
|
|
1145
|
+
// src/lib/ppocr-label.ts
|
|
1146
|
+
var import_node_crypto, import_node_fs, import_node_path, import_image_size, ppocrToLabelStudio, ppocrToFullLabelStudio, ppocrToMinLabelStudio;
|
|
1147
|
+
var init_ppocr_label = __esm({
|
|
1148
|
+
"src/lib/ppocr-label.ts"() {
|
|
1011
1149
|
"use strict";
|
|
1012
1150
|
init_cjs_shims();
|
|
1013
|
-
|
|
1151
|
+
import_node_crypto = require("crypto");
|
|
1152
|
+
import_node_fs = require("fs");
|
|
1153
|
+
import_node_path = require("path");
|
|
1154
|
+
import_image_size = __toESM(require("image-size"), 1);
|
|
1155
|
+
init_constants();
|
|
1014
1156
|
init_geometry();
|
|
1015
|
-
|
|
1157
|
+
ppocrToLabelStudio = async (data, options) => {
|
|
1016
1158
|
const {
|
|
1017
|
-
|
|
1159
|
+
imagePath,
|
|
1160
|
+
baseServerUrl,
|
|
1161
|
+
inputDir,
|
|
1162
|
+
toFullJson = true,
|
|
1163
|
+
taskId = 1,
|
|
1164
|
+
labelName = DEFAULT_LABEL_NAME,
|
|
1018
1165
|
normalizeShape: normalizeShape2,
|
|
1019
1166
|
widthIncrement = 0,
|
|
1020
1167
|
heightIncrement = 0,
|
|
1021
|
-
precision =
|
|
1168
|
+
precision = DEFAULT_LABEL_STUDIO_PRECISION
|
|
1022
1169
|
} = options || {};
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1170
|
+
if (toFullJson) {
|
|
1171
|
+
return ppocrToFullLabelStudio(
|
|
1172
|
+
data,
|
|
1173
|
+
imagePath,
|
|
1174
|
+
baseServerUrl,
|
|
1175
|
+
inputDir,
|
|
1176
|
+
taskId,
|
|
1177
|
+
labelName,
|
|
1178
|
+
normalizeShape2,
|
|
1179
|
+
widthIncrement,
|
|
1180
|
+
heightIncrement,
|
|
1181
|
+
precision
|
|
1182
|
+
);
|
|
1183
|
+
} else {
|
|
1184
|
+
return ppocrToMinLabelStudio(
|
|
1185
|
+
data,
|
|
1186
|
+
imagePath,
|
|
1187
|
+
baseServerUrl,
|
|
1188
|
+
inputDir,
|
|
1189
|
+
labelName,
|
|
1190
|
+
normalizeShape2,
|
|
1191
|
+
widthIncrement,
|
|
1192
|
+
heightIncrement,
|
|
1193
|
+
precision
|
|
1194
|
+
);
|
|
1195
|
+
}
|
|
1196
|
+
};
|
|
1197
|
+
ppocrToFullLabelStudio = (data, imagePath, baseServerUrl, inputDir, taskId = 1, labelName = DEFAULT_LABEL_NAME, normalizeShape2, widthIncrement = 0, heightIncrement = 0, precision = DEFAULT_LABEL_STUDIO_PRECISION) => {
|
|
1198
|
+
const newBaseServerUrl = baseServerUrl.replace(/\/+$/, "") + (baseServerUrl === "" ? "" : "/");
|
|
1199
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1200
|
+
let original_width = 1920;
|
|
1201
|
+
let original_height = 1080;
|
|
1202
|
+
const resolvedImagePath = inputDir ? (0, import_node_path.join)(inputDir, imagePath) : imagePath;
|
|
1203
|
+
if (!(0, import_node_fs.existsSync)(resolvedImagePath)) {
|
|
1204
|
+
throw new Error(`Image file not found: ${resolvedImagePath}`);
|
|
1205
|
+
}
|
|
1206
|
+
const buffer = (0, import_node_fs.readFileSync)(resolvedImagePath);
|
|
1207
|
+
const dimensions = (0, import_image_size.default)(buffer);
|
|
1208
|
+
if (!dimensions.width || !dimensions.height) {
|
|
1209
|
+
throw new Error(
|
|
1210
|
+
`Failed to read image dimensions from: ${resolvedImagePath}`
|
|
1211
|
+
);
|
|
1212
|
+
}
|
|
1213
|
+
original_width = dimensions.width;
|
|
1214
|
+
original_height = dimensions.height;
|
|
1215
|
+
const fileName = imagePath.split("/").pop() || imagePath;
|
|
1216
|
+
const result = [
|
|
1217
|
+
{
|
|
1218
|
+
id: taskId,
|
|
1219
|
+
annotations: [
|
|
1220
|
+
{
|
|
1221
|
+
id: taskId,
|
|
1222
|
+
completed_by: 1,
|
|
1223
|
+
result: data.map((item) => {
|
|
1224
|
+
let { points } = item;
|
|
1225
|
+
points = transformPoints(points, {
|
|
1226
|
+
normalizeShape: normalizeShape2,
|
|
1227
|
+
widthIncrement,
|
|
1228
|
+
heightIncrement
|
|
1229
|
+
});
|
|
1230
|
+
const annotationId = (0, import_node_crypto.randomUUID)().slice(0, 10);
|
|
1231
|
+
const polygonPoints = points.map(([x, y]) => [
|
|
1232
|
+
roundToPrecision((x ?? 0) / original_width * 100, precision),
|
|
1233
|
+
roundToPrecision((y ?? 0) / original_height * 100, precision)
|
|
1053
1234
|
]);
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1235
|
+
return [
|
|
1236
|
+
// 1. Polygon geometry only
|
|
1237
|
+
{
|
|
1238
|
+
original_width,
|
|
1239
|
+
original_height,
|
|
1240
|
+
image_rotation: 0,
|
|
1241
|
+
value: {
|
|
1242
|
+
points: polygonPoints,
|
|
1243
|
+
closed: true
|
|
1244
|
+
},
|
|
1245
|
+
id: annotationId,
|
|
1246
|
+
from_name: "poly",
|
|
1247
|
+
to_name: "image",
|
|
1248
|
+
type: "polygon",
|
|
1249
|
+
origin: "manual"
|
|
1250
|
+
},
|
|
1251
|
+
// 2. Labels with polygon geometry
|
|
1252
|
+
{
|
|
1253
|
+
original_width,
|
|
1254
|
+
original_height,
|
|
1255
|
+
image_rotation: 0,
|
|
1256
|
+
value: {
|
|
1257
|
+
points: polygonPoints,
|
|
1258
|
+
closed: true,
|
|
1259
|
+
labels: [labelName]
|
|
1260
|
+
},
|
|
1261
|
+
id: annotationId,
|
|
1262
|
+
from_name: "label",
|
|
1263
|
+
to_name: "image",
|
|
1264
|
+
type: "labels",
|
|
1265
|
+
origin: "manual"
|
|
1266
|
+
},
|
|
1267
|
+
// 3. Textarea with polygon geometry and text
|
|
1268
|
+
{
|
|
1269
|
+
original_width,
|
|
1270
|
+
original_height,
|
|
1271
|
+
image_rotation: 0,
|
|
1272
|
+
value: {
|
|
1273
|
+
points: polygonPoints,
|
|
1274
|
+
closed: true,
|
|
1275
|
+
text: [item.transcription]
|
|
1276
|
+
},
|
|
1277
|
+
id: annotationId,
|
|
1278
|
+
from_name: "transcription",
|
|
1279
|
+
to_name: "image",
|
|
1280
|
+
type: "textarea",
|
|
1281
|
+
origin: "manual"
|
|
1282
|
+
}
|
|
1066
1283
|
];
|
|
1067
|
-
}
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
}
|
|
1087
|
-
} catch {
|
|
1088
|
-
dt_score = 0.8;
|
|
1089
|
-
}
|
|
1090
|
-
imageAnnotations.push({
|
|
1091
|
-
transcription,
|
|
1092
|
-
points,
|
|
1093
|
-
dt_score
|
|
1094
|
-
});
|
|
1284
|
+
}).flat(),
|
|
1285
|
+
was_cancelled: false,
|
|
1286
|
+
ground_truth: false,
|
|
1287
|
+
created_at: now,
|
|
1288
|
+
updated_at: now,
|
|
1289
|
+
draft_created_at: now,
|
|
1290
|
+
lead_time: 0,
|
|
1291
|
+
prediction: {},
|
|
1292
|
+
result_count: data.length * 3,
|
|
1293
|
+
unique_id: (0, import_node_crypto.randomUUID)(),
|
|
1294
|
+
import_id: null,
|
|
1295
|
+
last_action: null,
|
|
1296
|
+
bulk_created: false,
|
|
1297
|
+
task: taskId,
|
|
1298
|
+
project: 1,
|
|
1299
|
+
updated_by: 1,
|
|
1300
|
+
parent_prediction: null,
|
|
1301
|
+
parent_annotation: null,
|
|
1302
|
+
last_created_by: null
|
|
1095
1303
|
}
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1304
|
+
],
|
|
1305
|
+
file_upload: fileName,
|
|
1306
|
+
drafts: [],
|
|
1307
|
+
predictions: [],
|
|
1308
|
+
data: { ocr: `${newBaseServerUrl}${imagePath}` },
|
|
1309
|
+
meta: {},
|
|
1310
|
+
created_at: now,
|
|
1311
|
+
updated_at: now,
|
|
1312
|
+
allow_skip: false,
|
|
1313
|
+
inner_id: taskId,
|
|
1314
|
+
total_annotations: 1,
|
|
1315
|
+
cancelled_annotations: 0,
|
|
1316
|
+
total_predictions: 0,
|
|
1317
|
+
comment_count: 0,
|
|
1318
|
+
unresolved_comment_count: 0,
|
|
1319
|
+
last_comment_updated_at: null,
|
|
1320
|
+
project: 1,
|
|
1321
|
+
updated_by: 1,
|
|
1322
|
+
comment_authors: []
|
|
1100
1323
|
}
|
|
1101
|
-
|
|
1102
|
-
return
|
|
1324
|
+
];
|
|
1325
|
+
return result;
|
|
1103
1326
|
};
|
|
1104
|
-
|
|
1105
|
-
const
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
);
|
|
1119
|
-
}
|
|
1120
|
-
if (baseImageDir) {
|
|
1121
|
-
imagePath = `${baseImageDir}/${imagePath.split("/").pop() || imagePath}`;
|
|
1122
|
-
}
|
|
1123
|
-
const numAnnotations = Math.max(
|
|
1124
|
-
item.poly?.length || 0,
|
|
1125
|
-
item.bbox?.length || 0,
|
|
1126
|
-
item.transcription?.length || 0
|
|
1327
|
+
ppocrToMinLabelStudio = (data, imagePath, baseServerUrl, inputDir, labelName = "text", normalizeShape2, widthIncrement = 0, heightIncrement = 0, precision = DEFAULT_LABEL_STUDIO_PRECISION) => {
|
|
1328
|
+
const newBaseServerUrl = baseServerUrl.replace(/\/+$/, "") + (baseServerUrl === "" ? "" : "/");
|
|
1329
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1330
|
+
let original_width = 1920;
|
|
1331
|
+
let original_height = 1080;
|
|
1332
|
+
const resolvedImagePath = inputDir ? (0, import_node_path.join)(inputDir, imagePath) : imagePath;
|
|
1333
|
+
if (!(0, import_node_fs.existsSync)(resolvedImagePath)) {
|
|
1334
|
+
throw new Error(`Image file not found: ${resolvedImagePath}`);
|
|
1335
|
+
}
|
|
1336
|
+
const buffer = (0, import_node_fs.readFileSync)(resolvedImagePath);
|
|
1337
|
+
const dimensions = (0, import_image_size.default)(buffer);
|
|
1338
|
+
if (!dimensions.width || !dimensions.height) {
|
|
1339
|
+
throw new Error(
|
|
1340
|
+
`Failed to read image dimensions from: ${resolvedImagePath}`
|
|
1127
1341
|
);
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1342
|
+
}
|
|
1343
|
+
original_width = dimensions.width;
|
|
1344
|
+
original_height = dimensions.height;
|
|
1345
|
+
return data.map((item, index) => {
|
|
1346
|
+
let { points } = item;
|
|
1347
|
+
points = transformPoints(points, {
|
|
1348
|
+
normalizeShape: normalizeShape2,
|
|
1349
|
+
widthIncrement,
|
|
1350
|
+
heightIncrement
|
|
1351
|
+
});
|
|
1352
|
+
const roundedPoints = points.map(
|
|
1353
|
+
([x, y]) => [
|
|
1354
|
+
roundToPrecision(x ?? 0, precision),
|
|
1355
|
+
roundToPrecision(y ?? 0, precision)
|
|
1356
|
+
]
|
|
1357
|
+
);
|
|
1358
|
+
let minX = Infinity;
|
|
1359
|
+
let minY = Infinity;
|
|
1360
|
+
let maxX = -Infinity;
|
|
1361
|
+
let maxY = -Infinity;
|
|
1362
|
+
for (const point of roundedPoints) {
|
|
1363
|
+
const [x, y] = point;
|
|
1364
|
+
if (x !== void 0 && y !== void 0) {
|
|
1365
|
+
minX = Math.min(minX, x);
|
|
1366
|
+
minY = Math.min(minY, y);
|
|
1367
|
+
maxX = Math.max(maxX, x);
|
|
1368
|
+
maxY = Math.max(maxY, y);
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
const width = maxX - minX;
|
|
1372
|
+
const height = maxY - minY;
|
|
1373
|
+
return {
|
|
1374
|
+
ocr: encodeURI(`${newBaseServerUrl}${imagePath}`),
|
|
1375
|
+
id: index + 1,
|
|
1376
|
+
bbox: [
|
|
1377
|
+
{
|
|
1378
|
+
x: minX,
|
|
1379
|
+
y: minY,
|
|
1380
|
+
width,
|
|
1381
|
+
height,
|
|
1382
|
+
rotation: 0,
|
|
1383
|
+
original_width,
|
|
1384
|
+
original_height
|
|
1135
1385
|
}
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
[x, y + height]
|
|
1145
|
-
];
|
|
1386
|
+
],
|
|
1387
|
+
label: [
|
|
1388
|
+
{
|
|
1389
|
+
points: roundedPoints,
|
|
1390
|
+
closed: true,
|
|
1391
|
+
labels: [labelName],
|
|
1392
|
+
original_width,
|
|
1393
|
+
original_height
|
|
1146
1394
|
}
|
|
1395
|
+
],
|
|
1396
|
+
transcription: [item.transcription],
|
|
1397
|
+
poly: [
|
|
1398
|
+
{
|
|
1399
|
+
points: roundedPoints,
|
|
1400
|
+
closed: true,
|
|
1401
|
+
original_width,
|
|
1402
|
+
original_height
|
|
1403
|
+
}
|
|
1404
|
+
],
|
|
1405
|
+
annotator: 1,
|
|
1406
|
+
annotation_id: index + 1,
|
|
1407
|
+
created_at: now,
|
|
1408
|
+
updated_at: now,
|
|
1409
|
+
lead_time: 0
|
|
1410
|
+
};
|
|
1411
|
+
});
|
|
1412
|
+
};
|
|
1413
|
+
}
|
|
1414
|
+
});
|
|
1415
|
+
|
|
1416
|
+
// src/commands/toLabelStudio/impl.ts
|
|
1417
|
+
var impl_exports3 = {};
|
|
1418
|
+
__export(impl_exports3, {
|
|
1419
|
+
convertToLabelStudio: () => convertToLabelStudio
|
|
1420
|
+
});
|
|
1421
|
+
async function convertToLabelStudio(flags, ...inputDirs) {
|
|
1422
|
+
const {
|
|
1423
|
+
outDir = OUTPUT_BASE_DIR,
|
|
1424
|
+
defaultLabelName = DEFAULT_LABEL_NAME,
|
|
1425
|
+
toFullJson = DEFAULT_LABEL_STUDIO_FULL_JSON,
|
|
1426
|
+
createFilePerImage = DEFAULT_CREATE_FILE_PER_IMAGE,
|
|
1427
|
+
createFileListForServing = DEFAULT_CREATE_FILE_LIST_FOR_SERVING,
|
|
1428
|
+
fileListName = DEFAULT_FILE_LIST_NAME,
|
|
1429
|
+
baseServerUrl = DEFAULT_BASE_SERVER_URL,
|
|
1430
|
+
sortVertical = DEFAULT_SORT_VERTICAL,
|
|
1431
|
+
sortHorizontal = DEFAULT_SORT_HORIZONTAL,
|
|
1432
|
+
normalizeShape: normalizeShape2 = DEFAULT_SHAPE_NORMALIZE,
|
|
1433
|
+
widthIncrement = DEFAULT_WIDTH_INCREMENT,
|
|
1434
|
+
heightIncrement = DEFAULT_HEIGHT_INCREMENT,
|
|
1435
|
+
precision = DEFAULT_LABEL_STUDIO_PRECISION
|
|
1436
|
+
} = flags;
|
|
1437
|
+
const newBaseServerUrl = baseServerUrl.replace(/\/+$/, "") + (baseServerUrl === "" ? "" : "/");
|
|
1438
|
+
await (0, import_promises3.mkdir)(outDir, { recursive: true });
|
|
1439
|
+
for (const inputDir of inputDirs) {
|
|
1440
|
+
console.log(import_chalk3.default.blue(`Processing input directory: ${inputDir}`));
|
|
1441
|
+
const files = await (0, import_promises3.readdir)(inputDir);
|
|
1442
|
+
for (const file of files) {
|
|
1443
|
+
if (!file.endsWith(".txt")) {
|
|
1444
|
+
continue;
|
|
1445
|
+
}
|
|
1446
|
+
const filePath = (0, import_path3.join)(inputDir, file);
|
|
1447
|
+
console.log(import_chalk3.default.gray(`Processing file: ${file}`));
|
|
1448
|
+
try {
|
|
1449
|
+
const fileData = await (0, import_promises3.readFile)(filePath, "utf-8");
|
|
1450
|
+
const lines = fileData.trim().split("\n");
|
|
1451
|
+
const imageDataMap = /* @__PURE__ */ new Map();
|
|
1452
|
+
for (const line of lines) {
|
|
1453
|
+
const parts = line.split(" ");
|
|
1454
|
+
if (parts.length !== 2) {
|
|
1455
|
+
throw new Error(`Invalid PPOCRLabelV2 format in line: ${line}`);
|
|
1147
1456
|
}
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1457
|
+
const [imagePath, annotationsStr] = parts;
|
|
1458
|
+
const annotations = JSON.parse(annotationsStr);
|
|
1459
|
+
PPOCRLabelSchema.parse(annotations);
|
|
1460
|
+
imageDataMap.set(imagePath, annotations);
|
|
1461
|
+
}
|
|
1462
|
+
const allLabelStudioData = [];
|
|
1463
|
+
const fileList = [];
|
|
1464
|
+
let taskId = 1;
|
|
1465
|
+
for (const [imagePath, ppocrData] of imageDataMap.entries()) {
|
|
1466
|
+
const sortedPpocrData = sortBoundingBoxes(
|
|
1467
|
+
ppocrData,
|
|
1468
|
+
sortVertical,
|
|
1469
|
+
sortHorizontal
|
|
1470
|
+
);
|
|
1471
|
+
const finalImagePath = createFileListForServing ? encodeURI(`${newBaseServerUrl}${imagePath}`) : imagePath;
|
|
1472
|
+
const labelStudioData = await ppocrToLabelStudio(sortedPpocrData, {
|
|
1473
|
+
toFullJson,
|
|
1474
|
+
imagePath,
|
|
1475
|
+
baseServerUrl: newBaseServerUrl,
|
|
1476
|
+
inputDir,
|
|
1477
|
+
taskId,
|
|
1478
|
+
labelName: defaultLabelName,
|
|
1479
|
+
normalizeShape: normalizeShape2 !== SHAPE_NORMALIZE_NONE ? normalizeShape2 : void 0,
|
|
1153
1480
|
widthIncrement,
|
|
1154
|
-
heightIncrement
|
|
1481
|
+
heightIncrement,
|
|
1482
|
+
precision
|
|
1155
1483
|
});
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
const firstPoint = points[0];
|
|
1161
|
-
if (firstPoint) {
|
|
1162
|
-
const polygon2 = turf.polygon([points.concat([firstPoint])]);
|
|
1163
|
-
const area2 = turf.area(polygon2);
|
|
1164
|
-
dt_score = Math.min(1, Math.max(0.5, area2 / 1e4));
|
|
1165
|
-
}
|
|
1166
|
-
} catch {
|
|
1167
|
-
dt_score = 0.8;
|
|
1484
|
+
if (toFullJson) {
|
|
1485
|
+
allLabelStudioData.push(labelStudioData[0]);
|
|
1486
|
+
} else {
|
|
1487
|
+
allLabelStudioData.push(...labelStudioData);
|
|
1168
1488
|
}
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1489
|
+
if (createFilePerImage) {
|
|
1490
|
+
const imageBaseName = imagePath.replace(/\//g, "_").replace(/\.[^.]+$/, "");
|
|
1491
|
+
const individualOutputPath = (0, import_path3.join)(
|
|
1492
|
+
outDir,
|
|
1493
|
+
`${imageBaseName}_${toFullJson ? "full" : "min"}.json`
|
|
1494
|
+
);
|
|
1495
|
+
await (0, import_promises3.writeFile)(
|
|
1496
|
+
individualOutputPath,
|
|
1497
|
+
JSON.stringify(
|
|
1498
|
+
toFullJson ? labelStudioData[0] : labelStudioData,
|
|
1499
|
+
null,
|
|
1500
|
+
2
|
|
1501
|
+
),
|
|
1502
|
+
"utf-8"
|
|
1503
|
+
);
|
|
1504
|
+
console.log(
|
|
1505
|
+
import_chalk3.default.gray(
|
|
1506
|
+
` \u2713 Created individual file: ${individualOutputPath}`
|
|
1507
|
+
)
|
|
1508
|
+
);
|
|
1176
1509
|
}
|
|
1177
|
-
|
|
1510
|
+
if (createFileListForServing) {
|
|
1511
|
+
fileList.push(finalImagePath);
|
|
1512
|
+
}
|
|
1513
|
+
taskId++;
|
|
1514
|
+
}
|
|
1515
|
+
const baseName = file.replace(".txt", "");
|
|
1516
|
+
const outputPath = (0, import_path3.join)(
|
|
1517
|
+
outDir,
|
|
1518
|
+
`${baseName}_${toFullJson ? "full" : "min"}.json`
|
|
1519
|
+
);
|
|
1520
|
+
await (0, import_promises3.writeFile)(
|
|
1521
|
+
outputPath,
|
|
1522
|
+
JSON.stringify(allLabelStudioData, null, 2),
|
|
1523
|
+
"utf-8"
|
|
1524
|
+
);
|
|
1525
|
+
console.log(import_chalk3.default.green(`\u2713 Converted ${file} -> ${outputPath}`));
|
|
1526
|
+
if (createFileListForServing && fileList.length > 0) {
|
|
1527
|
+
const fileListPath = (0, import_path3.join)(outDir, fileListName);
|
|
1528
|
+
await (0, import_promises3.writeFile)(fileListPath, fileList.join("\n"), "utf-8");
|
|
1529
|
+
console.log(
|
|
1530
|
+
import_chalk3.default.green(
|
|
1531
|
+
`\u2713 Created file list: ${fileListPath} (${fileList.length} files)`
|
|
1532
|
+
)
|
|
1533
|
+
);
|
|
1178
1534
|
}
|
|
1535
|
+
} catch (error) {
|
|
1536
|
+
console.error(
|
|
1537
|
+
import_chalk3.default.red(`\u2717 Failed to process ${file}:`),
|
|
1538
|
+
error instanceof Error ? error.message : error
|
|
1539
|
+
);
|
|
1179
1540
|
}
|
|
1180
|
-
|
|
1181
|
-
|
|
1541
|
+
}
|
|
1542
|
+
}
|
|
1543
|
+
console.log(import_chalk3.default.green("\n\u2713 Conversion completed!"));
|
|
1544
|
+
}
|
|
1545
|
+
var import_promises3, import_path3, import_chalk3;
|
|
1546
|
+
var init_impl3 = __esm({
|
|
1547
|
+
"src/commands/toLabelStudio/impl.ts"() {
|
|
1548
|
+
"use strict";
|
|
1549
|
+
init_cjs_shims();
|
|
1550
|
+
import_promises3 = require("fs/promises");
|
|
1551
|
+
import_path3 = require("path");
|
|
1552
|
+
import_chalk3 = __toESM(require("chalk"), 1);
|
|
1553
|
+
init_constants();
|
|
1554
|
+
init_ppocr_label();
|
|
1555
|
+
init_schema();
|
|
1556
|
+
init_sort();
|
|
1182
1557
|
}
|
|
1183
1558
|
});
|
|
1184
1559
|
|
|
1185
1560
|
// src/commands/toPPOCR/impl.ts
|
|
1186
|
-
var
|
|
1187
|
-
__export(
|
|
1561
|
+
var impl_exports4 = {};
|
|
1562
|
+
__export(impl_exports4, {
|
|
1188
1563
|
convertToPPOCR: () => convertToPPOCR
|
|
1189
1564
|
});
|
|
1190
1565
|
async function convertToPPOCR(flags, ...inputDirs) {
|
|
@@ -1199,20 +1574,20 @@ async function convertToPPOCR(flags, ...inputDirs) {
|
|
|
1199
1574
|
heightIncrement = DEFAULT_HEIGHT_INCREMENT,
|
|
1200
1575
|
precision = DEFAULT_PPOCR_PRECISION
|
|
1201
1576
|
} = flags;
|
|
1202
|
-
await (0,
|
|
1577
|
+
await (0, import_promises4.mkdir)(outDir, { recursive: true });
|
|
1203
1578
|
for (const inputDir of inputDirs) {
|
|
1204
|
-
console.log(
|
|
1205
|
-
const files = await (0,
|
|
1579
|
+
console.log(import_chalk4.default.blue(`Processing input directory: ${inputDir}`));
|
|
1580
|
+
const files = await (0, import_promises4.readdir)(inputDir);
|
|
1206
1581
|
for (const file of files) {
|
|
1207
1582
|
if (!file.endsWith(".json")) {
|
|
1208
1583
|
continue;
|
|
1209
1584
|
}
|
|
1210
|
-
const filePath = (0,
|
|
1211
|
-
console.log(
|
|
1585
|
+
const filePath = (0, import_path4.join)(inputDir, file);
|
|
1586
|
+
console.log(import_chalk4.default.gray(`Processing file: ${file}`));
|
|
1212
1587
|
try {
|
|
1213
|
-
const fileData = await (0,
|
|
1588
|
+
const fileData = await (0, import_promises4.readFile)(filePath, "utf-8");
|
|
1214
1589
|
const labelStudioData = JSON.parse(fileData);
|
|
1215
|
-
const { data, isFull } =
|
|
1590
|
+
const { data, isFull } = isLabelStudioFullJSON2(labelStudioData);
|
|
1216
1591
|
const ppocrDataMap = isFull ? await labelStudioToPPOCR(data, {
|
|
1217
1592
|
baseImageDir,
|
|
1218
1593
|
normalizeShape: normalizeShape2 !== SHAPE_NORMALIZE_NONE ? normalizeShape2 : void 0,
|
|
@@ -1238,32 +1613,32 @@ async function convertToPPOCR(flags, ...inputDirs) {
|
|
|
1238
1613
|
outputLines.push(`${imagePath} ${jsonArray}`);
|
|
1239
1614
|
}
|
|
1240
1615
|
const baseName = file.replace(".json", "");
|
|
1241
|
-
const outputPath = (0,
|
|
1242
|
-
await (0,
|
|
1243
|
-
console.log(
|
|
1616
|
+
const outputPath = (0, import_path4.join)(outDir, `${baseName}_${fileName}`);
|
|
1617
|
+
await (0, import_promises4.writeFile)(outputPath, outputLines.join("\n"), "utf-8");
|
|
1618
|
+
console.log(import_chalk4.default.green(`\u2713 Converted ${file} -> ${outputPath}`));
|
|
1244
1619
|
} catch (error) {
|
|
1245
1620
|
console.error(
|
|
1246
|
-
|
|
1621
|
+
import_chalk4.default.red(`\u2717 Failed to process ${file}:`),
|
|
1247
1622
|
error instanceof Error ? error.message : error
|
|
1248
1623
|
);
|
|
1249
1624
|
}
|
|
1250
1625
|
}
|
|
1251
1626
|
}
|
|
1252
|
-
console.log(
|
|
1627
|
+
console.log(import_chalk4.default.green("\n\u2713 Conversion completed!"));
|
|
1253
1628
|
}
|
|
1254
|
-
var
|
|
1255
|
-
var
|
|
1629
|
+
var import_promises4, import_path4, import_chalk4, isLabelStudioFullJSON2;
|
|
1630
|
+
var init_impl4 = __esm({
|
|
1256
1631
|
"src/commands/toPPOCR/impl.ts"() {
|
|
1257
1632
|
"use strict";
|
|
1258
1633
|
init_cjs_shims();
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1634
|
+
import_promises4 = require("fs/promises");
|
|
1635
|
+
import_path4 = require("path");
|
|
1636
|
+
import_chalk4 = __toESM(require("chalk"), 1);
|
|
1262
1637
|
init_constants();
|
|
1263
1638
|
init_label_studio();
|
|
1264
1639
|
init_schema();
|
|
1265
1640
|
init_sort();
|
|
1266
|
-
|
|
1641
|
+
isLabelStudioFullJSON2 = (data) => {
|
|
1267
1642
|
const parsedFull = FullOCRLabelStudioSchema.safeParse(data);
|
|
1268
1643
|
if (parsedFull.success) {
|
|
1269
1644
|
return { isFull: true, data: parsedFull.data };
|
|
@@ -1285,24 +1660,160 @@ var init_impl2 = __esm({
|
|
|
1285
1660
|
|
|
1286
1661
|
// src/bin/cli.ts
|
|
1287
1662
|
init_cjs_shims();
|
|
1288
|
-
var
|
|
1663
|
+
var import_core6 = require("@stricli/core");
|
|
1289
1664
|
|
|
1290
1665
|
// src/app.ts
|
|
1291
1666
|
init_cjs_shims();
|
|
1292
1667
|
var import_auto_complete = require("@stricli/auto-complete");
|
|
1293
|
-
var
|
|
1668
|
+
var import_core5 = require("@stricli/core");
|
|
1294
1669
|
|
|
1295
1670
|
// package.json
|
|
1296
|
-
var version = "1.
|
|
1671
|
+
var version = "1.2.0";
|
|
1297
1672
|
var description = "Convert between Label Studio OCR format and PPOCRLabelv2 format";
|
|
1298
1673
|
|
|
1299
|
-
// src/commands/
|
|
1674
|
+
// src/commands/enhance-labelstudio/command.ts
|
|
1300
1675
|
init_cjs_shims();
|
|
1301
1676
|
var import_core = require("@stricli/core");
|
|
1302
1677
|
init_constants();
|
|
1303
|
-
var
|
|
1678
|
+
var enhanceLabelStudioCommand = (0, import_core.buildCommand)({
|
|
1679
|
+
loader: async () => {
|
|
1680
|
+
const { enhanceLabelStudio: enhanceLabelStudio2 } = await Promise.resolve().then(() => (init_impl(), impl_exports));
|
|
1681
|
+
return enhanceLabelStudio2;
|
|
1682
|
+
},
|
|
1683
|
+
parameters: {
|
|
1684
|
+
positional: {
|
|
1685
|
+
kind: "array",
|
|
1686
|
+
parameter: {
|
|
1687
|
+
brief: "Input directories containing Label Studio JSON files",
|
|
1688
|
+
parse: String
|
|
1689
|
+
},
|
|
1690
|
+
minimum: 1
|
|
1691
|
+
},
|
|
1692
|
+
flags: {
|
|
1693
|
+
outDir: {
|
|
1694
|
+
kind: "parsed",
|
|
1695
|
+
brief: `Output directory. Default: "${OUTPUT_BASE_DIR}"`,
|
|
1696
|
+
parse: String,
|
|
1697
|
+
optional: true
|
|
1698
|
+
},
|
|
1699
|
+
sortVertical: {
|
|
1700
|
+
kind: "parsed",
|
|
1701
|
+
brief: `Sort bounding boxes vertically. Options: "${SORT_VERTICAL_NONE}", "${SORT_VERTICAL_TOP_BOTTOM}", "${SORT_VERTICAL_BOTTOM_TOP}". Default: "${DEFAULT_SORT_VERTICAL}"`,
|
|
1702
|
+
parse: String,
|
|
1703
|
+
optional: true
|
|
1704
|
+
},
|
|
1705
|
+
sortHorizontal: {
|
|
1706
|
+
kind: "parsed",
|
|
1707
|
+
brief: `Sort bounding boxes horizontally. Options: "${SORT_HORIZONTAL_NONE}", "${SORT_HORIZONTAL_LTR}", "${SORT_HORIZONTAL_RTL}". Default: "${DEFAULT_SORT_HORIZONTAL}"`,
|
|
1708
|
+
parse: String,
|
|
1709
|
+
optional: true
|
|
1710
|
+
},
|
|
1711
|
+
normalizeShape: {
|
|
1712
|
+
kind: "parsed",
|
|
1713
|
+
brief: `Normalize diamond-like shapes to axis-aligned rectangles. Options: "${SHAPE_NORMALIZE_NONE}", "${SHAPE_NORMALIZE_RECTANGLE}". Default: "${DEFAULT_SHAPE_NORMALIZE}"`,
|
|
1714
|
+
parse: String,
|
|
1715
|
+
optional: true
|
|
1716
|
+
},
|
|
1717
|
+
widthIncrement: {
|
|
1718
|
+
kind: "parsed",
|
|
1719
|
+
brief: `Increase bounding box width by this amount (in pixels). Can be negative to decrease. Default: ${DEFAULT_WIDTH_INCREMENT}`,
|
|
1720
|
+
parse: Number,
|
|
1721
|
+
optional: true
|
|
1722
|
+
},
|
|
1723
|
+
heightIncrement: {
|
|
1724
|
+
kind: "parsed",
|
|
1725
|
+
brief: `Increase bounding box height by this amount (in pixels). Can be negative to decrease. Default: ${DEFAULT_HEIGHT_INCREMENT}`,
|
|
1726
|
+
parse: Number,
|
|
1727
|
+
optional: true
|
|
1728
|
+
},
|
|
1729
|
+
precision: {
|
|
1730
|
+
kind: "parsed",
|
|
1731
|
+
brief: `Number of decimal places for coordinates. Use -1 for full precision (no rounding). Default: ${DEFAULT_LABEL_STUDIO_PRECISION}`,
|
|
1732
|
+
parse: Number,
|
|
1733
|
+
optional: true
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
1736
|
+
},
|
|
1737
|
+
docs: {
|
|
1738
|
+
brief: "Enhance Label Studio files with sorting, normalization, and resizing"
|
|
1739
|
+
}
|
|
1740
|
+
});
|
|
1741
|
+
|
|
1742
|
+
// src/commands/enhance-ppocr/command.ts
|
|
1743
|
+
init_cjs_shims();
|
|
1744
|
+
var import_core2 = require("@stricli/core");
|
|
1745
|
+
init_constants();
|
|
1746
|
+
var enhancePPOCRCommand = (0, import_core2.buildCommand)({
|
|
1747
|
+
loader: async () => {
|
|
1748
|
+
const { enhancePPOCR: enhancePPOCR2 } = await Promise.resolve().then(() => (init_impl2(), impl_exports2));
|
|
1749
|
+
return enhancePPOCR2;
|
|
1750
|
+
},
|
|
1751
|
+
parameters: {
|
|
1752
|
+
positional: {
|
|
1753
|
+
kind: "array",
|
|
1754
|
+
parameter: {
|
|
1755
|
+
brief: "Input directories containing PPOCRLabel files",
|
|
1756
|
+
parse: String
|
|
1757
|
+
},
|
|
1758
|
+
minimum: 1
|
|
1759
|
+
},
|
|
1760
|
+
flags: {
|
|
1761
|
+
outDir: {
|
|
1762
|
+
kind: "parsed",
|
|
1763
|
+
brief: `Output directory. Default: "${OUTPUT_BASE_DIR}"`,
|
|
1764
|
+
parse: String,
|
|
1765
|
+
optional: true
|
|
1766
|
+
},
|
|
1767
|
+
sortVertical: {
|
|
1768
|
+
kind: "parsed",
|
|
1769
|
+
brief: `Sort bounding boxes vertically. Options: "${SORT_VERTICAL_NONE}", "${SORT_VERTICAL_TOP_BOTTOM}", "${SORT_VERTICAL_BOTTOM_TOP}". Default: "${DEFAULT_SORT_VERTICAL}"`,
|
|
1770
|
+
parse: String,
|
|
1771
|
+
optional: true
|
|
1772
|
+
},
|
|
1773
|
+
sortHorizontal: {
|
|
1774
|
+
kind: "parsed",
|
|
1775
|
+
brief: `Sort bounding boxes horizontally. Options: "${SORT_HORIZONTAL_NONE}", "${SORT_HORIZONTAL_LTR}", "${SORT_HORIZONTAL_RTL}". Default: "${DEFAULT_SORT_HORIZONTAL}"`,
|
|
1776
|
+
parse: String,
|
|
1777
|
+
optional: true
|
|
1778
|
+
},
|
|
1779
|
+
normalizeShape: {
|
|
1780
|
+
kind: "parsed",
|
|
1781
|
+
brief: `Normalize diamond-like shapes to axis-aligned rectangles. Options: "${SHAPE_NORMALIZE_NONE}", "${SHAPE_NORMALIZE_RECTANGLE}". Default: "${DEFAULT_SHAPE_NORMALIZE}"`,
|
|
1782
|
+
parse: String,
|
|
1783
|
+
optional: true
|
|
1784
|
+
},
|
|
1785
|
+
widthIncrement: {
|
|
1786
|
+
kind: "parsed",
|
|
1787
|
+
brief: `Increase bounding box width by this amount (in pixels). Can be negative to decrease. Default: ${DEFAULT_WIDTH_INCREMENT}`,
|
|
1788
|
+
parse: Number,
|
|
1789
|
+
optional: true
|
|
1790
|
+
},
|
|
1791
|
+
heightIncrement: {
|
|
1792
|
+
kind: "parsed",
|
|
1793
|
+
brief: `Increase bounding box height by this amount (in pixels). Can be negative to decrease. Default: ${DEFAULT_HEIGHT_INCREMENT}`,
|
|
1794
|
+
parse: Number,
|
|
1795
|
+
optional: true
|
|
1796
|
+
},
|
|
1797
|
+
precision: {
|
|
1798
|
+
kind: "parsed",
|
|
1799
|
+
brief: `Number of decimal places for coordinates. Use -1 for full precision (no rounding). Default: ${DEFAULT_PPOCR_PRECISION} (integers)`,
|
|
1800
|
+
parse: Number,
|
|
1801
|
+
optional: true
|
|
1802
|
+
}
|
|
1803
|
+
}
|
|
1804
|
+
},
|
|
1805
|
+
docs: {
|
|
1806
|
+
brief: "Enhance PPOCRLabel files with sorting, normalization, and resizing"
|
|
1807
|
+
}
|
|
1808
|
+
});
|
|
1809
|
+
|
|
1810
|
+
// src/commands/toLabelStudio/command.ts
|
|
1811
|
+
init_cjs_shims();
|
|
1812
|
+
var import_core3 = require("@stricli/core");
|
|
1813
|
+
init_constants();
|
|
1814
|
+
var toLabelStudioCommand = (0, import_core3.buildCommand)({
|
|
1304
1815
|
loader: async () => {
|
|
1305
|
-
const { convertToLabelStudio: convertToLabelStudio2 } = await Promise.resolve().then(() => (
|
|
1816
|
+
const { convertToLabelStudio: convertToLabelStudio2 } = await Promise.resolve().then(() => (init_impl3(), impl_exports3));
|
|
1306
1817
|
return convertToLabelStudio2;
|
|
1307
1818
|
},
|
|
1308
1819
|
parameters: {
|
|
@@ -1317,58 +1828,58 @@ var toLabelStudioCommand = (0, import_core.buildCommand)({
|
|
|
1317
1828
|
flags: {
|
|
1318
1829
|
outDir: {
|
|
1319
1830
|
kind: "parsed",
|
|
1320
|
-
brief: `Output directory. Default
|
|
1831
|
+
brief: `Output directory. Default: "${OUTPUT_BASE_DIR}"`,
|
|
1321
1832
|
parse: String,
|
|
1322
1833
|
optional: true
|
|
1323
1834
|
},
|
|
1324
1835
|
defaultLabelName: {
|
|
1325
1836
|
kind: "parsed",
|
|
1326
|
-
brief: `Default label name for text annotations. Default
|
|
1837
|
+
brief: `Default label name for text annotations. Default: "${DEFAULT_LABEL_NAME}"`,
|
|
1327
1838
|
parse: String,
|
|
1328
1839
|
optional: true
|
|
1329
1840
|
},
|
|
1330
1841
|
toFullJson: {
|
|
1331
1842
|
kind: "boolean",
|
|
1332
|
-
brief: `Convert to Full OCR Label Studio format. Default
|
|
1843
|
+
brief: `Convert to Full OCR Label Studio format. Default: "${DEFAULT_LABEL_STUDIO_FULL_JSON}"`,
|
|
1333
1844
|
optional: true
|
|
1334
1845
|
},
|
|
1335
1846
|
createFilePerImage: {
|
|
1336
1847
|
kind: "boolean",
|
|
1337
|
-
brief: `Create a separate Label Studio JSON file for each image. Default
|
|
1848
|
+
brief: `Create a separate Label Studio JSON file for each image. Default: "${DEFAULT_CREATE_FILE_PER_IMAGE}"`,
|
|
1338
1849
|
optional: true
|
|
1339
1850
|
},
|
|
1340
1851
|
createFileListForServing: {
|
|
1341
1852
|
kind: "boolean",
|
|
1342
|
-
brief: `Create a file list for serving in Label Studio. Default
|
|
1853
|
+
brief: `Create a file list for serving in Label Studio. Default: "${DEFAULT_CREATE_FILE_LIST_FOR_SERVING}"`,
|
|
1343
1854
|
optional: true
|
|
1344
1855
|
},
|
|
1345
1856
|
fileListName: {
|
|
1346
1857
|
kind: "parsed",
|
|
1347
|
-
brief: `Name of the file list for serving. Default
|
|
1858
|
+
brief: `Name of the file list for serving. Default: "${DEFAULT_FILE_LIST_NAME}"`,
|
|
1348
1859
|
parse: String,
|
|
1349
1860
|
optional: true
|
|
1350
1861
|
},
|
|
1351
1862
|
baseServerUrl: {
|
|
1352
1863
|
kind: "parsed",
|
|
1353
|
-
brief: `Base server URL for constructing image URLs in the file list. Default
|
|
1864
|
+
brief: `Base server URL for constructing image URLs in the file list. Default: "${DEFAULT_BASE_SERVER_URL}"`,
|
|
1354
1865
|
parse: String,
|
|
1355
1866
|
optional: true
|
|
1356
1867
|
},
|
|
1357
1868
|
sortVertical: {
|
|
1358
1869
|
kind: "parsed",
|
|
1359
|
-
brief: `Sort bounding boxes vertically. Options: "${SORT_VERTICAL_NONE}"
|
|
1870
|
+
brief: `Sort bounding boxes vertically. Options: "${SORT_VERTICAL_NONE}", "${SORT_VERTICAL_TOP_BOTTOM}", "${SORT_VERTICAL_BOTTOM_TOP}". Default: "${DEFAULT_SORT_VERTICAL}"`,
|
|
1360
1871
|
parse: String,
|
|
1361
1872
|
optional: true
|
|
1362
1873
|
},
|
|
1363
1874
|
sortHorizontal: {
|
|
1364
1875
|
kind: "parsed",
|
|
1365
|
-
brief: `Sort bounding boxes horizontally. Options: "${SORT_HORIZONTAL_NONE}"
|
|
1876
|
+
brief: `Sort bounding boxes horizontally. Options: "${SORT_HORIZONTAL_NONE}", "${SORT_HORIZONTAL_LTR}", "${SORT_HORIZONTAL_RTL}". Default: "${DEFAULT_SORT_HORIZONTAL}"`,
|
|
1366
1877
|
parse: String,
|
|
1367
1878
|
optional: true
|
|
1368
1879
|
},
|
|
1369
1880
|
normalizeShape: {
|
|
1370
1881
|
kind: "parsed",
|
|
1371
|
-
brief: `Normalize diamond-like shapes to axis-aligned rectangles. Options: "${SHAPE_NORMALIZE_NONE}"
|
|
1882
|
+
brief: `Normalize diamond-like shapes to axis-aligned rectangles. Options: "${SHAPE_NORMALIZE_NONE}", "${SHAPE_NORMALIZE_RECTANGLE}". Default: "${DEFAULT_SHAPE_NORMALIZE}"`,
|
|
1372
1883
|
parse: String,
|
|
1373
1884
|
optional: true
|
|
1374
1885
|
},
|
|
@@ -1399,11 +1910,11 @@ var toLabelStudioCommand = (0, import_core.buildCommand)({
|
|
|
1399
1910
|
|
|
1400
1911
|
// src/commands/toPPOCR/commands.ts
|
|
1401
1912
|
init_cjs_shims();
|
|
1402
|
-
var
|
|
1913
|
+
var import_core4 = require("@stricli/core");
|
|
1403
1914
|
init_constants();
|
|
1404
|
-
var toPPOCRCommand = (0,
|
|
1915
|
+
var toPPOCRCommand = (0, import_core4.buildCommand)({
|
|
1405
1916
|
loader: async () => {
|
|
1406
|
-
const { convertToPPOCR: convertToPPOCR2 } = await Promise.resolve().then(() => (
|
|
1917
|
+
const { convertToPPOCR: convertToPPOCR2 } = await Promise.resolve().then(() => (init_impl4(), impl_exports4));
|
|
1407
1918
|
return convertToPPOCR2;
|
|
1408
1919
|
},
|
|
1409
1920
|
parameters: {
|
|
@@ -1418,13 +1929,13 @@ var toPPOCRCommand = (0, import_core2.buildCommand)({
|
|
|
1418
1929
|
flags: {
|
|
1419
1930
|
outDir: {
|
|
1420
1931
|
kind: "parsed",
|
|
1421
|
-
brief: `Output directory. Default
|
|
1932
|
+
brief: `Output directory. Default: "${OUTPUT_BASE_DIR}"`,
|
|
1422
1933
|
parse: String,
|
|
1423
1934
|
optional: true
|
|
1424
1935
|
},
|
|
1425
1936
|
fileName: {
|
|
1426
1937
|
kind: "parsed",
|
|
1427
|
-
brief: `Output PPOCR file name. Default
|
|
1938
|
+
brief: `Output PPOCR file name. Default: "${DEFAULT_PPOCR_FILE_NAME}"`,
|
|
1428
1939
|
parse: String,
|
|
1429
1940
|
optional: true
|
|
1430
1941
|
},
|
|
@@ -1436,19 +1947,19 @@ var toPPOCRCommand = (0, import_core2.buildCommand)({
|
|
|
1436
1947
|
},
|
|
1437
1948
|
sortVertical: {
|
|
1438
1949
|
kind: "parsed",
|
|
1439
|
-
brief: `Sort bounding boxes vertically. Options: "${SORT_VERTICAL_NONE}"
|
|
1950
|
+
brief: `Sort bounding boxes vertically. Options: "${SORT_VERTICAL_NONE}", "${SORT_VERTICAL_TOP_BOTTOM}", "${SORT_VERTICAL_BOTTOM_TOP}". Default: "${DEFAULT_SORT_VERTICAL}"`,
|
|
1440
1951
|
parse: String,
|
|
1441
1952
|
optional: true
|
|
1442
1953
|
},
|
|
1443
1954
|
sortHorizontal: {
|
|
1444
1955
|
kind: "parsed",
|
|
1445
|
-
brief: `Sort bounding boxes horizontally. Options: "${SORT_HORIZONTAL_NONE}"
|
|
1956
|
+
brief: `Sort bounding boxes horizontally. Options: "${SORT_HORIZONTAL_NONE}", "${SORT_HORIZONTAL_LTR}", "${SORT_HORIZONTAL_RTL}". Default: "${DEFAULT_SORT_HORIZONTAL}"`,
|
|
1446
1957
|
parse: String,
|
|
1447
1958
|
optional: true
|
|
1448
1959
|
},
|
|
1449
1960
|
normalizeShape: {
|
|
1450
1961
|
kind: "parsed",
|
|
1451
|
-
brief: `Normalize diamond-like shapes to axis-aligned rectangles. Options: "${SHAPE_NORMALIZE_NONE}"
|
|
1962
|
+
brief: `Normalize diamond-like shapes to axis-aligned rectangles. Options: "${SHAPE_NORMALIZE_NONE}", "${SHAPE_NORMALIZE_RECTANGLE}". Default: "${DEFAULT_SHAPE_NORMALIZE}"`,
|
|
1452
1963
|
parse: String,
|
|
1453
1964
|
optional: true
|
|
1454
1965
|
},
|
|
@@ -1478,10 +1989,12 @@ var toPPOCRCommand = (0, import_core2.buildCommand)({
|
|
|
1478
1989
|
});
|
|
1479
1990
|
|
|
1480
1991
|
// src/app.ts
|
|
1481
|
-
var routes = (0,
|
|
1992
|
+
var routes = (0, import_core5.buildRouteMap)({
|
|
1482
1993
|
routes: {
|
|
1483
1994
|
toLabelStudio: toLabelStudioCommand,
|
|
1484
1995
|
toPPOCR: toPPOCRCommand,
|
|
1996
|
+
"enhance-labelstudio": enhanceLabelStudioCommand,
|
|
1997
|
+
"enhance-ppocr": enhancePPOCRCommand,
|
|
1485
1998
|
install: (0, import_auto_complete.buildInstallCommand)("label-studio-converter", {
|
|
1486
1999
|
bash: "__label-studio-converter_bash_complete"
|
|
1487
2000
|
}),
|
|
@@ -1495,7 +2008,7 @@ var routes = (0, import_core3.buildRouteMap)({
|
|
|
1495
2008
|
}
|
|
1496
2009
|
}
|
|
1497
2010
|
});
|
|
1498
|
-
var app = (0,
|
|
2011
|
+
var app = (0, import_core5.buildApplication)(routes, {
|
|
1499
2012
|
name: "label-studio-converter",
|
|
1500
2013
|
versionInfo: {
|
|
1501
2014
|
currentVersion: version
|
|
@@ -1518,6 +2031,6 @@ function buildContext(process2) {
|
|
|
1518
2031
|
|
|
1519
2032
|
// src/bin/cli.ts
|
|
1520
2033
|
(async () => {
|
|
1521
|
-
(0,
|
|
2034
|
+
(0, import_core6.run)(app, process.argv.slice(2), buildContext(process));
|
|
1522
2035
|
})();
|
|
1523
2036
|
//# sourceMappingURL=cli.cjs.map
|