label-studio-converter 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +580 -20
- package/dist/bash-complete.cjs +316 -74
- package/dist/bash-complete.cjs.map +1 -1
- package/dist/bash-complete.js +316 -74
- package/dist/bash-complete.js.map +1 -1
- package/dist/cli.cjs +316 -74
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +316 -74
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +199 -49
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +31 -16
- package/dist/index.d.ts +31 -16
- package/dist/index.js +199 -49
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
- [Interface setup](#interface-setup)
|
|
25
25
|
- [Serving annotation files locally](#serving-annotation-files-locally)
|
|
26
26
|
- [Using generated files with PPOCRLabelv2](#using-generated-files-with-ppocrlabelv2)
|
|
27
|
+
- [Conversion Margin of Error](#conversion-margin-of-error)
|
|
27
28
|
- [Roadmap](#compass-roadmap)
|
|
28
29
|
- [Contributing](#wave-contributing)
|
|
29
30
|
- [Code of Conduct](#scroll-code-of-conduct)
|
|
@@ -39,12 +40,20 @@
|
|
|
39
40
|
|
|
40
41
|
### :bangbang: Prerequisites
|
|
41
42
|
|
|
42
|
-
This project uses [pnpm](https://pnpm.io/) as package manager:
|
|
43
|
+
- This project uses [pnpm](https://pnpm.io/) as package manager:
|
|
43
44
|
|
|
44
|
-
```bash
|
|
45
|
-
npm install --global pnpm
|
|
46
|
-
```
|
|
45
|
+
```bash
|
|
46
|
+
npm install --global pnpm
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
- [Label Studio](https://labelstud.io/): Tested with version `1.22.0` and above.
|
|
47
50
|
|
|
51
|
+
- PPOCRLabelv2 from
|
|
52
|
+
[`PFCCLab/PPOCRLabel`](https://github.com/PFCCLab/PPOCRLabel): Tested with
|
|
53
|
+
latest commit
|
|
54
|
+
[04928bf](https://github.com/PFCCLab/PPOCRLabel/tree/04928bf015656e41ba5569877df9b0666ca90f89)
|
|
55
|
+
|
|
56
|
+
- [Node.js](https://nodejs.org/): Tested with version `22.x` and above.
|
|
48
57
|
<!-- Run Locally -->
|
|
49
58
|
|
|
50
59
|
### :running: Run Locally
|
|
@@ -71,6 +80,13 @@ pnpm install
|
|
|
71
80
|
|
|
72
81
|
## :eyes: Usage
|
|
73
82
|
|
|
83
|
+
> [!IMPORTANT]
|
|
84
|
+
> This tool only supports conversion between PPOCRLabelv2 format and Label
|
|
85
|
+
> Studio ["OCR"
|
|
86
|
+
> template](https://labelstud.io/templates/optical_character_recognition). For
|
|
87
|
+
> setting up Label Studio for OCR tasks, please refer to the [Using generated
|
|
88
|
+
> files with Label Studio](#using-generated-files-with-label-studio) section.
|
|
89
|
+
|
|
74
90
|
### Basic Usage
|
|
75
91
|
|
|
76
92
|
```ts
|
|
@@ -88,6 +104,10 @@ await toLabelStudio({
|
|
|
88
104
|
baseServerUrl: 'http://localhost:8081',
|
|
89
105
|
sortVertical: 'none',
|
|
90
106
|
sortHorizontal: 'none',
|
|
107
|
+
normalizeShape: 'none', // Options: 'none', 'rectangle'
|
|
108
|
+
widthIncrement: 0, // Increase width in pixels (can be negative)
|
|
109
|
+
heightIncrement: 0, // Increase height in pixels (can be negative)
|
|
110
|
+
precision: -1, // Number precision: -1 = full precision (default for Label Studio)
|
|
91
111
|
});
|
|
92
112
|
|
|
93
113
|
// Convert Label Studio files to PPOCRLabel format
|
|
@@ -98,15 +118,19 @@ await toPPOCR({
|
|
|
98
118
|
baseImageDir: 'images/ch',
|
|
99
119
|
sortVertical: 'none',
|
|
100
120
|
sortHorizontal: 'none',
|
|
121
|
+
normalizeShape: 'none', // Options: 'none', 'rectangle'
|
|
122
|
+
widthIncrement: 0, // Increase width in pixels (can be negative)
|
|
123
|
+
heightIncrement: 0, // Increase height in pixels (can be negative)
|
|
124
|
+
precision: 0, // Number precision: 0 = integers (default for PPOCR)
|
|
101
125
|
});
|
|
102
126
|
```
|
|
103
127
|
|
|
104
128
|
### CLI Usage
|
|
105
129
|
|
|
106
|
-
```
|
|
130
|
+
```bash
|
|
107
131
|
USAGE
|
|
108
|
-
label-studio-converter toLabelStudio [--outDir value] [--defaultLabelName value] [--toFullJson] [--createFilePerImage] [--createFileListForServing] [--fileListName value] [--baseServerUrl value] [--sortVertical value] [--sortHorizontal value] <args>...
|
|
109
|
-
label-studio-converter toPPOCR [--outDir value] [--fileName value] [--baseImageDir value] [--sortVertical value] [--sortHorizontal value] <args>...
|
|
132
|
+
label-studio-converter toLabelStudio [--outDir value] [--defaultLabelName value] [--toFullJson] [--createFilePerImage] [--createFileListForServing] [--fileListName value] [--baseServerUrl value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
133
|
+
label-studio-converter toPPOCR [--outDir value] [--fileName value] [--baseImageDir value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
110
134
|
label-studio-converter --help
|
|
111
135
|
label-studio-converter --version
|
|
112
136
|
|
|
@@ -123,9 +147,11 @@ COMMANDS
|
|
|
123
147
|
|
|
124
148
|
Subcommands:
|
|
125
149
|
|
|
126
|
-
|
|
150
|
+
**toLabelStudio**:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
127
153
|
USAGE
|
|
128
|
-
label-studio-converter toLabelStudio [--outDir value] [--defaultLabelName value] [--toFullJson] [--createFilePerImage] [--createFileListForServing] [--fileListName value] [--baseServerUrl value] [--sortVertical value] [--sortHorizontal value] <args>...
|
|
154
|
+
label-studio-converter toLabelStudio [--outDir value] [--defaultLabelName value] [--toFullJson] [--createFilePerImage] [--createFileListForServing] [--fileListName value] [--baseServerUrl value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
129
155
|
label-studio-converter toLabelStudio --help
|
|
130
156
|
|
|
131
157
|
Convert PPOCRLabel files to Label Studio format
|
|
@@ -140,26 +166,36 @@ FLAGS
|
|
|
140
166
|
[--baseServerUrl] Base server URL for constructing image URLs in the file list. Default to "http://localhost:8081"
|
|
141
167
|
[--sortVertical] Sort bounding boxes vertically. Options: "none" (default), "top-bottom", "bottom-top"
|
|
142
168
|
[--sortHorizontal] Sort bounding boxes horizontally. Options: "none" (default), "ltr", "rtl"
|
|
169
|
+
[--normalizeShape] Normalize diamond-like shapes to axis-aligned rectangles. Options: "none" (default), "rectangle"
|
|
170
|
+
[--widthIncrement] Increase bounding box width by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
171
|
+
[--heightIncrement] Increase bounding box height by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
172
|
+
[--precision] Number of decimal places for coordinates. Use -1 for full precision (no rounding). Default: -1
|
|
143
173
|
-h --help Print help information and exit
|
|
144
174
|
|
|
145
175
|
ARGUMENTS
|
|
146
176
|
args... Input directories containing PPOCRLabel files
|
|
147
177
|
```
|
|
148
178
|
|
|
149
|
-
|
|
179
|
+
**toPPOCR**:
|
|
180
|
+
|
|
181
|
+
```bash
|
|
150
182
|
USAGE
|
|
151
|
-
label-studio-converter toPPOCR [--outDir value] [--fileName value] [--baseImageDir value] [--sortVertical value] [--sortHorizontal value] <args>...
|
|
183
|
+
label-studio-converter toPPOCR [--outDir value] [--fileName value] [--baseImageDir value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
152
184
|
label-studio-converter toPPOCR --help
|
|
153
185
|
|
|
154
186
|
Convert Label Studio files to PPOCRLabel format
|
|
155
187
|
|
|
156
188
|
FLAGS
|
|
157
|
-
[--outDir]
|
|
158
|
-
[--fileName]
|
|
159
|
-
[--baseImageDir]
|
|
160
|
-
[--sortVertical]
|
|
161
|
-
[--sortHorizontal]
|
|
162
|
-
|
|
189
|
+
[--outDir] Output directory. Default to "./output"
|
|
190
|
+
[--fileName] Output PPOCR file name. Default to "Label.txt"
|
|
191
|
+
[--baseImageDir] Base directory path to prepend to image filenames in output (e.g., "ch" or "images/ch")
|
|
192
|
+
[--sortVertical] Sort bounding boxes vertically. Options: "none" (default), "top-bottom", "bottom-top"
|
|
193
|
+
[--sortHorizontal] Sort bounding boxes horizontally. Options: "none" (default), "ltr", "rtl"
|
|
194
|
+
[--normalizeShape] Normalize diamond-like shapes to axis-aligned rectangles. Options: "none" (default), "rectangle"
|
|
195
|
+
[--widthIncrement] Increase bounding box width by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
196
|
+
[--heightIncrement] Increase bounding box height by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
197
|
+
[--precision] Number of decimal places for coordinates. Use -1 for full precision (no rounding). Default: 0 (integers)
|
|
198
|
+
-h --help Print help information and exit
|
|
163
199
|
|
|
164
200
|
ARGUMENTS
|
|
165
201
|
args... Input directories containing Label Studio files
|
|
@@ -173,6 +209,9 @@ ARGUMENTS
|
|
|
173
209
|
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output-label-studio --defaultLabelName Text --toFullJson --createFileListForServing --fileListName files.txt --baseServerUrl http://localhost:8081 --sortVertical none --sortHorizontal none
|
|
174
210
|
```
|
|
175
211
|
|
|
212
|
+
> [!NOTE]
|
|
213
|
+
> By default, all PPOCRLabel positions are treated as **polygons** in Label Studio.
|
|
214
|
+
|
|
176
215
|
**Convert Label Studio files to PPOCRLabel format:**
|
|
177
216
|
|
|
178
217
|
```bash
|
|
@@ -191,11 +230,134 @@ label-studio-converter toLabelStudio ./input-ppocr --outDir ./output-label-studi
|
|
|
191
230
|
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output-label-studio --defaultLabelName Text --noToFullJson --sortVertical none --sortHorizontal none
|
|
192
231
|
```
|
|
193
232
|
|
|
233
|
+
> [!IMPORTANT]
|
|
234
|
+
> Minimal Label Studio format cannot be used for serving in Label Studio, as it
|
|
235
|
+
> lacks necessary fields such as `id` and `data`. So you can only use minimal
|
|
236
|
+
> format for conversion back to PPOCRLabelv2 format or other purposes.
|
|
237
|
+
|
|
238
|
+
**Shape Normalization**
|
|
239
|
+
|
|
240
|
+
Convert diamond-like or irregular quadrilateral shapes to axis-aligned
|
|
241
|
+
rectangles. This is useful when your annotations have irregular shapes that you
|
|
242
|
+
want to normalize to clean, horizontal/vertical bounding boxes:
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
# Convert to axis-aligned rectangles
|
|
246
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --normalizeShape rectangle
|
|
247
|
+
|
|
248
|
+
# For toPPOCR command
|
|
249
|
+
label-studio-converter toPPOCR ./input-label-studio --outDir ./output --normalizeShape rectangle
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
<details>
|
|
253
|
+
<summary>
|
|
254
|
+
<b>Before normalization</b> (diamond-like shapes):
|
|
255
|
+
</summary>
|
|
256
|
+
|
|
257
|
+

|
|
258
|
+
|
|
259
|
+
</details>
|
|
260
|
+
|
|
261
|
+
<details>
|
|
262
|
+
<summary>
|
|
263
|
+
<b>After normalization</b> (axis-aligned rectangles):
|
|
264
|
+
</summary>
|
|
265
|
+
|
|
266
|
+
Command:
|
|
267
|
+
|
|
268
|
+
```bash
|
|
269
|
+
./dist/cli.js toPPOCR ./tmp --baseImageDir output --normalizeShape rectangle
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+

|
|
273
|
+
|
|
274
|
+
</details>
|
|
275
|
+
|
|
276
|
+
<details>
|
|
277
|
+
<summary>
|
|
278
|
+
<b>Before normalization</b> (diamond-like vertical shapes):
|
|
279
|
+
</summary>
|
|
280
|
+
|
|
281
|
+

|
|
282
|
+
|
|
283
|
+
</details>
|
|
284
|
+
|
|
285
|
+
<details>
|
|
286
|
+
<summary>
|
|
287
|
+
<b>After normalization</b> (axis-aligned vertical rectangles):
|
|
288
|
+
</summary>
|
|
289
|
+
|
|
290
|
+
Command:
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
./dist/cli.js toPPOCR ./tmp --baseImageDir output --normalizeShape rectangle
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+

|
|
297
|
+
|
|
298
|
+
</details>
|
|
299
|
+
|
|
300
|
+
**Bounding Box Resizing**
|
|
301
|
+
|
|
302
|
+
Increase or decrease bounding box dimensions while keeping them centered. This
|
|
303
|
+
is useful for adjusting annotation margins:
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
# Increase width by 10 pixels and height by 20 pixels
|
|
307
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --widthIncrement 10 --heightIncrement 20
|
|
308
|
+
|
|
309
|
+
# Decrease width by 5 pixels (negative increment)
|
|
310
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --widthIncrement -5
|
|
311
|
+
|
|
312
|
+
# Works with toPPOCR as well
|
|
313
|
+
label-studio-converter toPPOCR ./input-label-studio --outDir ./output --widthIncrement 10 --heightIncrement 10
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
**Combining Features**
|
|
317
|
+
|
|
318
|
+
You can combine shape normalization and resizing:
|
|
319
|
+
|
|
320
|
+
```bash
|
|
321
|
+
# Normalize to rectangle and increase size
|
|
322
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --normalizeShape rectangle --widthIncrement 5 --heightIncrement 5
|
|
323
|
+
|
|
324
|
+
# Also works with sorting
|
|
325
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --normalizeShape rectangle --widthIncrement 10 --sortVertical top-bottom --sortHorizontal ltr
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
**Number Precision Control**
|
|
329
|
+
|
|
330
|
+
Control the precision of coordinate values in the output. This is useful for
|
|
331
|
+
matching format expectations or reducing file size:
|
|
332
|
+
|
|
333
|
+
```bash
|
|
334
|
+
# Convert to Label Studio with full precision (default: -1)
|
|
335
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --precision -1
|
|
336
|
+
|
|
337
|
+
# Convert to PPOCR with integer coordinates (default: 0)
|
|
338
|
+
label-studio-converter toPPOCR ./input-label-studio --outDir ./output --precision 0
|
|
339
|
+
|
|
340
|
+
# Use 2 decimal places for more compact but still precise coordinates
|
|
341
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --precision 2
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
Precision values:
|
|
345
|
+
|
|
346
|
+
- `-1`: Full floating-point precision (default for Label Studio output)
|
|
347
|
+
- `0`: Round to integers (default for PPOCR output)
|
|
348
|
+
- `1+`: Round to specified number of decimal places
|
|
349
|
+
|
|
350
|
+
> [!NOTE]
|
|
351
|
+
> The default precision matches typical format conventions: Label Studio uses
|
|
352
|
+
> full precision for percentage-based coordinates, while PPOCR format typically
|
|
353
|
+
> uses integer pixel coordinates.
|
|
354
|
+
|
|
194
355
|
### Using generated files with Label Studio
|
|
195
356
|
|
|
196
357
|
#### Interface setup
|
|
197
358
|
|
|
198
|
-
When creating a new labeling project in Label Studio, choose the "OCR"
|
|
359
|
+
When creating a new labeling project in Label Studio, choose the ["OCR"
|
|
360
|
+
template](https://labelstud.io/templates/optical_character_recognition).
|
|
199
361
|
This will set up the appropriate interface for text recognition tasks.
|
|
200
362
|
|
|
201
363
|
This project uses the following Label Studio interface configuration:
|
|
@@ -216,8 +378,8 @@ This project uses the following Label Studio interface configuration:
|
|
|
216
378
|
This setup includes:
|
|
217
379
|
|
|
218
380
|
- An `Image` tag to display the image to be annotated.
|
|
219
|
-
- A `Labels` tag with two label options:
|
|
220
|
-
all annotations will be labeled as
|
|
381
|
+
- A `Labels` tag with two label options: `Text` and `Handwriting`. By default,
|
|
382
|
+
all annotations will be labeled as `Text`. You can modify this based on your
|
|
221
383
|
needs.
|
|
222
384
|
- A `Rectangle` tag to allow annotators to draw bounding boxes around text regions.
|
|
223
385
|
- A `Polygon` tag to allow annotators to draw polygons around text regions.
|
|
@@ -298,6 +460,404 @@ files in the dataset directories.
|
|
|
298
460
|
If the images are put in a different directory, make sure to update the image
|
|
299
461
|
directory path by specifying the `baseImageDir` option during conversion.
|
|
300
462
|
|
|
463
|
+
### Conversion Margin of Error
|
|
464
|
+
|
|
465
|
+
During conversion between two formats, which are PPOCRLabelv2 and Label Studio,
|
|
466
|
+
margin of errors may occur due to differences in how each format handles certain
|
|
467
|
+
aspects of the data.
|
|
468
|
+
|
|
469
|
+
**Convert from Label Studio to PPOCRLabelv2**
|
|
470
|
+
|
|
471
|
+
Label Studio annotation:
|
|
472
|
+
|
|
473
|
+

|
|
474
|
+
|
|
475
|
+
Generated PPOCRLabelv2 annotation:
|
|
476
|
+
|
|
477
|
+

|
|
478
|
+
|
|
479
|
+
Converted back to Label Studio annotation:
|
|
480
|
+
|
|
481
|
+

|
|
482
|
+
|
|
483
|
+
<details>
|
|
484
|
+
<summary>
|
|
485
|
+
<b>Original data</b> (<code>full_label_studio.json</code>):
|
|
486
|
+
</summary>
|
|
487
|
+
|
|
488
|
+
```json
|
|
489
|
+
[
|
|
490
|
+
{
|
|
491
|
+
"id": 1,
|
|
492
|
+
"annotations": [
|
|
493
|
+
{
|
|
494
|
+
"id": 201,
|
|
495
|
+
"completed_by": 1,
|
|
496
|
+
"result": [
|
|
497
|
+
{
|
|
498
|
+
"original_width": 889,
|
|
499
|
+
"original_height": 520,
|
|
500
|
+
"image_rotation": 0,
|
|
501
|
+
"value": {
|
|
502
|
+
"x": 27.44656917885264,
|
|
503
|
+
"y": 58.07692307692308,
|
|
504
|
+
"width": 42.63217097862767,
|
|
505
|
+
"height": 5.961538461538453,
|
|
506
|
+
"rotation": 0
|
|
507
|
+
},
|
|
508
|
+
"id": "JQAipC-2LH",
|
|
509
|
+
"from_name": "bbox",
|
|
510
|
+
"to_name": "image",
|
|
511
|
+
"type": "rectangle",
|
|
512
|
+
"origin": "manual"
|
|
513
|
+
},
|
|
514
|
+
{
|
|
515
|
+
"original_width": 889,
|
|
516
|
+
"original_height": 520,
|
|
517
|
+
"image_rotation": 0,
|
|
518
|
+
"value": {
|
|
519
|
+
"x": 27.44656917885264,
|
|
520
|
+
"y": 58.07692307692308,
|
|
521
|
+
"width": 42.63217097862767,
|
|
522
|
+
"height": 5.961538461538453,
|
|
523
|
+
"rotation": 0,
|
|
524
|
+
"labels": ["Text"]
|
|
525
|
+
},
|
|
526
|
+
"id": "JQAipC-2LH",
|
|
527
|
+
"from_name": "label",
|
|
528
|
+
"to_name": "image",
|
|
529
|
+
"type": "labels",
|
|
530
|
+
"origin": "manual"
|
|
531
|
+
},
|
|
532
|
+
{
|
|
533
|
+
"original_width": 889,
|
|
534
|
+
"original_height": 520,
|
|
535
|
+
"image_rotation": 0,
|
|
536
|
+
"value": {
|
|
537
|
+
"x": 27.44656917885264,
|
|
538
|
+
"y": 58.07692307692308,
|
|
539
|
+
"width": 42.63217097862767,
|
|
540
|
+
"height": 5.961538461538453,
|
|
541
|
+
"rotation": 0,
|
|
542
|
+
"text": ["ACUTE CORONARY SYNDROME"]
|
|
543
|
+
},
|
|
544
|
+
"id": "JQAipC-2LH",
|
|
545
|
+
"from_name": "transcription",
|
|
546
|
+
"to_name": "image",
|
|
547
|
+
"type": "textarea",
|
|
548
|
+
"origin": "manual"
|
|
549
|
+
},
|
|
550
|
+
{
|
|
551
|
+
"original_width": 889,
|
|
552
|
+
"original_height": 520,
|
|
553
|
+
"image_rotation": 0,
|
|
554
|
+
"value": {
|
|
555
|
+
"x": 27.559055118110237,
|
|
556
|
+
"y": 64.8076923076923,
|
|
557
|
+
"width": 26.884374807767497,
|
|
558
|
+
"height": 4.423038206853052,
|
|
559
|
+
"rotation": 359.76027010391914
|
|
560
|
+
},
|
|
561
|
+
"id": "gydCl1Q9Nt",
|
|
562
|
+
"from_name": "bbox",
|
|
563
|
+
"to_name": "image",
|
|
564
|
+
"type": "rectangle",
|
|
565
|
+
"origin": "manual"
|
|
566
|
+
},
|
|
567
|
+
{
|
|
568
|
+
"original_width": 889,
|
|
569
|
+
"original_height": 520,
|
|
570
|
+
"image_rotation": 0,
|
|
571
|
+
"value": {
|
|
572
|
+
"x": 27.559055118110237,
|
|
573
|
+
"y": 64.8076923076923,
|
|
574
|
+
"width": 26.884374807767497,
|
|
575
|
+
"height": 4.423038206853052,
|
|
576
|
+
"rotation": 359.76027010391914,
|
|
577
|
+
"labels": ["Handwriting"]
|
|
578
|
+
},
|
|
579
|
+
"id": "gydCl1Q9Nt",
|
|
580
|
+
"from_name": "label",
|
|
581
|
+
"to_name": "image",
|
|
582
|
+
"type": "labels",
|
|
583
|
+
"origin": "manual"
|
|
584
|
+
},
|
|
585
|
+
{
|
|
586
|
+
"original_width": 889,
|
|
587
|
+
"original_height": 520,
|
|
588
|
+
"image_rotation": 0,
|
|
589
|
+
"value": {
|
|
590
|
+
"x": 27.559055118110237,
|
|
591
|
+
"y": 64.8076923076923,
|
|
592
|
+
"width": 26.884374807767497,
|
|
593
|
+
"height": 4.423038206853052,
|
|
594
|
+
"rotation": 359.76027010391914,
|
|
595
|
+
"text": ["UNSTABLE ANGINA"]
|
|
596
|
+
},
|
|
597
|
+
"id": "gydCl1Q9Nt",
|
|
598
|
+
"from_name": "transcription",
|
|
599
|
+
"to_name": "image",
|
|
600
|
+
"type": "textarea",
|
|
601
|
+
"origin": "manual"
|
|
602
|
+
}
|
|
603
|
+
],
|
|
604
|
+
"was_cancelled": false,
|
|
605
|
+
"ground_truth": false,
|
|
606
|
+
"created_at": "2026-01-07T03:14:39.424067Z",
|
|
607
|
+
"updated_at": "2026-01-07T03:14:39.424096Z",
|
|
608
|
+
"draft_created_at": "2026-01-07T03:14:04.596361Z",
|
|
609
|
+
"lead_time": 56.087,
|
|
610
|
+
"prediction": {},
|
|
611
|
+
"result_count": 2,
|
|
612
|
+
"unique_id": "7e8c79f1-49ce-471c-8b26-8b8c6f9c3401",
|
|
613
|
+
"import_id": null,
|
|
614
|
+
"last_action": null,
|
|
615
|
+
"bulk_created": false,
|
|
616
|
+
"task": 1,
|
|
617
|
+
"project": 2,
|
|
618
|
+
"updated_by": 1,
|
|
619
|
+
"parent_prediction": null,
|
|
620
|
+
"parent_annotation": null,
|
|
621
|
+
"last_created_by": null
|
|
622
|
+
}
|
|
623
|
+
],
|
|
624
|
+
"file_upload": "5b1e3483-example.jpg",
|
|
625
|
+
"drafts": [],
|
|
626
|
+
"predictions": [],
|
|
627
|
+
"data": { "ocr": "\/data\/upload\/2\/5b1e3483-example.jpg" },
|
|
628
|
+
"meta": {},
|
|
629
|
+
"created_at": "2026-01-07T03:13:41.175183Z",
|
|
630
|
+
"updated_at": "2026-01-07T03:14:39.478016Z",
|
|
631
|
+
"allow_skip": true,
|
|
632
|
+
"inner_id": 1,
|
|
633
|
+
"total_annotations": 1,
|
|
634
|
+
"cancelled_annotations": 0,
|
|
635
|
+
"total_predictions": 0,
|
|
636
|
+
"comment_count": 0,
|
|
637
|
+
"unresolved_comment_count": 0,
|
|
638
|
+
"last_comment_updated_at": null,
|
|
639
|
+
"project": 2,
|
|
640
|
+
"updated_by": 1,
|
|
641
|
+
"comment_authors": []
|
|
642
|
+
}
|
|
643
|
+
]
|
|
644
|
+
```
|
|
645
|
+
|
|
646
|
+
</details>
|
|
647
|
+
|
|
648
|
+
<details>
|
|
649
|
+
<summary>
|
|
650
|
+
<b>Converted data</b> (<code>output/Label.txt</code>):
|
|
651
|
+
</summary>
|
|
652
|
+
|
|
653
|
+
Command:
|
|
654
|
+
|
|
655
|
+
```bash
|
|
656
|
+
./dist/cli.js toPPOCR ./tmp --baseImageDir output
|
|
657
|
+
```
|
|
658
|
+
|
|
659
|
+
Output:
|
|
660
|
+
|
|
661
|
+
```
|
|
662
|
+
output/5b1e3483-example.jpg [{"transcription":"ACUTE CORONARY SYNDROME","points":[[243.99999999999997,302],[623,302],[623,332.99999999999994],[243.99999999999997,332.99999999999994]],"dt_score":1},{"transcription":"UNSTABLE ANGINA","points":[[245,337],[484.00209204105306,337],[484.00209204105306,359.9997986756359],[245,359.9997986756359]],"dt_score":1}]
|
|
663
|
+
```
|
|
664
|
+
|
|
665
|
+
</details>
|
|
666
|
+
|
|
667
|
+
<details>
|
|
668
|
+
<summary>
|
|
669
|
+
<b>Convert back to Label Studio</b> (<code>output/Label_full.json</code>):
|
|
670
|
+
</summary>
|
|
671
|
+
|
|
672
|
+
Command:
|
|
673
|
+
|
|
674
|
+
```bash
|
|
675
|
+
./dist/cli.js toLabelStudio ./tmp
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
Output:
|
|
679
|
+
|
|
680
|
+
```json
|
|
681
|
+
[
|
|
682
|
+
{
|
|
683
|
+
"id": 1,
|
|
684
|
+
"annotations": [
|
|
685
|
+
{
|
|
686
|
+
"id": 1,
|
|
687
|
+
"completed_by": 1,
|
|
688
|
+
"result": [
|
|
689
|
+
{
|
|
690
|
+
"original_width": 889,
|
|
691
|
+
"original_height": 520,
|
|
692
|
+
"image_rotation": 0,
|
|
693
|
+
"value": {
|
|
694
|
+
"points": [
|
|
695
|
+
[27.44656917885264, 58.07692307692308],
|
|
696
|
+
[70.07874015748031, 58.07692307692308],
|
|
697
|
+
[70.07874015748031, 64.03846153846153],
|
|
698
|
+
[27.44656917885264, 64.03846153846153]
|
|
699
|
+
],
|
|
700
|
+
"closed": true
|
|
701
|
+
},
|
|
702
|
+
"id": "4ebb52a4-d",
|
|
703
|
+
"from_name": "poly",
|
|
704
|
+
"to_name": "image",
|
|
705
|
+
"type": "polygon",
|
|
706
|
+
"origin": "manual"
|
|
707
|
+
},
|
|
708
|
+
{
|
|
709
|
+
"original_width": 889,
|
|
710
|
+
"original_height": 520,
|
|
711
|
+
"image_rotation": 0,
|
|
712
|
+
"value": {
|
|
713
|
+
"points": [
|
|
714
|
+
[27.44656917885264, 58.07692307692308],
|
|
715
|
+
[70.07874015748031, 58.07692307692308],
|
|
716
|
+
[70.07874015748031, 64.03846153846153],
|
|
717
|
+
[27.44656917885264, 64.03846153846153]
|
|
718
|
+
],
|
|
719
|
+
"closed": true,
|
|
720
|
+
"labels": ["Text"]
|
|
721
|
+
},
|
|
722
|
+
"id": "4ebb52a4-d",
|
|
723
|
+
"from_name": "label",
|
|
724
|
+
"to_name": "image",
|
|
725
|
+
"type": "labels",
|
|
726
|
+
"origin": "manual"
|
|
727
|
+
},
|
|
728
|
+
{
|
|
729
|
+
"original_width": 889,
|
|
730
|
+
"original_height": 520,
|
|
731
|
+
"image_rotation": 0,
|
|
732
|
+
"value": {
|
|
733
|
+
"points": [
|
|
734
|
+
[27.44656917885264, 58.07692307692308],
|
|
735
|
+
[70.07874015748031, 58.07692307692308],
|
|
736
|
+
[70.07874015748031, 64.03846153846153],
|
|
737
|
+
[27.44656917885264, 64.03846153846153]
|
|
738
|
+
],
|
|
739
|
+
"closed": true,
|
|
740
|
+
"text": ["ACUTE CORONARY SYNDROME"]
|
|
741
|
+
},
|
|
742
|
+
"id": "4ebb52a4-d",
|
|
743
|
+
"from_name": "transcription",
|
|
744
|
+
"to_name": "image",
|
|
745
|
+
"type": "textarea",
|
|
746
|
+
"origin": "manual"
|
|
747
|
+
},
|
|
748
|
+
{
|
|
749
|
+
"original_width": 889,
|
|
750
|
+
"original_height": 520,
|
|
751
|
+
"image_rotation": 0,
|
|
752
|
+
"value": {
|
|
753
|
+
"points": [
|
|
754
|
+
[27.559055118110237, 64.8076923076923],
|
|
755
|
+
[54.44342992587774, 64.8076923076923],
|
|
756
|
+
[54.44342992587774, 69.23073051454536],
|
|
757
|
+
[27.559055118110237, 69.23073051454536]
|
|
758
|
+
],
|
|
759
|
+
"closed": true
|
|
760
|
+
},
|
|
761
|
+
"id": "06aa0669-d",
|
|
762
|
+
"from_name": "poly",
|
|
763
|
+
"to_name": "image",
|
|
764
|
+
"type": "polygon",
|
|
765
|
+
"origin": "manual"
|
|
766
|
+
},
|
|
767
|
+
{
|
|
768
|
+
"original_width": 889,
|
|
769
|
+
"original_height": 520,
|
|
770
|
+
"image_rotation": 0,
|
|
771
|
+
"value": {
|
|
772
|
+
"points": [
|
|
773
|
+
[27.559055118110237, 64.8076923076923],
|
|
774
|
+
[54.44342992587774, 64.8076923076923],
|
|
775
|
+
[54.44342992587774, 69.23073051454536],
|
|
776
|
+
[27.559055118110237, 69.23073051454536]
|
|
777
|
+
],
|
|
778
|
+
"closed": true,
|
|
779
|
+
"labels": ["Text"]
|
|
780
|
+
},
|
|
781
|
+
"id": "06aa0669-d",
|
|
782
|
+
"from_name": "label",
|
|
783
|
+
"to_name": "image",
|
|
784
|
+
"type": "labels",
|
|
785
|
+
"origin": "manual"
|
|
786
|
+
},
|
|
787
|
+
{
|
|
788
|
+
"original_width": 889,
|
|
789
|
+
"original_height": 520,
|
|
790
|
+
"image_rotation": 0,
|
|
791
|
+
"value": {
|
|
792
|
+
"points": [
|
|
793
|
+
[27.559055118110237, 64.8076923076923],
|
|
794
|
+
[54.44342992587774, 64.8076923076923],
|
|
795
|
+
[54.44342992587774, 69.23073051454536],
|
|
796
|
+
[27.559055118110237, 69.23073051454536]
|
|
797
|
+
],
|
|
798
|
+
"closed": true,
|
|
799
|
+
"text": ["UNSTABLE ANGINA"]
|
|
800
|
+
},
|
|
801
|
+
"id": "06aa0669-d",
|
|
802
|
+
"from_name": "transcription",
|
|
803
|
+
"to_name": "image",
|
|
804
|
+
"type": "textarea",
|
|
805
|
+
"origin": "manual"
|
|
806
|
+
}
|
|
807
|
+
],
|
|
808
|
+
"was_cancelled": false,
|
|
809
|
+
"ground_truth": false,
|
|
810
|
+
"created_at": "2026-01-07T04:16:31.329Z",
|
|
811
|
+
"updated_at": "2026-01-07T04:16:31.329Z",
|
|
812
|
+
"draft_created_at": "2026-01-07T04:16:31.329Z",
|
|
813
|
+
"lead_time": 0,
|
|
814
|
+
"prediction": {},
|
|
815
|
+
"result_count": 6,
|
|
816
|
+
"unique_id": "b471a896-b002-4b52-b3a4-36f810c3ca16",
|
|
817
|
+
"import_id": null,
|
|
818
|
+
"last_action": null,
|
|
819
|
+
"bulk_created": false,
|
|
820
|
+
"task": 1,
|
|
821
|
+
"project": 1,
|
|
822
|
+
"updated_by": 1,
|
|
823
|
+
"parent_prediction": null,
|
|
824
|
+
"parent_annotation": null,
|
|
825
|
+
"last_created_by": null
|
|
826
|
+
}
|
|
827
|
+
],
|
|
828
|
+
"file_upload": "5b1e3483-example.jpg",
|
|
829
|
+
"drafts": [],
|
|
830
|
+
"predictions": [],
|
|
831
|
+
"data": {
|
|
832
|
+
"ocr": "http://localhost:8081/output/5b1e3483-example.jpg"
|
|
833
|
+
},
|
|
834
|
+
"meta": {},
|
|
835
|
+
"created_at": "2026-01-07T04:16:31.329Z",
|
|
836
|
+
"updated_at": "2026-01-07T04:16:31.329Z",
|
|
837
|
+
"allow_skip": false,
|
|
838
|
+
"inner_id": 1,
|
|
839
|
+
"total_annotations": 1,
|
|
840
|
+
"cancelled_annotations": 0,
|
|
841
|
+
"total_predictions": 0,
|
|
842
|
+
"comment_count": 0,
|
|
843
|
+
"unresolved_comment_count": 0,
|
|
844
|
+
"last_comment_updated_at": null,
|
|
845
|
+
"project": 1,
|
|
846
|
+
"updated_by": 1,
|
|
847
|
+
"comment_authors": []
|
|
848
|
+
}
|
|
849
|
+
]
|
|
850
|
+
```
|
|
851
|
+
|
|
852
|
+
</details>
|
|
853
|
+
|
|
854
|
+
> [!IMPORTANT]
|
|
855
|
+
> So as you can see, after converting from Label Studio to PPOCRLabelv2 and then
|
|
856
|
+
> back to Label Studio, the positions of the bounding boxes have slight
|
|
857
|
+
> differences due to the conversion process. This may affect the accuracy of the
|
|
858
|
+
> annotations, especially if precise bounding box locations are critical for your
|
|
859
|
+
> application.
|
|
860
|
+
|
|
301
861
|
<!-- Roadmap -->
|
|
302
862
|
|
|
303
863
|
## :compass: Roadmap
|