label-studio-converter 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1076 -53
- package/dist/bash-complete.cjs +1285 -530
- package/dist/bash-complete.cjs.map +1 -1
- package/dist/bash-complete.js +1278 -524
- package/dist/bash-complete.js.map +1 -1
- package/dist/cli.cjs +1284 -529
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +1278 -524
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +864 -56
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +136 -17
- package/dist/index.d.ts +136 -17
- package/dist/index.js +849 -55
- package/dist/index.js.map +1 -1
- package/package.json +6 -3
package/README.md
CHANGED
|
@@ -16,14 +16,17 @@
|
|
|
16
16
|
|
|
17
17
|
- [Getting Started](#toolbox-getting-started)
|
|
18
18
|
- [Prerequisites](#bangbang-prerequisites)
|
|
19
|
+
- [Installation](#package-installation)
|
|
19
20
|
- [Run Locally](#running-run-locally)
|
|
20
21
|
- [Usage](#eyes-usage)
|
|
21
|
-
- [
|
|
22
|
+
- [Library Usage](#library-usage)
|
|
22
23
|
- [CLI Usage](#cli-usage)
|
|
24
|
+
- [Enhancement Features](#enhancement-features)
|
|
23
25
|
- [Using generated files with Label Studio](#using-generated-files-with-label-studio)
|
|
24
26
|
- [Interface setup](#interface-setup)
|
|
25
27
|
- [Serving annotation files locally](#serving-annotation-files-locally)
|
|
26
28
|
- [Using generated files with PPOCRLabelv2](#using-generated-files-with-ppocrlabelv2)
|
|
29
|
+
- [Conversion Margin of Error](#conversion-margin-of-error)
|
|
27
30
|
- [Roadmap](#compass-roadmap)
|
|
28
31
|
- [Contributing](#wave-contributing)
|
|
29
32
|
- [Code of Conduct](#scroll-code-of-conduct)
|
|
@@ -39,10 +42,39 @@
|
|
|
39
42
|
|
|
40
43
|
### :bangbang: Prerequisites
|
|
41
44
|
|
|
42
|
-
This project uses [pnpm](https://pnpm.io/) as package manager:
|
|
45
|
+
- This project uses [pnpm](https://pnpm.io/) as package manager:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
npm install --global pnpm
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
- [Label Studio](https://labelstud.io/): Tested with version `1.22.0` and above.
|
|
52
|
+
|
|
53
|
+
- PPOCRLabelv2 from
|
|
54
|
+
[`PFCCLab/PPOCRLabel`](https://github.com/PFCCLab/PPOCRLabel): Tested with
|
|
55
|
+
latest commit
|
|
56
|
+
[04928bf](https://github.com/PFCCLab/PPOCRLabel/tree/04928bf015656e41ba5569877df9b0666ca90f89)
|
|
57
|
+
|
|
58
|
+
- [Node.js](https://nodejs.org/): Tested with version `22.x` and above.
|
|
59
|
+
|
|
60
|
+
<!-- Installation -->
|
|
61
|
+
|
|
62
|
+
### :package: Installation
|
|
63
|
+
|
|
64
|
+
**As a CLI tool:**
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
npm install -g label-studio-converter
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**As a library:**
|
|
43
71
|
|
|
44
72
|
```bash
|
|
45
|
-
npm install
|
|
73
|
+
npm install label-studio-converter
|
|
74
|
+
# or
|
|
75
|
+
pnpm add label-studio-converter
|
|
76
|
+
# or
|
|
77
|
+
yarn add label-studio-converter
|
|
46
78
|
```
|
|
47
79
|
|
|
48
80
|
<!-- Run Locally -->
|
|
@@ -71,42 +103,130 @@ pnpm install
|
|
|
71
103
|
|
|
72
104
|
## :eyes: Usage
|
|
73
105
|
|
|
74
|
-
|
|
106
|
+
> [!IMPORTANT]
|
|
107
|
+
> This tool only supports conversion between PPOCRLabelv2 format and Label
|
|
108
|
+
> Studio ["OCR"
|
|
109
|
+
> template](https://labelstud.io/templates/optical_character_recognition). For
|
|
110
|
+
> setting up Label Studio for OCR tasks, please refer to the [Using generated
|
|
111
|
+
> files with Label Studio](#using-generated-files-with-label-studio) section.
|
|
112
|
+
|
|
113
|
+
> [!NOTE]
|
|
114
|
+
> **This package can be used both as a CLI tool and as a library.**
|
|
115
|
+
>
|
|
116
|
+
> - **CLI**: Run commands directly from the terminal
|
|
117
|
+
> - **Library**: Import and use functions in your TypeScript/JavaScript code
|
|
118
|
+
|
|
119
|
+
### Library Usage
|
|
120
|
+
|
|
121
|
+
**Conversion Functions:**
|
|
75
122
|
|
|
76
123
|
```ts
|
|
77
|
-
import {
|
|
124
|
+
import {
|
|
125
|
+
labelStudioToPPOCR,
|
|
126
|
+
minLabelStudioToPPOCR,
|
|
127
|
+
ppocrToLabelStudio
|
|
128
|
+
} from 'label-studio-converter';
|
|
129
|
+
|
|
130
|
+
// Convert Label Studio Full Format to PPOCRLabel
|
|
131
|
+
const fullData = [...]; // FullOCRLabelStudio type
|
|
132
|
+
const ppocrMap = await labelStudioToPPOCR(fullData, {
|
|
133
|
+
baseImageDir: 'images/ch',
|
|
134
|
+
normalizeShape: 'rectangle',
|
|
135
|
+
widthIncrement: 5,
|
|
136
|
+
heightIncrement: 5,
|
|
137
|
+
precision: 0 // integers
|
|
138
|
+
});
|
|
78
139
|
|
|
79
|
-
// Convert
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
140
|
+
// Convert Label Studio Min Format to PPOCRLabel
|
|
141
|
+
const minData = [...]; // MinOCRLabelStudio type
|
|
142
|
+
const ppocrMap2 = await minLabelStudioToPPOCR(minData, {
|
|
143
|
+
baseImageDir: 'images/ch',
|
|
144
|
+
precision: 0
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// Convert PPOCRLabel to Label Studio
|
|
148
|
+
const ppocrData = [...]; // PPOCRLabel type
|
|
149
|
+
const labelStudioData = await ppocrToLabelStudio(ppocrData, {
|
|
150
|
+
imagePath: 'example.jpg',
|
|
88
151
|
baseServerUrl: 'http://localhost:8081',
|
|
89
|
-
|
|
90
|
-
|
|
152
|
+
inputDir: './images',
|
|
153
|
+
toFullJson: true,
|
|
154
|
+
labelName: 'Text',
|
|
155
|
+
precision: -1 // full precision
|
|
91
156
|
});
|
|
157
|
+
```
|
|
92
158
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
159
|
+
**Enhancement Functions:**
|
|
160
|
+
|
|
161
|
+
```ts
|
|
162
|
+
import {
|
|
163
|
+
enhancePPOCRLabel,
|
|
164
|
+
enhanceLabelStudioData,
|
|
165
|
+
} from 'label-studio-converter';
|
|
166
|
+
|
|
167
|
+
// Enhance PPOCRLabel data
|
|
168
|
+
const enhanced = enhancePPOCRLabel(ppocrData, {
|
|
169
|
+
sortVertical: 'top-bottom',
|
|
170
|
+
sortHorizontal: 'ltr',
|
|
171
|
+
normalizeShape: 'rectangle',
|
|
172
|
+
widthIncrement: 10,
|
|
173
|
+
heightIncrement: 5,
|
|
174
|
+
precision: 0,
|
|
101
175
|
});
|
|
176
|
+
|
|
177
|
+
// Enhance Label Studio data (Full or Min format)
|
|
178
|
+
const enhancedLS = await enhanceLabelStudioData(
|
|
179
|
+
labelStudioData,
|
|
180
|
+
true, // isFull: true for Full format, false for Min format
|
|
181
|
+
{
|
|
182
|
+
sortVertical: 'top-bottom',
|
|
183
|
+
normalizeShape: 'rectangle',
|
|
184
|
+
precision: 2,
|
|
185
|
+
},
|
|
186
|
+
);
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Utility Functions:**
|
|
190
|
+
|
|
191
|
+
```ts
|
|
192
|
+
import {
|
|
193
|
+
transformPoints,
|
|
194
|
+
normalizeShape,
|
|
195
|
+
resizeBoundingBox,
|
|
196
|
+
sortBoundingBoxes,
|
|
197
|
+
} from 'label-studio-converter';
|
|
198
|
+
|
|
199
|
+
// Transform points (normalize + resize)
|
|
200
|
+
const transformed = transformPoints(points, {
|
|
201
|
+
normalizeShape: 'rectangle',
|
|
202
|
+
widthIncrement: 10,
|
|
203
|
+
heightIncrement: 5,
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
// Normalize diamond shapes to rectangles
|
|
207
|
+
const normalized = normalizeShape(points);
|
|
208
|
+
|
|
209
|
+
// Resize bounding box
|
|
210
|
+
const resized = resizeBoundingBox(points, 10, 5);
|
|
211
|
+
|
|
212
|
+
// Sort bounding boxes
|
|
213
|
+
const sorted = sortBoundingBoxes(annotations, 'top-bottom', 'ltr');
|
|
102
214
|
```
|
|
103
215
|
|
|
104
216
|
### CLI Usage
|
|
105
217
|
|
|
218
|
+
**Available Commands:**
|
|
219
|
+
|
|
220
|
+
```bash
|
|
221
|
+
label-studio-converter --help
|
|
106
222
|
```
|
|
223
|
+
|
|
224
|
+
```bash
|
|
107
225
|
USAGE
|
|
108
|
-
label-studio-converter toLabelStudio [--outDir value] [--defaultLabelName value] [--toFullJson] [--createFilePerImage] [--createFileListForServing] [--fileListName value] [--baseServerUrl value] [--sortVertical value] [--sortHorizontal value] <args>...
|
|
109
|
-
label-studio-converter toPPOCR [--outDir value] [--fileName value] [--baseImageDir value] [--sortVertical value] [--sortHorizontal value] <args>...
|
|
226
|
+
label-studio-converter toLabelStudio [--outDir value] [--defaultLabelName value] [--toFullJson] [--createFilePerImage] [--createFileListForServing] [--fileListName value] [--baseServerUrl value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
227
|
+
label-studio-converter toPPOCR [--outDir value] [--fileName value] [--baseImageDir value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
228
|
+
label-studio-converter enhance-labelstudio [--outDir value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
229
|
+
label-studio-converter enhance-ppocr [--outDir value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
110
230
|
label-studio-converter --help
|
|
111
231
|
label-studio-converter --version
|
|
112
232
|
|
|
@@ -117,66 +237,319 @@ FLAGS
|
|
|
117
237
|
-v --version Print version information and exit
|
|
118
238
|
|
|
119
239
|
COMMANDS
|
|
120
|
-
toLabelStudio
|
|
121
|
-
toPPOCR
|
|
240
|
+
toLabelStudio Convert PPOCRLabel files to Label Studio format
|
|
241
|
+
toPPOCR Convert Label Studio files to PPOCRLabel format
|
|
242
|
+
enhance-labelstudio Enhance Label Studio files with sorting, normalization, and resizing
|
|
243
|
+
enhance-ppocr Enhance PPOCRLabel files with sorting, normalization, and resizing
|
|
122
244
|
```
|
|
123
245
|
|
|
124
|
-
|
|
246
|
+
**Commands:**
|
|
125
247
|
|
|
126
|
-
|
|
248
|
+
- `toLabelStudio` - Convert PPOCRLabel files to Label Studio format
|
|
249
|
+
|
|
250
|
+
```bash
|
|
127
251
|
USAGE
|
|
128
|
-
label-studio-converter toLabelStudio [--outDir value] [--defaultLabelName value] [--toFullJson] [--createFilePerImage] [--createFileListForServing] [--fileListName value] [--baseServerUrl value] [--sortVertical value] [--sortHorizontal value] <args>...
|
|
252
|
+
label-studio-converter toLabelStudio [--outDir value] [--defaultLabelName value] [--toFullJson] [--createFilePerImage] [--createFileListForServing] [--fileListName value] [--baseServerUrl value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
129
253
|
label-studio-converter toLabelStudio --help
|
|
130
254
|
|
|
131
255
|
Convert PPOCRLabel files to Label Studio format
|
|
132
256
|
|
|
133
257
|
FLAGS
|
|
134
|
-
[--outDir] Output directory. Default
|
|
135
|
-
[--defaultLabelName] Default label name for text annotations. Default
|
|
136
|
-
[--toFullJson/--noToFullJson] Convert to Full OCR Label Studio format. Default
|
|
137
|
-
[--createFilePerImage/--noCreateFilePerImage] Create a separate Label Studio JSON file for each image. Default
|
|
138
|
-
[--createFileListForServing/--noCreateFileListForServing] Create a file list for serving in Label Studio. Default
|
|
139
|
-
[--fileListName] Name of the file list for serving. Default
|
|
140
|
-
[--baseServerUrl] Base server URL for constructing image URLs in the file list. Default
|
|
258
|
+
[--outDir] Output directory. Default: "./output"
|
|
259
|
+
[--defaultLabelName] Default label name for text annotations. Default: "Text"
|
|
260
|
+
[--toFullJson/--noToFullJson] Convert to Full OCR Label Studio format. Default: "true"
|
|
261
|
+
[--createFilePerImage/--noCreateFilePerImage] Create a separate Label Studio JSON file for each image. Default: "false"
|
|
262
|
+
[--createFileListForServing/--noCreateFileListForServing] Create a file list for serving in Label Studio. Default: "true"
|
|
263
|
+
[--fileListName] Name of the file list for serving. Default: "files.txt"
|
|
264
|
+
[--baseServerUrl] Base server URL for constructing image URLs in the file list. Default: "http://localhost:8081"
|
|
141
265
|
[--sortVertical] Sort bounding boxes vertically. Options: "none" (default), "top-bottom", "bottom-top"
|
|
142
266
|
[--sortHorizontal] Sort bounding boxes horizontally. Options: "none" (default), "ltr", "rtl"
|
|
267
|
+
[--normalizeShape] Normalize diamond-like shapes to axis-aligned rectangles. Options: "none" (default), "rectangle"
|
|
268
|
+
[--widthIncrement] Increase bounding box width by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
269
|
+
[--heightIncrement] Increase bounding box height by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
270
|
+
[--precision] Number of decimal places for coordinates. Use -1 for full precision (no rounding). Default: -1
|
|
143
271
|
-h --help Print help information and exit
|
|
144
272
|
|
|
145
273
|
ARGUMENTS
|
|
146
274
|
args... Input directories containing PPOCRLabel files
|
|
147
275
|
```
|
|
148
276
|
|
|
149
|
-
|
|
277
|
+
- `toPPOCR` - Convert Label Studio files to PPOCRLabel format
|
|
278
|
+
|
|
279
|
+
```bash
|
|
150
280
|
USAGE
|
|
151
|
-
label-studio-converter toPPOCR [--outDir value] [--fileName value] [--baseImageDir value] [--sortVertical value] [--sortHorizontal value] <args>...
|
|
281
|
+
label-studio-converter toPPOCR [--outDir value] [--fileName value] [--baseImageDir value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
152
282
|
label-studio-converter toPPOCR --help
|
|
153
283
|
|
|
154
284
|
Convert Label Studio files to PPOCRLabel format
|
|
155
285
|
|
|
156
286
|
FLAGS
|
|
157
|
-
[--outDir]
|
|
158
|
-
[--fileName]
|
|
159
|
-
[--baseImageDir]
|
|
160
|
-
[--sortVertical]
|
|
161
|
-
[--sortHorizontal]
|
|
162
|
-
|
|
287
|
+
[--outDir] Output directory. Default: "./output"
|
|
288
|
+
[--fileName] Output PPOCR file name. Default: "Label.txt"
|
|
289
|
+
[--baseImageDir] Base directory path to prepend to image filenames in output (e.g., "ch" or "images/ch")
|
|
290
|
+
[--sortVertical] Sort bounding boxes vertically. Options: "none" (default), "top-bottom", "bottom-top"
|
|
291
|
+
[--sortHorizontal] Sort bounding boxes horizontally. Options: "none" (default), "ltr", "rtl"
|
|
292
|
+
[--normalizeShape] Normalize diamond-like shapes to axis-aligned rectangles. Options: "none" (default), "rectangle"
|
|
293
|
+
[--widthIncrement] Increase bounding box width by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
294
|
+
[--heightIncrement] Increase bounding box height by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
295
|
+
[--precision] Number of decimal places for coordinates. Use -1 for full precision (no rounding). Default: 0 (integers)
|
|
296
|
+
-h --help Print help information and exit
|
|
163
297
|
|
|
164
298
|
ARGUMENTS
|
|
165
299
|
args... Input directories containing Label Studio files
|
|
166
300
|
```
|
|
167
301
|
|
|
302
|
+
- `enhance-labelstudio` - Enhance Label Studio files with sorting,
|
|
303
|
+
normalization, and resizing
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
USAGE
|
|
307
|
+
label-studio-converter enhance-labelstudio [--outDir value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
308
|
+
label-studio-converter enhance-labelstudio --help
|
|
309
|
+
|
|
310
|
+
Enhance Label Studio files with sorting, normalization, and resizing
|
|
311
|
+
|
|
312
|
+
FLAGS
|
|
313
|
+
[--outDir] Output directory. Default: "./output"
|
|
314
|
+
[--sortVertical] Sort bounding boxes vertically. Options: "none" (default), "top-bottom", "bottom-top"
|
|
315
|
+
[--sortHorizontal] Sort bounding boxes horizontally. Options: "none" (default), "ltr", "rtl"
|
|
316
|
+
[--normalizeShape] Normalize diamond-like shapes to axis-aligned rectangles. Options: "none" (default), "rectangle"
|
|
317
|
+
[--widthIncrement] Increase bounding box width by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
318
|
+
[--heightIncrement] Increase bounding box height by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
319
|
+
[--precision] Number of decimal places for coordinates. Use -1 for full precision (no rounding). Default: -1
|
|
320
|
+
-h --help Print help information and exit
|
|
321
|
+
|
|
322
|
+
ARGUMENTS
|
|
323
|
+
args... Input directories containing Label Studio JSON files
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
- `enhance-ppocr` - Enhance PPOCRLabel files with sorting, normalization, and resizing
|
|
327
|
+
|
|
328
|
+
```bash
|
|
329
|
+
USAGE
|
|
330
|
+
label-studio-converter enhance-ppocr [--outDir value] [--sortVertical value] [--sortHorizontal value] [--normalizeShape value] [--widthIncrement value] [--heightIncrement value] [--precision value] <args>...
|
|
331
|
+
label-studio-converter enhance-ppocr --help
|
|
332
|
+
|
|
333
|
+
Enhance PPOCRLabel files with sorting, normalization, and resizing
|
|
334
|
+
|
|
335
|
+
FLAGS
|
|
336
|
+
[--outDir] Output directory. Default: "./output"
|
|
337
|
+
[--sortVertical] Sort bounding boxes vertically. Options: "none" (default), "top-bottom", "bottom-top"
|
|
338
|
+
[--sortHorizontal] Sort bounding boxes horizontally. Options: "none" (default), "ltr", "rtl"
|
|
339
|
+
[--normalizeShape] Normalize diamond-like shapes to axis-aligned rectangles. Options: "none" (default), "rectangle"
|
|
340
|
+
[--widthIncrement] Increase bounding box width by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
341
|
+
[--heightIncrement] Increase bounding box height by this amount (in pixels). Can be negative to decrease. Default: 0
|
|
342
|
+
[--precision] Number of decimal places for coordinates. Use -1 for full precision (no rounding). Default: 0 (integers)
|
|
343
|
+
-h --help Print help information and exit
|
|
344
|
+
|
|
345
|
+
ARGUMENTS
|
|
346
|
+
args... Input directories containing PPOCRLabel files
|
|
347
|
+
```
|
|
348
|
+
|
|
168
349
|
#### Examples
|
|
169
350
|
|
|
170
|
-
**
|
|
351
|
+
**Basic Conversions:**
|
|
171
352
|
|
|
172
353
|
```bash
|
|
173
|
-
|
|
354
|
+
# Convert PPOCRLabel files to full Label Studio format
|
|
355
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output-label-studio
|
|
356
|
+
|
|
357
|
+
# Convert Label Studio files to PPOCRLabel format
|
|
358
|
+
label-studio-converter toPPOCR ./input-label-studio --outDir ./output-ppocr
|
|
359
|
+
|
|
360
|
+
# Convert with custom output filename for PPOCR
|
|
361
|
+
label-studio-converter toPPOCR ./input-label-studio --outDir ./output-ppocr --fileName MyLabels.txt
|
|
362
|
+
|
|
363
|
+
# Convert with base image directory path
|
|
364
|
+
label-studio-converter toPPOCR ./input-label-studio --baseImageDir images/ch
|
|
174
365
|
```
|
|
175
366
|
|
|
176
|
-
|
|
367
|
+
> [!NOTE]
|
|
368
|
+
> By default, all PPOCRLabel positions are treated as **polygons** in Label Studio.
|
|
369
|
+
|
|
370
|
+
**toLabelStudio Options:**
|
|
177
371
|
|
|
178
372
|
```bash
|
|
179
|
-
|
|
373
|
+
# Create separate JSON file for each image
|
|
374
|
+
label-studio-converter toLabelStudio ./input-ppocr \
|
|
375
|
+
--outDir ./output \
|
|
376
|
+
--createFilePerImage
|
|
377
|
+
|
|
378
|
+
# Specify custom label name (default is "Text")
|
|
379
|
+
label-studio-converter toLabelStudio ./input-ppocr \
|
|
380
|
+
--outDir ./output \
|
|
381
|
+
--defaultLabelName Handwriting
|
|
382
|
+
|
|
383
|
+
# Convert to minimal format (without serving support)
|
|
384
|
+
label-studio-converter toLabelStudio ./input-ppocr \
|
|
385
|
+
--outDir ./output \
|
|
386
|
+
--noToFullJson
|
|
387
|
+
|
|
388
|
+
# Disable file list creation for serving
|
|
389
|
+
label-studio-converter toLabelStudio ./input-ppocr \
|
|
390
|
+
--outDir ./output \
|
|
391
|
+
--noCreateFileListForServing
|
|
392
|
+
|
|
393
|
+
# Custom file list name and server URL
|
|
394
|
+
label-studio-converter toLabelStudio ./input-ppocr \
|
|
395
|
+
--outDir ./output \
|
|
396
|
+
--fileListName my-images.txt \
|
|
397
|
+
--baseServerUrl http://192.168.1.100:8080
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
**toPPOCR Options:**
|
|
401
|
+
|
|
402
|
+
```bash
|
|
403
|
+
# Basic conversion with output directory
|
|
404
|
+
label-studio-converter toPPOCR ./input-label-studio \
|
|
405
|
+
--outDir ./output
|
|
406
|
+
|
|
407
|
+
# Custom output filename
|
|
408
|
+
label-studio-converter toPPOCR ./input-label-studio \
|
|
409
|
+
--outDir ./output \
|
|
410
|
+
--fileName CustomLabel.txt
|
|
411
|
+
|
|
412
|
+
# Add base image directory to paths
|
|
413
|
+
label-studio-converter toPPOCR ./input-label-studio \
|
|
414
|
+
--outDir ./output \
|
|
415
|
+
--baseImageDir dataset/images
|
|
416
|
+
```
|
|
417
|
+
|
|
418
|
+
### Enhancement Features
|
|
419
|
+
|
|
420
|
+
The tool provides powerful enhancement capabilities that can be used standalone or integrated with conversion:
|
|
421
|
+
|
|
422
|
+
**Enhance PPOCRLabel files:**
|
|
423
|
+
|
|
424
|
+
```bash
|
|
425
|
+
# Sort annotations from top to bottom, left to right
|
|
426
|
+
label-studio-converter enhance-ppocr ./data --sortVertical top-bottom --sortHorizontal ltr
|
|
427
|
+
|
|
428
|
+
# Normalize diamond shapes to rectangles and resize
|
|
429
|
+
label-studio-converter enhance-ppocr ./data --normalizeShape rectangle --widthIncrement 10 --heightIncrement 5
|
|
430
|
+
|
|
431
|
+
# Apply all enhancements
|
|
432
|
+
label-studio-converter enhance-ppocr ./data \
|
|
433
|
+
--sortVertical top-bottom \
|
|
434
|
+
--sortHorizontal ltr \
|
|
435
|
+
--normalizeShape rectangle \
|
|
436
|
+
--widthIncrement 5 \
|
|
437
|
+
--heightIncrement 5 \
|
|
438
|
+
--precision 0
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
**Enhance Label Studio files:**
|
|
442
|
+
|
|
443
|
+
```bash
|
|
444
|
+
# Sort and normalize Label Studio annotations
|
|
445
|
+
label-studio-converter enhance-labelstudio ./data \
|
|
446
|
+
--sortVertical top-bottom \
|
|
447
|
+
--normalizeShape rectangle \
|
|
448
|
+
--precision 2
|
|
449
|
+
|
|
450
|
+
# Works with both Full and Min formats automatically
|
|
451
|
+
label-studio-converter enhance-labelstudio ./label-studio-files --outDir ./enhanced
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
**Enhancement Options:**
|
|
455
|
+
|
|
456
|
+
- `--sortVertical`: Sort bounding boxes vertically
|
|
457
|
+
- `none` (default): No sorting
|
|
458
|
+
- `top-bottom`: Sort from top to bottom
|
|
459
|
+
- `bottom-top`: Sort from bottom to top
|
|
460
|
+
- Example:
|
|
461
|
+
```bash
|
|
462
|
+
# Sort annotations from top to bottom
|
|
463
|
+
label-studio-converter enhance-ppocr ./data --sortVertical top-bottom
|
|
464
|
+
```
|
|
465
|
+
|
|
466
|
+
- `--sortHorizontal`: Sort bounding boxes horizontally
|
|
467
|
+
- `none` (default): No sorting
|
|
468
|
+
- `ltr`: Sort left to right (useful for English, most European languages)
|
|
469
|
+
- `rtl`: Sort right to left (useful for Arabic, Hebrew)
|
|
470
|
+
- Example:
|
|
471
|
+
|
|
472
|
+
```bash
|
|
473
|
+
# Sort annotations left to right
|
|
474
|
+
label-studio-converter enhance-ppocr ./data --sortHorizontal ltr
|
|
475
|
+
|
|
476
|
+
# Sort annotations right to left
|
|
477
|
+
label-studio-converter enhance-ppocr ./data --sortHorizontal rtl
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
- `--normalizeShape`: Normalize shapes
|
|
481
|
+
- `none` (default): Keep original shape
|
|
482
|
+
- `rectangle`: Convert diamond-like or rotated shapes to axis-aligned rectangles
|
|
483
|
+
- Example:
|
|
484
|
+
```bash
|
|
485
|
+
# Convert irregular shapes to clean rectangles
|
|
486
|
+
label-studio-converter enhance-ppocr ./data --normalizeShape rectangle
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
- `--widthIncrement`: Increase/decrease width (pixels, can be negative)
|
|
490
|
+
- Default: `0`
|
|
491
|
+
- Examples:
|
|
492
|
+
|
|
493
|
+
```bash
|
|
494
|
+
# Increase width by 10 pixels
|
|
495
|
+
label-studio-converter enhance-ppocr ./data --widthIncrement 10
|
|
496
|
+
|
|
497
|
+
# Decrease width by 5 pixels
|
|
498
|
+
label-studio-converter enhance-ppocr ./data --widthIncrement -5
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
- `--heightIncrement`: Increase/decrease height (pixels, can be negative)
|
|
502
|
+
- Default: `0`
|
|
503
|
+
- Examples:
|
|
504
|
+
|
|
505
|
+
```bash
|
|
506
|
+
# Increase height by 15 pixels
|
|
507
|
+
label-studio-converter enhance-ppocr ./data --heightIncrement 15
|
|
508
|
+
|
|
509
|
+
# Decrease height by 3 pixels
|
|
510
|
+
label-studio-converter enhance-ppocr ./data --heightIncrement -3
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
- `--precision`: Control the number of decimal places for coordinate values
|
|
514
|
+
- `-1`: Full precision - no rounding, keeps all decimal places (default for Label Studio output)
|
|
515
|
+
- Example output: `27.44656917885264`
|
|
516
|
+
- `0`: Round to integers (default for PPOCR output)
|
|
517
|
+
- Example output: `27`
|
|
518
|
+
- `1`: Round to 1 decimal place
|
|
519
|
+
- Example output: `27.4`
|
|
520
|
+
- `2`: Round to 2 decimal places
|
|
521
|
+
- Example output: `27.45`
|
|
522
|
+
- Any positive integer for that many decimal places
|
|
523
|
+
- Examples:
|
|
524
|
+
|
|
525
|
+
```bash
|
|
526
|
+
# Use full precision
|
|
527
|
+
label-studio-converter toLabelStudio ./data --precision -1
|
|
528
|
+
|
|
529
|
+
# Use integer coordinates
|
|
530
|
+
label-studio-converter toPPOCR ./data --precision 0
|
|
531
|
+
|
|
532
|
+
# Use 2 decimal places
|
|
533
|
+
label-studio-converter enhance-labelstudio ./data --precision 2
|
|
534
|
+
```
|
|
535
|
+
|
|
536
|
+
**Conversion with Enhancement:**
|
|
537
|
+
|
|
538
|
+
All enhancement options are available in conversion commands:
|
|
539
|
+
|
|
540
|
+
```bash
|
|
541
|
+
# Convert with enhancements applied during conversion
|
|
542
|
+
label-studio-converter toLabelStudio ./input-ppocr \
|
|
543
|
+
--outDir ./output \
|
|
544
|
+
--sortVertical top-bottom \
|
|
545
|
+
--normalizeShape rectangle \
|
|
546
|
+
--widthIncrement 10
|
|
547
|
+
|
|
548
|
+
label-studio-converter toPPOCR ./input-label-studio \
|
|
549
|
+
--outDir ./output \
|
|
550
|
+
--sortVertical top-bottom \
|
|
551
|
+
--sortHorizontal ltr \
|
|
552
|
+
--normalizeShape rectangle
|
|
180
553
|
```
|
|
181
554
|
|
|
182
555
|
**Convert PPOCRLabel files to Label Studio format with one file per image:**
|
|
@@ -191,11 +564,134 @@ label-studio-converter toLabelStudio ./input-ppocr --outDir ./output-label-studi
|
|
|
191
564
|
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output-label-studio --defaultLabelName Text --noToFullJson --sortVertical none --sortHorizontal none
|
|
192
565
|
```
|
|
193
566
|
|
|
567
|
+
> [!IMPORTANT]
|
|
568
|
+
> Minimal Label Studio format cannot be used for serving in Label Studio, as it
|
|
569
|
+
> lacks necessary fields such as `id` and `data`. So you can only use minimal
|
|
570
|
+
> format for conversion back to PPOCRLabelv2 format or other purposes.
|
|
571
|
+
|
|
572
|
+
**Shape Normalization**
|
|
573
|
+
|
|
574
|
+
Convert diamond-like or irregular quadrilateral shapes to axis-aligned
|
|
575
|
+
rectangles. This is useful when your annotations have irregular shapes that you
|
|
576
|
+
want to normalize to clean, horizontal/vertical bounding boxes:
|
|
577
|
+
|
|
578
|
+
```bash
|
|
579
|
+
# Convert to axis-aligned rectangles
|
|
580
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --normalizeShape rectangle
|
|
581
|
+
|
|
582
|
+
# For toPPOCR command
|
|
583
|
+
label-studio-converter toPPOCR ./input-label-studio --outDir ./output --normalizeShape rectangle
|
|
584
|
+
```
|
|
585
|
+
|
|
586
|
+
<details>
|
|
587
|
+
<summary>
|
|
588
|
+
<b>Before normalization</b> (diamond-like shapes):
|
|
589
|
+
</summary>
|
|
590
|
+
|
|
591
|
+

|
|
592
|
+
|
|
593
|
+
</details>
|
|
594
|
+
|
|
595
|
+
<details>
|
|
596
|
+
<summary>
|
|
597
|
+
<b>After normalization</b> (axis-aligned rectangles):
|
|
598
|
+
</summary>
|
|
599
|
+
|
|
600
|
+
Command:
|
|
601
|
+
|
|
602
|
+
```bash
|
|
603
|
+
./dist/cli.js toPPOCR ./tmp --baseImageDir output --normalizeShape rectangle
|
|
604
|
+
```
|
|
605
|
+
|
|
606
|
+

|
|
607
|
+
|
|
608
|
+
</details>
|
|
609
|
+
|
|
610
|
+
<details>
|
|
611
|
+
<summary>
|
|
612
|
+
<b>Before normalization</b> (diamond-like vertical shapes):
|
|
613
|
+
</summary>
|
|
614
|
+
|
|
615
|
+

|
|
616
|
+
|
|
617
|
+
</details>
|
|
618
|
+
|
|
619
|
+
<details>
|
|
620
|
+
<summary>
|
|
621
|
+
<b>After normalization</b> (axis-aligned vertical rectangles):
|
|
622
|
+
</summary>
|
|
623
|
+
|
|
624
|
+
Command:
|
|
625
|
+
|
|
626
|
+
```bash
|
|
627
|
+
./dist/cli.js toPPOCR ./tmp --baseImageDir output --normalizeShape rectangle
|
|
628
|
+
```
|
|
629
|
+
|
|
630
|
+

|
|
631
|
+
|
|
632
|
+
</details>
|
|
633
|
+
|
|
634
|
+
**Bounding Box Resizing**
|
|
635
|
+
|
|
636
|
+
Increase or decrease bounding box dimensions while keeping them centered. This
|
|
637
|
+
is useful for adjusting annotation margins:
|
|
638
|
+
|
|
639
|
+
```bash
|
|
640
|
+
# Increase width by 10 pixels and height by 20 pixels
|
|
641
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --widthIncrement 10 --heightIncrement 20
|
|
642
|
+
|
|
643
|
+
# Decrease width by 5 pixels (negative increment)
|
|
644
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --widthIncrement -5
|
|
645
|
+
|
|
646
|
+
# Works with toPPOCR as well
|
|
647
|
+
label-studio-converter toPPOCR ./input-label-studio --outDir ./output --widthIncrement 10 --heightIncrement 10
|
|
648
|
+
```
|
|
649
|
+
|
|
650
|
+
**Combining Features**
|
|
651
|
+
|
|
652
|
+
You can combine shape normalization and resizing:
|
|
653
|
+
|
|
654
|
+
```bash
|
|
655
|
+
# Normalize to rectangle and increase size
|
|
656
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --normalizeShape rectangle --widthIncrement 5 --heightIncrement 5
|
|
657
|
+
|
|
658
|
+
# Also works with sorting
|
|
659
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --normalizeShape rectangle --widthIncrement 10 --sortVertical top-bottom --sortHorizontal ltr
|
|
660
|
+
```
|
|
661
|
+
|
|
662
|
+
**Number Precision Control**
|
|
663
|
+
|
|
664
|
+
Control the precision of coordinate values in the output. This is useful for
|
|
665
|
+
matching format expectations or reducing file size:
|
|
666
|
+
|
|
667
|
+
```bash
|
|
668
|
+
# Convert to Label Studio with full precision (default: -1)
|
|
669
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --precision -1
|
|
670
|
+
|
|
671
|
+
# Convert to PPOCR with integer coordinates (default: 0)
|
|
672
|
+
label-studio-converter toPPOCR ./input-label-studio --outDir ./output --precision 0
|
|
673
|
+
|
|
674
|
+
# Use 2 decimal places for more compact but still precise coordinates
|
|
675
|
+
label-studio-converter toLabelStudio ./input-ppocr --outDir ./output --precision 2
|
|
676
|
+
```
|
|
677
|
+
|
|
678
|
+
Precision values:
|
|
679
|
+
|
|
680
|
+
- `-1`: Full floating-point precision (default for Label Studio output)
|
|
681
|
+
- `0`: Round to integers (default for PPOCR output)
|
|
682
|
+
- `1+`: Round to specified number of decimal places
|
|
683
|
+
|
|
684
|
+
> [!NOTE]
|
|
685
|
+
> The default precision matches typical format conventions: Label Studio uses
|
|
686
|
+
> full precision for percentage-based coordinates, while PPOCR format typically
|
|
687
|
+
> uses integer pixel coordinates.
|
|
688
|
+
|
|
194
689
|
### Using generated files with Label Studio
|
|
195
690
|
|
|
196
691
|
#### Interface setup
|
|
197
692
|
|
|
198
|
-
When creating a new labeling project in Label Studio, choose the "OCR"
|
|
693
|
+
When creating a new labeling project in Label Studio, choose the ["OCR"
|
|
694
|
+
template](https://labelstud.io/templates/optical_character_recognition).
|
|
199
695
|
This will set up the appropriate interface for text recognition tasks.
|
|
200
696
|
|
|
201
697
|
This project uses the following Label Studio interface configuration:
|
|
@@ -216,8 +712,8 @@ This project uses the following Label Studio interface configuration:
|
|
|
216
712
|
This setup includes:
|
|
217
713
|
|
|
218
714
|
- An `Image` tag to display the image to be annotated.
|
|
219
|
-
- A `Labels` tag with two label options:
|
|
220
|
-
all annotations will be labeled as
|
|
715
|
+
- A `Labels` tag with two label options: `Text` and `Handwriting`. By default,
|
|
716
|
+
all annotations will be labeled as `Text`. You can modify this based on your
|
|
221
717
|
needs.
|
|
222
718
|
- A `Rectangle` tag to allow annotators to draw bounding boxes around text regions.
|
|
223
719
|
- A `Polygon` tag to allow annotators to draw polygons around text regions.
|
|
@@ -298,6 +794,533 @@ files in the dataset directories.
|
|
|
298
794
|
If the images are put in a different directory, make sure to update the image
|
|
299
795
|
directory path by specifying the `baseImageDir` option during conversion.
|
|
300
796
|
|
|
797
|
+
### Conversion Margin of Error
|
|
798
|
+
|
|
799
|
+
During conversion between two formats, which are PPOCRLabelv2 and Label Studio,
|
|
800
|
+
margin of errors may occur due to differences in how each format handles certain
|
|
801
|
+
aspects of the data.
|
|
802
|
+
|
|
803
|
+
**Convert from Label Studio to PPOCRLabelv2**
|
|
804
|
+
|
|
805
|
+
Label Studio annotation:
|
|
806
|
+
|
|
807
|
+

|
|
808
|
+
|
|
809
|
+
Generated PPOCRLabelv2 annotation:
|
|
810
|
+
|
|
811
|
+

|
|
812
|
+
|
|
813
|
+
Converted back to Label Studio annotation:
|
|
814
|
+
|
|
815
|
+

|
|
816
|
+
|
|
817
|
+
<details>
|
|
818
|
+
<summary>
|
|
819
|
+
<b>Original data</b>:
|
|
820
|
+
</summary>
|
|
821
|
+
|
|
822
|
+
```json
|
|
823
|
+
[
|
|
824
|
+
{
|
|
825
|
+
"id": 1,
|
|
826
|
+
"annotations": [
|
|
827
|
+
{
|
|
828
|
+
"id": 201,
|
|
829
|
+
"completed_by": 1,
|
|
830
|
+
"result": [
|
|
831
|
+
{
|
|
832
|
+
"original_width": 889,
|
|
833
|
+
"original_height": 520,
|
|
834
|
+
"image_rotation": 0,
|
|
835
|
+
"value": {
|
|
836
|
+
"x": 27.691012033297714,
|
|
837
|
+
"y": 58.08133472367049,
|
|
838
|
+
"width": 42.14645223570203,
|
|
839
|
+
"height": 5.4223149113660085,
|
|
840
|
+
"rotation": 0
|
|
841
|
+
},
|
|
842
|
+
"id": "pa6F68vZpa",
|
|
843
|
+
"from_name": "bbox",
|
|
844
|
+
"to_name": "image",
|
|
845
|
+
"type": "rectangle",
|
|
846
|
+
"origin": "manual"
|
|
847
|
+
},
|
|
848
|
+
{
|
|
849
|
+
"original_width": 889,
|
|
850
|
+
"original_height": 520,
|
|
851
|
+
"image_rotation": 0,
|
|
852
|
+
"value": {
|
|
853
|
+
"x": 27.691012033297714,
|
|
854
|
+
"y": 58.08133472367049,
|
|
855
|
+
"width": 42.14645223570203,
|
|
856
|
+
"height": 5.4223149113660085,
|
|
857
|
+
"rotation": 0,
|
|
858
|
+
"labels": ["Text"]
|
|
859
|
+
},
|
|
860
|
+
"id": "pa6F68vZpa",
|
|
861
|
+
"from_name": "label",
|
|
862
|
+
"to_name": "image",
|
|
863
|
+
"type": "labels",
|
|
864
|
+
"origin": "manual"
|
|
865
|
+
},
|
|
866
|
+
{
|
|
867
|
+
"original_width": 889,
|
|
868
|
+
"original_height": 520,
|
|
869
|
+
"image_rotation": 0,
|
|
870
|
+
"value": {
|
|
871
|
+
"x": 27.691012033297714,
|
|
872
|
+
"y": 58.08133472367049,
|
|
873
|
+
"width": 42.14645223570203,
|
|
874
|
+
"height": 5.4223149113660085,
|
|
875
|
+
"rotation": 0,
|
|
876
|
+
"text": ["ACUTE CORONARY SYNDROME"]
|
|
877
|
+
},
|
|
878
|
+
"id": "pa6F68vZpa",
|
|
879
|
+
"from_name": "transcription",
|
|
880
|
+
"to_name": "image",
|
|
881
|
+
"type": "textarea",
|
|
882
|
+
"origin": "manual"
|
|
883
|
+
},
|
|
884
|
+
{
|
|
885
|
+
"original_width": 889,
|
|
886
|
+
"original_height": 520,
|
|
887
|
+
"image_rotation": 0,
|
|
888
|
+
"value": {
|
|
889
|
+
"x": 27.569025196146622,
|
|
890
|
+
"y": 70.38581856100105,
|
|
891
|
+
"width": 49.03965680633165,
|
|
892
|
+
"height": 4.788140385599174,
|
|
893
|
+
"rotation": 359.64368755661553
|
|
894
|
+
},
|
|
895
|
+
"id": "iIfXbvxhFx",
|
|
896
|
+
"from_name": "bbox",
|
|
897
|
+
"to_name": "image",
|
|
898
|
+
"type": "rectangle",
|
|
899
|
+
"origin": "manual"
|
|
900
|
+
},
|
|
901
|
+
{
|
|
902
|
+
"original_width": 889,
|
|
903
|
+
"original_height": 520,
|
|
904
|
+
"image_rotation": 0,
|
|
905
|
+
"value": {
|
|
906
|
+
"x": 27.569025196146622,
|
|
907
|
+
"y": 70.38581856100105,
|
|
908
|
+
"width": 49.03965680633165,
|
|
909
|
+
"height": 4.788140385599174,
|
|
910
|
+
"rotation": 359.64368755661553,
|
|
911
|
+
"labels": ["Text"]
|
|
912
|
+
},
|
|
913
|
+
"id": "iIfXbvxhFx",
|
|
914
|
+
"from_name": "label",
|
|
915
|
+
"to_name": "image",
|
|
916
|
+
"type": "labels",
|
|
917
|
+
"origin": "manual"
|
|
918
|
+
},
|
|
919
|
+
{
|
|
920
|
+
"original_width": 889,
|
|
921
|
+
"original_height": 520,
|
|
922
|
+
"image_rotation": 0,
|
|
923
|
+
"value": {
|
|
924
|
+
"x": 27.569025196146622,
|
|
925
|
+
"y": 70.38581856100105,
|
|
926
|
+
"width": 49.03965680633165,
|
|
927
|
+
"height": 4.788140385599174,
|
|
928
|
+
"rotation": 359.64368755661553,
|
|
929
|
+
"text": ["MILD CORONARY ARTERY DISEASE"]
|
|
930
|
+
},
|
|
931
|
+
"id": "iIfXbvxhFx",
|
|
932
|
+
"from_name": "transcription",
|
|
933
|
+
"to_name": "image",
|
|
934
|
+
"type": "textarea",
|
|
935
|
+
"origin": "manual"
|
|
936
|
+
},
|
|
937
|
+
{
|
|
938
|
+
"original_width": 889,
|
|
939
|
+
"original_height": 520,
|
|
940
|
+
"image_rotation": 0,
|
|
941
|
+
"value": {
|
|
942
|
+
"points": [
|
|
943
|
+
[27.630018614722168, 81.85610010427528],
|
|
944
|
+
[61.66434617987663, 80.8133472367049],
|
|
945
|
+
[61.969313272754356, 85.71428571428571],
|
|
946
|
+
[28.239952800477624, 86.44421272158499]
|
|
947
|
+
],
|
|
948
|
+
"closed": true
|
|
949
|
+
},
|
|
950
|
+
"id": "mpqixNR8uh",
|
|
951
|
+
"from_name": "poly",
|
|
952
|
+
"to_name": "image",
|
|
953
|
+
"type": "polygon",
|
|
954
|
+
"origin": "manual"
|
|
955
|
+
},
|
|
956
|
+
{
|
|
957
|
+
"original_width": 889,
|
|
958
|
+
"original_height": 520,
|
|
959
|
+
"image_rotation": 0,
|
|
960
|
+
"value": {
|
|
961
|
+
"points": [
|
|
962
|
+
[27.630018614722168, 81.85610010427528],
|
|
963
|
+
[61.66434617987663, 80.8133472367049],
|
|
964
|
+
[61.969313272754356, 85.71428571428571],
|
|
965
|
+
[28.239952800477624, 86.44421272158499]
|
|
966
|
+
],
|
|
967
|
+
"closed": true,
|
|
968
|
+
"labels": ["Handwriting"]
|
|
969
|
+
},
|
|
970
|
+
"id": "mpqixNR8uh",
|
|
971
|
+
"from_name": "label",
|
|
972
|
+
"to_name": "image",
|
|
973
|
+
"type": "labels",
|
|
974
|
+
"origin": "manual"
|
|
975
|
+
},
|
|
976
|
+
{
|
|
977
|
+
"original_width": 889,
|
|
978
|
+
"original_height": 520,
|
|
979
|
+
"image_rotation": 0,
|
|
980
|
+
"value": {
|
|
981
|
+
"points": [
|
|
982
|
+
[27.630018614722168, 81.85610010427528],
|
|
983
|
+
[61.66434617987663, 80.8133472367049],
|
|
984
|
+
[61.969313272754356, 85.71428571428571],
|
|
985
|
+
[28.239952800477624, 86.44421272158499]
|
|
986
|
+
],
|
|
987
|
+
"closed": true,
|
|
988
|
+
"text": ["MEDICAL MANAGEMENT"]
|
|
989
|
+
},
|
|
990
|
+
"id": "mpqixNR8uh",
|
|
991
|
+
"from_name": "transcription",
|
|
992
|
+
"to_name": "image",
|
|
993
|
+
"type": "textarea",
|
|
994
|
+
"origin": "manual"
|
|
995
|
+
}
|
|
996
|
+
],
|
|
997
|
+
"was_cancelled": false,
|
|
998
|
+
"ground_truth": false,
|
|
999
|
+
"created_at": "2026-01-07T03:14:39.424067Z",
|
|
1000
|
+
"updated_at": "2026-01-10T03:21:09.833576Z",
|
|
1001
|
+
"draft_created_at": "2026-01-07T03:14:04.596361Z",
|
|
1002
|
+
"lead_time": 2686.9700000000003,
|
|
1003
|
+
"prediction": {},
|
|
1004
|
+
"result_count": 3,
|
|
1005
|
+
"unique_id": "7e8c79f1-49ce-471c-8b26-8b8c6f9c3401",
|
|
1006
|
+
"import_id": null,
|
|
1007
|
+
"last_action": null,
|
|
1008
|
+
"bulk_created": false,
|
|
1009
|
+
"task": 1,
|
|
1010
|
+
"project": 2,
|
|
1011
|
+
"updated_by": 1,
|
|
1012
|
+
"parent_prediction": null,
|
|
1013
|
+
"parent_annotation": null,
|
|
1014
|
+
"last_created_by": null
|
|
1015
|
+
}
|
|
1016
|
+
],
|
|
1017
|
+
"file_upload": "5b1e3483-example.jpg",
|
|
1018
|
+
"drafts": [],
|
|
1019
|
+
"predictions": [],
|
|
1020
|
+
"data": { "ocr": "\/data\/upload\/2\/5b1e3483-example.jpg" },
|
|
1021
|
+
"meta": {},
|
|
1022
|
+
"created_at": "2026-01-07T03:13:41.175183Z",
|
|
1023
|
+
"updated_at": "2026-01-10T03:21:09.923449Z",
|
|
1024
|
+
"allow_skip": true,
|
|
1025
|
+
"inner_id": 1,
|
|
1026
|
+
"total_annotations": 1,
|
|
1027
|
+
"cancelled_annotations": 0,
|
|
1028
|
+
"total_predictions": 0,
|
|
1029
|
+
"comment_count": 0,
|
|
1030
|
+
"unresolved_comment_count": 0,
|
|
1031
|
+
"last_comment_updated_at": null,
|
|
1032
|
+
"project": 2,
|
|
1033
|
+
"updated_by": 1,
|
|
1034
|
+
"comment_authors": []
|
|
1035
|
+
}
|
|
1036
|
+
]
|
|
1037
|
+
```
|
|
1038
|
+
|
|
1039
|
+
</details>
|
|
1040
|
+
|
|
1041
|
+
<details>
|
|
1042
|
+
<summary>
|
|
1043
|
+
<b>Converted data</b>:
|
|
1044
|
+
</summary>
|
|
1045
|
+
|
|
1046
|
+
Command:
|
|
1047
|
+
|
|
1048
|
+
```bash
|
|
1049
|
+
./dist/cli.js toPPOCR ./tmp --baseImageDir output
|
|
1050
|
+
```
|
|
1051
|
+
|
|
1052
|
+
Output:
|
|
1053
|
+
|
|
1054
|
+
```
|
|
1055
|
+
output/example.jpg [{"transcription":"ACUTE CORONARY SYNDROME","points":[[246,302],[621,302],[621,330],[246,330]],"dt_score":1},{"transcription":"MILD CORONARY ARTERY DISEASE","points":[[245,366],[681,366],[681,391],[245,391]],"dt_score":1},{"transcription":"MEDICAL MANAGEMENT","points":[[246,426],[548,420],[551,446],[251,450]],"dt_score":1}]
|
|
1056
|
+
```
|
|
1057
|
+
|
|
1058
|
+
</details>
|
|
1059
|
+
|
|
1060
|
+
<details>
|
|
1061
|
+
<summary>
|
|
1062
|
+
<b>Convert back to Label Studio</b>:
|
|
1063
|
+
</summary>
|
|
1064
|
+
|
|
1065
|
+
Command:
|
|
1066
|
+
|
|
1067
|
+
```bash
|
|
1068
|
+
./dist/cli.js toLabelStudio ./tmp
|
|
1069
|
+
```
|
|
1070
|
+
|
|
1071
|
+
Output:
|
|
1072
|
+
|
|
1073
|
+
```json
|
|
1074
|
+
[
|
|
1075
|
+
[
|
|
1076
|
+
{
|
|
1077
|
+
"id": 1,
|
|
1078
|
+
"annotations": [
|
|
1079
|
+
{
|
|
1080
|
+
"id": 1,
|
|
1081
|
+
"completed_by": 1,
|
|
1082
|
+
"result": [
|
|
1083
|
+
{
|
|
1084
|
+
"original_width": 889,
|
|
1085
|
+
"original_height": 520,
|
|
1086
|
+
"image_rotation": 0,
|
|
1087
|
+
"value": {
|
|
1088
|
+
"points": [
|
|
1089
|
+
[27.671541057367826, 58.07692307692308],
|
|
1090
|
+
[69.85376827896513, 58.07692307692308],
|
|
1091
|
+
[69.85376827896513, 63.46153846153846],
|
|
1092
|
+
[27.671541057367826, 63.46153846153846]
|
|
1093
|
+
],
|
|
1094
|
+
"closed": true
|
|
1095
|
+
},
|
|
1096
|
+
"id": "fce62949-7",
|
|
1097
|
+
"from_name": "poly",
|
|
1098
|
+
"to_name": "image",
|
|
1099
|
+
"type": "polygon",
|
|
1100
|
+
"origin": "manual"
|
|
1101
|
+
},
|
|
1102
|
+
{
|
|
1103
|
+
"original_width": 889,
|
|
1104
|
+
"original_height": 520,
|
|
1105
|
+
"image_rotation": 0,
|
|
1106
|
+
"value": {
|
|
1107
|
+
"points": [
|
|
1108
|
+
[27.671541057367826, 58.07692307692308],
|
|
1109
|
+
[69.85376827896513, 58.07692307692308],
|
|
1110
|
+
[69.85376827896513, 63.46153846153846],
|
|
1111
|
+
[27.671541057367826, 63.46153846153846]
|
|
1112
|
+
],
|
|
1113
|
+
"closed": true,
|
|
1114
|
+
"labels": ["Text"]
|
|
1115
|
+
},
|
|
1116
|
+
"id": "fce62949-7",
|
|
1117
|
+
"from_name": "label",
|
|
1118
|
+
"to_name": "image",
|
|
1119
|
+
"type": "labels",
|
|
1120
|
+
"origin": "manual"
|
|
1121
|
+
},
|
|
1122
|
+
{
|
|
1123
|
+
"original_width": 889,
|
|
1124
|
+
"original_height": 520,
|
|
1125
|
+
"image_rotation": 0,
|
|
1126
|
+
"value": {
|
|
1127
|
+
"points": [
|
|
1128
|
+
[27.671541057367826, 58.07692307692308],
|
|
1129
|
+
[69.85376827896513, 58.07692307692308],
|
|
1130
|
+
[69.85376827896513, 63.46153846153846],
|
|
1131
|
+
[27.671541057367826, 63.46153846153846]
|
|
1132
|
+
],
|
|
1133
|
+
"closed": true,
|
|
1134
|
+
"text": ["ACUTE CORONARY SYNDROME"]
|
|
1135
|
+
},
|
|
1136
|
+
"id": "fce62949-7",
|
|
1137
|
+
"from_name": "transcription",
|
|
1138
|
+
"to_name": "image",
|
|
1139
|
+
"type": "textarea",
|
|
1140
|
+
"origin": "manual"
|
|
1141
|
+
},
|
|
1142
|
+
{
|
|
1143
|
+
"original_width": 889,
|
|
1144
|
+
"original_height": 520,
|
|
1145
|
+
"image_rotation": 0,
|
|
1146
|
+
"value": {
|
|
1147
|
+
"points": [
|
|
1148
|
+
[27.559055118110237, 70.38461538461539],
|
|
1149
|
+
[76.6029246344207, 70.38461538461539],
|
|
1150
|
+
[76.6029246344207, 75.1923076923077],
|
|
1151
|
+
[27.559055118110237, 75.1923076923077]
|
|
1152
|
+
],
|
|
1153
|
+
"closed": true
|
|
1154
|
+
},
|
|
1155
|
+
"id": "9d9389a6-f",
|
|
1156
|
+
"from_name": "poly",
|
|
1157
|
+
"to_name": "image",
|
|
1158
|
+
"type": "polygon",
|
|
1159
|
+
"origin": "manual"
|
|
1160
|
+
},
|
|
1161
|
+
{
|
|
1162
|
+
"original_width": 889,
|
|
1163
|
+
"original_height": 520,
|
|
1164
|
+
"image_rotation": 0,
|
|
1165
|
+
"value": {
|
|
1166
|
+
"points": [
|
|
1167
|
+
[27.559055118110237, 70.38461538461539],
|
|
1168
|
+
[76.6029246344207, 70.38461538461539],
|
|
1169
|
+
[76.6029246344207, 75.1923076923077],
|
|
1170
|
+
[27.559055118110237, 75.1923076923077]
|
|
1171
|
+
],
|
|
1172
|
+
"closed": true,
|
|
1173
|
+
"labels": ["Text"]
|
|
1174
|
+
},
|
|
1175
|
+
"id": "9d9389a6-f",
|
|
1176
|
+
"from_name": "label",
|
|
1177
|
+
"to_name": "image",
|
|
1178
|
+
"type": "labels",
|
|
1179
|
+
"origin": "manual"
|
|
1180
|
+
},
|
|
1181
|
+
{
|
|
1182
|
+
"original_width": 889,
|
|
1183
|
+
"original_height": 520,
|
|
1184
|
+
"image_rotation": 0,
|
|
1185
|
+
"value": {
|
|
1186
|
+
"points": [
|
|
1187
|
+
[27.559055118110237, 70.38461538461539],
|
|
1188
|
+
[76.6029246344207, 70.38461538461539],
|
|
1189
|
+
[76.6029246344207, 75.1923076923077],
|
|
1190
|
+
[27.559055118110237, 75.1923076923077]
|
|
1191
|
+
],
|
|
1192
|
+
"closed": true,
|
|
1193
|
+
"text": ["MILD CORONARY ARTERY DISEASE"]
|
|
1194
|
+
},
|
|
1195
|
+
"id": "9d9389a6-f",
|
|
1196
|
+
"from_name": "transcription",
|
|
1197
|
+
"to_name": "image",
|
|
1198
|
+
"type": "textarea",
|
|
1199
|
+
"origin": "manual"
|
|
1200
|
+
},
|
|
1201
|
+
{
|
|
1202
|
+
"original_width": 889,
|
|
1203
|
+
"original_height": 520,
|
|
1204
|
+
"image_rotation": 0,
|
|
1205
|
+
"value": {
|
|
1206
|
+
"points": [
|
|
1207
|
+
[27.671541057367826, 81.92307692307692],
|
|
1208
|
+
[61.64229471316085, 80.76923076923077],
|
|
1209
|
+
[61.97975253093363, 85.76923076923076],
|
|
1210
|
+
[28.23397075365579, 86.53846153846155]
|
|
1211
|
+
],
|
|
1212
|
+
"closed": true
|
|
1213
|
+
},
|
|
1214
|
+
"id": "4f2e63fc-b",
|
|
1215
|
+
"from_name": "poly",
|
|
1216
|
+
"to_name": "image",
|
|
1217
|
+
"type": "polygon",
|
|
1218
|
+
"origin": "manual"
|
|
1219
|
+
},
|
|
1220
|
+
{
|
|
1221
|
+
"original_width": 889,
|
|
1222
|
+
"original_height": 520,
|
|
1223
|
+
"image_rotation": 0,
|
|
1224
|
+
"value": {
|
|
1225
|
+
"points": [
|
|
1226
|
+
[27.671541057367826, 81.92307692307692],
|
|
1227
|
+
[61.64229471316085, 80.76923076923077],
|
|
1228
|
+
[61.97975253093363, 85.76923076923076],
|
|
1229
|
+
[28.23397075365579, 86.53846153846155]
|
|
1230
|
+
],
|
|
1231
|
+
"closed": true,
|
|
1232
|
+
"labels": ["Text"]
|
|
1233
|
+
},
|
|
1234
|
+
"id": "4f2e63fc-b",
|
|
1235
|
+
"from_name": "label",
|
|
1236
|
+
"to_name": "image",
|
|
1237
|
+
"type": "labels",
|
|
1238
|
+
"origin": "manual"
|
|
1239
|
+
},
|
|
1240
|
+
{
|
|
1241
|
+
"original_width": 889,
|
|
1242
|
+
"original_height": 520,
|
|
1243
|
+
"image_rotation": 0,
|
|
1244
|
+
"value": {
|
|
1245
|
+
"points": [
|
|
1246
|
+
[27.671541057367826, 81.92307692307692],
|
|
1247
|
+
[61.64229471316085, 80.76923076923077],
|
|
1248
|
+
[61.97975253093363, 85.76923076923076],
|
|
1249
|
+
[28.23397075365579, 86.53846153846155]
|
|
1250
|
+
],
|
|
1251
|
+
"closed": true,
|
|
1252
|
+
"text": ["MEDICAL MANAGEMENT"]
|
|
1253
|
+
},
|
|
1254
|
+
"id": "4f2e63fc-b",
|
|
1255
|
+
"from_name": "transcription",
|
|
1256
|
+
"to_name": "image",
|
|
1257
|
+
"type": "textarea",
|
|
1258
|
+
"origin": "manual"
|
|
1259
|
+
}
|
|
1260
|
+
],
|
|
1261
|
+
"was_cancelled": false,
|
|
1262
|
+
"ground_truth": false,
|
|
1263
|
+
"created_at": "2026-01-10T03:25:05.530Z",
|
|
1264
|
+
"updated_at": "2026-01-10T03:25:05.530Z",
|
|
1265
|
+
"draft_created_at": "2026-01-10T03:25:05.530Z",
|
|
1266
|
+
"lead_time": 0,
|
|
1267
|
+
"prediction": {},
|
|
1268
|
+
"result_count": 9,
|
|
1269
|
+
"unique_id": "e17b1920-022b-4e48-9207-f9904a42e840",
|
|
1270
|
+
"import_id": null,
|
|
1271
|
+
"last_action": null,
|
|
1272
|
+
"bulk_created": false,
|
|
1273
|
+
"task": 1,
|
|
1274
|
+
"project": 1,
|
|
1275
|
+
"updated_by": 1,
|
|
1276
|
+
"parent_prediction": null,
|
|
1277
|
+
"parent_annotation": null,
|
|
1278
|
+
"last_created_by": null
|
|
1279
|
+
}
|
|
1280
|
+
],
|
|
1281
|
+
"file_upload": "5b1e3483-example.jpg",
|
|
1282
|
+
"drafts": [],
|
|
1283
|
+
"predictions": [],
|
|
1284
|
+
"data": {
|
|
1285
|
+
"ocr": "http://localhost:8081/output/5b1e3483-example.jpg"
|
|
1286
|
+
},
|
|
1287
|
+
"meta": {},
|
|
1288
|
+
"created_at": "2026-01-10T03:25:05.530Z",
|
|
1289
|
+
"updated_at": "2026-01-10T03:25:05.530Z",
|
|
1290
|
+
"allow_skip": false,
|
|
1291
|
+
"inner_id": 1,
|
|
1292
|
+
"total_annotations": 1,
|
|
1293
|
+
"cancelled_annotations": 0,
|
|
1294
|
+
"total_predictions": 0,
|
|
1295
|
+
"comment_count": 0,
|
|
1296
|
+
"unresolved_comment_count": 0,
|
|
1297
|
+
"last_comment_updated_at": null,
|
|
1298
|
+
"project": 1,
|
|
1299
|
+
"updated_by": 1,
|
|
1300
|
+
"comment_authors": []
|
|
1301
|
+
}
|
|
1302
|
+
]
|
|
1303
|
+
]
|
|
1304
|
+
```
|
|
1305
|
+
|
|
1306
|
+
</details>
|
|
1307
|
+
|
|
1308
|
+
**Comparison of bounding box positions:**
|
|
1309
|
+
|
|
1310
|
+
| Original Label Studio (polygon) | Label Studio to PPOCRLabel | PPOCRLabel -> Label Studio (polygon) | Margin (Converted Back − Original) |
|
|
1311
|
+
| :--------------------------------------: | -------------------------- | ---------------------------------------- | --------------------------------------- |
|
|
1312
|
+
| \[27.630018614722168, 81.85610010427528] | \[246,426] | \[27.671541057367826, 81.92307692307692] | \[0.04152244264566, 0.06697681880164] |
|
|
1313
|
+
| \[61.66434617987663, 80.8133472367049] | \[548,420] | \[61.64229471316085, 80.76923076923077] | \[-0.02205146671578, -0.04411646747413] |
|
|
1314
|
+
| \[61.969313272754356, 85.71428571428571] | \[551,446] | \[61.97975253093363, 85.76923076923076] | \[0.01043925817927, 0.05494505494505] |
|
|
1315
|
+
| \[28.239952800477624, 86.44421272158499] | \[251,450] | \[28.23397075365579, 86.53846153846155] | \[-0.00598204682183, 0.09424881687656] |
|
|
1316
|
+
|
|
1317
|
+
> [!IMPORTANT]
|
|
1318
|
+
> So as you can see, after converting from Label Studio to PPOCRLabelv2 and then
|
|
1319
|
+
> back to Label Studio, the positions of the bounding boxes have slight
|
|
1320
|
+
> differences due to the conversion process. This may affect the accuracy of the
|
|
1321
|
+
> annotations, especially if precise bounding box locations are critical for your
|
|
1322
|
+
> application.
|
|
1323
|
+
|
|
301
1324
|
<!-- Roadmap -->
|
|
302
1325
|
|
|
303
1326
|
## :compass: Roadmap
|