docslight-lite 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docslight/__init__.py +41 -0
- docslight/cli.py +215 -0
- docslight/client.py +92 -0
- docslight/cloud/__init__.py +5 -0
- docslight/cloud/client.py +622 -0
- docslight/config.py +117 -0
- docslight/exceptions.py +65 -0
- docslight/local/__init__.py +31 -0
- docslight/local/layout_blocks.py +80 -0
- docslight/local/llm_extractor.py +252 -0
- docslight/local/loaders.py +95 -0
- docslight/local/markdown.py +18 -0
- docslight/local/office_loader.py +128 -0
- docslight/local/paddle_parser.py +173 -0
- docslight/local/pipeline.py +213 -0
- docslight/preview.py +46 -0
- docslight/providers/__init__.py +6 -0
- docslight/providers/ollama.py +30 -0
- docslight/providers/openai_compatible.py +64 -0
- docslight/result.py +89 -0
- docslight/schemas/__init__.py +5 -0
- docslight/schemas/fields.py +190 -0
- docslight/standard_json.py +367 -0
- docslight/static/app/common.js +668 -0
- docslight/static/app/docslight-extract.json +307 -0
- docslight/static/app/extract.js +394 -0
- docslight/static/app/i18n.js +405 -0
- docslight/static/app/parse.js +161 -0
- docslight/static/styles.css +878 -0
- docslight/templates/base.html +36 -0
- docslight/templates/extract.html +123 -0
- docslight/templates/parse.html +81 -0
- docslight/web_app.py +372 -0
- docslight_lite-0.1.0.dist-info/METADATA +277 -0
- docslight_lite-0.1.0.dist-info/RECORD +39 -0
- docslight_lite-0.1.0.dist-info/WHEEL +5 -0
- docslight_lite-0.1.0.dist-info/entry_points.txt +2 -0
- docslight_lite-0.1.0.dist-info/licenses/LICENSE +21 -0
- docslight_lite-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
{
|
|
2
|
+
"results": {
|
|
3
|
+
"name": {
|
|
4
|
+
"bboxes": [
|
|
5
|
+
{
|
|
6
|
+
"bbox": [
|
|
7
|
+
499,
|
|
8
|
+
84,
|
|
9
|
+
897,
|
|
10
|
+
122
|
|
11
|
+
],
|
|
12
|
+
"page_id": 1
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"value": "合小合有限公司图书发货清单"
|
|
16
|
+
},
|
|
17
|
+
"仓储地址": {
|
|
18
|
+
"bboxes": [
|
|
19
|
+
{
|
|
20
|
+
"bbox": [
|
|
21
|
+
100,
|
|
22
|
+
405,
|
|
23
|
+
867,
|
|
24
|
+
472
|
|
25
|
+
],
|
|
26
|
+
"page_id": 1
|
|
27
|
+
}
|
|
28
|
+
],
|
|
29
|
+
"value": "广东佛山南海区前进东路与纵向五路交叉口博展物流园一期京东P库"
|
|
30
|
+
},
|
|
31
|
+
"制单日期": {
|
|
32
|
+
"bboxes": [
|
|
33
|
+
{
|
|
34
|
+
"bbox": [
|
|
35
|
+
87,
|
|
36
|
+
1091,
|
|
37
|
+
1054,
|
|
38
|
+
1161
|
|
39
|
+
],
|
|
40
|
+
"page_id": 1
|
|
41
|
+
}
|
|
42
|
+
],
|
|
43
|
+
"value": "2024-05-07"
|
|
44
|
+
},
|
|
45
|
+
"发货方式": {
|
|
46
|
+
"bboxes": [
|
|
47
|
+
{
|
|
48
|
+
"bbox": [
|
|
49
|
+
103,
|
|
50
|
+
162,
|
|
51
|
+
333,
|
|
52
|
+
262
|
|
53
|
+
],
|
|
54
|
+
"page_id": 1
|
|
55
|
+
}
|
|
56
|
+
],
|
|
57
|
+
"value": "汽运"
|
|
58
|
+
},
|
|
59
|
+
"审批日期": {
|
|
60
|
+
"bboxes": [
|
|
61
|
+
{
|
|
62
|
+
"bbox": [
|
|
63
|
+
103,
|
|
64
|
+
162,
|
|
65
|
+
333,
|
|
66
|
+
262
|
|
67
|
+
],
|
|
68
|
+
"page_id": 1
|
|
69
|
+
}
|
|
70
|
+
],
|
|
71
|
+
"value": "2024-05-07"
|
|
72
|
+
},
|
|
73
|
+
"审核日期": {
|
|
74
|
+
"bboxes": [
|
|
75
|
+
{
|
|
76
|
+
"bbox": [
|
|
77
|
+
87,
|
|
78
|
+
1091,
|
|
79
|
+
1054,
|
|
80
|
+
1161
|
|
81
|
+
],
|
|
82
|
+
"page_id": 1
|
|
83
|
+
}
|
|
84
|
+
],
|
|
85
|
+
"value": "2024-05-07"
|
|
86
|
+
},
|
|
87
|
+
"客户单号": {
|
|
88
|
+
"bboxes": [
|
|
89
|
+
{
|
|
90
|
+
"bbox": [
|
|
91
|
+
721,
|
|
92
|
+
158,
|
|
93
|
+
987,
|
|
94
|
+
221
|
|
95
|
+
],
|
|
96
|
+
"page_id": 1
|
|
97
|
+
}
|
|
98
|
+
],
|
|
99
|
+
"value": "5444412/1891133"
|
|
100
|
+
},
|
|
101
|
+
"开户银行": {
|
|
102
|
+
"bboxes": [
|
|
103
|
+
{
|
|
104
|
+
"bbox": [
|
|
105
|
+
99,
|
|
106
|
+
506,
|
|
107
|
+
623,
|
|
108
|
+
686
|
|
109
|
+
],
|
|
110
|
+
"page_id": 1
|
|
111
|
+
}
|
|
112
|
+
],
|
|
113
|
+
"value": "中国建设银行股份有限公司宿迁宿豫支行"
|
|
114
|
+
},
|
|
115
|
+
"总册数": {
|
|
116
|
+
"bboxes": [
|
|
117
|
+
{
|
|
118
|
+
"bbox": [
|
|
119
|
+
720,
|
|
120
|
+
265,
|
|
121
|
+
930,
|
|
122
|
+
398
|
|
123
|
+
],
|
|
124
|
+
"page_id": 1
|
|
125
|
+
}
|
|
126
|
+
],
|
|
127
|
+
"value": "250"
|
|
128
|
+
},
|
|
129
|
+
"批销单号": {
|
|
130
|
+
"bboxes": [
|
|
131
|
+
{
|
|
132
|
+
"bbox": [
|
|
133
|
+
721,
|
|
134
|
+
158,
|
|
135
|
+
987,
|
|
136
|
+
221
|
|
137
|
+
],
|
|
138
|
+
"page_id": 1
|
|
139
|
+
}
|
|
140
|
+
],
|
|
141
|
+
"value": "PXD222085"
|
|
142
|
+
},
|
|
143
|
+
"码洋": {
|
|
144
|
+
"bboxes": [
|
|
145
|
+
{
|
|
146
|
+
"bbox": [
|
|
147
|
+
720,
|
|
148
|
+
265,
|
|
149
|
+
930,
|
|
150
|
+
398
|
|
151
|
+
],
|
|
152
|
+
"page_id": 1
|
|
153
|
+
}
|
|
154
|
+
],
|
|
155
|
+
"value": "17469.00"
|
|
156
|
+
},
|
|
157
|
+
"移动电话": {
|
|
158
|
+
"bboxes": [
|
|
159
|
+
{
|
|
160
|
+
"bbox": [
|
|
161
|
+
99,
|
|
162
|
+
506,
|
|
163
|
+
623,
|
|
164
|
+
686
|
|
165
|
+
],
|
|
166
|
+
"page_id": 1
|
|
167
|
+
}
|
|
168
|
+
],
|
|
169
|
+
"value": "159700XXXX"
|
|
170
|
+
},
|
|
171
|
+
"税号": {
|
|
172
|
+
"bboxes": [
|
|
173
|
+
{
|
|
174
|
+
"bbox": [
|
|
175
|
+
99,
|
|
176
|
+
506,
|
|
177
|
+
623,
|
|
178
|
+
686
|
|
179
|
+
],
|
|
180
|
+
"page_id": 1
|
|
181
|
+
}
|
|
182
|
+
],
|
|
183
|
+
"value": "91321311562910"
|
|
184
|
+
},
|
|
185
|
+
"联系人": {
|
|
186
|
+
"bboxes": [
|
|
187
|
+
{
|
|
188
|
+
"bbox": [
|
|
189
|
+
99,
|
|
190
|
+
506,
|
|
191
|
+
623,
|
|
192
|
+
686
|
|
193
|
+
],
|
|
194
|
+
"page_id": 1
|
|
195
|
+
}
|
|
196
|
+
],
|
|
197
|
+
"value": "合小合"
|
|
198
|
+
},
|
|
199
|
+
"订单号": {
|
|
200
|
+
"bboxes": [
|
|
201
|
+
{
|
|
202
|
+
"bbox": [
|
|
203
|
+
103,
|
|
204
|
+
162,
|
|
205
|
+
333,
|
|
206
|
+
262
|
|
207
|
+
],
|
|
208
|
+
"page_id": 1
|
|
209
|
+
}
|
|
210
|
+
],
|
|
211
|
+
"value": "1750458"
|
|
212
|
+
},
|
|
213
|
+
"铁路到站": {
|
|
214
|
+
"bboxes": [
|
|
215
|
+
{
|
|
216
|
+
"bbox": [
|
|
217
|
+
720,
|
|
218
|
+
265,
|
|
219
|
+
930,
|
|
220
|
+
398
|
|
221
|
+
],
|
|
222
|
+
"page_id": 1
|
|
223
|
+
}
|
|
224
|
+
],
|
|
225
|
+
"value": "佛山市"
|
|
226
|
+
},
|
|
227
|
+
"tables": {
|
|
228
|
+
"Table_1": [
|
|
229
|
+
{
|
|
230
|
+
"ISBN": "978-7-5197-8886-5",
|
|
231
|
+
"包册数": "2+10(14)",
|
|
232
|
+
"单价": "49.00",
|
|
233
|
+
"图书名称": "“张三”身边的法律事 儿",
|
|
234
|
+
"实洋": "1,228.92",
|
|
235
|
+
"序号": "",
|
|
236
|
+
"折扣": "66.00",
|
|
237
|
+
"数量": "98",
|
|
238
|
+
"码洋": "才 862.00",
|
|
239
|
+
"货位号": "01-02-027-005"
|
|
240
|
+
},
|
|
241
|
+
{
|
|
242
|
+
"ISBN": "978-7-5197-9009-7",
|
|
243
|
+
"包册数": "0+3(8)",
|
|
244
|
+
"单价": "85.00",
|
|
245
|
+
"图书名称": "破产审判实务与前沿问 题研究",
|
|
246
|
+
"实洋": "168.30",
|
|
247
|
+
"序号": "2",
|
|
248
|
+
"折扣": "66.00",
|
|
249
|
+
"数量": "",
|
|
250
|
+
"码洋": "255.00",
|
|
251
|
+
"货位号": "01-02-063-002"
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
"ISBN": "978-7-5197-8939-8",
|
|
255
|
+
"包册数": "11+2(5)",
|
|
256
|
+
"单价": "108.00",
|
|
257
|
+
"图书名称": "中华人民共和国公司法 及司法解释指导莱例全 书",
|
|
258
|
+
"实洋": "4,062.96",
|
|
259
|
+
"序号": "3",
|
|
260
|
+
"折扣": "",
|
|
261
|
+
"数量": "57",
|
|
262
|
+
"码洋": "",
|
|
263
|
+
"货位号": "01-04-020-004"
|
|
264
|
+
},
|
|
265
|
+
{
|
|
266
|
+
"ISBN": "978-7-5197-8953-4",
|
|
267
|
+
"包册数": "11+2(5)",
|
|
268
|
+
"单价": "108.00",
|
|
269
|
+
"图书名称": "中华人民共和国民法典 及司法解释指导案例全 书",
|
|
270
|
+
"实洋": "4,062.96",
|
|
271
|
+
"序号": "4",
|
|
272
|
+
"折扣": "",
|
|
273
|
+
"数量": "57",
|
|
274
|
+
"码洋": "",
|
|
275
|
+
"货位号": "01-04-029-006"
|
|
276
|
+
},
|
|
277
|
+
{
|
|
278
|
+
"ISBN": "978-7-5197-9057-8",
|
|
279
|
+
"包册数": "7+4(13)",
|
|
280
|
+
"单价": "32.00",
|
|
281
|
+
"图书名称": "民事起诉状、答辩状示 范文本(试行)",
|
|
282
|
+
"实洋": "1,216.00",
|
|
283
|
+
"序号": "1CD",
|
|
284
|
+
"折扣": "40.00",
|
|
285
|
+
"数量": "95",
|
|
286
|
+
"码洋": "3,040.00",
|
|
287
|
+
"货位号": "01-04-288-005"
|
|
288
|
+
}
|
|
289
|
+
]
|
|
290
|
+
},
|
|
291
|
+
"_table_bboxes": {
|
|
292
|
+
"Table_1": {
|
|
293
|
+
"bbox": [
|
|
294
|
+
85,
|
|
295
|
+
694,
|
|
296
|
+
1342,
|
|
297
|
+
1026
|
|
298
|
+
],
|
|
299
|
+
"page_id": 1
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
},
|
|
303
|
+
"metadata": {
|
|
304
|
+
"source_width": 1425,
|
|
305
|
+
"source_height": 1233
|
|
306
|
+
}
|
|
307
|
+
}
|
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
import {
|
|
2
|
+
bindDropzone,
|
|
3
|
+
bindResultTabs,
|
|
4
|
+
downloadText,
|
|
5
|
+
highlightBboxes,
|
|
6
|
+
initHealthBadge,
|
|
7
|
+
loadPreview,
|
|
8
|
+
normalizeExtractPayload,
|
|
9
|
+
postForm,
|
|
10
|
+
renderExtractCards,
|
|
11
|
+
renderJsonView,
|
|
12
|
+
renderPlaceholder,
|
|
13
|
+
renderPreview,
|
|
14
|
+
setFormError,
|
|
15
|
+
} from "./common.js";
|
|
16
|
+
import { initI18n, onLanguageChange, t } from "./i18n.js";
|
|
17
|
+
|
|
18
|
+
const extractForm = document.querySelector("#extractForm");
|
|
19
|
+
const modeSelect = document.querySelector("#modeSelect");
|
|
20
|
+
const cloudConfig = document.querySelector("#cloudConfig");
|
|
21
|
+
const cloudExtractMode = document.querySelector("#cloudExtractMode");
|
|
22
|
+
const groundingToggle = document.querySelector("#groundingToggle");
|
|
23
|
+
const localLlmBlock = document.querySelector("#localLlmBlock");
|
|
24
|
+
const fileInput = document.querySelector("#fileInput");
|
|
25
|
+
const dropZone = document.querySelector("#dropZone");
|
|
26
|
+
const fileName = document.querySelector("#fileName");
|
|
27
|
+
const previewTitle = document.querySelector("#previewTitle");
|
|
28
|
+
const previewCanvas = document.querySelector("#previewCanvas");
|
|
29
|
+
const officePreviewNotice = document.querySelector("#officePreviewNotice");
|
|
30
|
+
const highlightStatus = document.querySelector("#highlightStatus");
|
|
31
|
+
const formError = document.querySelector("#formError");
|
|
32
|
+
const submitButton = document.querySelector("#submitButton");
|
|
33
|
+
const downloadButton = document.querySelector("#downloadButton");
|
|
34
|
+
const metadataPreview = document.querySelector("#metadataPreview");
|
|
35
|
+
const fieldTemplateName = document.querySelector("#fieldTemplateName");
|
|
36
|
+
const fieldsRows = document.querySelector("#fieldsRows");
|
|
37
|
+
const addFieldButton = document.querySelector("#addFieldButton");
|
|
38
|
+
const addTableButton = document.querySelector("#addTableButton");
|
|
39
|
+
const fieldsPanel = document.querySelector("#fieldsPanel");
|
|
40
|
+
const jsonPanel = document.querySelector("#jsonPanel");
|
|
41
|
+
const extractResultTabs = document.querySelector("#extractResultTabs");
|
|
42
|
+
const healthStatus = document.querySelector("#healthStatus");
|
|
43
|
+
|
|
44
|
+
const state = {
|
|
45
|
+
fieldRows: [],
|
|
46
|
+
currentTab: "fields",
|
|
47
|
+
hasResult: false,
|
|
48
|
+
latestJson: "",
|
|
49
|
+
resultSource: "cloud",
|
|
50
|
+
previewRequestId: 0,
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
function syncRuntimeControls() {
|
|
54
|
+
const isCloud = modeSelect?.value !== "local";
|
|
55
|
+
if (cloudConfig) cloudConfig.hidden = !isCloud;
|
|
56
|
+
if (localLlmBlock) localLlmBlock.hidden = isCloud;
|
|
57
|
+
if (groundingToggle) groundingToggle.hidden = !isCloud || cloudExtractMode?.value !== "integrate";
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function normalizedMeta(row) {
|
|
61
|
+
const prompt = row.prompt?.trim() || null;
|
|
62
|
+
const mapping = row.mapping?.trim() || null;
|
|
63
|
+
return { prompt, mapping };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function nextTableNumber() {
|
|
67
|
+
const numbers = state.fieldRows
|
|
68
|
+
.filter((row) => row.type === "table")
|
|
69
|
+
.map((row) => Number(row.name.replace(/^Table_/, "")))
|
|
70
|
+
.filter(Number.isFinite);
|
|
71
|
+
return numbers.length ? Math.max(...numbers) + 1 : 1;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function inputControl(value, placeholder, onInput) {
|
|
75
|
+
const input = document.createElement("input");
|
|
76
|
+
input.type = "text";
|
|
77
|
+
input.value = value || "";
|
|
78
|
+
input.placeholder = placeholder;
|
|
79
|
+
input.addEventListener("input", () => onInput(input.value));
|
|
80
|
+
return input;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function labeledCell(labelText, control) {
|
|
84
|
+
const label = document.createElement("label");
|
|
85
|
+
label.className = "field-label";
|
|
86
|
+
const span = document.createElement("span");
|
|
87
|
+
span.textContent = labelText;
|
|
88
|
+
label.append(span, control);
|
|
89
|
+
return label;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function actionButton(text, onClick) {
|
|
93
|
+
const button = document.createElement("button");
|
|
94
|
+
button.type = "button";
|
|
95
|
+
button.className = "ghost-button";
|
|
96
|
+
button.textContent = text;
|
|
97
|
+
button.addEventListener("click", onClick);
|
|
98
|
+
return button;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const PROMPT_ICON_SVG = '<svg viewBox="0 0 16 16" fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"><path d="M11.5 1.5l3 3L5 14H2v-3L11.5 1.5z"/></svg>';
|
|
102
|
+
|
|
103
|
+
function closeAllPopovers() {
|
|
104
|
+
document.querySelectorAll(".prompt-popover-backdrop").forEach((el) => el.remove());
|
|
105
|
+
document.querySelectorAll(".prompt-popover").forEach((el) => el.remove());
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function nameWithPromptCell(row, labelText, namePlaceholder, updateName, updatePrompt) {
|
|
109
|
+
const wrapper = document.createElement("div");
|
|
110
|
+
wrapper.className = "prompt-cell";
|
|
111
|
+
|
|
112
|
+
const nameInput = inputControl(row.name, namePlaceholder, updateName);
|
|
113
|
+
const label = document.createElement("label");
|
|
114
|
+
label.className = "field-label";
|
|
115
|
+
const labelSpan = document.createElement("span");
|
|
116
|
+
labelSpan.textContent = labelText;
|
|
117
|
+
label.append(labelSpan, nameInput);
|
|
118
|
+
|
|
119
|
+
const trigger = document.createElement("button");
|
|
120
|
+
trigger.type = "button";
|
|
121
|
+
trigger.className = "prompt-trigger";
|
|
122
|
+
trigger.setAttribute("aria-label", t("fields.prompt"));
|
|
123
|
+
trigger.innerHTML = PROMPT_ICON_SVG;
|
|
124
|
+
if (row.prompt?.trim()) trigger.dataset.active = "true";
|
|
125
|
+
|
|
126
|
+
trigger.addEventListener("click", (event) => {
|
|
127
|
+
event.stopPropagation();
|
|
128
|
+
if (document.body.contains(document.querySelector(".prompt-popover"))) {
|
|
129
|
+
closeAllPopovers();
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
closeAllPopovers();
|
|
133
|
+
|
|
134
|
+
const backdrop = document.createElement("div");
|
|
135
|
+
backdrop.className = "prompt-popover-backdrop";
|
|
136
|
+
backdrop.addEventListener("click", closeAllPopovers);
|
|
137
|
+
|
|
138
|
+
const popover = document.createElement("div");
|
|
139
|
+
popover.className = "prompt-popover";
|
|
140
|
+
popover.addEventListener("click", (e) => e.stopPropagation());
|
|
141
|
+
popover.addEventListener("mousedown", (e) => e.stopPropagation());
|
|
142
|
+
|
|
143
|
+
const header = document.createElement("div");
|
|
144
|
+
header.className = "prompt-popover-header";
|
|
145
|
+
header.textContent = t("fields.prompt");
|
|
146
|
+
popover.append(header);
|
|
147
|
+
|
|
148
|
+
const textarea = document.createElement("textarea");
|
|
149
|
+
textarea.value = row.prompt || "";
|
|
150
|
+
textarea.placeholder = t("fields.promptPlaceholder");
|
|
151
|
+
textarea.addEventListener("input", () => {
|
|
152
|
+
row.prompt = textarea.value;
|
|
153
|
+
trigger.dataset.active = textarea.value.trim() ? "true" : "false";
|
|
154
|
+
});
|
|
155
|
+
popover.append(textarea);
|
|
156
|
+
|
|
157
|
+
const rect = trigger.getBoundingClientRect();
|
|
158
|
+
popover.style.top = `${rect.bottom + 8}px`;
|
|
159
|
+
popover.style.left = `${Math.max(8, rect.right - 260)}px`;
|
|
160
|
+
|
|
161
|
+
document.body.append(backdrop, popover);
|
|
162
|
+
textarea.focus();
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
wrapper.append(label, trigger);
|
|
166
|
+
return wrapper;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function renderTextRow(row) {
|
|
170
|
+
const card = document.createElement("article");
|
|
171
|
+
card.className = "field-row-card";
|
|
172
|
+
const title = document.createElement("strong");
|
|
173
|
+
title.textContent = t("fields.field");
|
|
174
|
+
card.append(
|
|
175
|
+
title,
|
|
176
|
+
nameWithPromptCell(row, t("fields.name"), t("fields.namePlaceholder"), (value) => { row.name = value; }, (value) => { row.prompt = value; }),
|
|
177
|
+
labeledCell(t("fields.mapping"), inputControl(row.mapping, t("fields.mappingPlaceholder"), (value) => { row.mapping = value; })),
|
|
178
|
+
actionButton(t("fields.remove"), () => {
|
|
179
|
+
state.fieldRows = state.fieldRows.filter((candidate) => candidate !== row);
|
|
180
|
+
renderFieldsBuilder();
|
|
181
|
+
}),
|
|
182
|
+
);
|
|
183
|
+
return card;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function renderColumnRow(table, column) {
|
|
187
|
+
const row = document.createElement("div");
|
|
188
|
+
row.className = "field-row-card";
|
|
189
|
+
row.append(
|
|
190
|
+
nameWithPromptCell(column, t("fields.column"), t("fields.columnNamePlaceholder"), (value) => { column.name = value; }, (value) => { column.prompt = value; }),
|
|
191
|
+
labeledCell(t("fields.mapping"), inputControl(column.mapping, t("fields.mappingPlaceholder"), (value) => { column.mapping = value; })),
|
|
192
|
+
actionButton(t("fields.removeColumn"), () => {
|
|
193
|
+
table.columns = table.columns.filter((candidate) => candidate !== column);
|
|
194
|
+
renderFieldsBuilder();
|
|
195
|
+
}),
|
|
196
|
+
);
|
|
197
|
+
return row;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function renderTableRow(row) {
|
|
201
|
+
const card = document.createElement("article");
|
|
202
|
+
card.className = "table-field-card";
|
|
203
|
+
const title = document.createElement("strong");
|
|
204
|
+
title.textContent = t("fields.table");
|
|
205
|
+
const columns = document.createElement("div");
|
|
206
|
+
columns.className = "table-columns";
|
|
207
|
+
row.columns.forEach((column) => columns.append(renderColumnRow(row, column)));
|
|
208
|
+
card.append(
|
|
209
|
+
title,
|
|
210
|
+
labeledCell(t("fields.tableName"), inputControl(row.name, t("fields.tableNamePlaceholder"), (value) => { row.name = value; })),
|
|
211
|
+
columns,
|
|
212
|
+
actionButton(t("fields.addColumn"), () => {
|
|
213
|
+
row.columns.push({ name: t("fields.defaultColumnName"), prompt: "", mapping: "" });
|
|
214
|
+
renderFieldsBuilder();
|
|
215
|
+
}),
|
|
216
|
+
actionButton(t("fields.removeTable"), () => {
|
|
217
|
+
state.fieldRows = state.fieldRows.filter((candidate) => candidate !== row);
|
|
218
|
+
renderFieldsBuilder();
|
|
219
|
+
}),
|
|
220
|
+
);
|
|
221
|
+
return card;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function renderFieldsBuilder() {
|
|
225
|
+
if (!fieldsRows) return;
|
|
226
|
+
fieldsRows.replaceChildren();
|
|
227
|
+
state.fieldRows.forEach((row) => {
|
|
228
|
+
fieldsRows.append(row.type === "table" ? renderTableRow(row) : renderTextRow(row));
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function addTextField(name = t("fields.initialFieldName")) {
|
|
233
|
+
state.fieldRows.push({ type: "field", name, prompt: "", mapping: "" });
|
|
234
|
+
renderFieldsBuilder();
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function addTableField(name = `Table_${nextTableNumber()}`, columnName = t("fields.initialColumnName")) {
|
|
238
|
+
state.fieldRows.push({
|
|
239
|
+
type: "table",
|
|
240
|
+
name,
|
|
241
|
+
columns: [{ name: columnName, prompt: "", mapping: "" }],
|
|
242
|
+
});
|
|
243
|
+
renderFieldsBuilder();
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
function buildFieldsPayload() {
|
|
247
|
+
const payload = {
|
|
248
|
+
keys: {},
|
|
249
|
+
tableHeaders: {},
|
|
250
|
+
name: fieldTemplateName?.value.trim() || "Document",
|
|
251
|
+
};
|
|
252
|
+
|
|
253
|
+
state.fieldRows.forEach((row) => {
|
|
254
|
+
if (row.type === "field") {
|
|
255
|
+
const name = row.name.trim();
|
|
256
|
+
if (name) payload.keys[name] = normalizedMeta(row);
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const tableName = row.name.trim();
|
|
261
|
+
const columns = {};
|
|
262
|
+
row.columns.forEach((column) => {
|
|
263
|
+
const columnName = column.name.trim();
|
|
264
|
+
if (columnName) columns[columnName] = normalizedMeta(column);
|
|
265
|
+
});
|
|
266
|
+
if (tableName && Object.keys(columns).length) payload.tableHeaders[tableName] = columns;
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
if (!Object.keys(payload.keys).length) delete payload.keys;
|
|
270
|
+
if (!Object.keys(payload.tableHeaders).length) delete payload.tableHeaders;
|
|
271
|
+
return payload;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function refreshPreview() {
|
|
275
|
+
return loadPreview({
|
|
276
|
+
fileInput,
|
|
277
|
+
previewTitle,
|
|
278
|
+
previewCanvas,
|
|
279
|
+
officePreviewNotice,
|
|
280
|
+
highlightStatus,
|
|
281
|
+
state,
|
|
282
|
+
});
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
function renderEmptyResult() {
|
|
286
|
+
const placeholder = t("extract.placeholder");
|
|
287
|
+
renderPlaceholder(fieldsPanel, placeholder);
|
|
288
|
+
renderPlaceholder(jsonPanel, placeholder);
|
|
289
|
+
state.hasResult = false;
|
|
290
|
+
state.latestJson = "";
|
|
291
|
+
if (metadataPreview) metadataPreview.textContent = t("extract.metadataEmpty");
|
|
292
|
+
if (downloadButton) downloadButton.disabled = true;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function renderExtractResult(result) {
|
|
296
|
+
state.hasResult = true;
|
|
297
|
+
const normalized = normalizeExtractPayload(result);
|
|
298
|
+
const displayData = normalized.results;
|
|
299
|
+
state.latestJson = JSON.stringify(normalized.full, null, 2);
|
|
300
|
+
state.resultSource = modeSelect?.value === "local" ? "local" : "cloud";
|
|
301
|
+
|
|
302
|
+
renderExtractCards(displayData, fieldsPanel, {
|
|
303
|
+
source: state.resultSource,
|
|
304
|
+
onPick: (boxes, source) => {
|
|
305
|
+
if (!boxes) {
|
|
306
|
+
highlightBboxes(null, source || state.resultSource, { previewCanvas, highlightStatus });
|
|
307
|
+
return;
|
|
308
|
+
}
|
|
309
|
+
highlightBboxes(boxes, source || state.resultSource, { previewCanvas, highlightStatus });
|
|
310
|
+
},
|
|
311
|
+
});
|
|
312
|
+
renderJsonView(normalized.full, jsonPanel);
|
|
313
|
+
|
|
314
|
+
if (metadataPreview) {
|
|
315
|
+
metadataPreview.textContent = JSON.stringify(normalized.metadata, null, 2);
|
|
316
|
+
}
|
|
317
|
+
if (downloadButton) downloadButton.disabled = false;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
function validateForm(fields) {
|
|
321
|
+
if (!fileInput?.files?.length) return t("error.selectDocument");
|
|
322
|
+
|
|
323
|
+
const isCloud = modeSelect?.value !== "local";
|
|
324
|
+
const apiKey = extractForm?.querySelector('[name="api_key"]')?.value?.trim();
|
|
325
|
+
if (isCloud && !apiKey) return t("error.cloudApiKeyRequired");
|
|
326
|
+
|
|
327
|
+
const localProvider = extractForm?.querySelector('[name="local_llm_provider"]')?.value?.trim();
|
|
328
|
+
const localModel = extractForm?.querySelector('[name="local_llm_model"]')?.value?.trim();
|
|
329
|
+
const localBaseUrl = extractForm?.querySelector('[name="local_llm_base_url"]')?.value?.trim();
|
|
330
|
+
if (!isCloud && (!localProvider || !localModel || !localBaseUrl)) {
|
|
331
|
+
return t("error.localLlmRequired");
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
if (!fields.keys && !fields.tableHeaders) return t("error.fieldsRequired");
|
|
335
|
+
return "";
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
function refreshLocalizedDynamicCopy() {
|
|
339
|
+
renderFieldsBuilder();
|
|
340
|
+
if (!state.hasResult) renderEmptyResult();
|
|
341
|
+
renderPreview(state.preview || null, { previewTitle, previewCanvas, officePreviewNotice, highlightStatus, state });
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
modeSelect?.addEventListener("change", syncRuntimeControls);
|
|
345
|
+
cloudExtractMode?.addEventListener("change", syncRuntimeControls);
|
|
346
|
+
addFieldButton?.addEventListener("click", () => addTextField(t("fields.defaultFieldName")));
|
|
347
|
+
addTableButton?.addEventListener("click", () => addTableField());
|
|
348
|
+
|
|
349
|
+
extractForm?.addEventListener("submit", async (event) => {
|
|
350
|
+
event.preventDefault();
|
|
351
|
+
setFormError(formError, "");
|
|
352
|
+
|
|
353
|
+
const fields = buildFieldsPayload();
|
|
354
|
+
const validationError = validateForm(fields);
|
|
355
|
+
if (validationError) {
|
|
356
|
+
setFormError(formError, validationError);
|
|
357
|
+
return;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
const body = new FormData(extractForm);
|
|
361
|
+
body.set("file", fileInput.files[0]);
|
|
362
|
+
body.set("fields", JSON.stringify(fields));
|
|
363
|
+
if (modeSelect?.value !== "local") {
|
|
364
|
+
body.set("cloud_extract_mode", cloudExtractMode?.value || "vlm");
|
|
365
|
+
if (cloudExtractMode?.value !== "integrate") body.delete("enable_grounding");
|
|
366
|
+
}
|
|
367
|
+
if (submitButton) submitButton.disabled = true;
|
|
368
|
+
|
|
369
|
+
try {
|
|
370
|
+
const payload = await postForm("/api/extract", body);
|
|
371
|
+
renderExtractResult(payload.result || payload);
|
|
372
|
+
} catch (error) {
|
|
373
|
+
setFormError(formError, error instanceof Error ? error.message : t("extract.failed"));
|
|
374
|
+
} finally {
|
|
375
|
+
if (submitButton) submitButton.disabled = false;
|
|
376
|
+
}
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
downloadButton?.addEventListener("click", () => {
|
|
380
|
+
downloadText(state.latestJson, "docslight-extract.json");
|
|
381
|
+
});
|
|
382
|
+
|
|
383
|
+
initI18n();
|
|
384
|
+
initHealthBadge(healthStatus);
|
|
385
|
+
bindDropzone({ dropZone, fileInput, fileName, onFileChange: refreshPreview });
|
|
386
|
+
bindResultTabs(extractResultTabs, (tab) => {
|
|
387
|
+
state.currentTab = tab;
|
|
388
|
+
});
|
|
389
|
+
onLanguageChange(refreshLocalizedDynamicCopy);
|
|
390
|
+
syncRuntimeControls();
|
|
391
|
+
renderPreview(null, { previewTitle, previewCanvas, officePreviewNotice, highlightStatus, state });
|
|
392
|
+
renderEmptyResult();
|
|
393
|
+
addTextField(t("fields.initialFieldName"));
|
|
394
|
+
addTableField("Table_1", t("fields.initialColumnName"));
|