@josephyan/qingflow-cli 0.2.0-beta.1000

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +31 -0
  2. package/docs/local-agent-install.md +309 -0
  3. package/entry_point.py +13 -0
  4. package/npm/bin/qingflow.mjs +5 -0
  5. package/npm/lib/runtime.mjs +346 -0
  6. package/npm/scripts/postinstall.mjs +16 -0
  7. package/package.json +34 -0
  8. package/pyproject.toml +67 -0
  9. package/qingflow +15 -0
  10. package/src/qingflow_mcp/__init__.py +37 -0
  11. package/src/qingflow_mcp/__main__.py +5 -0
  12. package/src/qingflow_mcp/backend_client.py +649 -0
  13. package/src/qingflow_mcp/builder_facade/__init__.py +3 -0
  14. package/src/qingflow_mcp/builder_facade/models.py +1846 -0
  15. package/src/qingflow_mcp/builder_facade/service.py +16502 -0
  16. package/src/qingflow_mcp/cli/__init__.py +1 -0
  17. package/src/qingflow_mcp/cli/commands/__init__.py +18 -0
  18. package/src/qingflow_mcp/cli/commands/app.py +40 -0
  19. package/src/qingflow_mcp/cli/commands/auth.py +112 -0
  20. package/src/qingflow_mcp/cli/commands/builder.py +539 -0
  21. package/src/qingflow_mcp/cli/commands/chart.py +18 -0
  22. package/src/qingflow_mcp/cli/commands/common.py +62 -0
  23. package/src/qingflow_mcp/cli/commands/imports.py +96 -0
  24. package/src/qingflow_mcp/cli/commands/portal.py +25 -0
  25. package/src/qingflow_mcp/cli/commands/record.py +331 -0
  26. package/src/qingflow_mcp/cli/commands/repo.py +80 -0
  27. package/src/qingflow_mcp/cli/commands/task.py +141 -0
  28. package/src/qingflow_mcp/cli/commands/view.py +18 -0
  29. package/src/qingflow_mcp/cli/commands/workspace.py +110 -0
  30. package/src/qingflow_mcp/cli/context.py +60 -0
  31. package/src/qingflow_mcp/cli/formatters.py +573 -0
  32. package/src/qingflow_mcp/cli/json_io.py +50 -0
  33. package/src/qingflow_mcp/cli/main.py +186 -0
  34. package/src/qingflow_mcp/cli/qingflow_login.py +116 -0
  35. package/src/qingflow_mcp/cli/terminal_ui.py +173 -0
  36. package/src/qingflow_mcp/config.py +407 -0
  37. package/src/qingflow_mcp/errors.py +66 -0
  38. package/src/qingflow_mcp/id_utils.py +49 -0
  39. package/src/qingflow_mcp/import_store.py +121 -0
  40. package/src/qingflow_mcp/json_types.py +18 -0
  41. package/src/qingflow_mcp/list_type_labels.py +76 -0
  42. package/src/qingflow_mcp/public_surface.py +243 -0
  43. package/src/qingflow_mcp/repository_store.py +71 -0
  44. package/src/qingflow_mcp/response_trim.py +841 -0
  45. package/src/qingflow_mcp/server.py +216 -0
  46. package/src/qingflow_mcp/server_app_builder.py +543 -0
  47. package/src/qingflow_mcp/server_app_user.py +386 -0
  48. package/src/qingflow_mcp/session_store.py +369 -0
  49. package/src/qingflow_mcp/solution/__init__.py +6 -0
  50. package/src/qingflow_mcp/solution/build_assembly_store.py +181 -0
  51. package/src/qingflow_mcp/solution/compiler/__init__.py +282 -0
  52. package/src/qingflow_mcp/solution/compiler/chart_compiler.py +96 -0
  53. package/src/qingflow_mcp/solution/compiler/form_compiler.py +495 -0
  54. package/src/qingflow_mcp/solution/compiler/icon_utils.py +187 -0
  55. package/src/qingflow_mcp/solution/compiler/navigation_compiler.py +57 -0
  56. package/src/qingflow_mcp/solution/compiler/package_compiler.py +19 -0
  57. package/src/qingflow_mcp/solution/compiler/portal_compiler.py +60 -0
  58. package/src/qingflow_mcp/solution/compiler/view_compiler.py +51 -0
  59. package/src/qingflow_mcp/solution/compiler/workflow_compiler.py +173 -0
  60. package/src/qingflow_mcp/solution/design_session.py +222 -0
  61. package/src/qingflow_mcp/solution/design_store.py +100 -0
  62. package/src/qingflow_mcp/solution/executor.py +2398 -0
  63. package/src/qingflow_mcp/solution/normalizer.py +23 -0
  64. package/src/qingflow_mcp/solution/requirements_builder.py +536 -0
  65. package/src/qingflow_mcp/solution/run_store.py +244 -0
  66. package/src/qingflow_mcp/solution/spec_models.py +855 -0
  67. package/src/qingflow_mcp/tools/__init__.py +1 -0
  68. package/src/qingflow_mcp/tools/ai_builder_tools.py +3449 -0
  69. package/src/qingflow_mcp/tools/app_tools.py +926 -0
  70. package/src/qingflow_mcp/tools/approval_tools.py +1062 -0
  71. package/src/qingflow_mcp/tools/auth_tools.py +1133 -0
  72. package/src/qingflow_mcp/tools/base.py +281 -0
  73. package/src/qingflow_mcp/tools/code_block_tools.py +777 -0
  74. package/src/qingflow_mcp/tools/custom_button_tools.py +202 -0
  75. package/src/qingflow_mcp/tools/directory_tools.py +675 -0
  76. package/src/qingflow_mcp/tools/feedback_tools.py +238 -0
  77. package/src/qingflow_mcp/tools/file_tools.py +409 -0
  78. package/src/qingflow_mcp/tools/import_tools.py +2223 -0
  79. package/src/qingflow_mcp/tools/navigation_tools.py +210 -0
  80. package/src/qingflow_mcp/tools/package_tools.py +326 -0
  81. package/src/qingflow_mcp/tools/portal_tools.py +158 -0
  82. package/src/qingflow_mcp/tools/qingbi_report_tools.py +374 -0
  83. package/src/qingflow_mcp/tools/record_tools.py +14291 -0
  84. package/src/qingflow_mcp/tools/repository_dev_tools.py +552 -0
  85. package/src/qingflow_mcp/tools/resource_read_tools.py +503 -0
  86. package/src/qingflow_mcp/tools/role_tools.py +112 -0
  87. package/src/qingflow_mcp/tools/solution_tools.py +4054 -0
  88. package/src/qingflow_mcp/tools/task_context_tools.py +2986 -0
  89. package/src/qingflow_mcp/tools/task_tools.py +889 -0
  90. package/src/qingflow_mcp/tools/view_tools.py +335 -0
  91. package/src/qingflow_mcp/tools/workflow_tools.py +376 -0
  92. package/src/qingflow_mcp/tools/workspace_tools.py +266 -0
@@ -0,0 +1,2223 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import mimetypes
6
+ import re
7
+ import shutil
8
+ import tempfile
9
+ from io import BytesIO
10
+ from copy import deepcopy
11
+ from datetime import datetime, timedelta, timezone
12
+ from pathlib import Path
13
+ from typing import Any
14
+ from uuid import uuid4
15
+
16
+ from mcp.server.fastmcp import FastMCP
17
+ from openpyxl import Workbook, load_workbook
18
+
19
+ from ..config import DEFAULT_PROFILE
20
+ from ..errors import QingflowApiError
21
+ from ..import_store import ImportJobStore, ImportVerificationStore
22
+ from ..json_types import JSONObject
23
+ from .app_tools import _derive_import_capability
24
+ from .base import ToolBase, tool_cn_name
25
+ from .file_tools import FileTools
26
+ from .record_tools import RecordTools, _build_field_index, _normalize_form_schema
27
+
28
+
29
+ SUPPORTED_IMPORT_EXTENSIONS = {".xlsx", ".xls"}
30
+ REPAIRABLE_IMPORT_EXTENSIONS = {".xlsx"}
31
+ SAFE_REPAIRS = {
32
+ "normalize_headers",
33
+ "trim_trailing_blank_rows",
34
+ "normalize_enum_values",
35
+ "normalize_date_formats",
36
+ "normalize_number_formats",
37
+ "normalize_url_cells",
38
+ }
39
+ EMAIL_PATTERN = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
40
+
41
+
42
+ class ImportTools(ToolBase):
43
+ """导入工具(中文名:数据导入与校验)。
44
+
45
+ 类型:批量数据导入工具。
46
+ 主要职责:
47
+ 1. 获取导入模板与导入 schema;
48
+ 2. 执行导入文件校验与本地修复;
49
+ 3. 启动导入任务并查询导入进度与结果。
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ sessions,
55
+ backend,
56
+ *,
57
+ verification_store: ImportVerificationStore | None = None,
58
+ job_store: ImportJobStore | None = None,
59
+ ) -> None:
60
+ """执行内部辅助逻辑。"""
61
+ super().__init__(sessions, backend)
62
+ self._record_tools = RecordTools(sessions, backend)
63
+ self._file_tools = FileTools(sessions, backend)
64
+ self._verification_store = verification_store or ImportVerificationStore()
65
+ self._job_store = job_store or ImportJobStore()
66
+
67
+ def register(self, mcp: FastMCP) -> None:
68
+ """注册当前工具到 MCP 服务。"""
69
+ @mcp.tool()
70
+ def record_import_schema_get(
71
+ app_key: str = "",
72
+ output_profile: str = "normal",
73
+ ) -> dict[str, Any]:
74
+ return self.record_import_schema_get(
75
+ profile=DEFAULT_PROFILE,
76
+ app_key=app_key,
77
+ output_profile=output_profile,
78
+ )
79
+
80
+ @mcp.tool(description="Get the official app import template and the expected applicant import columns.")
81
+ def record_import_template_get(
82
+ profile: str = DEFAULT_PROFILE,
83
+ app_key: str = "",
84
+ download_to_path: str | None = None,
85
+ ) -> dict[str, Any]:
86
+ return self.record_import_template_get(
87
+ profile=profile,
88
+ app_key=app_key,
89
+ download_to_path=download_to_path,
90
+ )
91
+
92
+ @mcp.tool(description="Verify a local Excel import file and produce the only verification_id allowed for import start.")
93
+ def record_import_verify(
94
+ profile: str = DEFAULT_PROFILE,
95
+ app_key: str = "",
96
+ file_path: str = "",
97
+ ) -> dict[str, Any]:
98
+ return self.record_import_verify(
99
+ profile=profile,
100
+ app_key=app_key,
101
+ file_path=file_path,
102
+ )
103
+
104
+ @mcp.tool(description="Repair a local .xlsx import file after explicit user authorization, then re-verify it.")
105
+ def record_import_repair_local(
106
+ profile: str = DEFAULT_PROFILE,
107
+ verification_id: str = "",
108
+ authorized_file_modification: bool = False,
109
+ output_path: str | None = None,
110
+ selected_repairs: list[str] | None = None,
111
+ ) -> dict[str, Any]:
112
+ return self.record_import_repair_local(
113
+ profile=profile,
114
+ verification_id=verification_id,
115
+ authorized_file_modification=authorized_file_modification,
116
+ output_path=output_path,
117
+ selected_repairs=selected_repairs,
118
+ )
119
+
120
+ @mcp.tool(description="Start import from a successful verification_id. being_enter_auditing must be passed explicitly.")
121
+ def record_import_start(
122
+ profile: str = DEFAULT_PROFILE,
123
+ app_key: str = "",
124
+ verification_id: str = "",
125
+ being_enter_auditing: bool | None = None,
126
+ view_key: str | None = None,
127
+ ) -> dict[str, Any]:
128
+ return self.record_import_start(
129
+ profile=profile,
130
+ app_key=app_key,
131
+ verification_id=verification_id,
132
+ being_enter_auditing=being_enter_auditing,
133
+ view_key=view_key,
134
+ )
135
+
136
+ @mcp.tool(description="Get import status by process_id_str, import_id, or the latest remembered import in the current app.")
137
+ def record_import_status_get(
138
+ profile: str = DEFAULT_PROFILE,
139
+ app_key: str = "",
140
+ import_id: str | None = None,
141
+ process_id_str: str | None = None,
142
+ ) -> dict[str, Any]:
143
+ selector_count = sum(
144
+ 1
145
+ for item in (
146
+ bool(_normalize_optional_text(process_id_str)),
147
+ bool(_normalize_optional_text(import_id)),
148
+ bool(str(app_key or "").strip()),
149
+ )
150
+ if item
151
+ )
152
+ if selector_count != 1:
153
+ return self._failed_status_result(
154
+ error_code="CONFIG_ERROR",
155
+ message="record_import_status_get accepts exactly one selector: process_id_str, import_id, or app_key",
156
+ extra={
157
+ "details": {
158
+ "fix_hint": "Use `process_id_str` or `import_id` for a known import, or use only `app_key` to inspect the latest import in that app.",
159
+ }
160
+ },
161
+ )
162
+ return self.record_import_status_get(
163
+ profile=profile,
164
+ app_key=app_key,
165
+ import_id=import_id,
166
+ process_id_str=process_id_str,
167
+ )
168
+
169
+ @tool_cn_name("导入 Schema")
170
+ def record_import_schema_get(
171
+ self,
172
+ *,
173
+ profile: str = DEFAULT_PROFILE,
174
+ app_key: str,
175
+ output_profile: str = "normal",
176
+ ) -> dict[str, Any]:
177
+ """执行记录相关逻辑。"""
178
+ if not app_key.strip():
179
+ return {
180
+ "ok": False,
181
+ "status": "blocked",
182
+ "app_key": app_key,
183
+ "error_code": "IMPORT_SCHEMA_UNAVAILABLE",
184
+ "message": "app_key is required",
185
+ }
186
+
187
+ def runner(session_profile, context):
188
+ import_capability, import_warnings = self._fetch_import_capability(context, app_key)
189
+ _index, expected_columns, schema_fingerprint = self._resolve_import_schema_bundle(
190
+ profile,
191
+ context,
192
+ app_key,
193
+ import_capability=import_capability,
194
+ )
195
+ columns: list[JSONObject] = []
196
+ for column in expected_columns:
197
+ payload: JSONObject = {
198
+ "title": column["title"],
199
+ "kind": column["write_kind"],
200
+ "required": bool(column.get("required")),
201
+ }
202
+ if isinstance(column.get("options"), list) and column.get("options"):
203
+ payload["options"] = column["options"]
204
+ if bool(column.get("requires_lookup")):
205
+ payload["accepts_natural_input"] = True
206
+ if bool(column.get("requires_upload")):
207
+ payload["requires_upload"] = True
208
+ if isinstance(column.get("target_app_key"), str):
209
+ payload["target_app_key"] = column["target_app_key"]
210
+ if isinstance(column.get("target_app_name"), str):
211
+ payload["target_app_name"] = column["target_app_name"]
212
+ if isinstance(column.get("searchable_fields"), list) and column.get("searchable_fields"):
213
+ payload["searchable_fields"] = column["searchable_fields"]
214
+ columns.append(payload)
215
+ response: dict[str, Any] = {
216
+ "ok": True,
217
+ "status": "success",
218
+ "app_key": app_key,
219
+ "ws_id": session_profile.selected_ws_id,
220
+ "request_route": self.backend.describe_route(context),
221
+ "warnings": import_warnings,
222
+ "schema_scope": "import_ready",
223
+ "columns": columns,
224
+ "schema_fingerprint": schema_fingerprint,
225
+ }
226
+ if output_profile == "verbose":
227
+ response["expected_columns"] = expected_columns
228
+ response["import_capability"] = import_capability
229
+ return response
230
+
231
+ return self._run(profile, runner)
232
+
233
+ @tool_cn_name("导入模板")
234
+ def record_import_template_get(
235
+ self,
236
+ *,
237
+ profile: str,
238
+ app_key: str,
239
+ download_to_path: str | None = None,
240
+ ) -> dict[str, Any]:
241
+ """执行记录相关逻辑。"""
242
+ if not app_key.strip():
243
+ return self._failed_template_result(app_key=app_key, error_code="IMPORT_TEMPLATE_UNAUTHORIZED", message="app_key is required")
244
+
245
+ def runner(session_profile, context):
246
+ import_capability, import_warnings = self._fetch_import_capability(context, app_key)
247
+ field_index, expected_columns, schema_fingerprint = self._resolve_import_schema_bundle(
248
+ profile,
249
+ context,
250
+ app_key,
251
+ import_capability=import_capability,
252
+ )
253
+ try:
254
+ payload = self.backend.request("GET", context, f"/app/{app_key}/apply/excelTemplate")
255
+ except QingflowApiError as exc:
256
+ if import_capability.get("auth_source") == "apply_auth":
257
+ downloaded_to_path = self._write_local_template(
258
+ expected_columns=expected_columns,
259
+ destination_hint=download_to_path,
260
+ app_key=app_key,
261
+ )
262
+ return {
263
+ "ok": True,
264
+ "status": "partial_success",
265
+ "app_key": app_key,
266
+ "ws_id": session_profile.selected_ws_id,
267
+ "request_route": self.backend.describe_route(context),
268
+ "template_url": None,
269
+ "downloaded_to_path": downloaded_to_path,
270
+ "expected_columns": expected_columns,
271
+ "schema_fingerprint": schema_fingerprint,
272
+ "warnings": import_warnings
273
+ + [
274
+ {
275
+ "code": "IMPORT_TEMPLATE_LOCAL_FALLBACK",
276
+ "message": "Official template download requires data management permission; MCP generated a local applicant-import template instead.",
277
+ }
278
+ ],
279
+ "verification": {
280
+ "schema_fingerprint": schema_fingerprint,
281
+ "template_url_resolved": False,
282
+ "template_downloaded": True,
283
+ "template_source": "local_generated",
284
+ },
285
+ }
286
+ return self._failed_template_result(
287
+ app_key=app_key,
288
+ error_code="IMPORT_TEMPLATE_UNAUTHORIZED",
289
+ message=exc.message,
290
+ request_route=self.backend.describe_route(context),
291
+ )
292
+ template_url = _pick_template_url(payload)
293
+ if not template_url:
294
+ return self._failed_template_result(
295
+ app_key=app_key,
296
+ error_code="IMPORT_TEMPLATE_UNAUTHORIZED",
297
+ message="template endpoint did not return excelUrl",
298
+ request_route=self.backend.describe_route(context),
299
+ )
300
+ downloaded_to_path = None
301
+ warnings: list[JSONObject] = list(import_warnings)
302
+ verification = {
303
+ "schema_fingerprint": schema_fingerprint,
304
+ "template_url_resolved": True,
305
+ "template_downloaded": False,
306
+ "template_source": "official",
307
+ }
308
+ if download_to_path:
309
+ destination = _resolve_template_download_path(download_to_path, app_key=app_key)
310
+ destination.parent.mkdir(parents=True, exist_ok=True)
311
+ content = self.backend.download_binary(template_url)
312
+ destination.write_bytes(content)
313
+ downloaded_to_path = str(destination)
314
+ verification["template_downloaded"] = True
315
+ return {
316
+ "ok": True,
317
+ "status": "success",
318
+ "app_key": app_key,
319
+ "ws_id": session_profile.selected_ws_id,
320
+ "request_route": self.backend.describe_route(context),
321
+ "template_url": template_url,
322
+ "downloaded_to_path": downloaded_to_path,
323
+ "expected_columns": expected_columns,
324
+ "schema_fingerprint": schema_fingerprint,
325
+ "warnings": warnings,
326
+ "verification": verification,
327
+ }
328
+
329
+ try:
330
+ return self._run(profile, runner)
331
+ except RuntimeError as exc:
332
+ return self._runtime_error_as_result(exc, error_code="IMPORT_TEMPLATE_UNAUTHORIZED")
333
+
334
+ @tool_cn_name("导入校验")
335
+ def record_import_verify(
336
+ self,
337
+ *,
338
+ profile: str,
339
+ app_key: str,
340
+ file_path: str,
341
+ ) -> dict[str, Any]:
342
+ """执行记录相关逻辑。"""
343
+ if not app_key.strip():
344
+ return self._failed_verify_result(app_key=app_key, file_path=file_path, error_code="IMPORT_VERIFICATION_FAILED", message="app_key is required")
345
+ path = Path(file_path).expanduser()
346
+ if not path.is_file():
347
+ return self._failed_verify_result(app_key=app_key, file_path=file_path, error_code="IMPORT_VERIFICATION_FAILED", message="file_path must point to an existing file")
348
+
349
+ def runner(session_profile, context):
350
+ import_capability, import_warnings = self._fetch_import_capability(context, app_key)
351
+ precheck_known = import_capability.get("auth_source") != "unknown"
352
+ if not bool(import_capability.get("can_import")):
353
+ if import_capability.get("auth_source") != "unknown":
354
+ return self._failed_verify_result(
355
+ app_key=app_key,
356
+ file_path=file_path,
357
+ error_code="IMPORT_AUTH_PRECHECK_FAILED",
358
+ message="the current user does not have import permission for this app",
359
+ extra={
360
+ "warnings": import_warnings,
361
+ "verification": {
362
+ "import_auth_prechecked": True,
363
+ "import_auth_precheck_passed": False,
364
+ "backend_verification_passed": False,
365
+ },
366
+ "import_capability": import_capability,
367
+ },
368
+ )
369
+ import_warnings = list(import_warnings) + [
370
+ {
371
+ "code": "IMPORT_AUTH_PRECHECK_SKIPPED",
372
+ "message": "record_import_verify could not determine import permission from app metadata; continuing with file verification only.",
373
+ }
374
+ ]
375
+ field_index, expected_columns, schema_fingerprint = self._resolve_import_schema_bundle(
376
+ profile,
377
+ context,
378
+ app_key,
379
+ import_capability=import_capability,
380
+ )
381
+ template_header_profile, header_warnings = self._load_template_header_profile(
382
+ context,
383
+ app_key,
384
+ import_capability=import_capability,
385
+ expected_columns=expected_columns,
386
+ )
387
+ template_header_titles = template_header_profile.get("allowed_titles")
388
+ local_check = self._local_verify(
389
+ profile=profile,
390
+ context=context,
391
+ path=path,
392
+ app_key=app_key,
393
+ field_index=field_index,
394
+ expected_columns=expected_columns,
395
+ allowed_header_titles=template_header_titles,
396
+ schema_fingerprint=schema_fingerprint,
397
+ )
398
+ effective_path = path
399
+ effective_local_check = local_check
400
+ auto_normalization = None
401
+ try:
402
+ auto_normalization = self._maybe_auto_normalize_file(
403
+ source_path=path,
404
+ expected_columns=expected_columns,
405
+ template_header_profile=template_header_profile,
406
+ local_check=local_check,
407
+ )
408
+ except Exception as exc:
409
+ effective_local_check = deepcopy(local_check)
410
+ effective_local_check["issues"].append(
411
+ _issue(
412
+ "IMPORT_AUTO_NORMALIZATION_FAILED",
413
+ f"Workbook compatibility normalization failed before backend verification: {exc}",
414
+ severity="error",
415
+ )
416
+ )
417
+ effective_local_check["warnings"].append(
418
+ {
419
+ "code": "IMPORT_AUTO_NORMALIZATION_FAILED",
420
+ "message": "Workbook compatibility normalization failed during local precheck; returning a structured verification failure instead of crashing.",
421
+ }
422
+ )
423
+ effective_local_check["local_precheck_passed"] = False
424
+ effective_local_check["can_import"] = False
425
+ effective_local_check["error_code"] = "IMPORT_VERIFICATION_FAILED"
426
+ if auto_normalization is not None:
427
+ effective_path = Path(str(auto_normalization["verified_file_path"]))
428
+ effective_local_check = self._local_verify(
429
+ profile=profile,
430
+ context=context,
431
+ path=effective_path,
432
+ app_key=app_key,
433
+ field_index=field_index,
434
+ expected_columns=expected_columns,
435
+ allowed_header_titles=list(auto_normalization["header_titles"]),
436
+ schema_fingerprint=schema_fingerprint,
437
+ )
438
+ warnings = import_warnings + deepcopy(effective_local_check["warnings"]) + header_warnings
439
+ if auto_normalization is not None:
440
+ warnings.extend(deepcopy(auto_normalization["warnings"]))
441
+ issues = deepcopy(effective_local_check["issues"])
442
+ can_import = bool(effective_local_check["can_import"])
443
+ backend_verification = None
444
+ if can_import:
445
+ try:
446
+ payload = self.backend.request_multipart(
447
+ "POST",
448
+ context,
449
+ f"/app/{app_key}/upload/verification",
450
+ files={
451
+ "file": (
452
+ effective_path.name,
453
+ effective_path.read_bytes(),
454
+ mimetypes.guess_type(effective_path.name)[0] or "application/octet-stream",
455
+ )
456
+ },
457
+ )
458
+ if isinstance(payload, dict):
459
+ backend_verification = payload
460
+ else:
461
+ backend_verification = {}
462
+ being_validated = backend_verification.get("beingValidated", True)
463
+ if being_validated is False:
464
+ can_import = False
465
+ issues.append(
466
+ _issue(
467
+ "BACKEND_IMPORT_VERIFICATION_REJECTED",
468
+ "Backend verification rejected the file for import.",
469
+ severity="error",
470
+ )
471
+ )
472
+ except QingflowApiError as exc:
473
+ can_import = False
474
+ issues.append(
475
+ _issue(
476
+ "BACKEND_IMPORT_VERIFICATION_FAILED",
477
+ exc.message or "Backend import verification failed.",
478
+ severity="error",
479
+ )
480
+ )
481
+ warnings.append(
482
+ {
483
+ "code": "IMPORT_VERIFICATION_FAILED",
484
+ "message": "Backend verification failed; the file cannot be imported until verification succeeds.",
485
+ }
486
+ )
487
+ verification_id = str(uuid4())
488
+ verification_payload = {
489
+ "id": verification_id,
490
+ "created_at": _utc_now().isoformat(),
491
+ "profile": profile,
492
+ "app_key": app_key,
493
+ "file_path": str(path.resolve()),
494
+ "source_file_path": str(path.resolve()),
495
+ "verified_file_path": str(effective_path.resolve()) if effective_path != path else None,
496
+ "file_name": path.name,
497
+ "file_sha256": local_check["file_sha256"],
498
+ "verified_file_sha256": effective_local_check["file_sha256"] if effective_path != path else None,
499
+ "file_size": local_check["file_size"],
500
+ "schema_fingerprint": schema_fingerprint,
501
+ "can_import": can_import,
502
+ "issues": issues,
503
+ "warnings": warnings,
504
+ "import_capability": import_capability,
505
+ "apply_rows": backend_verification.get("applyRows") if isinstance(backend_verification, dict) else None,
506
+ "backend_verification": backend_verification,
507
+ "local_precheck": effective_local_check,
508
+ "source_local_precheck": local_check,
509
+ "auto_normalization": auto_normalization,
510
+ }
511
+ self._verification_store.put(verification_id, verification_payload)
512
+ return {
513
+ "ok": True,
514
+ "status": "success" if can_import else "failed",
515
+ "error_code": None if can_import else (effective_local_check.get("error_code") or local_check.get("error_code") or "IMPORT_VERIFICATION_FAILED"),
516
+ "can_import": can_import,
517
+ "verification_id": verification_id,
518
+ "file_path": str(path.resolve()),
519
+ "verified_file_path": str(effective_path.resolve()) if effective_path != path else None,
520
+ "file_name": path.name,
521
+ "file_sha256": local_check["file_sha256"],
522
+ "verified_file_sha256": effective_local_check["file_sha256"] if effective_path != path else None,
523
+ "file_size": local_check["file_size"],
524
+ "schema_fingerprint": schema_fingerprint,
525
+ "apply_rows": backend_verification.get("applyRows") if isinstance(backend_verification, dict) else None,
526
+ "issues": issues,
527
+ "repair_suggestions": local_check["repair_suggestions"],
528
+ "warnings": warnings,
529
+ "import_capability": import_capability,
530
+ "verification": {
531
+ "import_auth_prechecked": precheck_known,
532
+ "import_auth_precheck_passed": True if precheck_known else None,
533
+ "import_auth_source": import_capability.get("auth_source"),
534
+ "local_precheck_passed": bool(effective_local_check["local_precheck_passed"]),
535
+ "backend_verification_passed": isinstance(backend_verification, dict)
536
+ and backend_verification.get("beingValidated", True) is not False,
537
+ "schema_fingerprint": schema_fingerprint,
538
+ "file_sha256": local_check["file_sha256"],
539
+ "verified_file_sha256": effective_local_check["file_sha256"] if effective_path != path else None,
540
+ "file_format": local_check["extension"],
541
+ "local_precheck_limited": bool(effective_local_check["local_precheck_limited"]),
542
+ "auto_normalized": effective_path != path,
543
+ },
544
+ }
545
+
546
+ try:
547
+ return self._run(profile, runner)
548
+ except RuntimeError as exc:
549
+ return self._runtime_error_as_result(exc, error_code="IMPORT_VERIFICATION_FAILED", extra={"can_import": False})
550
+
551
+ @tool_cn_name("导入修复")
552
+ def record_import_repair_local(
553
+ self,
554
+ *,
555
+ profile: str,
556
+ verification_id: str,
557
+ authorized_file_modification: bool,
558
+ output_path: str | None = None,
559
+ selected_repairs: list[str] | None = None,
560
+ ) -> dict[str, Any]:
561
+ """执行记录相关逻辑。"""
562
+ if not verification_id.strip():
563
+ return self._failed_repair_result(error_code="IMPORT_VERIFICATION_FAILED", message="verification_id is required")
564
+ if not authorized_file_modification:
565
+ return self._failed_repair_result(
566
+ error_code="IMPORT_REPAIR_NOT_AUTHORIZED",
567
+ message="record_import_repair_local requires authorized_file_modification=true",
568
+ )
569
+ unknown_repairs = sorted({item for item in (selected_repairs or []) if item not in SAFE_REPAIRS})
570
+ if unknown_repairs:
571
+ return self._failed_repair_result(
572
+ error_code="IMPORT_REPAIR_FORMAT_UNSUPPORTED",
573
+ message=f"unknown selected_repairs: {', '.join(unknown_repairs)}",
574
+ )
575
+
576
+ def runner(_session_profile, context):
577
+ stored = self._verification_store.get(verification_id)
578
+ if stored is None:
579
+ return self._failed_repair_result(error_code="IMPORT_VERIFICATION_STALE", message="verification_id is missing or expired")
580
+ source_path = Path(str(stored.get("source_file_path") or stored["file_path"]))
581
+ extension = source_path.suffix.lower()
582
+ if extension not in REPAIRABLE_IMPORT_EXTENSIONS:
583
+ return self._failed_repair_result(
584
+ error_code="IMPORT_REPAIR_FORMAT_UNSUPPORTED",
585
+ message="record_import_repair_local v1 only supports .xlsx files",
586
+ extra={"source_file_path": str(source_path)},
587
+ )
588
+ expected_columns, _ = self._expected_import_columns(profile, context, str(stored["app_key"]))
589
+ normalized_repairs = set(selected_repairs or SAFE_REPAIRS)
590
+ destination = _resolve_repaired_output_path(source_path, output_path=output_path)
591
+ destination.parent.mkdir(parents=True, exist_ok=True)
592
+ shutil.copy2(source_path, destination)
593
+
594
+ workbook = load_workbook(destination)
595
+ sheet = workbook[workbook.sheetnames[0]]
596
+ applied_repairs: list[str] = []
597
+ skipped_repairs: list[str] = []
598
+ if "normalize_headers" in normalized_repairs:
599
+ if _repair_headers(sheet, expected_columns):
600
+ applied_repairs.append("normalize_headers")
601
+ else:
602
+ skipped_repairs.append("normalize_headers")
603
+ if "trim_trailing_blank_rows" in normalized_repairs:
604
+ if _trim_trailing_blank_rows(sheet):
605
+ applied_repairs.append("trim_trailing_blank_rows")
606
+ else:
607
+ skipped_repairs.append("trim_trailing_blank_rows")
608
+ if "normalize_enum_values" in normalized_repairs:
609
+ if _normalize_enum_values(sheet, expected_columns):
610
+ applied_repairs.append("normalize_enum_values")
611
+ else:
612
+ skipped_repairs.append("normalize_enum_values")
613
+ if "normalize_date_formats" in normalized_repairs:
614
+ if _normalize_date_formats(sheet):
615
+ applied_repairs.append("normalize_date_formats")
616
+ else:
617
+ skipped_repairs.append("normalize_date_formats")
618
+ if "normalize_number_formats" in normalized_repairs:
619
+ if _normalize_number_formats(sheet):
620
+ applied_repairs.append("normalize_number_formats")
621
+ else:
622
+ skipped_repairs.append("normalize_number_formats")
623
+ if "normalize_url_cells" in normalized_repairs:
624
+ if _normalize_url_cells(sheet):
625
+ applied_repairs.append("normalize_url_cells")
626
+ else:
627
+ skipped_repairs.append("normalize_url_cells")
628
+ workbook.save(destination)
629
+
630
+ verification_result = self.record_import_verify(
631
+ profile=profile,
632
+ app_key=str(stored["app_key"]),
633
+ file_path=str(destination),
634
+ )
635
+ new_verification_id = verification_result.get("verification_id")
636
+ return {
637
+ "ok": bool(verification_result.get("ok")),
638
+ "status": verification_result.get("status"),
639
+ "error_code": verification_result.get("error_code"),
640
+ "source_file_path": str(source_path),
641
+ "repaired_file_path": str(destination),
642
+ "applied_repairs": applied_repairs,
643
+ "skipped_repairs": skipped_repairs,
644
+ "new_verification_id": new_verification_id,
645
+ "can_import_after_repair": bool(verification_result.get("can_import")),
646
+ "post_repair_issues": verification_result.get("issues", []),
647
+ "warnings": verification_result.get("warnings", []),
648
+ "verification": {
649
+ "source_preserved": True,
650
+ "repair_authorized": True,
651
+ "reverified": True,
652
+ "selected_repairs": sorted(normalized_repairs),
653
+ },
654
+ }
655
+
656
+ try:
657
+ return self._run(profile, runner)
658
+ except RuntimeError as exc:
659
+ return self._runtime_error_as_result(exc, error_code="IMPORT_REPAIR_FORMAT_UNSUPPORTED")
660
+
661
+ @tool_cn_name("开始导入")
662
+ def record_import_start(
663
+ self,
664
+ *,
665
+ profile: str,
666
+ app_key: str,
667
+ verification_id: str,
668
+ being_enter_auditing: bool | None,
669
+ view_key: str | None = None,
670
+ ) -> dict[str, Any]:
671
+ """执行记录相关逻辑。"""
672
+ if being_enter_auditing is None:
673
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_FAILED", message="being_enter_auditing must be passed explicitly")
674
+
675
+ def runner(session_profile, context):
676
+ stored = self._verification_store.get(verification_id)
677
+ if stored is None:
678
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_STALE", message="verification_id is missing or expired")
679
+ if str(stored.get("app_key")) != app_key:
680
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_STALE", message="verification_id does not belong to the requested app")
681
+ if not bool(stored.get("can_import")):
682
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_FAILED", message="verification_id is not importable", extra={"accepted": False})
683
+ current_path = Path(str(stored.get("verified_file_path") or stored["file_path"]))
684
+ if not current_path.is_file():
685
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_STALE", message="verified file no longer exists")
686
+ current_sha256 = _sha256_file(current_path)
687
+ expected_sha256 = stored.get("verified_file_sha256") or stored.get("file_sha256")
688
+ if current_sha256 != expected_sha256:
689
+ return self._failed_start_result(
690
+ error_code="IMPORT_FILE_CHANGED_AFTER_VERIFY",
691
+ message="the file changed after verification; run record_import_verify again",
692
+ extra={"accepted": False},
693
+ )
694
+ stored_import_capability = stored.get("import_capability")
695
+ _, current_schema_fingerprint = self._expected_import_columns(
696
+ profile,
697
+ context,
698
+ app_key,
699
+ import_capability=stored_import_capability if isinstance(stored_import_capability, dict) else None,
700
+ )
701
+ if current_schema_fingerprint != stored.get("schema_fingerprint"):
702
+ return self._failed_start_result(
703
+ error_code="IMPORT_SCHEMA_CHANGED_AFTER_VERIFY",
704
+ message="the applicant schema changed after verification; run record_import_verify again",
705
+ extra={"accepted": False},
706
+ )
707
+ upload_result = self._file_tools.file_upload_local(
708
+ profile=profile,
709
+ upload_kind="login",
710
+ file_path=str(current_path),
711
+ )
712
+ file_url = upload_result.get("download_url")
713
+ if not isinstance(file_url, str) or not file_url.strip():
714
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_FAILED", message="file upload did not return download_url")
715
+ try:
716
+ socket_result = self.backend.start_socket_data_import(
717
+ context,
718
+ app_key=app_key,
719
+ being_enter_auditing=bool(being_enter_auditing),
720
+ view_key=view_key,
721
+ excel_url=file_url,
722
+ excel_name=str(stored.get("file_name") or current_path.name),
723
+ )
724
+ except QingflowApiError as exc:
725
+ error_code = "IMPORT_SOCKET_ACK_TIMEOUT" if exc.details and exc.details.get("error_code") == "IMPORT_SOCKET_ACK_TIMEOUT" else "IMPORT_VERIFICATION_FAILED"
726
+ return self._failed_start_result(error_code=error_code, message=exc.message, extra={"accepted": False, "file_url": file_url})
727
+ import_id = str(socket_result.get("import_id") or "")
728
+ process_id_str = _normalize_optional_text(socket_result.get("process_id_str"))
729
+ started_at = _utc_now().isoformat()
730
+ self._job_store.put(
731
+ import_id,
732
+ {
733
+ "created_at": started_at,
734
+ "profile": profile,
735
+ "app_key": app_key,
736
+ "import_id": import_id,
737
+ "process_id_str": process_id_str,
738
+ "source_file_name": str(stored.get("file_name") or current_path.name),
739
+ "started_at": started_at,
740
+ "file_url": file_url,
741
+ "verification_id": verification_id,
742
+ },
743
+ )
744
+ warnings = deepcopy(socket_result.get("warnings", []))
745
+ return {
746
+ "ok": True,
747
+ "status": "accepted",
748
+ "accepted": True,
749
+ "import_id": import_id,
750
+ "process_id_str": process_id_str,
751
+ "source_file_name": str(stored.get("file_name") or current_path.name),
752
+ "file_url": file_url,
753
+ "warnings": warnings,
754
+ "verification": {
755
+ "verification_id_valid": True,
756
+ "file_hash_verified": True,
757
+ "schema_fingerprint_verified": True,
758
+ "upload_staged": True,
759
+ "import_acknowledged": bool(import_id),
760
+ },
761
+ }
762
+
763
+ try:
764
+ return self._run(profile, runner)
765
+ except RuntimeError as exc:
766
+ return self._runtime_error_as_result(exc, error_code="IMPORT_VERIFICATION_FAILED", extra={"accepted": False})
767
+
768
+ @tool_cn_name("导入状态")
769
+ def record_import_status_get(
770
+ self,
771
+ *,
772
+ profile: str,
773
+ app_key: str = "",
774
+ import_id: str | None = None,
775
+ process_id_str: str | None = None,
776
+ ) -> dict[str, Any]:
777
+ """执行记录相关逻辑。"""
778
+ normalized_app_key = (app_key or "").strip()
779
+ normalized_import_id = _normalize_optional_text(import_id)
780
+ normalized_process_id = _normalize_optional_text(process_id_str)
781
+ if normalized_import_id and normalized_process_id:
782
+ return self._failed_status_result(
783
+ error_code="CONFIG_ERROR",
784
+ message="record_import_status_get accepts import_id or process_id_str, but not both at the same time",
785
+ extra={
786
+ "import_id": normalized_import_id,
787
+ "process_id_str": normalized_process_id,
788
+ "details": {
789
+ "fix_hint": "Use only one of `import_id` or `process_id_str`. You may pass `app_key` as an optional routing hint for direct method compatibility.",
790
+ }
791
+ },
792
+ )
793
+ if not normalized_process_id and not normalized_import_id and not normalized_app_key:
794
+ return self._failed_status_result(
795
+ error_code="CONFIG_ERROR",
796
+ message="record_import_status_get requires at least one selector: process_id_str, import_id, or app_key",
797
+ extra={
798
+ "import_id": normalized_import_id,
799
+ "process_id_str": normalized_process_id,
800
+ "details": {
801
+ "fix_hint": "Use `process_id_str` or `import_id` for a known import, or use only `app_key` to inspect the latest import in that app.",
802
+ }
803
+ },
804
+ )
805
+
806
+ def runner(_session_profile, context):
807
+ local_job = None
808
+ if normalized_import_id:
809
+ local_job = self._job_store.get(normalized_import_id)
810
+ if local_job is None and normalized_process_id:
811
+ matches = [item for item in self._job_store.list() if _normalize_optional_text(item.get("process_id_str")) == normalized_process_id]
812
+ local_job = matches[0] if len(matches) == 1 else None
813
+ effective_process_id = normalized_process_id
814
+ if effective_process_id is None and isinstance(local_job, dict):
815
+ effective_process_id = _normalize_optional_text(local_job.get("process_id_str"))
816
+ resolved_app_key = normalized_app_key
817
+ if not resolved_app_key and isinstance(local_job, dict):
818
+ resolved_app_key = str(local_job.get("app_key") or "").strip()
819
+ if not resolved_app_key:
820
+ return self._failed_status_result(
821
+ error_code="CONFIG_ERROR",
822
+ message="record_import_status_get could not determine app_key from the provided selector",
823
+ extra={
824
+ "import_id": normalized_import_id,
825
+ "process_id_str": effective_process_id,
826
+ "details": {
827
+ "fix_hint": "Use the original `app_key`, or call import status with the latest-import mode: only `app_key`.",
828
+ }
829
+ },
830
+ )
831
+ if local_job is None and not normalized_import_id and not normalized_process_id:
832
+ recent = [item for item in self._job_store.list() if str(item.get("app_key")) == resolved_app_key]
833
+ local_job = recent[0] if recent else None
834
+ page = self.backend.request(
835
+ "GET",
836
+ context,
837
+ "/app/apply/dataImport/record",
838
+ params={"appKey": resolved_app_key, "pageNum": 1, "pageSize": 100},
839
+ )
840
+ records = _extract_import_records(page)
841
+ matched_record, matched_by = _match_import_record(
842
+ records,
843
+ local_job=local_job,
844
+ import_id=normalized_import_id,
845
+ process_id_str=effective_process_id,
846
+ )
847
+ if matched_record is None:
848
+ return self._failed_status_result(
849
+ error_code="IMPORT_STATUS_AMBIGUOUS",
850
+ message="could not uniquely resolve an import record from the provided identifiers",
851
+ extra={
852
+ "import_id": normalized_import_id,
853
+ "process_id_str": effective_process_id,
854
+ "matched_by": matched_by,
855
+ },
856
+ )
857
+ normalized_process = _normalize_optional_text(
858
+ matched_record.get("processIdStr") or matched_record.get("processId") or matched_record.get("process_id_str")
859
+ )
860
+ if local_job is not None and normalized_import_id:
861
+ self._job_store.put(
862
+ normalized_import_id,
863
+ {
864
+ **local_job,
865
+ "created_at": local_job.get("created_at") or _utc_now().isoformat(),
866
+ "process_id_str": normalized_process,
867
+ },
868
+ )
869
+ total_rows = _coerce_int(matched_record.get("totalNumber") or matched_record.get("total_rows"))
870
+ success_rows = _coerce_int(matched_record.get("successNum") or matched_record.get("success_rows"))
871
+ failed_rows = _coerce_int(matched_record.get("errorNum") or matched_record.get("failed_rows"))
872
+ progress = _coerce_int(matched_record.get("importPercentage") or matched_record.get("progress"))
873
+ return {
874
+ "ok": True,
875
+ "status": _normalize_optional_text(matched_record.get("processStatus")) or "unknown",
876
+ "app_key": resolved_app_key,
877
+ "import_id": normalized_import_id or (local_job.get("import_id") if isinstance(local_job, dict) else None),
878
+ "process_id_str": normalized_process,
879
+ "matched_by": matched_by,
880
+ "source_file_name": matched_record.get("sourceFileName") or matched_record.get("source_file_name"),
881
+ "total_rows": total_rows,
882
+ "success_rows": success_rows,
883
+ "failed_rows": failed_rows,
884
+ "progress": progress,
885
+ "error_file_urls": _normalize_error_file_urls(matched_record.get("errorFileUrls")),
886
+ "operate_time": matched_record.get("operateTime"),
887
+ "operate_user": matched_record.get("operateUser"),
888
+ "warnings": [],
889
+ "verification": {
890
+ "status_lookup_completed": True,
891
+ "matched_by": matched_by,
892
+ "process_id_verified": bool(normalized_process),
893
+ },
894
+ }
895
+
896
+ try:
897
+ return self._run(profile, runner)
898
+ except RuntimeError as exc:
899
+ return self._runtime_error_as_result(exc, error_code="IMPORT_STATUS_AMBIGUOUS")
900
+
901
+ def _resolve_import_schema_bundle(
902
+ self,
903
+ profile: str,
904
+ context,
905
+ app_key: str,
906
+ *,
907
+ import_capability: JSONObject | None = None,
908
+ ) -> tuple[Any, list[JSONObject], str]: # type: ignore[no-untyped-def]
909
+ """执行内部辅助逻辑。"""
910
+ auth_source = _normalize_optional_text((import_capability or {}).get("auth_source")) or "unknown"
911
+ if auth_source == "data_manage_auth":
912
+ schema = self.backend.request("GET", context, f"/app/{app_key}/form", params={"type": 1})
913
+ index = _build_field_index(_normalize_form_schema(schema))
914
+ else:
915
+ index = self._record_tools._get_field_index(profile, context, app_key, force_refresh=False)
916
+ ws_id = self.sessions.get_profile(profile).selected_ws_id
917
+ expected_columns: list[JSONObject] = []
918
+ for field in index.by_id.values():
919
+ payload = self._record_tools._schema_field_payload(
920
+ profile,
921
+ context,
922
+ field,
923
+ workflow_node_id=None,
924
+ ws_id=ws_id,
925
+ schema_mode="applicant",
926
+ )
927
+ if not bool(payload.get("writable")):
928
+ continue
929
+ expected_columns.append(
930
+ {
931
+ "field_id": payload["field_id"],
932
+ "title": payload["title"],
933
+ "que_type": payload["que_type"],
934
+ "required": bool(field.required),
935
+ "write_kind": payload["write_kind"],
936
+ "options": payload.get("options", []),
937
+ "requires_lookup": bool(payload.get("requires_lookup")),
938
+ "requires_upload": bool(payload.get("requires_upload")),
939
+ "target_app_key": payload.get("target_app_key"),
940
+ "target_app_name": payload.get("target_app_name"),
941
+ "searchable_fields": payload.get("searchable_fields", []),
942
+ }
943
+ )
944
+ expected_columns.sort(key=lambda item: int(item["field_id"]))
945
+ schema_fingerprint = _stable_import_schema_fingerprint(expected_columns)
946
+ return index, expected_columns, schema_fingerprint
947
+
948
+ def _expected_import_columns(
949
+ self,
950
+ profile: str,
951
+ context,
952
+ app_key: str,
953
+ *,
954
+ import_capability: JSONObject | None = None,
955
+ ) -> tuple[list[JSONObject], str]: # type: ignore[no-untyped-def]
956
+ """执行内部辅助逻辑。"""
957
+ _, expected_columns, schema_fingerprint = self._resolve_import_schema_bundle(
958
+ profile,
959
+ context,
960
+ app_key,
961
+ import_capability=import_capability,
962
+ )
963
+ return expected_columns, schema_fingerprint
964
+
965
+ def _local_verify(
966
+ self,
967
+ *,
968
+ profile: str,
969
+ context,
970
+ path: Path,
971
+ app_key: str,
972
+ field_index: Any,
973
+ expected_columns: list[JSONObject],
974
+ allowed_header_titles: list[str] | None,
975
+ schema_fingerprint: str,
976
+ ) -> dict[str, Any]:
977
+ """执行内部辅助逻辑。"""
978
+ extension = path.suffix.lower()
979
+ file_sha256 = _sha256_file(path)
980
+ base_result = {
981
+ "app_key": app_key,
982
+ "file_path": str(path.resolve()),
983
+ "file_size": path.stat().st_size,
984
+ "file_sha256": file_sha256,
985
+ "schema_fingerprint": schema_fingerprint,
986
+ "issues": [],
987
+ "warnings": [],
988
+ "repair_suggestions": [],
989
+ "local_precheck_passed": True,
990
+ "local_precheck_limited": False,
991
+ "can_import": True,
992
+ "extension": extension,
993
+ "error_code": None,
994
+ }
995
+ if extension not in SUPPORTED_IMPORT_EXTENSIONS:
996
+ base_result["issues"].append(_issue("UNSUPPORTED_FILE_FORMAT", "Only .xlsx and .xls files are supported in import v1.", severity="error"))
997
+ base_result["local_precheck_passed"] = False
998
+ base_result["can_import"] = False
999
+ base_result["error_code"] = "IMPORT_FILE_FORMAT_UNSUPPORTED"
1000
+ return base_result
1001
+ if extension == ".xls":
1002
+ base_result["warnings"].append(
1003
+ {
1004
+ "code": "IMPORT_LOCAL_PRECHECK_LIMITED",
1005
+ "message": ".xls files are allowed for verify/start, but v1 local precheck is limited and repair is unsupported.",
1006
+ }
1007
+ )
1008
+ base_result["local_precheck_limited"] = True
1009
+ return base_result
1010
+
1011
+ try:
1012
+ workbook = load_workbook(path, read_only=True, data_only=False)
1013
+ except Exception as exc:
1014
+ base_result["issues"].append(_issue("WORKBOOK_OPEN_FAILED", f"Workbook could not be opened: {exc}", severity="error"))
1015
+ base_result["local_precheck_passed"] = False
1016
+ base_result["can_import"] = False
1017
+ base_result["error_code"] = "IMPORT_VERIFICATION_FAILED"
1018
+ return base_result
1019
+
1020
+ if not workbook.sheetnames:
1021
+ base_result["issues"].append(_issue("SHEET_MISSING", "Workbook does not contain any sheets.", severity="error"))
1022
+ base_result["local_precheck_passed"] = False
1023
+ base_result["can_import"] = False
1024
+ base_result["error_code"] = "IMPORT_VERIFICATION_FAILED"
1025
+ return base_result
1026
+ try:
1027
+ sheet = workbook[workbook.sheetnames[0]]
1028
+ header_row = [cell.value for cell in next(sheet.iter_rows(min_row=1, max_row=1), [])]
1029
+ header_analysis = _analyze_headers(
1030
+ header_row,
1031
+ expected_columns,
1032
+ allowed_titles=allowed_header_titles,
1033
+ )
1034
+ base_result["issues"].extend(header_analysis["issues"])
1035
+ base_result["repair_suggestions"].extend(header_analysis["repair_suggestions"])
1036
+ if not any(issue.get("severity") == "error" for issue in base_result["issues"]):
1037
+ semantic_issues, semantic_warnings = self._inspect_semantic_cells(
1038
+ profile=profile,
1039
+ context=context,
1040
+ sheet=sheet,
1041
+ expected_columns=expected_columns,
1042
+ field_index=field_index,
1043
+ )
1044
+ base_result["issues"].extend(semantic_issues)
1045
+ base_result["warnings"].extend(semantic_warnings)
1046
+ trailing_blank_rows = _count_trailing_blank_rows(sheet)
1047
+ if trailing_blank_rows > 0:
1048
+ base_result["warnings"].append(
1049
+ {
1050
+ "code": "TRAILING_BLANK_ROWS",
1051
+ "message": f"Workbook contains {trailing_blank_rows} trailing blank rows that can be safely removed.",
1052
+ }
1053
+ )
1054
+ base_result["repair_suggestions"].append("trim_trailing_blank_rows")
1055
+ enum_suggestions = _find_enum_repairs(sheet, expected_columns)
1056
+ if enum_suggestions:
1057
+ base_result["warnings"].append(
1058
+ {
1059
+ "code": "ENUM_VALUE_NORMALIZATION_AVAILABLE",
1060
+ "message": "Some enum-like cells can be normalized to exact template values without changing meaning.",
1061
+ }
1062
+ )
1063
+ base_result["repair_suggestions"].append("normalize_enum_values")
1064
+ base_result["repair_suggestions"] = sorted(set(base_result["repair_suggestions"]))
1065
+ except Exception as exc:
1066
+ base_result["issues"].append(
1067
+ _issue(
1068
+ "IMPORT_LOCAL_PRECHECK_FAILED",
1069
+ f"Workbook content could not be fully inspected during local precheck: {exc}",
1070
+ severity="error",
1071
+ )
1072
+ )
1073
+ base_result["warnings"].append(
1074
+ {
1075
+ "code": "IMPORT_LOCAL_PRECHECK_FAILED",
1076
+ "message": "Workbook local precheck encountered an unexpected compatibility problem; returning a structured verification failure instead of crashing.",
1077
+ }
1078
+ )
1079
+ if any(issue.get("severity") == "error" for issue in base_result["issues"]):
1080
+ base_result["local_precheck_passed"] = False
1081
+ base_result["can_import"] = False
1082
+ base_result["error_code"] = "IMPORT_VERIFICATION_FAILED"
1083
+ return base_result
1084
+
1085
+ def _inspect_semantic_cells(
1086
+ self,
1087
+ *,
1088
+ profile: str,
1089
+ context,
1090
+ sheet,
1091
+ expected_columns: list[JSONObject],
1092
+ field_index: Any,
1093
+ ) -> tuple[list[JSONObject], list[JSONObject]]: # type: ignore[no-untyped-def]
1094
+ """执行内部辅助逻辑。"""
1095
+ issues: list[JSONObject] = []
1096
+ warnings: list[JSONObject] = []
1097
+ header_positions = _sheet_header_positions(sheet)
1098
+ expected_by_key: dict[str, list[JSONObject]] = {}
1099
+ for column in expected_columns:
1100
+ key = _normalize_header_key(column.get("title"))
1101
+ if key:
1102
+ expected_by_key.setdefault(key, []).append(column)
1103
+ for key, columns in expected_by_key.items():
1104
+ positions = header_positions.get(key, [])
1105
+ if len(columns) != 1 or len(positions) != 1:
1106
+ continue
1107
+ column = columns[0]
1108
+ column_index = positions[0]
1109
+ write_kind = _normalize_optional_text(column.get("write_kind")) or "scalar"
1110
+ if column.get("options"):
1111
+ issue = _inspect_enum_column(sheet, column_index=column_index, column=column)
1112
+ if issue is not None:
1113
+ issues.append(issue)
1114
+ continue
1115
+ if write_kind == "relation":
1116
+ issue = _inspect_relation_column(sheet, column_index=column_index, column=column)
1117
+ if issue is not None:
1118
+ issues.append(issue)
1119
+ continue
1120
+ field = field_index.by_id.get(str(column.get("field_id"))) if field_index is not None else None
1121
+ if (
1122
+ write_kind == "member"
1123
+ and field is not None
1124
+ and (
1125
+ field.member_select_scope_type is not None
1126
+ or field.member_select_scope is not None
1127
+ )
1128
+ ):
1129
+ member_issue, member_warning = self._inspect_member_column(
1130
+ context=context,
1131
+ sheet=sheet,
1132
+ column_index=column_index,
1133
+ column=column,
1134
+ field=field,
1135
+ )
1136
+ if member_issue is not None:
1137
+ issues.append(member_issue)
1138
+ continue
1139
+ if member_warning is not None:
1140
+ warnings.append(member_warning)
1141
+ continue
1142
+ if (
1143
+ write_kind == "department"
1144
+ and field is not None
1145
+ and (
1146
+ field.dept_select_scope_type is not None
1147
+ or field.dept_select_scope is not None
1148
+ )
1149
+ ):
1150
+ department_issue, department_warning = self._inspect_department_column(
1151
+ context=context,
1152
+ sheet=sheet,
1153
+ column_index=column_index,
1154
+ column=column,
1155
+ field=field,
1156
+ )
1157
+ if department_issue is not None:
1158
+ issues.append(department_issue)
1159
+ continue
1160
+ if department_warning is not None:
1161
+ warnings.append(department_warning)
1162
+ continue
1163
+ return issues, warnings
1164
+
1165
+ def _inspect_member_column(
1166
+ self,
1167
+ *,
1168
+ context,
1169
+ sheet,
1170
+ column_index: int,
1171
+ column: JSONObject,
1172
+ field,
1173
+ ) -> tuple[JSONObject | None, JSONObject | None]: # type: ignore[no-untyped-def]
1174
+ """执行内部辅助逻辑。"""
1175
+ invalid_email_samples: list[str] = []
1176
+ scope_miss_samples: list[str] = []
1177
+ checked_values: set[str] = set()
1178
+ for row_index in range(2, sheet.max_row + 1):
1179
+ text = _normalize_optional_text(sheet.cell(row=row_index, column=column_index).value)
1180
+ if text is None:
1181
+ continue
1182
+ normalized = text.strip()
1183
+ if normalized in checked_values:
1184
+ continue
1185
+ checked_values.add(normalized)
1186
+ if not EMAIL_PATTERN.fullmatch(normalized):
1187
+ invalid_email_samples.append(f"row {row_index}: {normalized}")
1188
+ if len(invalid_email_samples) >= 3:
1189
+ break
1190
+ continue
1191
+ try:
1192
+ candidates = self._record_tools._resolve_member_candidates(context, field, keyword=normalized)
1193
+ matches = self._record_tools._match_member_candidates(candidates, normalized)
1194
+ except QingflowApiError as exc:
1195
+ if exc.category == "not_supported":
1196
+ return None, {
1197
+ "code": "MEMBER_CANDIDATE_VALIDATION_SKIPPED",
1198
+ "message": f"Member candidate scope for column '{column['title']}' could not be resolved safely during local precheck.",
1199
+ }
1200
+ raise
1201
+ except RuntimeError:
1202
+ return None, {
1203
+ "code": "MEMBER_CANDIDATE_VALIDATION_SKIPPED",
1204
+ "message": f"Member candidate scope for column '{column['title']}' could not be resolved safely during local precheck.",
1205
+ }
1206
+ if len(matches) != 1:
1207
+ scope_miss_samples.append(f"row {row_index}: {normalized}")
1208
+ if len(scope_miss_samples) >= 3:
1209
+ break
1210
+ if invalid_email_samples:
1211
+ return _issue(
1212
+ "MEMBER_IMPORT_REQUIRES_EMAIL",
1213
+ f"Column '{column['title']}' must use member email values in import files. Samples: {', '.join(invalid_email_samples)}",
1214
+ severity="error",
1215
+ ), None
1216
+ if scope_miss_samples:
1217
+ return _issue(
1218
+ "MEMBER_NOT_IN_CANDIDATE_SCOPE",
1219
+ f"Column '{column['title']}' contains members outside the current candidate scope. Samples: {', '.join(scope_miss_samples)}",
1220
+ severity="error",
1221
+ ), None
1222
+ return None, None
1223
+
1224
+ def _inspect_department_column(
1225
+ self,
1226
+ *,
1227
+ context,
1228
+ sheet,
1229
+ column_index: int,
1230
+ column: JSONObject,
1231
+ field,
1232
+ ) -> tuple[JSONObject | None, JSONObject | None]: # type: ignore[no-untyped-def]
1233
+ """执行内部辅助逻辑。"""
1234
+ scope_miss_samples: list[str] = []
1235
+ checked_values: set[str] = set()
1236
+ for row_index in range(2, sheet.max_row + 1):
1237
+ value = sheet.cell(row=row_index, column=column_index).value
1238
+ text = _normalize_optional_text(value)
1239
+ if text is None:
1240
+ continue
1241
+ normalized = text.strip()
1242
+ if normalized in checked_values:
1243
+ continue
1244
+ checked_values.add(normalized)
1245
+ try:
1246
+ candidates = self._record_tools._resolve_department_candidates(context, field, keyword=normalized)
1247
+ matches = self._record_tools._match_department_candidates(candidates, normalized)
1248
+ except QingflowApiError as exc:
1249
+ if exc.category == "not_supported":
1250
+ return None, {
1251
+ "code": "DEPARTMENT_CANDIDATE_VALIDATION_SKIPPED",
1252
+ "message": f"Department candidate scope for column '{column['title']}' could not be resolved safely during local precheck.",
1253
+ }
1254
+ raise
1255
+ except RuntimeError:
1256
+ return None, {
1257
+ "code": "DEPARTMENT_CANDIDATE_VALIDATION_SKIPPED",
1258
+ "message": f"Department candidate scope for column '{column['title']}' could not be resolved safely during local precheck.",
1259
+ }
1260
+ if len(matches) != 1:
1261
+ scope_miss_samples.append(f"row {row_index}: {normalized}")
1262
+ if len(scope_miss_samples) >= 3:
1263
+ break
1264
+ if scope_miss_samples:
1265
+ return _issue(
1266
+ "DEPARTMENT_NOT_IN_CANDIDATE_SCOPE",
1267
+ f"Column '{column['title']}' contains departments outside the current candidate scope. Samples: {', '.join(scope_miss_samples)}",
1268
+ severity="error",
1269
+ ), None
1270
+ return None, None
1271
+
1272
+ def _load_template_header_profile(
1273
+ self,
1274
+ context,
1275
+ app_key: str,
1276
+ *,
1277
+ import_capability: JSONObject | None = None,
1278
+ expected_columns: list[JSONObject] | None = None,
1279
+ ) -> tuple[dict[str, Any], list[JSONObject]]: # type: ignore[no-untyped-def]
1280
+ """执行内部辅助逻辑。"""
1281
+ warnings: list[JSONObject] = []
1282
+ try:
1283
+ payload = self.backend.request("GET", context, f"/app/{app_key}/apply/excelTemplate")
1284
+ template_url = _pick_template_url(payload)
1285
+ if not template_url:
1286
+ return {"allowed_titles": None, "leaf_titles": None, "header_depth": 1}, warnings
1287
+ content = self.backend.download_binary(template_url)
1288
+ workbook = load_workbook(BytesIO(content), read_only=False, data_only=False)
1289
+ if not workbook.sheetnames:
1290
+ return {"allowed_titles": None, "leaf_titles": None, "header_depth": 1}, warnings
1291
+ sheet = workbook[workbook.sheetnames[0]]
1292
+ header_row = [cell.value for cell in next(sheet.iter_rows(min_row=1, max_row=1), [])]
1293
+ titles = [_normalize_optional_text(value) for value in header_row]
1294
+ normalized_titles = [title for title in titles if title]
1295
+ header_depth = _infer_header_depth(sheet)
1296
+ leaf_titles = [title for title in _extract_leaf_header_titles(sheet, header_depth) if title]
1297
+ return {
1298
+ "allowed_titles": normalized_titles or None,
1299
+ "leaf_titles": leaf_titles or None,
1300
+ "header_depth": header_depth,
1301
+ }, warnings
1302
+ except Exception:
1303
+ if (
1304
+ _normalize_optional_text((import_capability or {}).get("auth_source")) == "apply_auth"
1305
+ and expected_columns
1306
+ ):
1307
+ warnings.append(
1308
+ {
1309
+ "code": "IMPORT_TEMPLATE_HEADER_LOCAL_FALLBACK",
1310
+ "message": "Official template headers require data management permission; local precheck fell back to applicant import columns.",
1311
+ }
1312
+ )
1313
+ fallback_titles = [str(item["title"]) for item in expected_columns]
1314
+ return {"allowed_titles": fallback_titles, "leaf_titles": fallback_titles, "header_depth": 1}, warnings
1315
+ warnings.append(
1316
+ {
1317
+ "code": "IMPORT_TEMPLATE_HEADER_UNAVAILABLE",
1318
+ "message": "Official template headers could not be loaded during local precheck; falling back to applicant writable columns only.",
1319
+ }
1320
+ )
1321
+ return {"allowed_titles": None, "leaf_titles": None, "header_depth": 1}, warnings
1322
+
1323
+ def _maybe_auto_normalize_file(
1324
+ self,
1325
+ *,
1326
+ source_path: Path,
1327
+ expected_columns: list[JSONObject],
1328
+ template_header_profile: dict[str, Any],
1329
+ local_check: dict[str, Any],
1330
+ ) -> dict[str, Any] | None:
1331
+ """执行内部辅助逻辑。"""
1332
+ if source_path.suffix.lower() != ".xlsx":
1333
+ return None
1334
+ try:
1335
+ workbook = load_workbook(source_path, read_only=False, data_only=False)
1336
+ if not workbook.sheetnames:
1337
+ return None
1338
+ sheet = workbook[workbook.sheetnames[0]]
1339
+ rows = [list(row) for row in sheet.iter_rows(values_only=True)]
1340
+ header_depth = _infer_header_depth(sheet)
1341
+ return _build_auto_normalized_file(
1342
+ source_path=source_path,
1343
+ sheet_title=sheet.title,
1344
+ rows=rows,
1345
+ header_depth=header_depth,
1346
+ template_leaf_titles=template_header_profile.get("leaf_titles"),
1347
+ local_check=local_check,
1348
+ )
1349
+ except Exception as exc:
1350
+ workbook = load_workbook(source_path, read_only=True, data_only=False)
1351
+ if not workbook.sheetnames:
1352
+ return None
1353
+ sheet = workbook[workbook.sheetnames[0]]
1354
+ rows = [list(row) for row in sheet.iter_rows(values_only=True)]
1355
+ header_depth = _infer_header_depth_from_rows(
1356
+ rows,
1357
+ template_header_profile=template_header_profile,
1358
+ local_check=local_check,
1359
+ )
1360
+ normalized = _build_auto_normalized_file(
1361
+ source_path=source_path,
1362
+ sheet_title=sheet.title,
1363
+ rows=rows,
1364
+ header_depth=header_depth,
1365
+ template_leaf_titles=template_header_profile.get("leaf_titles"),
1366
+ local_check=local_check,
1367
+ )
1368
+ if normalized is not None:
1369
+ normalized["warnings"].insert(
1370
+ 0,
1371
+ {
1372
+ "code": "IMPORT_AUTO_NORMALIZATION_COMPATIBILITY_FALLBACK",
1373
+ "message": f"Workbook compatibility normalization retried in compatibility mode after a workbook parsing error: {exc}",
1374
+ },
1375
+ )
1376
+ return normalized
1377
+
1378
+ def _fetch_import_capability(self, context, app_key: str) -> tuple[JSONObject, list[JSONObject]]: # type: ignore[no-untyped-def]
1379
+ """执行内部辅助逻辑。"""
1380
+ try:
1381
+ payload = self.backend.request("GET", context, f"/app/{app_key}/baseInfo")
1382
+ except QingflowApiError:
1383
+ payload = None
1384
+ return _derive_import_capability(payload)
1385
+
1386
+ def _write_local_template(
1387
+ self,
1388
+ *,
1389
+ expected_columns: list[JSONObject],
1390
+ destination_hint: str | None,
1391
+ app_key: str,
1392
+ ) -> str:
1393
+ """执行内部辅助逻辑。"""
1394
+ if destination_hint:
1395
+ destination = _resolve_template_download_path(destination_hint, app_key=app_key)
1396
+ else:
1397
+ destination = Path(tempfile.gettempdir()) / f"qingflow-import-template-{app_key}-{uuid4().hex[:8]}.xlsx"
1398
+ destination.parent.mkdir(parents=True, exist_ok=True)
1399
+ workbook = Workbook()
1400
+ sheet = workbook.active
1401
+ sheet.title = "导入模板"
1402
+ sheet.append([str(item["title"]) for item in expected_columns])
1403
+ workbook.save(destination)
1404
+ return str(destination)
1405
+
1406
+ def _failed_template_result(
1407
+ self,
1408
+ *,
1409
+ app_key: str,
1410
+ error_code: str,
1411
+ message: str,
1412
+ request_route: JSONObject | None = None,
1413
+ ) -> dict[str, Any]:
1414
+ """执行内部辅助逻辑。"""
1415
+ return {
1416
+ "ok": False,
1417
+ "status": "failed",
1418
+ "error_code": error_code,
1419
+ "app_key": app_key,
1420
+ "template_url": None,
1421
+ "downloaded_to_path": None,
1422
+ "expected_columns": [],
1423
+ "schema_fingerprint": None,
1424
+ "request_route": request_route,
1425
+ "warnings": [],
1426
+ "verification": {"template_url_resolved": False},
1427
+ "message": message,
1428
+ }
1429
+
1430
+ def _failed_verify_result(
1431
+ self,
1432
+ *,
1433
+ app_key: str,
1434
+ file_path: str,
1435
+ error_code: str,
1436
+ message: str,
1437
+ extra: dict[str, Any] | None = None,
1438
+ ) -> dict[str, Any]:
1439
+ """执行内部辅助逻辑。"""
1440
+ payload = {
1441
+ "ok": True,
1442
+ "status": "failed",
1443
+ "error_code": error_code,
1444
+ "app_key": app_key,
1445
+ "can_import": False,
1446
+ "verification_id": None,
1447
+ "file_path": str(Path(file_path).expanduser()) if file_path else file_path,
1448
+ "verified_file_path": None,
1449
+ "file_name": Path(file_path).name if file_path else None,
1450
+ "file_sha256": None,
1451
+ "verified_file_sha256": None,
1452
+ "file_size": None,
1453
+ "schema_fingerprint": None,
1454
+ "apply_rows": None,
1455
+ "issues": [_issue(error_code, message, severity="error")],
1456
+ "repair_suggestions": [],
1457
+ "warnings": [],
1458
+ "verification": {
1459
+ "import_auth_prechecked": False,
1460
+ "import_auth_precheck_passed": False,
1461
+ "local_precheck_passed": False,
1462
+ "backend_verification_passed": False,
1463
+ },
1464
+ "import_capability": None,
1465
+ "message": message,
1466
+ }
1467
+ if extra:
1468
+ payload.update(extra)
1469
+ return payload
1470
+
1471
+ def _failed_repair_result(self, *, error_code: str, message: str, extra: dict[str, Any] | None = None) -> dict[str, Any]:
1472
+ """执行内部辅助逻辑。"""
1473
+ payload = {
1474
+ "ok": False,
1475
+ "status": "failed",
1476
+ "error_code": error_code,
1477
+ "source_file_path": None,
1478
+ "repaired_file_path": None,
1479
+ "applied_repairs": [],
1480
+ "skipped_repairs": [],
1481
+ "new_verification_id": None,
1482
+ "can_import_after_repair": False,
1483
+ "post_repair_issues": [_issue(error_code, message, severity="error")],
1484
+ "warnings": [],
1485
+ "verification": {
1486
+ "repair_authorized": False,
1487
+ "reverified": False,
1488
+ },
1489
+ "message": message,
1490
+ }
1491
+ if extra:
1492
+ payload.update(extra)
1493
+ return payload
1494
+
1495
+ def _failed_start_result(self, *, error_code: str, message: str, extra: dict[str, Any] | None = None) -> dict[str, Any]:
1496
+ """执行内部辅助逻辑。"""
1497
+ payload = {
1498
+ "ok": False,
1499
+ "status": "failed",
1500
+ "error_code": error_code,
1501
+ "accepted": False,
1502
+ "import_id": None,
1503
+ "process_id_str": None,
1504
+ "source_file_name": None,
1505
+ "file_url": None,
1506
+ "warnings": [],
1507
+ "verification": {
1508
+ "verification_id_valid": False,
1509
+ "file_hash_verified": False,
1510
+ "schema_fingerprint_verified": False,
1511
+ "upload_staged": False,
1512
+ "import_acknowledged": False,
1513
+ },
1514
+ "message": message,
1515
+ }
1516
+ if extra:
1517
+ payload.update(extra)
1518
+ return payload
1519
+
1520
+ def _failed_status_result(self, *, error_code: str, message: str, extra: dict[str, Any] | None = None) -> dict[str, Any]:
1521
+ """执行内部辅助逻辑。"""
1522
+ payload = {
1523
+ "ok": False,
1524
+ "status": "failed",
1525
+ "error_code": error_code,
1526
+ "import_id": None,
1527
+ "process_id_str": None,
1528
+ "matched_by": None,
1529
+ "source_file_name": None,
1530
+ "total_rows": None,
1531
+ "success_rows": None,
1532
+ "failed_rows": None,
1533
+ "progress": None,
1534
+ "error_file_urls": [],
1535
+ "operate_time": None,
1536
+ "operate_user": None,
1537
+ "warnings": [],
1538
+ "verification": {
1539
+ "status_lookup_completed": False,
1540
+ "process_id_verified": False,
1541
+ },
1542
+ "message": message,
1543
+ }
1544
+ if extra:
1545
+ payload.update(extra)
1546
+ return payload
1547
+
1548
+ def _runtime_error_as_result(
1549
+ self,
1550
+ error: RuntimeError,
1551
+ *,
1552
+ error_code: str,
1553
+ extra: dict[str, Any] | None = None,
1554
+ ) -> dict[str, Any]:
1555
+ """执行内部辅助逻辑。"""
1556
+ try:
1557
+ payload = json.loads(str(error))
1558
+ except json.JSONDecodeError:
1559
+ payload = {"message": str(error)}
1560
+ response = {
1561
+ "ok": False,
1562
+ "status": "failed",
1563
+ "error_code": ((payload.get("details") or {}) if isinstance(payload.get("details"), dict) else {}).get("error_code") or error_code,
1564
+ "warnings": [],
1565
+ "verification": {},
1566
+ "message": payload.get("message") or str(error),
1567
+ }
1568
+ if extra:
1569
+ response.update(extra)
1570
+ return response
1571
+
1572
+
1573
+ def _pick_template_url(payload: Any) -> str | None:
1574
+ if isinstance(payload, dict):
1575
+ for key in ("excelUrl", "url", "downloadUrl"):
1576
+ value = payload.get(key)
1577
+ if isinstance(value, str) and value.strip():
1578
+ return value.strip()
1579
+ return None
1580
+
1581
+
1582
+ def _resolve_template_download_path(raw_path: str, *, app_key: str) -> Path:
1583
+ path = Path(raw_path).expanduser()
1584
+ if path.exists() and path.is_dir():
1585
+ return path / f"{app_key}_import_template.xlsx"
1586
+ if path.suffix:
1587
+ return path
1588
+ return path / f"{app_key}_import_template.xlsx"
1589
+
1590
+
1591
+ def _resolve_repaired_output_path(source_path: Path, *, output_path: str | None) -> Path:
1592
+ if output_path:
1593
+ path = Path(output_path).expanduser()
1594
+ if path.exists() and path.is_dir():
1595
+ return path / f"{source_path.stem}.repaired{source_path.suffix}"
1596
+ if path.suffix:
1597
+ return path
1598
+ return path / f"{source_path.stem}.repaired{source_path.suffix}"
1599
+ return source_path.with_name(f"{source_path.stem}.repaired{source_path.suffix}")
1600
+
1601
+
1602
+ def _resolve_verified_output_path(source_path: Path) -> Path:
1603
+ return Path(tempfile.gettempdir()) / f"qingflow-import-verified-{source_path.stem}-{uuid4().hex[:8]}{source_path.suffix}"
1604
+
1605
+
1606
+ def _utc_now() -> datetime:
1607
+ return datetime.now(timezone.utc)
1608
+
1609
+
1610
+ def _sha256_file(path: Path) -> str:
1611
+ digest = hashlib.sha256()
1612
+ with path.open("rb") as handle:
1613
+ for chunk in iter(lambda: handle.read(1024 * 1024), b""):
1614
+ digest.update(chunk)
1615
+ return digest.hexdigest()
1616
+
1617
+
1618
+ def _normalize_optional_text(value: Any) -> str | None:
1619
+ if value is None:
1620
+ return None
1621
+ normalized = str(value).strip()
1622
+ return normalized or None
1623
+
1624
+
1625
+ def _normalize_header_key(value: Any) -> str:
1626
+ text = _normalize_optional_text(value)
1627
+ return (text or "").casefold()
1628
+
1629
+
1630
+ def _issue(code: str, message: str, *, severity: str, repairable: bool = False, repair_code: str | None = None) -> JSONObject:
1631
+ payload: JSONObject = {
1632
+ "code": code,
1633
+ "message": message,
1634
+ "severity": severity,
1635
+ "repairable": repairable,
1636
+ }
1637
+ if repair_code:
1638
+ payload["repair_code"] = repair_code
1639
+ return payload
1640
+
1641
+
1642
+ def _analyze_headers(
1643
+ header_row: list[Any],
1644
+ expected_columns: list[JSONObject],
1645
+ *,
1646
+ allowed_titles: list[str] | None = None,
1647
+ ) -> dict[str, Any]:
1648
+ expected_titles = [str(item["title"]) for item in expected_columns]
1649
+ allowed_title_list = allowed_titles if allowed_titles else expected_titles
1650
+ allowed_counts = _header_title_counts(allowed_title_list)
1651
+ allowed_by_key = {
1652
+ key: title
1653
+ for key, title in (
1654
+ (_normalize_header_key(title), _normalize_optional_text(title))
1655
+ for title in allowed_title_list
1656
+ )
1657
+ if key and title
1658
+ }
1659
+ seen: dict[str, int] = {}
1660
+ actual_headers: list[str] = []
1661
+ for item in header_row:
1662
+ text = _normalize_optional_text(item)
1663
+ if text is None:
1664
+ actual_headers.append("")
1665
+ continue
1666
+ actual_headers.append(text)
1667
+ key = _normalize_header_key(text)
1668
+ seen[key] = seen.get(key, 0) + 1
1669
+ missing: list[str] = []
1670
+ for key, expected_count in allowed_counts.items():
1671
+ actual_count = seen.get(key, 0)
1672
+ if actual_count >= expected_count:
1673
+ continue
1674
+ title = allowed_by_key.get(key) or key
1675
+ if expected_count <= 1:
1676
+ missing.append(title)
1677
+ else:
1678
+ missing.append(f"{title} (need {expected_count}, got {actual_count})")
1679
+ extra = [text for text in actual_headers if text and _normalize_header_key(text) not in allowed_by_key]
1680
+ duplicates = []
1681
+ for key, count in seen.items():
1682
+ if not key:
1683
+ continue
1684
+ allowed_count = allowed_counts.get(key, 0)
1685
+ if count > max(allowed_count, 1 if allowed_count == 0 else allowed_count):
1686
+ duplicates.append(allowed_by_key.get(key) or key)
1687
+ issues: list[JSONObject] = []
1688
+ repair_suggestions: list[str] = []
1689
+ if missing:
1690
+ issues.append(
1691
+ _issue(
1692
+ "MISSING_COLUMNS",
1693
+ f"Missing expected columns: {', '.join(missing)}",
1694
+ severity="error",
1695
+ repairable=True,
1696
+ repair_code="normalize_headers",
1697
+ )
1698
+ )
1699
+ if extra:
1700
+ issues.append(
1701
+ _issue(
1702
+ "EXTRA_COLUMNS",
1703
+ f"Unexpected columns: {', '.join(extra)}",
1704
+ severity="error",
1705
+ repairable=True,
1706
+ repair_code="normalize_headers",
1707
+ )
1708
+ )
1709
+ if duplicates:
1710
+ issues.append(
1711
+ _issue(
1712
+ "DUPLICATE_COLUMNS",
1713
+ f"Duplicate columns: {', '.join(sorted(set(duplicates)))}",
1714
+ severity="error",
1715
+ repairable=True,
1716
+ repair_code="normalize_headers",
1717
+ )
1718
+ )
1719
+ normalized_changes = []
1720
+ for text in actual_headers:
1721
+ if not text:
1722
+ continue
1723
+ canonical = allowed_by_key.get(_normalize_header_key(text))
1724
+ if canonical and canonical != text:
1725
+ normalized_changes.append((text, canonical))
1726
+ if missing or extra or duplicates or normalized_changes:
1727
+ repair_suggestions.append("normalize_headers")
1728
+ return {"issues": issues, "repair_suggestions": repair_suggestions}
1729
+
1730
+
1731
+ def _header_title_counts(titles: list[str]) -> dict[str, int]:
1732
+ counts: dict[str, int] = {}
1733
+ for title in titles:
1734
+ key = _normalize_header_key(title)
1735
+ if not key:
1736
+ continue
1737
+ counts[key] = counts.get(key, 0) + 1
1738
+ return counts
1739
+
1740
+
1741
+ def _sheet_header_positions(sheet) -> dict[str, list[int]]: # type: ignore[no-untyped-def]
1742
+ mapping: dict[str, list[int]] = {}
1743
+ for index, cell in enumerate(next(sheet.iter_rows(min_row=1, max_row=1), []), start=1):
1744
+ key = _normalize_header_key(cell.value)
1745
+ if not key:
1746
+ continue
1747
+ mapping.setdefault(key, []).append(index)
1748
+ return mapping
1749
+
1750
+
1751
+ def _inspect_enum_column(sheet, *, column_index: int, column: JSONObject) -> JSONObject | None: # type: ignore[no-untyped-def]
1752
+ options = [str(item).strip() for item in column.get("options", []) if str(item).strip()]
1753
+ if not options:
1754
+ return None
1755
+ option_map = {_normalize_header_key(item): item for item in options}
1756
+ invalid_samples: list[str] = []
1757
+ for row_index in range(2, sheet.max_row + 1):
1758
+ text = _normalize_optional_text(sheet.cell(row=row_index, column=column_index).value)
1759
+ if text is None:
1760
+ continue
1761
+ if _normalize_header_key(text) in option_map:
1762
+ continue
1763
+ invalid_samples.append(f"row {row_index}: {text}")
1764
+ if len(invalid_samples) >= 3:
1765
+ break
1766
+ if not invalid_samples:
1767
+ return None
1768
+ return _issue(
1769
+ "INVALID_ENUM_VALUES",
1770
+ f"Column '{column['title']}' contains values outside the allowed options. Samples: {', '.join(invalid_samples)}",
1771
+ severity="error",
1772
+ )
1773
+
1774
+
1775
+ def _inspect_relation_column(sheet, *, column_index: int, column: JSONObject) -> JSONObject | None: # type: ignore[no-untyped-def]
1776
+ invalid_samples: list[str] = []
1777
+ for row_index in range(2, sheet.max_row + 1):
1778
+ value = sheet.cell(row=row_index, column=column_index).value
1779
+ text = _normalize_optional_text(value)
1780
+ if text is None:
1781
+ continue
1782
+ relation_id = _coerce_positive_relation_id(value)
1783
+ if relation_id is not None:
1784
+ continue
1785
+ invalid_samples.append(f"row {row_index}: {text}")
1786
+ if len(invalid_samples) >= 3:
1787
+ break
1788
+ if not invalid_samples:
1789
+ return None
1790
+ return _issue(
1791
+ "RELATION_IMPORT_REQUIRES_APPLY_ID",
1792
+ f"Column '{column['title']}' must use target record apply_id values during import. Samples: {', '.join(invalid_samples)}",
1793
+ severity="error",
1794
+ )
1795
+
1796
+
1797
+ def _stable_import_schema_fingerprint(expected_columns: list[JSONObject]) -> str:
1798
+ stable_columns = []
1799
+ for item in expected_columns:
1800
+ stable_columns.append(
1801
+ {
1802
+ "field_id": item["field_id"],
1803
+ "title": item["title"],
1804
+ "que_type": item["que_type"],
1805
+ "required": item["required"],
1806
+ "write_kind": item["write_kind"],
1807
+ "options": item.get("options", []),
1808
+ "requires_lookup": bool(item.get("requires_lookup")),
1809
+ "requires_upload": bool(item.get("requires_upload")),
1810
+ "target_app_key": item.get("target_app_key"),
1811
+ }
1812
+ )
1813
+ return hashlib.sha256(
1814
+ json.dumps(stable_columns, ensure_ascii=False, sort_keys=True).encode("utf-8")
1815
+ ).hexdigest()
1816
+
1817
+
1818
+ def _coerce_positive_relation_id(value: Any) -> int | None:
1819
+ if isinstance(value, bool):
1820
+ return None
1821
+ if isinstance(value, int):
1822
+ return value if value > 0 else None
1823
+ if isinstance(value, float):
1824
+ if value.is_integer() and value > 0:
1825
+ return int(value)
1826
+ return None
1827
+ text = _normalize_optional_text(value)
1828
+ if text is None:
1829
+ return None
1830
+ if text.isdigit():
1831
+ parsed = int(text)
1832
+ return parsed if parsed > 0 else None
1833
+ return None
1834
+
1835
+
1836
+ def _infer_header_depth(sheet) -> int: # type: ignore[no-untyped-def]
1837
+ header_depth = 1
1838
+ merged_cells = getattr(sheet, "merged_cells", None)
1839
+ merged_ranges = getattr(merged_cells, "ranges", merged_cells) if merged_cells is not None else []
1840
+ row_one_has_merge = False
1841
+ for merged_range in merged_ranges or []:
1842
+ min_row = int(getattr(merged_range, "min_row", 1))
1843
+ max_row = int(getattr(merged_range, "max_row", 1))
1844
+ if min_row == 1:
1845
+ row_one_has_merge = True
1846
+ header_depth = max(header_depth, max_row)
1847
+ if row_one_has_merge and sheet.max_row >= 2:
1848
+ row_two_values = [cell.value for cell in sheet[2]]
1849
+ if any(_normalize_optional_text(value) for value in row_two_values):
1850
+ header_depth = max(header_depth, 2)
1851
+ return min(header_depth, max(1, int(sheet.max_row)))
1852
+
1853
+
1854
+ def _extract_leaf_header_titles(sheet, header_depth: int) -> list[str]: # type: ignore[no-untyped-def]
1855
+ titles: list[str] = []
1856
+ max_column = max(1, int(sheet.max_column))
1857
+ depth = max(1, min(header_depth, int(sheet.max_row)))
1858
+ for column_index in range(1, max_column + 1):
1859
+ selected = ""
1860
+ for row_index in range(depth, 0, -1):
1861
+ text = _normalize_optional_text(sheet.cell(row=row_index, column=column_index).value)
1862
+ if text:
1863
+ selected = text
1864
+ break
1865
+ titles.append(selected)
1866
+ return titles
1867
+
1868
+
1869
+ def _overlay_header_titles(actual_titles: list[str], template_leaf_titles: Any) -> list[str]:
1870
+ normalized = list(actual_titles)
1871
+ if not isinstance(template_leaf_titles, list):
1872
+ return normalized
1873
+ for index, title in enumerate(template_leaf_titles):
1874
+ normalized_title = _normalize_optional_text(title)
1875
+ if normalized_title is None:
1876
+ continue
1877
+ if index < len(normalized):
1878
+ normalized[index] = normalized_title
1879
+ return normalized
1880
+
1881
+
1882
+ def _infer_header_depth_from_rows(
1883
+ rows: list[list[Any]],
1884
+ *,
1885
+ template_header_profile: dict[str, Any],
1886
+ local_check: dict[str, Any],
1887
+ ) -> int:
1888
+ template_depth = max(1, int(template_header_profile.get("header_depth") or 1))
1889
+ header_depth = min(template_depth, max(1, len(rows)))
1890
+ if header_depth > 1:
1891
+ return header_depth
1892
+ if "normalize_headers" in (local_check.get("repair_suggestions") or []) and len(rows) >= 2:
1893
+ if any(_normalize_optional_text(value) for value in rows[1]):
1894
+ return 2
1895
+ return 1
1896
+
1897
+
1898
+ def _extract_leaf_header_titles_from_rows(rows: list[list[Any]], header_depth: int) -> list[str]:
1899
+ titles: list[str] = []
1900
+ max_column = max((len(row) for row in rows[: max(1, header_depth)]), default=0)
1901
+ depth = max(1, min(header_depth, len(rows)))
1902
+ for column_index in range(max_column):
1903
+ selected = ""
1904
+ for row_index in range(depth - 1, -1, -1):
1905
+ value = rows[row_index][column_index] if column_index < len(rows[row_index]) else None
1906
+ text = _normalize_optional_text(value)
1907
+ if text:
1908
+ selected = text
1909
+ break
1910
+ titles.append(selected)
1911
+ return titles
1912
+
1913
+
1914
+ def _count_trailing_blank_rows_from_rows(rows: list[list[Any]], *, min_data_index: int = 1) -> int:
1915
+ count = 0
1916
+ for row in reversed(rows[min_data_index:]):
1917
+ if any(value not in (None, "") for value in row):
1918
+ break
1919
+ count += 1
1920
+ return count
1921
+
1922
+
1923
+ def _build_auto_normalized_file(
1924
+ *,
1925
+ source_path: Path,
1926
+ sheet_title: str,
1927
+ rows: list[list[Any]],
1928
+ header_depth: int,
1929
+ template_leaf_titles: Any,
1930
+ local_check: dict[str, Any],
1931
+ ) -> dict[str, Any] | None:
1932
+ if not rows:
1933
+ return None
1934
+ normalized_header_depth = max(1, min(header_depth, len(rows)))
1935
+ trailing_blank_rows = _count_trailing_blank_rows_from_rows(rows, min_data_index=normalized_header_depth)
1936
+ if normalized_header_depth <= 1 and trailing_blank_rows <= 0:
1937
+ return None
1938
+ extracted_headers = _extract_leaf_header_titles_from_rows(rows, normalized_header_depth)
1939
+ target_headers = _overlay_header_titles(extracted_headers, template_leaf_titles)
1940
+ row_width = max(len(target_headers), max((len(row) for row in rows), default=0))
1941
+ if row_width <= 0:
1942
+ return None
1943
+ padded_headers = list(target_headers) + [""] * max(0, row_width - len(target_headers))
1944
+ verified_path = _resolve_verified_output_path(source_path)
1945
+ normalized_workbook = Workbook()
1946
+ normalized_sheet = normalized_workbook.active
1947
+ normalized_sheet.title = sheet_title
1948
+ normalized_sheet.append(padded_headers)
1949
+ last_nonblank_row = max(normalized_header_depth, len(rows) - trailing_blank_rows)
1950
+ for row in rows[normalized_header_depth:last_nonblank_row]:
1951
+ normalized_sheet.append(list(row) + [None] * max(0, row_width - len(row)))
1952
+ verified_path.parent.mkdir(parents=True, exist_ok=True)
1953
+ normalized_workbook.save(verified_path)
1954
+ warnings: list[JSONObject] = []
1955
+ applied_repairs: list[str] = []
1956
+ if normalized_header_depth > 1:
1957
+ applied_repairs.append("normalize_headers")
1958
+ warnings.append(
1959
+ {
1960
+ "code": "IMPORT_HEADERS_AUTO_NORMALIZED",
1961
+ "message": f"Workbook used {normalized_header_depth} header rows; record_import_verify normalized it to a single leaf-header row automatically.",
1962
+ }
1963
+ )
1964
+ if trailing_blank_rows > 0:
1965
+ applied_repairs.append("trim_trailing_blank_rows")
1966
+ warnings.append(
1967
+ {
1968
+ "code": "TRAILING_BLANK_ROWS_AUTO_TRIMMED",
1969
+ "message": f"Removed {trailing_blank_rows} trailing blank rows before backend verification.",
1970
+ }
1971
+ )
1972
+ return {
1973
+ "verified_file_path": str(verified_path.resolve()),
1974
+ "header_titles": target_headers or padded_headers,
1975
+ "warnings": warnings,
1976
+ "applied_repairs": applied_repairs,
1977
+ "header_depth": normalized_header_depth,
1978
+ "trailing_blank_rows": trailing_blank_rows,
1979
+ "source_local_check": local_check,
1980
+ }
1981
+
1982
+
1983
+ def _count_trailing_blank_rows(sheet) -> int: # type: ignore[no-untyped-def]
1984
+ count = 0
1985
+ for row_index in range(sheet.max_row, 1, -1):
1986
+ values = [cell.value for cell in sheet[row_index]]
1987
+ if any(value not in (None, "") for value in values):
1988
+ break
1989
+ count += 1
1990
+ return count
1991
+
1992
+
1993
+ def _find_enum_repairs(sheet, expected_columns: list[JSONObject]) -> list[str]: # type: ignore[no-untyped-def]
1994
+ header_map = _sheet_header_map(sheet)
1995
+ found: list[str] = []
1996
+ for column in expected_columns:
1997
+ options = [str(item).strip() for item in column.get("options", []) if str(item).strip()]
1998
+ if not options:
1999
+ continue
2000
+ column_index = header_map.get(_normalize_header_key(column["title"]))
2001
+ if column_index is None:
2002
+ continue
2003
+ option_map = {_normalize_header_key(item): item for item in options}
2004
+ for row in range(2, min(sheet.max_row, 50) + 1):
2005
+ value = sheet.cell(row=row, column=column_index).value
2006
+ text = _normalize_optional_text(value)
2007
+ if text is None:
2008
+ continue
2009
+ exact = option_map.get(_normalize_header_key(text))
2010
+ if exact and exact != text:
2011
+ found.append(column["title"])
2012
+ break
2013
+ return found
2014
+
2015
+
2016
+ def _sheet_header_map(sheet) -> dict[str, int]: # type: ignore[no-untyped-def]
2017
+ mapping: dict[str, int] = {}
2018
+ for index, cell in enumerate(next(sheet.iter_rows(min_row=1, max_row=1), []), start=1):
2019
+ key = _normalize_header_key(cell.value)
2020
+ if key and key not in mapping:
2021
+ mapping[key] = index
2022
+ return mapping
2023
+
2024
+
2025
+ def _repair_headers(sheet, expected_columns: list[JSONObject]) -> bool: # type: ignore[no-untyped-def]
2026
+ changed = False
2027
+ expected_by_key = {_normalize_header_key(item["title"]): item["title"] for item in expected_columns}
2028
+ header_cells = list(next(sheet.iter_rows(min_row=1, max_row=1), []))
2029
+ for cell in header_cells:
2030
+ text = _normalize_optional_text(cell.value)
2031
+ if text is None:
2032
+ continue
2033
+ canonical = expected_by_key.get(_normalize_header_key(text))
2034
+ if canonical and canonical != text:
2035
+ cell.value = canonical
2036
+ changed = True
2037
+ if changed:
2038
+ return True
2039
+
2040
+ # Fallback for template-based files where headers were edited into non-canonical
2041
+ # values but column order is still intact. Keep any extra trailing system columns.
2042
+ for index, column in enumerate(expected_columns, start=1):
2043
+ if index > len(header_cells):
2044
+ break
2045
+ expected_title = str(column["title"]).strip()
2046
+ current_title = _normalize_optional_text(header_cells[index - 1].value)
2047
+ if current_title != expected_title:
2048
+ header_cells[index - 1].value = expected_title
2049
+ changed = True
2050
+ return changed
2051
+
2052
+
2053
+ def _trim_trailing_blank_rows(sheet) -> bool: # type: ignore[no-untyped-def]
2054
+ removed = 0
2055
+ while sheet.max_row > 1:
2056
+ values = [cell.value for cell in sheet[sheet.max_row]]
2057
+ if any(value not in (None, "") for value in values):
2058
+ break
2059
+ sheet.delete_rows(sheet.max_row, 1)
2060
+ removed += 1
2061
+ return removed > 0
2062
+
2063
+
2064
+ def _normalize_enum_values(sheet, expected_columns: list[JSONObject]) -> bool: # type: ignore[no-untyped-def]
2065
+ changed = False
2066
+ header_map = _sheet_header_map(sheet)
2067
+ for column in expected_columns:
2068
+ options = [str(item).strip() for item in column.get("options", []) if str(item).strip()]
2069
+ if not options:
2070
+ continue
2071
+ column_index = header_map.get(_normalize_header_key(column["title"]))
2072
+ if column_index is None:
2073
+ continue
2074
+ option_map = {_normalize_header_key(item): item for item in options}
2075
+ for row in range(2, sheet.max_row + 1):
2076
+ cell = sheet.cell(row=row, column=column_index)
2077
+ text = _normalize_optional_text(cell.value)
2078
+ if text is None:
2079
+ continue
2080
+ canonical = option_map.get(_normalize_header_key(text))
2081
+ if canonical and canonical != text:
2082
+ cell.value = canonical
2083
+ changed = True
2084
+ return changed
2085
+
2086
+
2087
+ def _normalize_date_formats(sheet) -> bool: # type: ignore[no-untyped-def]
2088
+ changed = False
2089
+ for row in sheet.iter_rows(min_row=2):
2090
+ for cell in row:
2091
+ if getattr(cell, "is_date", False):
2092
+ if cell.number_format != "yyyy-mm-dd hh:mm:ss":
2093
+ cell.number_format = "yyyy-mm-dd hh:mm:ss"
2094
+ changed = True
2095
+ return changed
2096
+
2097
+
2098
+ def _normalize_number_formats(sheet) -> bool: # type: ignore[no-untyped-def]
2099
+ changed = False
2100
+ for row in sheet.iter_rows(min_row=2):
2101
+ for cell in row:
2102
+ if isinstance(cell.value, (int, float)) and not getattr(cell, "is_date", False):
2103
+ if cell.number_format == "General":
2104
+ cell.number_format = "0.00" if isinstance(cell.value, float) else "0"
2105
+ changed = True
2106
+ return changed
2107
+
2108
+
2109
+ def _normalize_url_cells(sheet) -> bool: # type: ignore[no-untyped-def]
2110
+ changed = False
2111
+ for row in sheet.iter_rows(min_row=2):
2112
+ for cell in row:
2113
+ text = _normalize_optional_text(cell.value)
2114
+ if text and (text.startswith("http://") or text.startswith("https://")) and text != cell.value:
2115
+ cell.value = text
2116
+ changed = True
2117
+ return changed
2118
+
2119
+
2120
+ def _extract_import_records(payload: Any) -> list[JSONObject]:
2121
+ if isinstance(payload, dict):
2122
+ for key in ("list", "records", "items"):
2123
+ value = payload.get(key)
2124
+ if isinstance(value, list):
2125
+ return [item for item in value if isinstance(item, dict)]
2126
+ if isinstance(payload, list):
2127
+ return [item for item in payload if isinstance(item, dict)]
2128
+ return []
2129
+
2130
+
2131
+ def _match_import_record(
2132
+ records: list[JSONObject],
2133
+ *,
2134
+ local_job: dict[str, Any] | None,
2135
+ import_id: str | None,
2136
+ process_id_str: str | None,
2137
+ ) -> tuple[JSONObject | None, str | None]:
2138
+ if process_id_str:
2139
+ exact = [
2140
+ item
2141
+ for item in records
2142
+ if _normalize_optional_text(item.get("processIdStr") or item.get("processId") or item.get("process_id_str")) == process_id_str
2143
+ ]
2144
+ if len(exact) == 1:
2145
+ return exact[0], "process_id_str"
2146
+ if len(exact) > 1:
2147
+ return None, "process_id_str"
2148
+ if import_id:
2149
+ exact = [
2150
+ item
2151
+ for item in records
2152
+ if import_id in _extract_import_record_ids(item)
2153
+ ]
2154
+ if len(exact) == 1:
2155
+ return exact[0], "import_id"
2156
+ if len(exact) > 1:
2157
+ return None, "import_id"
2158
+ if isinstance(local_job, dict):
2159
+ source_file_name = _normalize_optional_text(local_job.get("source_file_name"))
2160
+ started_at = _parse_utc(local_job.get("started_at"))
2161
+ candidates = records
2162
+ if source_file_name:
2163
+ candidates = [
2164
+ item
2165
+ for item in candidates
2166
+ if _normalize_optional_text(item.get("sourceFileName") or item.get("source_file_name")) == source_file_name
2167
+ ]
2168
+ if started_at is not None:
2169
+ window_end = started_at + timedelta(minutes=10)
2170
+ timed = []
2171
+ for item in candidates:
2172
+ operate_time = _parse_utc(item.get("operateTime"))
2173
+ if operate_time is None:
2174
+ continue
2175
+ if started_at - timedelta(minutes=1) <= operate_time <= window_end:
2176
+ timed.append(item)
2177
+ if len(timed) == 1:
2178
+ return timed[0], "local_job_window"
2179
+ if len(timed) > 1:
2180
+ return None, "local_job_window"
2181
+ if len(candidates) == 1:
2182
+ return candidates[0], "source_file_name"
2183
+ if len(candidates) > 1:
2184
+ return None, "source_file_name"
2185
+ return None, None
2186
+
2187
+
2188
+ def _extract_import_record_ids(record: JSONObject) -> set[str]:
2189
+ identifiers: set[str] = set()
2190
+ for key in ("importId", "import_id", "dataImportId", "data_import_id"):
2191
+ normalized = _normalize_optional_text(record.get(key))
2192
+ if normalized:
2193
+ identifiers.add(normalized)
2194
+ return identifiers
2195
+
2196
+
2197
+ def _parse_utc(value: Any) -> datetime | None:
2198
+ text = _normalize_optional_text(value)
2199
+ if text is None:
2200
+ return None
2201
+ normalized = text.replace("Z", "+00:00")
2202
+ try:
2203
+ parsed = datetime.fromisoformat(normalized)
2204
+ except ValueError:
2205
+ return None
2206
+ if parsed.tzinfo is None:
2207
+ return parsed.replace(tzinfo=timezone.utc)
2208
+ return parsed.astimezone(timezone.utc)
2209
+
2210
+
2211
+ def _coerce_int(value: Any) -> int | None:
2212
+ if value is None or value == "":
2213
+ return None
2214
+ try:
2215
+ return int(value)
2216
+ except (TypeError, ValueError):
2217
+ return None
2218
+
2219
+
2220
+ def _normalize_error_file_urls(value: Any) -> list[str]:
2221
+ if isinstance(value, list):
2222
+ return [str(item).strip() for item in value if str(item).strip()]
2223
+ return []