@josephyan/qingflow-cli 0.2.0-beta.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/README.md +30 -0
  2. package/docs/local-agent-install.md +235 -0
  3. package/entry_point.py +13 -0
  4. package/npm/bin/qingflow.mjs +5 -0
  5. package/npm/lib/runtime.mjs +204 -0
  6. package/npm/scripts/postinstall.mjs +16 -0
  7. package/package.json +34 -0
  8. package/pyproject.toml +67 -0
  9. package/qingflow +15 -0
  10. package/src/qingflow_mcp/__init__.py +5 -0
  11. package/src/qingflow_mcp/__main__.py +5 -0
  12. package/src/qingflow_mcp/backend_client.py +547 -0
  13. package/src/qingflow_mcp/builder_facade/__init__.py +3 -0
  14. package/src/qingflow_mcp/builder_facade/models.py +985 -0
  15. package/src/qingflow_mcp/builder_facade/service.py +8243 -0
  16. package/src/qingflow_mcp/cli/__init__.py +1 -0
  17. package/src/qingflow_mcp/cli/commands/__init__.py +15 -0
  18. package/src/qingflow_mcp/cli/commands/app.py +40 -0
  19. package/src/qingflow_mcp/cli/commands/auth.py +78 -0
  20. package/src/qingflow_mcp/cli/commands/builder.py +184 -0
  21. package/src/qingflow_mcp/cli/commands/common.py +47 -0
  22. package/src/qingflow_mcp/cli/commands/imports.py +86 -0
  23. package/src/qingflow_mcp/cli/commands/record.py +202 -0
  24. package/src/qingflow_mcp/cli/commands/task.py +87 -0
  25. package/src/qingflow_mcp/cli/commands/workspace.py +33 -0
  26. package/src/qingflow_mcp/cli/context.py +48 -0
  27. package/src/qingflow_mcp/cli/formatters.py +269 -0
  28. package/src/qingflow_mcp/cli/json_io.py +50 -0
  29. package/src/qingflow_mcp/cli/main.py +147 -0
  30. package/src/qingflow_mcp/config.py +221 -0
  31. package/src/qingflow_mcp/errors.py +66 -0
  32. package/src/qingflow_mcp/import_store.py +121 -0
  33. package/src/qingflow_mcp/json_types.py +18 -0
  34. package/src/qingflow_mcp/list_type_labels.py +76 -0
  35. package/src/qingflow_mcp/server.py +211 -0
  36. package/src/qingflow_mcp/server_app_builder.py +387 -0
  37. package/src/qingflow_mcp/server_app_user.py +317 -0
  38. package/src/qingflow_mcp/session_store.py +289 -0
  39. package/src/qingflow_mcp/solution/__init__.py +6 -0
  40. package/src/qingflow_mcp/solution/build_assembly_store.py +181 -0
  41. package/src/qingflow_mcp/solution/compiler/__init__.py +282 -0
  42. package/src/qingflow_mcp/solution/compiler/chart_compiler.py +96 -0
  43. package/src/qingflow_mcp/solution/compiler/form_compiler.py +466 -0
  44. package/src/qingflow_mcp/solution/compiler/icon_utils.py +113 -0
  45. package/src/qingflow_mcp/solution/compiler/navigation_compiler.py +57 -0
  46. package/src/qingflow_mcp/solution/compiler/package_compiler.py +19 -0
  47. package/src/qingflow_mcp/solution/compiler/portal_compiler.py +60 -0
  48. package/src/qingflow_mcp/solution/compiler/view_compiler.py +51 -0
  49. package/src/qingflow_mcp/solution/compiler/workflow_compiler.py +173 -0
  50. package/src/qingflow_mcp/solution/design_session.py +222 -0
  51. package/src/qingflow_mcp/solution/design_store.py +100 -0
  52. package/src/qingflow_mcp/solution/executor.py +2339 -0
  53. package/src/qingflow_mcp/solution/normalizer.py +23 -0
  54. package/src/qingflow_mcp/solution/requirements_builder.py +536 -0
  55. package/src/qingflow_mcp/solution/run_store.py +244 -0
  56. package/src/qingflow_mcp/solution/spec_models.py +853 -0
  57. package/src/qingflow_mcp/tools/__init__.py +1 -0
  58. package/src/qingflow_mcp/tools/ai_builder_tools.py +2063 -0
  59. package/src/qingflow_mcp/tools/app_tools.py +850 -0
  60. package/src/qingflow_mcp/tools/approval_tools.py +833 -0
  61. package/src/qingflow_mcp/tools/auth_tools.py +697 -0
  62. package/src/qingflow_mcp/tools/base.py +81 -0
  63. package/src/qingflow_mcp/tools/code_block_tools.py +679 -0
  64. package/src/qingflow_mcp/tools/directory_tools.py +648 -0
  65. package/src/qingflow_mcp/tools/feedback_tools.py +230 -0
  66. package/src/qingflow_mcp/tools/file_tools.py +385 -0
  67. package/src/qingflow_mcp/tools/import_tools.py +1971 -0
  68. package/src/qingflow_mcp/tools/navigation_tools.py +177 -0
  69. package/src/qingflow_mcp/tools/package_tools.py +240 -0
  70. package/src/qingflow_mcp/tools/portal_tools.py +131 -0
  71. package/src/qingflow_mcp/tools/qingbi_report_tools.py +269 -0
  72. package/src/qingflow_mcp/tools/record_tools.py +12739 -0
  73. package/src/qingflow_mcp/tools/role_tools.py +94 -0
  74. package/src/qingflow_mcp/tools/solution_tools.py +3887 -0
  75. package/src/qingflow_mcp/tools/task_context_tools.py +1423 -0
  76. package/src/qingflow_mcp/tools/task_tools.py +843 -0
  77. package/src/qingflow_mcp/tools/view_tools.py +280 -0
  78. package/src/qingflow_mcp/tools/workflow_tools.py +312 -0
  79. package/src/qingflow_mcp/tools/workspace_tools.py +219 -0
@@ -0,0 +1,1971 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import mimetypes
6
+ import re
7
+ import shutil
8
+ import tempfile
9
+ from io import BytesIO
10
+ from copy import deepcopy
11
+ from datetime import datetime, timedelta, timezone
12
+ from pathlib import Path
13
+ from typing import Any
14
+ from uuid import uuid4
15
+
16
+ from mcp.server.fastmcp import FastMCP
17
+ from openpyxl import Workbook, load_workbook
18
+
19
+ from ..config import DEFAULT_PROFILE
20
+ from ..errors import QingflowApiError
21
+ from ..import_store import ImportJobStore, ImportVerificationStore
22
+ from ..json_types import JSONObject
23
+ from .app_tools import _derive_import_capability
24
+ from .base import ToolBase
25
+ from .file_tools import FileTools
26
+ from .record_tools import RecordTools, _build_field_index, _normalize_form_schema
27
+
28
+
29
+ SUPPORTED_IMPORT_EXTENSIONS = {".xlsx", ".xls"}
30
+ REPAIRABLE_IMPORT_EXTENSIONS = {".xlsx"}
31
+ SAFE_REPAIRS = {
32
+ "normalize_headers",
33
+ "trim_trailing_blank_rows",
34
+ "normalize_enum_values",
35
+ "normalize_date_formats",
36
+ "normalize_number_formats",
37
+ "normalize_url_cells",
38
+ }
39
+ EMAIL_PATTERN = re.compile(r"^[^@\s]+@[^@\s]+\.[^@\s]+$")
40
+
41
+
42
+ class ImportTools(ToolBase):
43
+ def __init__(
44
+ self,
45
+ sessions,
46
+ backend,
47
+ *,
48
+ verification_store: ImportVerificationStore | None = None,
49
+ job_store: ImportJobStore | None = None,
50
+ ) -> None:
51
+ super().__init__(sessions, backend)
52
+ self._record_tools = RecordTools(sessions, backend)
53
+ self._file_tools = FileTools(sessions, backend)
54
+ self._verification_store = verification_store or ImportVerificationStore()
55
+ self._job_store = job_store or ImportJobStore()
56
+
57
+ def register(self, mcp: FastMCP) -> None:
58
+ @mcp.tool()
59
+ def record_import_schema_get(
60
+ app_key: str = "",
61
+ output_profile: str = "normal",
62
+ ) -> dict[str, Any]:
63
+ return self.record_import_schema_get(
64
+ profile=DEFAULT_PROFILE,
65
+ app_key=app_key,
66
+ output_profile=output_profile,
67
+ )
68
+
69
+ @mcp.tool(description="Get the official app import template and the expected applicant import columns.")
70
+ def record_import_template_get(
71
+ profile: str = DEFAULT_PROFILE,
72
+ app_key: str = "",
73
+ download_to_path: str | None = None,
74
+ ) -> dict[str, Any]:
75
+ return self.record_import_template_get(
76
+ profile=profile,
77
+ app_key=app_key,
78
+ download_to_path=download_to_path,
79
+ )
80
+
81
+ @mcp.tool(description="Verify a local Excel import file and produce the only verification_id allowed for import start.")
82
+ def record_import_verify(
83
+ profile: str = DEFAULT_PROFILE,
84
+ app_key: str = "",
85
+ file_path: str = "",
86
+ ) -> dict[str, Any]:
87
+ return self.record_import_verify(
88
+ profile=profile,
89
+ app_key=app_key,
90
+ file_path=file_path,
91
+ )
92
+
93
+ @mcp.tool(description="Repair a local .xlsx import file after explicit user authorization, then re-verify it.")
94
+ def record_import_repair_local(
95
+ profile: str = DEFAULT_PROFILE,
96
+ verification_id: str = "",
97
+ authorized_file_modification: bool = False,
98
+ output_path: str | None = None,
99
+ selected_repairs: list[str] | None = None,
100
+ ) -> dict[str, Any]:
101
+ return self.record_import_repair_local(
102
+ profile=profile,
103
+ verification_id=verification_id,
104
+ authorized_file_modification=authorized_file_modification,
105
+ output_path=output_path,
106
+ selected_repairs=selected_repairs,
107
+ )
108
+
109
+ @mcp.tool(description="Start import from a successful verification_id. being_enter_auditing must be passed explicitly.")
110
+ def record_import_start(
111
+ profile: str = DEFAULT_PROFILE,
112
+ app_key: str = "",
113
+ verification_id: str = "",
114
+ being_enter_auditing: bool | None = None,
115
+ view_key: str | None = None,
116
+ ) -> dict[str, Any]:
117
+ return self.record_import_start(
118
+ profile=profile,
119
+ app_key=app_key,
120
+ verification_id=verification_id,
121
+ being_enter_auditing=being_enter_auditing,
122
+ view_key=view_key,
123
+ )
124
+
125
+ @mcp.tool(description="Get import status by process_id_str, import_id, or the latest remembered import in the current app.")
126
+ def record_import_status_get(
127
+ profile: str = DEFAULT_PROFILE,
128
+ app_key: str = "",
129
+ import_id: str | None = None,
130
+ process_id_str: str | None = None,
131
+ ) -> dict[str, Any]:
132
+ return self.record_import_status_get(
133
+ profile=profile,
134
+ app_key=app_key,
135
+ import_id=import_id,
136
+ process_id_str=process_id_str,
137
+ )
138
+
139
+ def record_import_schema_get(
140
+ self,
141
+ *,
142
+ profile: str = DEFAULT_PROFILE,
143
+ app_key: str,
144
+ output_profile: str = "normal",
145
+ ) -> dict[str, Any]:
146
+ if not app_key.strip():
147
+ return {
148
+ "ok": False,
149
+ "status": "blocked",
150
+ "app_key": app_key,
151
+ "error_code": "IMPORT_SCHEMA_UNAVAILABLE",
152
+ "message": "app_key is required",
153
+ }
154
+
155
+ def runner(session_profile, context):
156
+ import_capability, import_warnings = self._fetch_import_capability(context, app_key)
157
+ _index, expected_columns, schema_fingerprint = self._resolve_import_schema_bundle(
158
+ profile,
159
+ context,
160
+ app_key,
161
+ import_capability=import_capability,
162
+ )
163
+ columns: list[JSONObject] = []
164
+ for column in expected_columns:
165
+ payload: JSONObject = {
166
+ "title": column["title"],
167
+ "kind": column["write_kind"],
168
+ "required": bool(column.get("required")),
169
+ }
170
+ if isinstance(column.get("options"), list) and column.get("options"):
171
+ payload["options"] = column["options"]
172
+ if bool(column.get("requires_lookup")):
173
+ payload["accepts_natural_input"] = True
174
+ if bool(column.get("requires_upload")):
175
+ payload["requires_upload"] = True
176
+ if isinstance(column.get("target_app_key"), str):
177
+ payload["target_app_key"] = column["target_app_key"]
178
+ if isinstance(column.get("target_app_name"), str):
179
+ payload["target_app_name"] = column["target_app_name"]
180
+ if isinstance(column.get("searchable_fields"), list) and column.get("searchable_fields"):
181
+ payload["searchable_fields"] = column["searchable_fields"]
182
+ columns.append(payload)
183
+ response: dict[str, Any] = {
184
+ "ok": True,
185
+ "status": "success",
186
+ "app_key": app_key,
187
+ "ws_id": session_profile.selected_ws_id,
188
+ "request_route": self.backend.describe_route(context),
189
+ "warnings": import_warnings,
190
+ "schema_scope": "import_ready",
191
+ "columns": columns,
192
+ "schema_fingerprint": schema_fingerprint,
193
+ }
194
+ if output_profile == "verbose":
195
+ response["expected_columns"] = expected_columns
196
+ response["import_capability"] = import_capability
197
+ return response
198
+
199
+ return self._run(profile, runner)
200
+
201
+ def record_import_template_get(
202
+ self,
203
+ *,
204
+ profile: str,
205
+ app_key: str,
206
+ download_to_path: str | None = None,
207
+ ) -> dict[str, Any]:
208
+ if not app_key.strip():
209
+ return self._failed_template_result(app_key=app_key, error_code="IMPORT_TEMPLATE_UNAUTHORIZED", message="app_key is required")
210
+
211
+ def runner(session_profile, context):
212
+ import_capability, import_warnings = self._fetch_import_capability(context, app_key)
213
+ field_index, expected_columns, schema_fingerprint = self._resolve_import_schema_bundle(
214
+ profile,
215
+ context,
216
+ app_key,
217
+ import_capability=import_capability,
218
+ )
219
+ try:
220
+ payload = self.backend.request("GET", context, f"/app/{app_key}/apply/excelTemplate")
221
+ except QingflowApiError as exc:
222
+ if import_capability.get("auth_source") == "apply_auth":
223
+ downloaded_to_path = self._write_local_template(
224
+ expected_columns=expected_columns,
225
+ destination_hint=download_to_path,
226
+ app_key=app_key,
227
+ )
228
+ return {
229
+ "ok": True,
230
+ "status": "partial_success",
231
+ "app_key": app_key,
232
+ "ws_id": session_profile.selected_ws_id,
233
+ "request_route": self.backend.describe_route(context),
234
+ "template_url": None,
235
+ "downloaded_to_path": downloaded_to_path,
236
+ "expected_columns": expected_columns,
237
+ "schema_fingerprint": schema_fingerprint,
238
+ "warnings": import_warnings
239
+ + [
240
+ {
241
+ "code": "IMPORT_TEMPLATE_LOCAL_FALLBACK",
242
+ "message": "Official template download requires data management permission; MCP generated a local applicant-import template instead.",
243
+ }
244
+ ],
245
+ "verification": {
246
+ "schema_fingerprint": schema_fingerprint,
247
+ "template_url_resolved": False,
248
+ "template_downloaded": True,
249
+ "template_source": "local_generated",
250
+ },
251
+ }
252
+ return self._failed_template_result(
253
+ app_key=app_key,
254
+ error_code="IMPORT_TEMPLATE_UNAUTHORIZED",
255
+ message=exc.message,
256
+ request_route=self.backend.describe_route(context),
257
+ )
258
+ template_url = _pick_template_url(payload)
259
+ if not template_url:
260
+ return self._failed_template_result(
261
+ app_key=app_key,
262
+ error_code="IMPORT_TEMPLATE_UNAUTHORIZED",
263
+ message="template endpoint did not return excelUrl",
264
+ request_route=self.backend.describe_route(context),
265
+ )
266
+ downloaded_to_path = None
267
+ warnings: list[JSONObject] = list(import_warnings)
268
+ verification = {
269
+ "schema_fingerprint": schema_fingerprint,
270
+ "template_url_resolved": True,
271
+ "template_downloaded": False,
272
+ "template_source": "official",
273
+ }
274
+ if download_to_path:
275
+ destination = _resolve_template_download_path(download_to_path, app_key=app_key)
276
+ destination.parent.mkdir(parents=True, exist_ok=True)
277
+ content = self.backend.download_binary(template_url)
278
+ destination.write_bytes(content)
279
+ downloaded_to_path = str(destination)
280
+ verification["template_downloaded"] = True
281
+ return {
282
+ "ok": True,
283
+ "status": "success",
284
+ "app_key": app_key,
285
+ "ws_id": session_profile.selected_ws_id,
286
+ "request_route": self.backend.describe_route(context),
287
+ "template_url": template_url,
288
+ "downloaded_to_path": downloaded_to_path,
289
+ "expected_columns": expected_columns,
290
+ "schema_fingerprint": schema_fingerprint,
291
+ "warnings": warnings,
292
+ "verification": verification,
293
+ }
294
+
295
+ try:
296
+ return self._run(profile, runner)
297
+ except RuntimeError as exc:
298
+ return self._runtime_error_as_result(exc, error_code="IMPORT_TEMPLATE_UNAUTHORIZED")
299
+
300
+ def record_import_verify(
301
+ self,
302
+ *,
303
+ profile: str,
304
+ app_key: str,
305
+ file_path: str,
306
+ ) -> dict[str, Any]:
307
+ if not app_key.strip():
308
+ return self._failed_verify_result(app_key=app_key, file_path=file_path, error_code="IMPORT_VERIFICATION_FAILED", message="app_key is required")
309
+ path = Path(file_path).expanduser()
310
+ if not path.is_file():
311
+ return self._failed_verify_result(app_key=app_key, file_path=file_path, error_code="IMPORT_VERIFICATION_FAILED", message="file_path must point to an existing file")
312
+
313
+ def runner(session_profile, context):
314
+ import_capability, import_warnings = self._fetch_import_capability(context, app_key)
315
+ precheck_known = import_capability.get("auth_source") != "unknown"
316
+ if not bool(import_capability.get("can_import")):
317
+ if import_capability.get("auth_source") != "unknown":
318
+ return self._failed_verify_result(
319
+ app_key=app_key,
320
+ file_path=file_path,
321
+ error_code="IMPORT_AUTH_PRECHECK_FAILED",
322
+ message="the current user does not have import permission for this app",
323
+ extra={
324
+ "warnings": import_warnings,
325
+ "verification": {
326
+ "import_auth_prechecked": True,
327
+ "import_auth_precheck_passed": False,
328
+ "backend_verification_passed": False,
329
+ },
330
+ "import_capability": import_capability,
331
+ },
332
+ )
333
+ import_warnings = list(import_warnings) + [
334
+ {
335
+ "code": "IMPORT_AUTH_PRECHECK_SKIPPED",
336
+ "message": "record_import_verify could not determine import permission from app metadata; continuing with file verification only.",
337
+ }
338
+ ]
339
+ field_index, expected_columns, schema_fingerprint = self._resolve_import_schema_bundle(
340
+ profile,
341
+ context,
342
+ app_key,
343
+ import_capability=import_capability,
344
+ )
345
+ template_header_profile, header_warnings = self._load_template_header_profile(
346
+ context,
347
+ app_key,
348
+ import_capability=import_capability,
349
+ expected_columns=expected_columns,
350
+ )
351
+ template_header_titles = template_header_profile.get("allowed_titles")
352
+ local_check = self._local_verify(
353
+ profile=profile,
354
+ context=context,
355
+ path=path,
356
+ app_key=app_key,
357
+ field_index=field_index,
358
+ expected_columns=expected_columns,
359
+ allowed_header_titles=template_header_titles,
360
+ schema_fingerprint=schema_fingerprint,
361
+ )
362
+ effective_path = path
363
+ effective_local_check = local_check
364
+ auto_normalization = self._maybe_auto_normalize_file(
365
+ source_path=path,
366
+ expected_columns=expected_columns,
367
+ template_header_profile=template_header_profile,
368
+ local_check=local_check,
369
+ )
370
+ if auto_normalization is not None:
371
+ effective_path = Path(str(auto_normalization["verified_file_path"]))
372
+ effective_local_check = self._local_verify(
373
+ profile=profile,
374
+ context=context,
375
+ path=effective_path,
376
+ app_key=app_key,
377
+ field_index=field_index,
378
+ expected_columns=expected_columns,
379
+ allowed_header_titles=list(auto_normalization["header_titles"]),
380
+ schema_fingerprint=schema_fingerprint,
381
+ )
382
+ warnings = import_warnings + deepcopy(effective_local_check["warnings"]) + header_warnings
383
+ if auto_normalization is not None:
384
+ warnings.extend(deepcopy(auto_normalization["warnings"]))
385
+ issues = deepcopy(effective_local_check["issues"])
386
+ can_import = bool(effective_local_check["can_import"])
387
+ backend_verification = None
388
+ if can_import:
389
+ try:
390
+ payload = self.backend.request_multipart(
391
+ "POST",
392
+ context,
393
+ f"/app/{app_key}/upload/verification",
394
+ files={
395
+ "file": (
396
+ effective_path.name,
397
+ effective_path.read_bytes(),
398
+ mimetypes.guess_type(effective_path.name)[0] or "application/octet-stream",
399
+ )
400
+ },
401
+ )
402
+ if isinstance(payload, dict):
403
+ backend_verification = payload
404
+ else:
405
+ backend_verification = {}
406
+ being_validated = backend_verification.get("beingValidated", True)
407
+ if being_validated is False:
408
+ can_import = False
409
+ issues.append(
410
+ _issue(
411
+ "BACKEND_IMPORT_VERIFICATION_REJECTED",
412
+ "Backend verification rejected the file for import.",
413
+ severity="error",
414
+ )
415
+ )
416
+ except QingflowApiError as exc:
417
+ can_import = False
418
+ issues.append(
419
+ _issue(
420
+ "BACKEND_IMPORT_VERIFICATION_FAILED",
421
+ exc.message or "Backend import verification failed.",
422
+ severity="error",
423
+ )
424
+ )
425
+ warnings.append(
426
+ {
427
+ "code": "IMPORT_VERIFICATION_FAILED",
428
+ "message": "Backend verification failed; the file cannot be imported until verification succeeds.",
429
+ }
430
+ )
431
+ verification_id = str(uuid4())
432
+ verification_payload = {
433
+ "id": verification_id,
434
+ "created_at": _utc_now().isoformat(),
435
+ "profile": profile,
436
+ "app_key": app_key,
437
+ "file_path": str(path.resolve()),
438
+ "source_file_path": str(path.resolve()),
439
+ "verified_file_path": str(effective_path.resolve()) if effective_path != path else None,
440
+ "file_name": path.name,
441
+ "file_sha256": local_check["file_sha256"],
442
+ "verified_file_sha256": effective_local_check["file_sha256"] if effective_path != path else None,
443
+ "file_size": local_check["file_size"],
444
+ "schema_fingerprint": schema_fingerprint,
445
+ "can_import": can_import,
446
+ "issues": issues,
447
+ "warnings": warnings,
448
+ "import_capability": import_capability,
449
+ "apply_rows": backend_verification.get("applyRows") if isinstance(backend_verification, dict) else None,
450
+ "backend_verification": backend_verification,
451
+ "local_precheck": effective_local_check,
452
+ "source_local_precheck": local_check,
453
+ "auto_normalization": auto_normalization,
454
+ }
455
+ self._verification_store.put(verification_id, verification_payload)
456
+ return {
457
+ "ok": True,
458
+ "status": "success" if can_import else "failed",
459
+ "error_code": None if can_import else (local_check.get("error_code") or "IMPORT_VERIFICATION_FAILED"),
460
+ "can_import": can_import,
461
+ "verification_id": verification_id,
462
+ "file_path": str(path.resolve()),
463
+ "verified_file_path": str(effective_path.resolve()) if effective_path != path else None,
464
+ "file_name": path.name,
465
+ "file_sha256": local_check["file_sha256"],
466
+ "verified_file_sha256": effective_local_check["file_sha256"] if effective_path != path else None,
467
+ "file_size": local_check["file_size"],
468
+ "schema_fingerprint": schema_fingerprint,
469
+ "apply_rows": backend_verification.get("applyRows") if isinstance(backend_verification, dict) else None,
470
+ "issues": issues,
471
+ "repair_suggestions": local_check["repair_suggestions"],
472
+ "warnings": warnings,
473
+ "import_capability": import_capability,
474
+ "verification": {
475
+ "import_auth_prechecked": precheck_known,
476
+ "import_auth_precheck_passed": True if precheck_known else None,
477
+ "import_auth_source": import_capability.get("auth_source"),
478
+ "local_precheck_passed": bool(local_check["local_precheck_passed"]),
479
+ "backend_verification_passed": isinstance(backend_verification, dict)
480
+ and backend_verification.get("beingValidated", True) is not False,
481
+ "schema_fingerprint": schema_fingerprint,
482
+ "file_sha256": local_check["file_sha256"],
483
+ "verified_file_sha256": effective_local_check["file_sha256"] if effective_path != path else None,
484
+ "file_format": local_check["extension"],
485
+ "local_precheck_limited": bool(local_check["local_precheck_limited"]),
486
+ "auto_normalized": effective_path != path,
487
+ },
488
+ }
489
+
490
+ try:
491
+ return self._run(profile, runner)
492
+ except RuntimeError as exc:
493
+ return self._runtime_error_as_result(exc, error_code="IMPORT_VERIFICATION_FAILED", extra={"can_import": False})
494
+
495
+ def record_import_repair_local(
496
+ self,
497
+ *,
498
+ profile: str,
499
+ verification_id: str,
500
+ authorized_file_modification: bool,
501
+ output_path: str | None = None,
502
+ selected_repairs: list[str] | None = None,
503
+ ) -> dict[str, Any]:
504
+ if not verification_id.strip():
505
+ return self._failed_repair_result(error_code="IMPORT_VERIFICATION_FAILED", message="verification_id is required")
506
+ if not authorized_file_modification:
507
+ return self._failed_repair_result(
508
+ error_code="IMPORT_REPAIR_NOT_AUTHORIZED",
509
+ message="record_import_repair_local requires authorized_file_modification=true",
510
+ )
511
+ unknown_repairs = sorted({item for item in (selected_repairs or []) if item not in SAFE_REPAIRS})
512
+ if unknown_repairs:
513
+ return self._failed_repair_result(
514
+ error_code="IMPORT_REPAIR_FORMAT_UNSUPPORTED",
515
+ message=f"unknown selected_repairs: {', '.join(unknown_repairs)}",
516
+ )
517
+
518
+ def runner(_session_profile, context):
519
+ stored = self._verification_store.get(verification_id)
520
+ if stored is None:
521
+ return self._failed_repair_result(error_code="IMPORT_VERIFICATION_STALE", message="verification_id is missing or expired")
522
+ source_path = Path(str(stored.get("source_file_path") or stored["file_path"]))
523
+ extension = source_path.suffix.lower()
524
+ if extension not in REPAIRABLE_IMPORT_EXTENSIONS:
525
+ return self._failed_repair_result(
526
+ error_code="IMPORT_REPAIR_FORMAT_UNSUPPORTED",
527
+ message="record_import_repair_local v1 only supports .xlsx files",
528
+ extra={"source_file_path": str(source_path)},
529
+ )
530
+ expected_columns, _ = self._expected_import_columns(profile, context, str(stored["app_key"]))
531
+ normalized_repairs = set(selected_repairs or SAFE_REPAIRS)
532
+ destination = _resolve_repaired_output_path(source_path, output_path=output_path)
533
+ destination.parent.mkdir(parents=True, exist_ok=True)
534
+ shutil.copy2(source_path, destination)
535
+
536
+ workbook = load_workbook(destination)
537
+ sheet = workbook[workbook.sheetnames[0]]
538
+ applied_repairs: list[str] = []
539
+ skipped_repairs: list[str] = []
540
+ if "normalize_headers" in normalized_repairs:
541
+ if _repair_headers(sheet, expected_columns):
542
+ applied_repairs.append("normalize_headers")
543
+ else:
544
+ skipped_repairs.append("normalize_headers")
545
+ if "trim_trailing_blank_rows" in normalized_repairs:
546
+ if _trim_trailing_blank_rows(sheet):
547
+ applied_repairs.append("trim_trailing_blank_rows")
548
+ else:
549
+ skipped_repairs.append("trim_trailing_blank_rows")
550
+ if "normalize_enum_values" in normalized_repairs:
551
+ if _normalize_enum_values(sheet, expected_columns):
552
+ applied_repairs.append("normalize_enum_values")
553
+ else:
554
+ skipped_repairs.append("normalize_enum_values")
555
+ if "normalize_date_formats" in normalized_repairs:
556
+ if _normalize_date_formats(sheet):
557
+ applied_repairs.append("normalize_date_formats")
558
+ else:
559
+ skipped_repairs.append("normalize_date_formats")
560
+ if "normalize_number_formats" in normalized_repairs:
561
+ if _normalize_number_formats(sheet):
562
+ applied_repairs.append("normalize_number_formats")
563
+ else:
564
+ skipped_repairs.append("normalize_number_formats")
565
+ if "normalize_url_cells" in normalized_repairs:
566
+ if _normalize_url_cells(sheet):
567
+ applied_repairs.append("normalize_url_cells")
568
+ else:
569
+ skipped_repairs.append("normalize_url_cells")
570
+ workbook.save(destination)
571
+
572
+ verification_result = self.record_import_verify(
573
+ profile=profile,
574
+ app_key=str(stored["app_key"]),
575
+ file_path=str(destination),
576
+ )
577
+ new_verification_id = verification_result.get("verification_id")
578
+ return {
579
+ "ok": bool(verification_result.get("ok")),
580
+ "status": verification_result.get("status"),
581
+ "error_code": verification_result.get("error_code"),
582
+ "source_file_path": str(source_path),
583
+ "repaired_file_path": str(destination),
584
+ "applied_repairs": applied_repairs,
585
+ "skipped_repairs": skipped_repairs,
586
+ "new_verification_id": new_verification_id,
587
+ "can_import_after_repair": bool(verification_result.get("can_import")),
588
+ "post_repair_issues": verification_result.get("issues", []),
589
+ "warnings": verification_result.get("warnings", []),
590
+ "verification": {
591
+ "source_preserved": True,
592
+ "repair_authorized": True,
593
+ "reverified": True,
594
+ "selected_repairs": sorted(normalized_repairs),
595
+ },
596
+ }
597
+
598
+ try:
599
+ return self._run(profile, runner)
600
+ except RuntimeError as exc:
601
+ return self._runtime_error_as_result(exc, error_code="IMPORT_REPAIR_FORMAT_UNSUPPORTED")
602
+
603
+ def record_import_start(
604
+ self,
605
+ *,
606
+ profile: str,
607
+ app_key: str,
608
+ verification_id: str,
609
+ being_enter_auditing: bool | None,
610
+ view_key: str | None = None,
611
+ ) -> dict[str, Any]:
612
+ if being_enter_auditing is None:
613
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_FAILED", message="being_enter_auditing must be passed explicitly")
614
+
615
+ def runner(session_profile, context):
616
+ stored = self._verification_store.get(verification_id)
617
+ if stored is None:
618
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_STALE", message="verification_id is missing or expired")
619
+ if str(stored.get("app_key")) != app_key:
620
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_STALE", message="verification_id does not belong to the requested app")
621
+ if not bool(stored.get("can_import")):
622
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_FAILED", message="verification_id is not importable", extra={"accepted": False})
623
+ current_path = Path(str(stored.get("verified_file_path") or stored["file_path"]))
624
+ if not current_path.is_file():
625
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_STALE", message="verified file no longer exists")
626
+ current_sha256 = _sha256_file(current_path)
627
+ expected_sha256 = stored.get("verified_file_sha256") or stored.get("file_sha256")
628
+ if current_sha256 != expected_sha256:
629
+ return self._failed_start_result(
630
+ error_code="IMPORT_FILE_CHANGED_AFTER_VERIFY",
631
+ message="the file changed after verification; run record_import_verify again",
632
+ extra={"accepted": False},
633
+ )
634
+ stored_import_capability = stored.get("import_capability")
635
+ _, current_schema_fingerprint = self._expected_import_columns(
636
+ profile,
637
+ context,
638
+ app_key,
639
+ import_capability=stored_import_capability if isinstance(stored_import_capability, dict) else None,
640
+ )
641
+ if current_schema_fingerprint != stored.get("schema_fingerprint"):
642
+ return self._failed_start_result(
643
+ error_code="IMPORT_SCHEMA_CHANGED_AFTER_VERIFY",
644
+ message="the applicant schema changed after verification; run record_import_verify again",
645
+ extra={"accepted": False},
646
+ )
647
+ upload_result = self._file_tools.file_upload_local(
648
+ profile=profile,
649
+ upload_kind="login",
650
+ file_path=str(current_path),
651
+ )
652
+ file_url = upload_result.get("download_url")
653
+ if not isinstance(file_url, str) or not file_url.strip():
654
+ return self._failed_start_result(error_code="IMPORT_VERIFICATION_FAILED", message="file upload did not return download_url")
655
+ try:
656
+ socket_result = self.backend.start_socket_data_import(
657
+ context,
658
+ app_key=app_key,
659
+ being_enter_auditing=bool(being_enter_auditing),
660
+ view_key=view_key,
661
+ excel_url=file_url,
662
+ excel_name=str(stored.get("file_name") or current_path.name),
663
+ )
664
+ except QingflowApiError as exc:
665
+ error_code = "IMPORT_SOCKET_ACK_TIMEOUT" if exc.details and exc.details.get("error_code") == "IMPORT_SOCKET_ACK_TIMEOUT" else "IMPORT_VERIFICATION_FAILED"
666
+ return self._failed_start_result(error_code=error_code, message=exc.message, extra={"accepted": False, "file_url": file_url})
667
+ import_id = str(socket_result.get("import_id") or "")
668
+ process_id_str = _normalize_optional_text(socket_result.get("process_id_str"))
669
+ started_at = _utc_now().isoformat()
670
+ self._job_store.put(
671
+ import_id,
672
+ {
673
+ "created_at": started_at,
674
+ "profile": profile,
675
+ "app_key": app_key,
676
+ "import_id": import_id,
677
+ "process_id_str": process_id_str,
678
+ "source_file_name": str(stored.get("file_name") or current_path.name),
679
+ "started_at": started_at,
680
+ "file_url": file_url,
681
+ "verification_id": verification_id,
682
+ },
683
+ )
684
+ warnings = deepcopy(socket_result.get("warnings", []))
685
+ return {
686
+ "ok": True,
687
+ "status": "accepted",
688
+ "accepted": True,
689
+ "import_id": import_id,
690
+ "process_id_str": process_id_str,
691
+ "source_file_name": str(stored.get("file_name") or current_path.name),
692
+ "file_url": file_url,
693
+ "warnings": warnings,
694
+ "verification": {
695
+ "verification_id_valid": True,
696
+ "file_hash_verified": True,
697
+ "schema_fingerprint_verified": True,
698
+ "upload_staged": True,
699
+ "import_acknowledged": bool(import_id),
700
+ },
701
+ }
702
+
703
+ try:
704
+ return self._run(profile, runner)
705
+ except RuntimeError as exc:
706
+ return self._runtime_error_as_result(exc, error_code="IMPORT_VERIFICATION_FAILED", extra={"accepted": False})
707
+
708
+ def record_import_status_get(
709
+ self,
710
+ *,
711
+ profile: str,
712
+ app_key: str,
713
+ import_id: str | None = None,
714
+ process_id_str: str | None = None,
715
+ ) -> dict[str, Any]:
716
+ if not app_key.strip():
717
+ return self._failed_status_result(error_code="IMPORT_STATUS_AMBIGUOUS", message="app_key is required")
718
+
719
+ def runner(_session_profile, context):
720
+ local_job = None
721
+ normalized_import_id = _normalize_optional_text(import_id)
722
+ normalized_process_id = _normalize_optional_text(process_id_str)
723
+ if normalized_import_id:
724
+ local_job = self._job_store.get(normalized_import_id)
725
+ if local_job is None and normalized_process_id:
726
+ matches = [item for item in self._job_store.list() if _normalize_optional_text(item.get("process_id_str")) == normalized_process_id]
727
+ local_job = matches[0] if len(matches) == 1 else None
728
+ if local_job is None and not normalized_import_id and not normalized_process_id:
729
+ recent = [item for item in self._job_store.list() if str(item.get("app_key")) == app_key]
730
+ local_job = recent[0] if recent else None
731
+ page = self.backend.request(
732
+ "GET",
733
+ context,
734
+ "/app/apply/dataImport/record",
735
+ params={"appKey": app_key, "pageNum": 1, "pageSize": 100},
736
+ )
737
+ records = _extract_import_records(page)
738
+ matched_record, matched_by = _match_import_record(
739
+ records,
740
+ local_job=local_job,
741
+ process_id_str=normalized_process_id,
742
+ )
743
+ if matched_record is None:
744
+ return self._failed_status_result(
745
+ error_code="IMPORT_STATUS_AMBIGUOUS",
746
+ message="could not uniquely resolve an import record from the provided identifiers",
747
+ extra={"matched_by": matched_by},
748
+ )
749
+ normalized_process = _normalize_optional_text(
750
+ matched_record.get("processIdStr") or matched_record.get("processId") or matched_record.get("process_id_str")
751
+ )
752
+ if local_job is not None and normalized_import_id:
753
+ self._job_store.put(
754
+ normalized_import_id,
755
+ {
756
+ **local_job,
757
+ "created_at": local_job.get("created_at") or _utc_now().isoformat(),
758
+ "process_id_str": normalized_process,
759
+ },
760
+ )
761
+ total_rows = _coerce_int(matched_record.get("totalNumber") or matched_record.get("total_rows"))
762
+ success_rows = _coerce_int(matched_record.get("successNum") or matched_record.get("success_rows"))
763
+ failed_rows = _coerce_int(matched_record.get("errorNum") or matched_record.get("failed_rows"))
764
+ progress = _coerce_int(matched_record.get("importPercentage") or matched_record.get("progress"))
765
+ return {
766
+ "ok": True,
767
+ "status": _normalize_optional_text(matched_record.get("processStatus")) or "unknown",
768
+ "import_id": normalized_import_id or (local_job.get("import_id") if isinstance(local_job, dict) else None),
769
+ "process_id_str": normalized_process,
770
+ "matched_by": matched_by,
771
+ "source_file_name": matched_record.get("sourceFileName") or matched_record.get("source_file_name"),
772
+ "total_rows": total_rows,
773
+ "success_rows": success_rows,
774
+ "failed_rows": failed_rows,
775
+ "progress": progress,
776
+ "error_file_urls": _normalize_error_file_urls(matched_record.get("errorFileUrls")),
777
+ "operate_time": matched_record.get("operateTime"),
778
+ "operate_user": matched_record.get("operateUser"),
779
+ "warnings": [],
780
+ "verification": {
781
+ "status_lookup_completed": True,
782
+ "matched_by": matched_by,
783
+ "process_id_verified": bool(normalized_process),
784
+ },
785
+ }
786
+
787
+ try:
788
+ return self._run(profile, runner)
789
+ except RuntimeError as exc:
790
+ return self._runtime_error_as_result(exc, error_code="IMPORT_STATUS_AMBIGUOUS")
791
+
792
+ def _resolve_import_schema_bundle(
793
+ self,
794
+ profile: str,
795
+ context,
796
+ app_key: str,
797
+ *,
798
+ import_capability: JSONObject | None = None,
799
+ ) -> tuple[Any, list[JSONObject], str]: # type: ignore[no-untyped-def]
800
+ auth_source = _normalize_optional_text((import_capability or {}).get("auth_source")) or "unknown"
801
+ if auth_source == "data_manage_auth":
802
+ schema = self.backend.request("GET", context, f"/app/{app_key}/form", params={"type": 1})
803
+ index = _build_field_index(_normalize_form_schema(schema))
804
+ else:
805
+ index = self._record_tools._get_field_index(profile, context, app_key, force_refresh=False)
806
+ ws_id = self.sessions.get_profile(profile).selected_ws_id
807
+ expected_columns: list[JSONObject] = []
808
+ for field in index.by_id.values():
809
+ payload = self._record_tools._schema_field_payload(
810
+ profile,
811
+ context,
812
+ field,
813
+ workflow_node_id=None,
814
+ ws_id=ws_id,
815
+ schema_mode="applicant",
816
+ )
817
+ if not bool(payload.get("writable")):
818
+ continue
819
+ expected_columns.append(
820
+ {
821
+ "field_id": payload["field_id"],
822
+ "title": payload["title"],
823
+ "que_type": payload["que_type"],
824
+ "required": bool(field.required),
825
+ "write_kind": payload["write_kind"],
826
+ "options": payload.get("options", []),
827
+ "requires_lookup": bool(payload.get("requires_lookup")),
828
+ "requires_upload": bool(payload.get("requires_upload")),
829
+ "target_app_key": payload.get("target_app_key"),
830
+ "target_app_name": payload.get("target_app_name"),
831
+ "searchable_fields": payload.get("searchable_fields", []),
832
+ }
833
+ )
834
+ expected_columns.sort(key=lambda item: int(item["field_id"]))
835
+ schema_fingerprint = _stable_import_schema_fingerprint(expected_columns)
836
+ return index, expected_columns, schema_fingerprint
837
+
838
+ def _expected_import_columns(
839
+ self,
840
+ profile: str,
841
+ context,
842
+ app_key: str,
843
+ *,
844
+ import_capability: JSONObject | None = None,
845
+ ) -> tuple[list[JSONObject], str]: # type: ignore[no-untyped-def]
846
+ _, expected_columns, schema_fingerprint = self._resolve_import_schema_bundle(
847
+ profile,
848
+ context,
849
+ app_key,
850
+ import_capability=import_capability,
851
+ )
852
+ return expected_columns, schema_fingerprint
853
+
854
+ def _local_verify(
855
+ self,
856
+ *,
857
+ profile: str,
858
+ context,
859
+ path: Path,
860
+ app_key: str,
861
+ field_index: Any,
862
+ expected_columns: list[JSONObject],
863
+ allowed_header_titles: list[str] | None,
864
+ schema_fingerprint: str,
865
+ ) -> dict[str, Any]:
866
+ extension = path.suffix.lower()
867
+ file_sha256 = _sha256_file(path)
868
+ base_result = {
869
+ "app_key": app_key,
870
+ "file_path": str(path.resolve()),
871
+ "file_size": path.stat().st_size,
872
+ "file_sha256": file_sha256,
873
+ "schema_fingerprint": schema_fingerprint,
874
+ "issues": [],
875
+ "warnings": [],
876
+ "repair_suggestions": [],
877
+ "local_precheck_passed": True,
878
+ "local_precheck_limited": False,
879
+ "can_import": True,
880
+ "extension": extension,
881
+ "error_code": None,
882
+ }
883
+ if extension not in SUPPORTED_IMPORT_EXTENSIONS:
884
+ base_result["issues"].append(_issue("UNSUPPORTED_FILE_FORMAT", "Only .xlsx and .xls files are supported in import v1.", severity="error"))
885
+ base_result["local_precheck_passed"] = False
886
+ base_result["can_import"] = False
887
+ base_result["error_code"] = "IMPORT_FILE_FORMAT_UNSUPPORTED"
888
+ return base_result
889
+ if extension == ".xls":
890
+ base_result["warnings"].append(
891
+ {
892
+ "code": "IMPORT_LOCAL_PRECHECK_LIMITED",
893
+ "message": ".xls files are allowed for verify/start, but v1 local precheck is limited and repair is unsupported.",
894
+ }
895
+ )
896
+ base_result["local_precheck_limited"] = True
897
+ return base_result
898
+
899
+ try:
900
+ workbook = load_workbook(path, read_only=True, data_only=False)
901
+ except Exception as exc:
902
+ base_result["issues"].append(_issue("WORKBOOK_OPEN_FAILED", f"Workbook could not be opened: {exc}", severity="error"))
903
+ base_result["local_precheck_passed"] = False
904
+ base_result["can_import"] = False
905
+ base_result["error_code"] = "IMPORT_VERIFICATION_FAILED"
906
+ return base_result
907
+
908
+ if not workbook.sheetnames:
909
+ base_result["issues"].append(_issue("SHEET_MISSING", "Workbook does not contain any sheets.", severity="error"))
910
+ base_result["local_precheck_passed"] = False
911
+ base_result["can_import"] = False
912
+ base_result["error_code"] = "IMPORT_VERIFICATION_FAILED"
913
+ return base_result
914
+ sheet = workbook[workbook.sheetnames[0]]
915
+ header_row = [cell.value for cell in next(sheet.iter_rows(min_row=1, max_row=1), [])]
916
+ header_analysis = _analyze_headers(
917
+ header_row,
918
+ expected_columns,
919
+ allowed_titles=allowed_header_titles,
920
+ )
921
+ base_result["issues"].extend(header_analysis["issues"])
922
+ base_result["repair_suggestions"].extend(header_analysis["repair_suggestions"])
923
+ if not any(issue.get("severity") == "error" for issue in base_result["issues"]):
924
+ semantic_issues, semantic_warnings = self._inspect_semantic_cells(
925
+ profile=profile,
926
+ context=context,
927
+ sheet=sheet,
928
+ expected_columns=expected_columns,
929
+ field_index=field_index,
930
+ )
931
+ base_result["issues"].extend(semantic_issues)
932
+ base_result["warnings"].extend(semantic_warnings)
933
+ trailing_blank_rows = _count_trailing_blank_rows(sheet)
934
+ if trailing_blank_rows > 0:
935
+ base_result["warnings"].append(
936
+ {
937
+ "code": "TRAILING_BLANK_ROWS",
938
+ "message": f"Workbook contains {trailing_blank_rows} trailing blank rows that can be safely removed.",
939
+ }
940
+ )
941
+ base_result["repair_suggestions"].append("trim_trailing_blank_rows")
942
+ enum_suggestions = _find_enum_repairs(sheet, expected_columns)
943
+ if enum_suggestions:
944
+ base_result["warnings"].append(
945
+ {
946
+ "code": "ENUM_VALUE_NORMALIZATION_AVAILABLE",
947
+ "message": "Some enum-like cells can be normalized to exact template values without changing meaning.",
948
+ }
949
+ )
950
+ base_result["repair_suggestions"].append("normalize_enum_values")
951
+ base_result["repair_suggestions"] = sorted(set(base_result["repair_suggestions"]))
952
+ if any(issue.get("severity") == "error" for issue in base_result["issues"]):
953
+ base_result["local_precheck_passed"] = False
954
+ base_result["can_import"] = False
955
+ base_result["error_code"] = "IMPORT_VERIFICATION_FAILED"
956
+ return base_result
957
+
958
+ def _inspect_semantic_cells(
959
+ self,
960
+ *,
961
+ profile: str,
962
+ context,
963
+ sheet,
964
+ expected_columns: list[JSONObject],
965
+ field_index: Any,
966
+ ) -> tuple[list[JSONObject], list[JSONObject]]: # type: ignore[no-untyped-def]
967
+ issues: list[JSONObject] = []
968
+ warnings: list[JSONObject] = []
969
+ header_positions = _sheet_header_positions(sheet)
970
+ expected_by_key: dict[str, list[JSONObject]] = {}
971
+ for column in expected_columns:
972
+ key = _normalize_header_key(column.get("title"))
973
+ if key:
974
+ expected_by_key.setdefault(key, []).append(column)
975
+ for key, columns in expected_by_key.items():
976
+ positions = header_positions.get(key, [])
977
+ if len(columns) != 1 or len(positions) != 1:
978
+ continue
979
+ column = columns[0]
980
+ column_index = positions[0]
981
+ write_kind = _normalize_optional_text(column.get("write_kind")) or "scalar"
982
+ if column.get("options"):
983
+ issue = _inspect_enum_column(sheet, column_index=column_index, column=column)
984
+ if issue is not None:
985
+ issues.append(issue)
986
+ continue
987
+ if write_kind == "relation":
988
+ issue = _inspect_relation_column(sheet, column_index=column_index, column=column)
989
+ if issue is not None:
990
+ issues.append(issue)
991
+ continue
992
+ field = field_index.by_id.get(str(column.get("field_id"))) if field_index is not None else None
993
+ if (
994
+ write_kind == "member"
995
+ and field is not None
996
+ and (
997
+ field.member_select_scope_type is not None
998
+ or field.member_select_scope is not None
999
+ )
1000
+ ):
1001
+ member_issue, member_warning = self._inspect_member_column(
1002
+ context=context,
1003
+ sheet=sheet,
1004
+ column_index=column_index,
1005
+ column=column,
1006
+ field=field,
1007
+ )
1008
+ if member_issue is not None:
1009
+ issues.append(member_issue)
1010
+ continue
1011
+ if member_warning is not None:
1012
+ warnings.append(member_warning)
1013
+ continue
1014
+ if (
1015
+ write_kind == "department"
1016
+ and field is not None
1017
+ and (
1018
+ field.dept_select_scope_type is not None
1019
+ or field.dept_select_scope is not None
1020
+ )
1021
+ ):
1022
+ department_issue, department_warning = self._inspect_department_column(
1023
+ context=context,
1024
+ sheet=sheet,
1025
+ column_index=column_index,
1026
+ column=column,
1027
+ field=field,
1028
+ )
1029
+ if department_issue is not None:
1030
+ issues.append(department_issue)
1031
+ continue
1032
+ if department_warning is not None:
1033
+ warnings.append(department_warning)
1034
+ continue
1035
+ return issues, warnings
1036
+
1037
+ def _inspect_member_column(
1038
+ self,
1039
+ *,
1040
+ context,
1041
+ sheet,
1042
+ column_index: int,
1043
+ column: JSONObject,
1044
+ field,
1045
+ ) -> tuple[JSONObject | None, JSONObject | None]: # type: ignore[no-untyped-def]
1046
+ invalid_email_samples: list[str] = []
1047
+ scope_miss_samples: list[str] = []
1048
+ checked_values: set[str] = set()
1049
+ for row_index in range(2, sheet.max_row + 1):
1050
+ text = _normalize_optional_text(sheet.cell(row=row_index, column=column_index).value)
1051
+ if text is None:
1052
+ continue
1053
+ normalized = text.strip()
1054
+ if normalized in checked_values:
1055
+ continue
1056
+ checked_values.add(normalized)
1057
+ if not EMAIL_PATTERN.fullmatch(normalized):
1058
+ invalid_email_samples.append(f"row {row_index}: {normalized}")
1059
+ if len(invalid_email_samples) >= 3:
1060
+ break
1061
+ continue
1062
+ try:
1063
+ candidates = self._record_tools._resolve_member_candidates(context, field, keyword=normalized)
1064
+ matches = self._record_tools._match_member_candidates(candidates, normalized)
1065
+ except QingflowApiError as exc:
1066
+ if exc.category == "not_supported":
1067
+ return None, {
1068
+ "code": "MEMBER_CANDIDATE_VALIDATION_SKIPPED",
1069
+ "message": f"Member candidate scope for column '{column['title']}' could not be resolved safely during local precheck.",
1070
+ }
1071
+ raise
1072
+ except RuntimeError:
1073
+ return None, {
1074
+ "code": "MEMBER_CANDIDATE_VALIDATION_SKIPPED",
1075
+ "message": f"Member candidate scope for column '{column['title']}' could not be resolved safely during local precheck.",
1076
+ }
1077
+ if len(matches) != 1:
1078
+ scope_miss_samples.append(f"row {row_index}: {normalized}")
1079
+ if len(scope_miss_samples) >= 3:
1080
+ break
1081
+ if invalid_email_samples:
1082
+ return _issue(
1083
+ "MEMBER_IMPORT_REQUIRES_EMAIL",
1084
+ f"Column '{column['title']}' must use member email values in import files. Samples: {', '.join(invalid_email_samples)}",
1085
+ severity="error",
1086
+ ), None
1087
+ if scope_miss_samples:
1088
+ return _issue(
1089
+ "MEMBER_NOT_IN_CANDIDATE_SCOPE",
1090
+ f"Column '{column['title']}' contains members outside the current candidate scope. Samples: {', '.join(scope_miss_samples)}",
1091
+ severity="error",
1092
+ ), None
1093
+ return None, None
1094
+
1095
+ def _inspect_department_column(
1096
+ self,
1097
+ *,
1098
+ context,
1099
+ sheet,
1100
+ column_index: int,
1101
+ column: JSONObject,
1102
+ field,
1103
+ ) -> tuple[JSONObject | None, JSONObject | None]: # type: ignore[no-untyped-def]
1104
+ scope_miss_samples: list[str] = []
1105
+ checked_values: set[str] = set()
1106
+ for row_index in range(2, sheet.max_row + 1):
1107
+ value = sheet.cell(row=row_index, column=column_index).value
1108
+ text = _normalize_optional_text(value)
1109
+ if text is None:
1110
+ continue
1111
+ normalized = text.strip()
1112
+ if normalized in checked_values:
1113
+ continue
1114
+ checked_values.add(normalized)
1115
+ try:
1116
+ candidates = self._record_tools._resolve_department_candidates(context, field, keyword=normalized)
1117
+ matches = self._record_tools._match_department_candidates(candidates, normalized)
1118
+ except QingflowApiError as exc:
1119
+ if exc.category == "not_supported":
1120
+ return None, {
1121
+ "code": "DEPARTMENT_CANDIDATE_VALIDATION_SKIPPED",
1122
+ "message": f"Department candidate scope for column '{column['title']}' could not be resolved safely during local precheck.",
1123
+ }
1124
+ raise
1125
+ except RuntimeError:
1126
+ return None, {
1127
+ "code": "DEPARTMENT_CANDIDATE_VALIDATION_SKIPPED",
1128
+ "message": f"Department candidate scope for column '{column['title']}' could not be resolved safely during local precheck.",
1129
+ }
1130
+ if len(matches) != 1:
1131
+ scope_miss_samples.append(f"row {row_index}: {normalized}")
1132
+ if len(scope_miss_samples) >= 3:
1133
+ break
1134
+ if scope_miss_samples:
1135
+ return _issue(
1136
+ "DEPARTMENT_NOT_IN_CANDIDATE_SCOPE",
1137
+ f"Column '{column['title']}' contains departments outside the current candidate scope. Samples: {', '.join(scope_miss_samples)}",
1138
+ severity="error",
1139
+ ), None
1140
+ return None, None
1141
+
1142
+ def _load_template_header_profile(
1143
+ self,
1144
+ context,
1145
+ app_key: str,
1146
+ *,
1147
+ import_capability: JSONObject | None = None,
1148
+ expected_columns: list[JSONObject] | None = None,
1149
+ ) -> tuple[dict[str, Any], list[JSONObject]]: # type: ignore[no-untyped-def]
1150
+ warnings: list[JSONObject] = []
1151
+ try:
1152
+ payload = self.backend.request("GET", context, f"/app/{app_key}/apply/excelTemplate")
1153
+ template_url = _pick_template_url(payload)
1154
+ if not template_url:
1155
+ return {"allowed_titles": None, "leaf_titles": None, "header_depth": 1}, warnings
1156
+ content = self.backend.download_binary(template_url)
1157
+ workbook = load_workbook(BytesIO(content), read_only=False, data_only=False)
1158
+ if not workbook.sheetnames:
1159
+ return {"allowed_titles": None, "leaf_titles": None, "header_depth": 1}, warnings
1160
+ sheet = workbook[workbook.sheetnames[0]]
1161
+ header_row = [cell.value for cell in next(sheet.iter_rows(min_row=1, max_row=1), [])]
1162
+ titles = [_normalize_optional_text(value) for value in header_row]
1163
+ normalized_titles = [title for title in titles if title]
1164
+ header_depth = _infer_header_depth(sheet)
1165
+ leaf_titles = [title for title in _extract_leaf_header_titles(sheet, header_depth) if title]
1166
+ return {
1167
+ "allowed_titles": normalized_titles or None,
1168
+ "leaf_titles": leaf_titles or None,
1169
+ "header_depth": header_depth,
1170
+ }, warnings
1171
+ except Exception:
1172
+ if (
1173
+ _normalize_optional_text((import_capability or {}).get("auth_source")) == "apply_auth"
1174
+ and expected_columns
1175
+ ):
1176
+ warnings.append(
1177
+ {
1178
+ "code": "IMPORT_TEMPLATE_HEADER_LOCAL_FALLBACK",
1179
+ "message": "Official template headers require data management permission; local precheck fell back to applicant import columns.",
1180
+ }
1181
+ )
1182
+ fallback_titles = [str(item["title"]) for item in expected_columns]
1183
+ return {"allowed_titles": fallback_titles, "leaf_titles": fallback_titles, "header_depth": 1}, warnings
1184
+ warnings.append(
1185
+ {
1186
+ "code": "IMPORT_TEMPLATE_HEADER_UNAVAILABLE",
1187
+ "message": "Official template headers could not be loaded during local precheck; falling back to applicant writable columns only.",
1188
+ }
1189
+ )
1190
+ return {"allowed_titles": None, "leaf_titles": None, "header_depth": 1}, warnings
1191
+
1192
+ def _maybe_auto_normalize_file(
1193
+ self,
1194
+ *,
1195
+ source_path: Path,
1196
+ expected_columns: list[JSONObject],
1197
+ template_header_profile: dict[str, Any],
1198
+ local_check: dict[str, Any],
1199
+ ) -> dict[str, Any] | None:
1200
+ if source_path.suffix.lower() != ".xlsx":
1201
+ return None
1202
+ workbook = load_workbook(source_path, read_only=False, data_only=False)
1203
+ if not workbook.sheetnames:
1204
+ return None
1205
+ sheet = workbook[workbook.sheetnames[0]]
1206
+ header_depth = _infer_header_depth(sheet)
1207
+ trailing_blank_rows = _count_trailing_blank_rows(sheet)
1208
+ if header_depth <= 1 and trailing_blank_rows <= 0:
1209
+ return None
1210
+ extracted_headers = _extract_leaf_header_titles(sheet, header_depth)
1211
+ target_headers = _overlay_header_titles(
1212
+ extracted_headers,
1213
+ template_header_profile.get("leaf_titles"),
1214
+ )
1215
+ verified_path = _resolve_verified_output_path(source_path)
1216
+ normalized_workbook = Workbook()
1217
+ normalized_sheet = normalized_workbook.active
1218
+ normalized_sheet.title = sheet.title
1219
+ normalized_sheet.append(target_headers)
1220
+ last_nonblank_row = max(header_depth, sheet.max_row - trailing_blank_rows)
1221
+ for row_index in range(header_depth + 1, last_nonblank_row + 1):
1222
+ normalized_sheet.append(
1223
+ [sheet.cell(row=row_index, column=column_index).value for column_index in range(1, sheet.max_column + 1)]
1224
+ )
1225
+ verified_path.parent.mkdir(parents=True, exist_ok=True)
1226
+ normalized_workbook.save(verified_path)
1227
+ warnings: list[JSONObject] = []
1228
+ applied_repairs: list[str] = []
1229
+ if header_depth > 1:
1230
+ applied_repairs.append("normalize_headers")
1231
+ warnings.append(
1232
+ {
1233
+ "code": "IMPORT_HEADERS_AUTO_NORMALIZED",
1234
+ "message": f"Workbook used {header_depth} header rows; record_import_verify normalized it to a single leaf-header row automatically.",
1235
+ }
1236
+ )
1237
+ if trailing_blank_rows > 0:
1238
+ applied_repairs.append("trim_trailing_blank_rows")
1239
+ warnings.append(
1240
+ {
1241
+ "code": "TRAILING_BLANK_ROWS_AUTO_TRIMMED",
1242
+ "message": f"Removed {trailing_blank_rows} trailing blank rows before backend verification.",
1243
+ }
1244
+ )
1245
+ return {
1246
+ "verified_file_path": str(verified_path.resolve()),
1247
+ "header_titles": target_headers,
1248
+ "warnings": warnings,
1249
+ "applied_repairs": applied_repairs,
1250
+ "header_depth": header_depth,
1251
+ "trailing_blank_rows": trailing_blank_rows,
1252
+ "source_local_check": local_check,
1253
+ }
1254
+
1255
+ def _fetch_import_capability(self, context, app_key: str) -> tuple[JSONObject, list[JSONObject]]: # type: ignore[no-untyped-def]
1256
+ try:
1257
+ payload = self.backend.request("GET", context, f"/app/{app_key}/baseInfo")
1258
+ except QingflowApiError:
1259
+ payload = None
1260
+ return _derive_import_capability(payload)
1261
+
1262
+ def _write_local_template(
1263
+ self,
1264
+ *,
1265
+ expected_columns: list[JSONObject],
1266
+ destination_hint: str | None,
1267
+ app_key: str,
1268
+ ) -> str:
1269
+ if destination_hint:
1270
+ destination = _resolve_template_download_path(destination_hint, app_key=app_key)
1271
+ else:
1272
+ destination = Path(tempfile.gettempdir()) / f"qingflow-import-template-{app_key}-{uuid4().hex[:8]}.xlsx"
1273
+ destination.parent.mkdir(parents=True, exist_ok=True)
1274
+ workbook = Workbook()
1275
+ sheet = workbook.active
1276
+ sheet.title = "导入模板"
1277
+ sheet.append([str(item["title"]) for item in expected_columns])
1278
+ workbook.save(destination)
1279
+ return str(destination)
1280
+
1281
+ def _failed_template_result(
1282
+ self,
1283
+ *,
1284
+ app_key: str,
1285
+ error_code: str,
1286
+ message: str,
1287
+ request_route: JSONObject | None = None,
1288
+ ) -> dict[str, Any]:
1289
+ return {
1290
+ "ok": False,
1291
+ "status": "failed",
1292
+ "error_code": error_code,
1293
+ "app_key": app_key,
1294
+ "template_url": None,
1295
+ "downloaded_to_path": None,
1296
+ "expected_columns": [],
1297
+ "schema_fingerprint": None,
1298
+ "request_route": request_route,
1299
+ "warnings": [],
1300
+ "verification": {"template_url_resolved": False},
1301
+ "message": message,
1302
+ }
1303
+
1304
+ def _failed_verify_result(
1305
+ self,
1306
+ *,
1307
+ app_key: str,
1308
+ file_path: str,
1309
+ error_code: str,
1310
+ message: str,
1311
+ extra: dict[str, Any] | None = None,
1312
+ ) -> dict[str, Any]:
1313
+ payload = {
1314
+ "ok": True,
1315
+ "status": "failed",
1316
+ "error_code": error_code,
1317
+ "app_key": app_key,
1318
+ "can_import": False,
1319
+ "verification_id": None,
1320
+ "file_path": str(Path(file_path).expanduser()) if file_path else file_path,
1321
+ "verified_file_path": None,
1322
+ "file_name": Path(file_path).name if file_path else None,
1323
+ "file_sha256": None,
1324
+ "verified_file_sha256": None,
1325
+ "file_size": None,
1326
+ "schema_fingerprint": None,
1327
+ "apply_rows": None,
1328
+ "issues": [_issue(error_code, message, severity="error")],
1329
+ "repair_suggestions": [],
1330
+ "warnings": [],
1331
+ "verification": {
1332
+ "import_auth_prechecked": False,
1333
+ "import_auth_precheck_passed": False,
1334
+ "local_precheck_passed": False,
1335
+ "backend_verification_passed": False,
1336
+ },
1337
+ "import_capability": None,
1338
+ "message": message,
1339
+ }
1340
+ if extra:
1341
+ payload.update(extra)
1342
+ return payload
1343
+
1344
+ def _failed_repair_result(self, *, error_code: str, message: str, extra: dict[str, Any] | None = None) -> dict[str, Any]:
1345
+ payload = {
1346
+ "ok": False,
1347
+ "status": "failed",
1348
+ "error_code": error_code,
1349
+ "source_file_path": None,
1350
+ "repaired_file_path": None,
1351
+ "applied_repairs": [],
1352
+ "skipped_repairs": [],
1353
+ "new_verification_id": None,
1354
+ "can_import_after_repair": False,
1355
+ "post_repair_issues": [_issue(error_code, message, severity="error")],
1356
+ "warnings": [],
1357
+ "verification": {
1358
+ "repair_authorized": False,
1359
+ "reverified": False,
1360
+ },
1361
+ "message": message,
1362
+ }
1363
+ if extra:
1364
+ payload.update(extra)
1365
+ return payload
1366
+
1367
+ def _failed_start_result(self, *, error_code: str, message: str, extra: dict[str, Any] | None = None) -> dict[str, Any]:
1368
+ payload = {
1369
+ "ok": False,
1370
+ "status": "failed",
1371
+ "error_code": error_code,
1372
+ "accepted": False,
1373
+ "import_id": None,
1374
+ "process_id_str": None,
1375
+ "source_file_name": None,
1376
+ "file_url": None,
1377
+ "warnings": [],
1378
+ "verification": {
1379
+ "verification_id_valid": False,
1380
+ "file_hash_verified": False,
1381
+ "schema_fingerprint_verified": False,
1382
+ "upload_staged": False,
1383
+ "import_acknowledged": False,
1384
+ },
1385
+ "message": message,
1386
+ }
1387
+ if extra:
1388
+ payload.update(extra)
1389
+ return payload
1390
+
1391
+ def _failed_status_result(self, *, error_code: str, message: str, extra: dict[str, Any] | None = None) -> dict[str, Any]:
1392
+ payload = {
1393
+ "ok": False,
1394
+ "status": "failed",
1395
+ "error_code": error_code,
1396
+ "import_id": None,
1397
+ "process_id_str": None,
1398
+ "matched_by": None,
1399
+ "source_file_name": None,
1400
+ "total_rows": None,
1401
+ "success_rows": None,
1402
+ "failed_rows": None,
1403
+ "progress": None,
1404
+ "error_file_urls": [],
1405
+ "operate_time": None,
1406
+ "operate_user": None,
1407
+ "warnings": [],
1408
+ "verification": {
1409
+ "status_lookup_completed": False,
1410
+ "process_id_verified": False,
1411
+ },
1412
+ "message": message,
1413
+ }
1414
+ if extra:
1415
+ payload.update(extra)
1416
+ return payload
1417
+
1418
+ def _runtime_error_as_result(
1419
+ self,
1420
+ error: RuntimeError,
1421
+ *,
1422
+ error_code: str,
1423
+ extra: dict[str, Any] | None = None,
1424
+ ) -> dict[str, Any]:
1425
+ try:
1426
+ payload = json.loads(str(error))
1427
+ except json.JSONDecodeError:
1428
+ payload = {"message": str(error)}
1429
+ response = {
1430
+ "ok": False,
1431
+ "status": "failed",
1432
+ "error_code": ((payload.get("details") or {}) if isinstance(payload.get("details"), dict) else {}).get("error_code") or error_code,
1433
+ "warnings": [],
1434
+ "verification": {},
1435
+ "message": payload.get("message") or str(error),
1436
+ }
1437
+ if extra:
1438
+ response.update(extra)
1439
+ return response
1440
+
1441
+
1442
+ def _pick_template_url(payload: Any) -> str | None:
1443
+ if isinstance(payload, dict):
1444
+ for key in ("excelUrl", "url", "downloadUrl"):
1445
+ value = payload.get(key)
1446
+ if isinstance(value, str) and value.strip():
1447
+ return value.strip()
1448
+ return None
1449
+
1450
+
1451
+ def _resolve_template_download_path(raw_path: str, *, app_key: str) -> Path:
1452
+ path = Path(raw_path).expanduser()
1453
+ if path.exists() and path.is_dir():
1454
+ return path / f"{app_key}_import_template.xlsx"
1455
+ if path.suffix:
1456
+ return path
1457
+ return path / f"{app_key}_import_template.xlsx"
1458
+
1459
+
1460
+ def _resolve_repaired_output_path(source_path: Path, *, output_path: str | None) -> Path:
1461
+ if output_path:
1462
+ path = Path(output_path).expanduser()
1463
+ if path.exists() and path.is_dir():
1464
+ return path / f"{source_path.stem}.repaired{source_path.suffix}"
1465
+ if path.suffix:
1466
+ return path
1467
+ return path / f"{source_path.stem}.repaired{source_path.suffix}"
1468
+ return source_path.with_name(f"{source_path.stem}.repaired{source_path.suffix}")
1469
+
1470
+
1471
+ def _resolve_verified_output_path(source_path: Path) -> Path:
1472
+ return Path(tempfile.gettempdir()) / f"qingflow-import-verified-{source_path.stem}-{uuid4().hex[:8]}{source_path.suffix}"
1473
+
1474
+
1475
+ def _utc_now() -> datetime:
1476
+ return datetime.now(timezone.utc)
1477
+
1478
+
1479
+ def _sha256_file(path: Path) -> str:
1480
+ digest = hashlib.sha256()
1481
+ with path.open("rb") as handle:
1482
+ for chunk in iter(lambda: handle.read(1024 * 1024), b""):
1483
+ digest.update(chunk)
1484
+ return digest.hexdigest()
1485
+
1486
+
1487
+ def _normalize_optional_text(value: Any) -> str | None:
1488
+ if value is None:
1489
+ return None
1490
+ normalized = str(value).strip()
1491
+ return normalized or None
1492
+
1493
+
1494
+ def _normalize_header_key(value: Any) -> str:
1495
+ text = _normalize_optional_text(value)
1496
+ return (text or "").casefold()
1497
+
1498
+
1499
+ def _issue(code: str, message: str, *, severity: str, repairable: bool = False, repair_code: str | None = None) -> JSONObject:
1500
+ payload: JSONObject = {
1501
+ "code": code,
1502
+ "message": message,
1503
+ "severity": severity,
1504
+ "repairable": repairable,
1505
+ }
1506
+ if repair_code:
1507
+ payload["repair_code"] = repair_code
1508
+ return payload
1509
+
1510
+
1511
+ def _analyze_headers(
1512
+ header_row: list[Any],
1513
+ expected_columns: list[JSONObject],
1514
+ *,
1515
+ allowed_titles: list[str] | None = None,
1516
+ ) -> dict[str, Any]:
1517
+ expected_titles = [str(item["title"]) for item in expected_columns]
1518
+ allowed_title_list = allowed_titles if allowed_titles else expected_titles
1519
+ allowed_counts = _header_title_counts(allowed_title_list)
1520
+ allowed_by_key = {
1521
+ key: title
1522
+ for key, title in (
1523
+ (_normalize_header_key(title), _normalize_optional_text(title))
1524
+ for title in allowed_title_list
1525
+ )
1526
+ if key and title
1527
+ }
1528
+ seen: dict[str, int] = {}
1529
+ actual_headers: list[str] = []
1530
+ for item in header_row:
1531
+ text = _normalize_optional_text(item)
1532
+ if text is None:
1533
+ actual_headers.append("")
1534
+ continue
1535
+ actual_headers.append(text)
1536
+ key = _normalize_header_key(text)
1537
+ seen[key] = seen.get(key, 0) + 1
1538
+ missing: list[str] = []
1539
+ for key, expected_count in allowed_counts.items():
1540
+ actual_count = seen.get(key, 0)
1541
+ if actual_count >= expected_count:
1542
+ continue
1543
+ title = allowed_by_key.get(key) or key
1544
+ if expected_count <= 1:
1545
+ missing.append(title)
1546
+ else:
1547
+ missing.append(f"{title} (need {expected_count}, got {actual_count})")
1548
+ extra = [text for text in actual_headers if text and _normalize_header_key(text) not in allowed_by_key]
1549
+ duplicates = []
1550
+ for key, count in seen.items():
1551
+ if not key:
1552
+ continue
1553
+ allowed_count = allowed_counts.get(key, 0)
1554
+ if count > max(allowed_count, 1 if allowed_count == 0 else allowed_count):
1555
+ duplicates.append(allowed_by_key.get(key) or key)
1556
+ issues: list[JSONObject] = []
1557
+ repair_suggestions: list[str] = []
1558
+ if missing:
1559
+ issues.append(
1560
+ _issue(
1561
+ "MISSING_COLUMNS",
1562
+ f"Missing expected columns: {', '.join(missing)}",
1563
+ severity="error",
1564
+ repairable=True,
1565
+ repair_code="normalize_headers",
1566
+ )
1567
+ )
1568
+ if extra:
1569
+ issues.append(
1570
+ _issue(
1571
+ "EXTRA_COLUMNS",
1572
+ f"Unexpected columns: {', '.join(extra)}",
1573
+ severity="error",
1574
+ repairable=True,
1575
+ repair_code="normalize_headers",
1576
+ )
1577
+ )
1578
+ if duplicates:
1579
+ issues.append(
1580
+ _issue(
1581
+ "DUPLICATE_COLUMNS",
1582
+ f"Duplicate columns: {', '.join(sorted(set(duplicates)))}",
1583
+ severity="error",
1584
+ repairable=True,
1585
+ repair_code="normalize_headers",
1586
+ )
1587
+ )
1588
+ normalized_changes = []
1589
+ for text in actual_headers:
1590
+ if not text:
1591
+ continue
1592
+ canonical = allowed_by_key.get(_normalize_header_key(text))
1593
+ if canonical and canonical != text:
1594
+ normalized_changes.append((text, canonical))
1595
+ if missing or extra or duplicates or normalized_changes:
1596
+ repair_suggestions.append("normalize_headers")
1597
+ return {"issues": issues, "repair_suggestions": repair_suggestions}
1598
+
1599
+
1600
+ def _header_title_counts(titles: list[str]) -> dict[str, int]:
1601
+ counts: dict[str, int] = {}
1602
+ for title in titles:
1603
+ key = _normalize_header_key(title)
1604
+ if not key:
1605
+ continue
1606
+ counts[key] = counts.get(key, 0) + 1
1607
+ return counts
1608
+
1609
+
1610
+ def _sheet_header_positions(sheet) -> dict[str, list[int]]: # type: ignore[no-untyped-def]
1611
+ mapping: dict[str, list[int]] = {}
1612
+ for index, cell in enumerate(next(sheet.iter_rows(min_row=1, max_row=1), []), start=1):
1613
+ key = _normalize_header_key(cell.value)
1614
+ if not key:
1615
+ continue
1616
+ mapping.setdefault(key, []).append(index)
1617
+ return mapping
1618
+
1619
+
1620
+ def _inspect_enum_column(sheet, *, column_index: int, column: JSONObject) -> JSONObject | None: # type: ignore[no-untyped-def]
1621
+ options = [str(item).strip() for item in column.get("options", []) if str(item).strip()]
1622
+ if not options:
1623
+ return None
1624
+ option_map = {_normalize_header_key(item): item for item in options}
1625
+ invalid_samples: list[str] = []
1626
+ for row_index in range(2, sheet.max_row + 1):
1627
+ text = _normalize_optional_text(sheet.cell(row=row_index, column=column_index).value)
1628
+ if text is None:
1629
+ continue
1630
+ if _normalize_header_key(text) in option_map:
1631
+ continue
1632
+ invalid_samples.append(f"row {row_index}: {text}")
1633
+ if len(invalid_samples) >= 3:
1634
+ break
1635
+ if not invalid_samples:
1636
+ return None
1637
+ return _issue(
1638
+ "INVALID_ENUM_VALUES",
1639
+ f"Column '{column['title']}' contains values outside the allowed options. Samples: {', '.join(invalid_samples)}",
1640
+ severity="error",
1641
+ )
1642
+
1643
+
1644
+ def _inspect_relation_column(sheet, *, column_index: int, column: JSONObject) -> JSONObject | None: # type: ignore[no-untyped-def]
1645
+ invalid_samples: list[str] = []
1646
+ for row_index in range(2, sheet.max_row + 1):
1647
+ value = sheet.cell(row=row_index, column=column_index).value
1648
+ text = _normalize_optional_text(value)
1649
+ if text is None:
1650
+ continue
1651
+ relation_id = _coerce_positive_relation_id(value)
1652
+ if relation_id is not None:
1653
+ continue
1654
+ invalid_samples.append(f"row {row_index}: {text}")
1655
+ if len(invalid_samples) >= 3:
1656
+ break
1657
+ if not invalid_samples:
1658
+ return None
1659
+ return _issue(
1660
+ "RELATION_IMPORT_REQUIRES_APPLY_ID",
1661
+ f"Column '{column['title']}' must use target record apply_id values during import. Samples: {', '.join(invalid_samples)}",
1662
+ severity="error",
1663
+ )
1664
+
1665
+
1666
+ def _stable_import_schema_fingerprint(expected_columns: list[JSONObject]) -> str:
1667
+ stable_columns = []
1668
+ for item in expected_columns:
1669
+ stable_columns.append(
1670
+ {
1671
+ "field_id": item["field_id"],
1672
+ "title": item["title"],
1673
+ "que_type": item["que_type"],
1674
+ "required": item["required"],
1675
+ "write_kind": item["write_kind"],
1676
+ "options": item.get("options", []),
1677
+ "requires_lookup": bool(item.get("requires_lookup")),
1678
+ "requires_upload": bool(item.get("requires_upload")),
1679
+ "target_app_key": item.get("target_app_key"),
1680
+ }
1681
+ )
1682
+ return hashlib.sha256(
1683
+ json.dumps(stable_columns, ensure_ascii=False, sort_keys=True).encode("utf-8")
1684
+ ).hexdigest()
1685
+
1686
+
1687
+ def _coerce_positive_relation_id(value: Any) -> int | None:
1688
+ if isinstance(value, bool):
1689
+ return None
1690
+ if isinstance(value, int):
1691
+ return value if value > 0 else None
1692
+ if isinstance(value, float):
1693
+ if value.is_integer() and value > 0:
1694
+ return int(value)
1695
+ return None
1696
+ text = _normalize_optional_text(value)
1697
+ if text is None:
1698
+ return None
1699
+ if text.isdigit():
1700
+ parsed = int(text)
1701
+ return parsed if parsed > 0 else None
1702
+ return None
1703
+
1704
+
1705
+ def _infer_header_depth(sheet) -> int: # type: ignore[no-untyped-def]
1706
+ header_depth = 1
1707
+ merged_cells = getattr(sheet, "merged_cells", None)
1708
+ merged_ranges = getattr(merged_cells, "ranges", merged_cells) if merged_cells is not None else []
1709
+ row_one_has_merge = False
1710
+ for merged_range in merged_ranges or []:
1711
+ min_row = int(getattr(merged_range, "min_row", 1))
1712
+ max_row = int(getattr(merged_range, "max_row", 1))
1713
+ if min_row == 1:
1714
+ row_one_has_merge = True
1715
+ header_depth = max(header_depth, max_row)
1716
+ if row_one_has_merge and sheet.max_row >= 2:
1717
+ row_two_values = [cell.value for cell in sheet[2]]
1718
+ if any(_normalize_optional_text(value) for value in row_two_values):
1719
+ header_depth = max(header_depth, 2)
1720
+ return min(header_depth, max(1, int(sheet.max_row)))
1721
+
1722
+
1723
+ def _extract_leaf_header_titles(sheet, header_depth: int) -> list[str]: # type: ignore[no-untyped-def]
1724
+ titles: list[str] = []
1725
+ max_column = max(1, int(sheet.max_column))
1726
+ depth = max(1, min(header_depth, int(sheet.max_row)))
1727
+ for column_index in range(1, max_column + 1):
1728
+ selected = ""
1729
+ for row_index in range(depth, 0, -1):
1730
+ text = _normalize_optional_text(sheet.cell(row=row_index, column=column_index).value)
1731
+ if text:
1732
+ selected = text
1733
+ break
1734
+ titles.append(selected)
1735
+ return titles
1736
+
1737
+
1738
+ def _overlay_header_titles(actual_titles: list[str], template_leaf_titles: Any) -> list[str]:
1739
+ normalized = list(actual_titles)
1740
+ if not isinstance(template_leaf_titles, list):
1741
+ return normalized
1742
+ for index, title in enumerate(template_leaf_titles):
1743
+ normalized_title = _normalize_optional_text(title)
1744
+ if normalized_title is None:
1745
+ continue
1746
+ if index < len(normalized):
1747
+ normalized[index] = normalized_title
1748
+ return normalized
1749
+
1750
+
1751
+ def _count_trailing_blank_rows(sheet) -> int: # type: ignore[no-untyped-def]
1752
+ count = 0
1753
+ for row_index in range(sheet.max_row, 1, -1):
1754
+ values = [cell.value for cell in sheet[row_index]]
1755
+ if any(value not in (None, "") for value in values):
1756
+ break
1757
+ count += 1
1758
+ return count
1759
+
1760
+
1761
+ def _find_enum_repairs(sheet, expected_columns: list[JSONObject]) -> list[str]: # type: ignore[no-untyped-def]
1762
+ header_map = _sheet_header_map(sheet)
1763
+ found: list[str] = []
1764
+ for column in expected_columns:
1765
+ options = [str(item).strip() for item in column.get("options", []) if str(item).strip()]
1766
+ if not options:
1767
+ continue
1768
+ column_index = header_map.get(_normalize_header_key(column["title"]))
1769
+ if column_index is None:
1770
+ continue
1771
+ option_map = {_normalize_header_key(item): item for item in options}
1772
+ for row in range(2, min(sheet.max_row, 50) + 1):
1773
+ value = sheet.cell(row=row, column=column_index).value
1774
+ text = _normalize_optional_text(value)
1775
+ if text is None:
1776
+ continue
1777
+ exact = option_map.get(_normalize_header_key(text))
1778
+ if exact and exact != text:
1779
+ found.append(column["title"])
1780
+ break
1781
+ return found
1782
+
1783
+
1784
+ def _sheet_header_map(sheet) -> dict[str, int]: # type: ignore[no-untyped-def]
1785
+ mapping: dict[str, int] = {}
1786
+ for index, cell in enumerate(next(sheet.iter_rows(min_row=1, max_row=1), []), start=1):
1787
+ key = _normalize_header_key(cell.value)
1788
+ if key and key not in mapping:
1789
+ mapping[key] = index
1790
+ return mapping
1791
+
1792
+
1793
+ def _repair_headers(sheet, expected_columns: list[JSONObject]) -> bool: # type: ignore[no-untyped-def]
1794
+ changed = False
1795
+ expected_by_key = {_normalize_header_key(item["title"]): item["title"] for item in expected_columns}
1796
+ header_cells = list(next(sheet.iter_rows(min_row=1, max_row=1), []))
1797
+ for cell in header_cells:
1798
+ text = _normalize_optional_text(cell.value)
1799
+ if text is None:
1800
+ continue
1801
+ canonical = expected_by_key.get(_normalize_header_key(text))
1802
+ if canonical and canonical != text:
1803
+ cell.value = canonical
1804
+ changed = True
1805
+ if changed:
1806
+ return True
1807
+
1808
+ # Fallback for template-based files where headers were edited into non-canonical
1809
+ # values but column order is still intact. Keep any extra trailing system columns.
1810
+ for index, column in enumerate(expected_columns, start=1):
1811
+ if index > len(header_cells):
1812
+ break
1813
+ expected_title = str(column["title"]).strip()
1814
+ current_title = _normalize_optional_text(header_cells[index - 1].value)
1815
+ if current_title != expected_title:
1816
+ header_cells[index - 1].value = expected_title
1817
+ changed = True
1818
+ return changed
1819
+
1820
+
1821
+ def _trim_trailing_blank_rows(sheet) -> bool: # type: ignore[no-untyped-def]
1822
+ removed = 0
1823
+ while sheet.max_row > 1:
1824
+ values = [cell.value for cell in sheet[sheet.max_row]]
1825
+ if any(value not in (None, "") for value in values):
1826
+ break
1827
+ sheet.delete_rows(sheet.max_row, 1)
1828
+ removed += 1
1829
+ return removed > 0
1830
+
1831
+
1832
+ def _normalize_enum_values(sheet, expected_columns: list[JSONObject]) -> bool: # type: ignore[no-untyped-def]
1833
+ changed = False
1834
+ header_map = _sheet_header_map(sheet)
1835
+ for column in expected_columns:
1836
+ options = [str(item).strip() for item in column.get("options", []) if str(item).strip()]
1837
+ if not options:
1838
+ continue
1839
+ column_index = header_map.get(_normalize_header_key(column["title"]))
1840
+ if column_index is None:
1841
+ continue
1842
+ option_map = {_normalize_header_key(item): item for item in options}
1843
+ for row in range(2, sheet.max_row + 1):
1844
+ cell = sheet.cell(row=row, column=column_index)
1845
+ text = _normalize_optional_text(cell.value)
1846
+ if text is None:
1847
+ continue
1848
+ canonical = option_map.get(_normalize_header_key(text))
1849
+ if canonical and canonical != text:
1850
+ cell.value = canonical
1851
+ changed = True
1852
+ return changed
1853
+
1854
+
1855
+ def _normalize_date_formats(sheet) -> bool: # type: ignore[no-untyped-def]
1856
+ changed = False
1857
+ for row in sheet.iter_rows(min_row=2):
1858
+ for cell in row:
1859
+ if getattr(cell, "is_date", False):
1860
+ if cell.number_format != "yyyy-mm-dd hh:mm:ss":
1861
+ cell.number_format = "yyyy-mm-dd hh:mm:ss"
1862
+ changed = True
1863
+ return changed
1864
+
1865
+
1866
+ def _normalize_number_formats(sheet) -> bool: # type: ignore[no-untyped-def]
1867
+ changed = False
1868
+ for row in sheet.iter_rows(min_row=2):
1869
+ for cell in row:
1870
+ if isinstance(cell.value, (int, float)) and not getattr(cell, "is_date", False):
1871
+ if cell.number_format == "General":
1872
+ cell.number_format = "0.00" if isinstance(cell.value, float) else "0"
1873
+ changed = True
1874
+ return changed
1875
+
1876
+
1877
+ def _normalize_url_cells(sheet) -> bool: # type: ignore[no-untyped-def]
1878
+ changed = False
1879
+ for row in sheet.iter_rows(min_row=2):
1880
+ for cell in row:
1881
+ text = _normalize_optional_text(cell.value)
1882
+ if text and (text.startswith("http://") or text.startswith("https://")) and text != cell.value:
1883
+ cell.value = text
1884
+ changed = True
1885
+ return changed
1886
+
1887
+
1888
+ def _extract_import_records(payload: Any) -> list[JSONObject]:
1889
+ if isinstance(payload, dict):
1890
+ for key in ("list", "records", "items"):
1891
+ value = payload.get(key)
1892
+ if isinstance(value, list):
1893
+ return [item for item in value if isinstance(item, dict)]
1894
+ if isinstance(payload, list):
1895
+ return [item for item in payload if isinstance(item, dict)]
1896
+ return []
1897
+
1898
+
1899
+ def _match_import_record(
1900
+ records: list[JSONObject],
1901
+ *,
1902
+ local_job: dict[str, Any] | None,
1903
+ process_id_str: str | None,
1904
+ ) -> tuple[JSONObject | None, str | None]:
1905
+ if process_id_str:
1906
+ exact = [
1907
+ item
1908
+ for item in records
1909
+ if _normalize_optional_text(item.get("processIdStr") or item.get("processId") or item.get("process_id_str")) == process_id_str
1910
+ ]
1911
+ if len(exact) == 1:
1912
+ return exact[0], "process_id_str"
1913
+ if len(exact) > 1:
1914
+ return None, "process_id_str"
1915
+ if isinstance(local_job, dict):
1916
+ source_file_name = _normalize_optional_text(local_job.get("source_file_name"))
1917
+ started_at = _parse_utc(local_job.get("started_at"))
1918
+ candidates = records
1919
+ if source_file_name:
1920
+ candidates = [
1921
+ item
1922
+ for item in candidates
1923
+ if _normalize_optional_text(item.get("sourceFileName") or item.get("source_file_name")) == source_file_name
1924
+ ]
1925
+ if started_at is not None:
1926
+ window_end = started_at + timedelta(minutes=10)
1927
+ timed = []
1928
+ for item in candidates:
1929
+ operate_time = _parse_utc(item.get("operateTime"))
1930
+ if operate_time is None:
1931
+ continue
1932
+ if started_at - timedelta(minutes=1) <= operate_time <= window_end:
1933
+ timed.append(item)
1934
+ if len(timed) == 1:
1935
+ return timed[0], "local_job_window"
1936
+ if len(timed) > 1:
1937
+ return None, "local_job_window"
1938
+ if len(candidates) == 1:
1939
+ return candidates[0], "source_file_name"
1940
+ if len(candidates) > 1:
1941
+ return None, "source_file_name"
1942
+ return None, None
1943
+
1944
+
1945
+ def _parse_utc(value: Any) -> datetime | None:
1946
+ text = _normalize_optional_text(value)
1947
+ if text is None:
1948
+ return None
1949
+ normalized = text.replace("Z", "+00:00")
1950
+ try:
1951
+ parsed = datetime.fromisoformat(normalized)
1952
+ except ValueError:
1953
+ return None
1954
+ if parsed.tzinfo is None:
1955
+ return parsed.replace(tzinfo=timezone.utc)
1956
+ return parsed.astimezone(timezone.utc)
1957
+
1958
+
1959
+ def _coerce_int(value: Any) -> int | None:
1960
+ if value is None or value == "":
1961
+ return None
1962
+ try:
1963
+ return int(value)
1964
+ except (TypeError, ValueError):
1965
+ return None
1966
+
1967
+
1968
+ def _normalize_error_file_urls(value: Any) -> list[str]:
1969
+ if isinstance(value, list):
1970
+ return [str(item).strip() for item in value if str(item).strip()]
1971
+ return []