@josephyan/qingflow-cli 1.0.10 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/package.json +1 -1
- package/pyproject.toml +2 -1
- package/src/qingflow_mcp/cli/commands/record.py +0 -2
- package/src/qingflow_mcp/cli/formatters.py +21 -14
- package/src/qingflow_mcp/response_trim.py +42 -32
- package/src/qingflow_mcp/server.py +3 -3
- package/src/qingflow_mcp/server_app_user.py +3 -3
- package/src/qingflow_mcp/tools/record_tools.py +1236 -346
|
@@ -2,18 +2,22 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import csv
|
|
4
4
|
import html
|
|
5
|
+
import mimetypes
|
|
5
6
|
import json
|
|
6
7
|
import os
|
|
7
8
|
import re
|
|
8
9
|
import time
|
|
10
|
+
import zipfile
|
|
9
11
|
from copy import deepcopy
|
|
10
12
|
from dataclasses import dataclass
|
|
11
13
|
from datetime import UTC, datetime, timedelta
|
|
12
14
|
from decimal import Decimal, InvalidOperation
|
|
15
|
+
from io import BytesIO
|
|
13
16
|
from pathlib import Path
|
|
14
17
|
from typing import Any, cast
|
|
15
18
|
from urllib.parse import parse_qs, unquote, urlsplit
|
|
16
19
|
from uuid import uuid4
|
|
20
|
+
from xml.etree import ElementTree
|
|
17
21
|
|
|
18
22
|
from mcp.server.fastmcp import FastMCP
|
|
19
23
|
|
|
@@ -34,6 +38,7 @@ from .directory_tools import _directory_has_more, _directory_items
|
|
|
34
38
|
|
|
35
39
|
DEFAULT_QUERY_PAGE_SIZE = 50
|
|
36
40
|
DEFAULT_LIST_PAGE_SIZE = 200
|
|
41
|
+
DEFAULT_RECORD_LIST_RETURN_LIMIT = 10
|
|
37
42
|
BACKEND_RECORD_ACCESS_PAGE_SIZE = 1000
|
|
38
43
|
DEFAULT_RECORD_ACCESS_SHARD_ROWS = 20_000
|
|
39
44
|
RECORD_ACCESS_UNBOUNDED_ROW_THRESHOLD = 50_000
|
|
@@ -43,6 +48,14 @@ RECORD_GET_DETAIL_LOG_PAGE_SIZE = 10
|
|
|
43
48
|
RECORD_GET_MEDIA_MAX_IMAGES = 30
|
|
44
49
|
RECORD_GET_MEDIA_MAX_IMAGE_BYTES = 20 * 1024 * 1024
|
|
45
50
|
RECORD_GET_MEDIA_MAX_TOTAL_BYTES = 100 * 1024 * 1024
|
|
51
|
+
RECORD_GET_FILE_MAX_FILES = 50
|
|
52
|
+
RECORD_GET_FILE_MAX_BYTES = 50 * 1024 * 1024
|
|
53
|
+
RECORD_GET_FILE_MAX_TOTAL_BYTES = 200 * 1024 * 1024
|
|
54
|
+
RECORD_GET_FILE_TIME_BUDGET_SECONDS = 55.0
|
|
55
|
+
RECORD_GET_FILE_MIN_REMAINING_SECONDS = 8.0
|
|
56
|
+
RECORD_GET_FILE_EXTRACT_PREVIEW_CHARS = 20_000
|
|
57
|
+
RECORD_GET_FILE_EXTRACT_XLSX_MAX_ROWS_PER_SHEET = 200
|
|
58
|
+
RECORD_GET_FILE_EXTRACT_PDF_MAX_PAGES = 50
|
|
46
59
|
DEFAULT_ANALYSIS_PAGE_SIZE = 1000
|
|
47
60
|
DEFAULT_SCAN_MAX_PAGES = 10
|
|
48
61
|
DEFAULT_ANALYSIS_SCAN_MAX_PAGES = 100
|
|
@@ -355,7 +368,7 @@ class RecordTools(ToolBase):
|
|
|
355
368
|
description=(
|
|
356
369
|
"Browse Qingflow records with a schema-first list DSL. "
|
|
357
370
|
"Use record_browse_schema_get first, then pass field_id-only columns, where, and order_by clauses. "
|
|
358
|
-
"This route
|
|
371
|
+
"This route returns up to 10 rows plus total counts for browse, sample inspection, and fuzzy record lookup; it is not for analysis."
|
|
359
372
|
)
|
|
360
373
|
)
|
|
361
374
|
def record_list(
|
|
@@ -366,7 +379,6 @@ class RecordTools(ToolBase):
|
|
|
366
379
|
query_fields: list[JSONObject | int] | None = None,
|
|
367
380
|
where: list[JSONObject] | None = None,
|
|
368
381
|
order_by: list[JSONObject] | None = None,
|
|
369
|
-
limit: int = 50,
|
|
370
382
|
page: int = 1,
|
|
371
383
|
view_id: str | None = None,
|
|
372
384
|
output_profile: str = "normal",
|
|
@@ -379,7 +391,6 @@ class RecordTools(ToolBase):
|
|
|
379
391
|
query_fields=query_fields or [],
|
|
380
392
|
where=where or [],
|
|
381
393
|
order_by=order_by or [],
|
|
382
|
-
limit=limit,
|
|
383
394
|
page=page,
|
|
384
395
|
view_id=view_id,
|
|
385
396
|
list_type=None,
|
|
@@ -1618,8 +1629,8 @@ class RecordTools(ToolBase):
|
|
|
1618
1629
|
query_fields: list[JSONObject | int] | None = None,
|
|
1619
1630
|
where: list[JSONObject],
|
|
1620
1631
|
order_by: list[JSONObject],
|
|
1621
|
-
limit: int,
|
|
1622
|
-
page: int,
|
|
1632
|
+
limit: int = DEFAULT_RECORD_LIST_RETURN_LIMIT,
|
|
1633
|
+
page: int = 1,
|
|
1623
1634
|
view_id: str | None = None,
|
|
1624
1635
|
list_type: int | None = None,
|
|
1625
1636
|
view_key: str | None = None,
|
|
@@ -1664,127 +1675,133 @@ class RecordTools(ToolBase):
|
|
|
1664
1675
|
},
|
|
1665
1676
|
)
|
|
1666
1677
|
)
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
app_key=app_key,
|
|
1670
|
-
resolved_view=view_route,
|
|
1671
|
-
)
|
|
1672
|
-
resolved_query_fields = self._resolve_record_list_query_fields_for_public(
|
|
1673
|
-
profile=profile,
|
|
1674
|
-
app_key=app_key,
|
|
1675
|
-
resolved_view=view_route,
|
|
1676
|
-
selectors=normalized_query_field_selectors,
|
|
1677
|
-
)
|
|
1678
|
+
filters = self._normalize_record_list_where(where)
|
|
1679
|
+
sorts = self._normalize_record_list_order_by(order_by)
|
|
1678
1680
|
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
requested_pages=1,
|
|
1687
|
-
scan_max_pages=1,
|
|
1688
|
-
auto_expand_pages=False,
|
|
1689
|
-
query_key=normalized_query,
|
|
1690
|
-
search_que_ids=resolved_query_fields,
|
|
1691
|
-
filters=self._normalize_record_list_where(where),
|
|
1692
|
-
sorts=self._normalize_record_list_order_by(order_by),
|
|
1693
|
-
max_rows=limit,
|
|
1694
|
-
max_columns=len(resolved_columns),
|
|
1695
|
-
select_columns=resolved_columns,
|
|
1696
|
-
amount_column=None,
|
|
1697
|
-
time_range={},
|
|
1698
|
-
stat_policy={},
|
|
1699
|
-
strict_full=False,
|
|
1700
|
-
output_profile="verbose" if normalized_output_profile in {"verbose", "normalized"} else DEFAULT_OUTPUT_PROFILE,
|
|
1701
|
-
list_type=view_route.list_type if view_route.list_type is not None else DEFAULT_RECORD_LIST_TYPE,
|
|
1702
|
-
view_key=view_route.view_selection.view_key if view_route.view_selection is not None else None,
|
|
1703
|
-
view_name=view_route.view_selection.view_name if view_route.view_selection is not None else None,
|
|
1704
|
-
)
|
|
1705
|
-
list_data = cast(JSONObject, cast(JSONObject, raw["data"])["list"])
|
|
1706
|
-
pagination = cast(JSONObject, list_data["pagination"])
|
|
1707
|
-
warnings: list[JSONObject] = []
|
|
1708
|
-
warnings.extend(legacy_warnings)
|
|
1709
|
-
warnings.extend(compatibility_warnings)
|
|
1710
|
-
warnings.extend(_view_filter_trust_warnings(view_route))
|
|
1711
|
-
warning = _normalize_optional_text(list_data.get("analysis_warning"))
|
|
1712
|
-
if warning:
|
|
1713
|
-
warnings.append({"code": "BROWSE_ONLY", "message": warning})
|
|
1714
|
-
list_type_used = _coerce_count(pagination.get("list_type_used"))
|
|
1715
|
-
if list_type_used is not None and list_type_used != DEFAULT_RECORD_LIST_TYPE:
|
|
1716
|
-
warnings.append(
|
|
1717
|
-
{
|
|
1718
|
-
"code": "LIST_TYPE_FALLBACK",
|
|
1719
|
-
"message": (
|
|
1720
|
-
f"record_list not accessible via listType={DEFAULT_RECORD_LIST_TYPE}; "
|
|
1721
|
-
f"fell back to listType={list_type_used} ({get_record_list_type_label(list_type_used)})."
|
|
1722
|
-
),
|
|
1723
|
-
}
|
|
1681
|
+
def runner(session_profile, context):
|
|
1682
|
+
browse_scope = self._build_browse_read_scope(
|
|
1683
|
+
profile,
|
|
1684
|
+
context,
|
|
1685
|
+
app_key,
|
|
1686
|
+
view_route,
|
|
1687
|
+
force_refresh=False,
|
|
1724
1688
|
)
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1689
|
+
index = cast(FieldIndex, browse_scope["index"])
|
|
1690
|
+
selected_fields = (
|
|
1691
|
+
self._resolve_record_list_columns(normalized_columns, index, view_route=view_route)
|
|
1692
|
+
if normalized_columns
|
|
1693
|
+
else self._derive_record_list_fields_from_index(index)
|
|
1694
|
+
)
|
|
1695
|
+
resolved_columns = [field.que_id for field in selected_fields]
|
|
1696
|
+
resolved_query_fields = self._resolve_record_list_query_fields(
|
|
1697
|
+
normalized_query_field_selectors,
|
|
1698
|
+
index,
|
|
1699
|
+
view_route=view_route,
|
|
1700
|
+
)
|
|
1701
|
+
match_rules = self._resolve_record_list_match_rules(context, filters, index, view_route=view_route)
|
|
1702
|
+
sort_rules = self._resolve_record_list_sort_rules(sorts, index, view_route=view_route)
|
|
1703
|
+
raw = self._record_list_query_view_fields(
|
|
1704
|
+
session_profile=session_profile,
|
|
1705
|
+
context=context,
|
|
1706
|
+
app_key=app_key,
|
|
1707
|
+
view_route=view_route,
|
|
1708
|
+
page_num=page,
|
|
1709
|
+
page_size=DEFAULT_LIST_PAGE_SIZE,
|
|
1710
|
+
query_key=normalized_query,
|
|
1711
|
+
search_que_ids=resolved_query_fields or None,
|
|
1712
|
+
match_rules=match_rules,
|
|
1713
|
+
sort_rules=sort_rules,
|
|
1714
|
+
max_rows=limit,
|
|
1715
|
+
selected_fields=selected_fields,
|
|
1716
|
+
output_profile="verbose" if normalized_output_profile in {"verbose", "normalized"} else DEFAULT_OUTPUT_PROFILE,
|
|
1717
|
+
)
|
|
1718
|
+
list_data = cast(JSONObject, cast(JSONObject, raw["data"])["list"])
|
|
1719
|
+
pagination = cast(JSONObject, list_data["pagination"])
|
|
1720
|
+
warnings: list[JSONObject] = []
|
|
1721
|
+
warnings.extend(legacy_warnings)
|
|
1722
|
+
warnings.extend(compatibility_warnings)
|
|
1723
|
+
warnings.extend(_view_filter_trust_warnings(view_route))
|
|
1724
|
+
warning = _normalize_optional_text(list_data.get("analysis_warning"))
|
|
1725
|
+
if warning:
|
|
1726
|
+
warnings.append({"code": "BROWSE_ONLY", "message": warning})
|
|
1727
|
+
list_type_used = _coerce_count(pagination.get("list_type_used"))
|
|
1728
|
+
if list_type_used is not None and list_type_used != DEFAULT_RECORD_LIST_TYPE:
|
|
1729
|
+
warnings.append(
|
|
1730
|
+
{
|
|
1731
|
+
"code": "LIST_TYPE_FALLBACK",
|
|
1732
|
+
"message": (
|
|
1733
|
+
f"record_list not accessible via listType={DEFAULT_RECORD_LIST_TYPE}; "
|
|
1734
|
+
f"fell back to listType={list_type_used} ({get_record_list_type_label(list_type_used)})."
|
|
1735
|
+
),
|
|
1736
|
+
}
|
|
1737
|
+
)
|
|
1738
|
+
rows = list_data.get("rows", [])
|
|
1739
|
+
normalized_public_rows = _normalize_public_record_rows(rows if isinstance(rows, list) else [])
|
|
1740
|
+
lookup_payload = _build_record_list_lookup_payload(
|
|
1741
|
+
query=normalized_query,
|
|
1742
|
+
items=normalized_public_rows,
|
|
1743
|
+
pagination=pagination,
|
|
1744
|
+
)
|
|
1745
|
+
total_count = _coerce_count(pagination.get("result_amount"))
|
|
1746
|
+
returned_count = _coerce_count(pagination.get("returned_items"))
|
|
1747
|
+
if returned_count is None:
|
|
1748
|
+
returned_count = len(normalized_public_rows)
|
|
1749
|
+
truncated = bool(total_count is not None and total_count > returned_count)
|
|
1750
|
+
response: JSONObject = {
|
|
1751
|
+
"profile": profile,
|
|
1752
|
+
"ws_id": raw.get("ws_id"),
|
|
1753
|
+
"ok": bool(raw.get("ok", True)),
|
|
1754
|
+
"request_route": raw.get("request_route"),
|
|
1755
|
+
"warnings": warnings,
|
|
1756
|
+
"verification": _view_filter_verification_payload(view_route),
|
|
1757
|
+
"output_profile": normalized_output_profile,
|
|
1758
|
+
"data": {
|
|
1759
|
+
"app_key": app_key,
|
|
1760
|
+
"items": normalized_public_rows,
|
|
1761
|
+
"pagination": {
|
|
1762
|
+
"returned_count": returned_count,
|
|
1763
|
+
"total_count": total_count,
|
|
1764
|
+
"truncated": truncated,
|
|
1765
|
+
},
|
|
1766
|
+
"selection": {
|
|
1767
|
+
"columns": [_column_selector_payload(field_id) for field_id in resolved_columns],
|
|
1768
|
+
"query_fields": [_column_selector_payload(field_id) for field_id in resolved_query_fields],
|
|
1769
|
+
"view": _accessible_view_payload(view_route),
|
|
1770
|
+
},
|
|
1755
1771
|
},
|
|
1756
|
-
},
|
|
1757
|
-
}
|
|
1758
|
-
if lookup_payload is not None:
|
|
1759
|
-
response["lookup"] = lookup_payload
|
|
1760
|
-
if normalized_output_profile == "normalized":
|
|
1761
|
-
normalized_rows = list_data.get("normalized_rows")
|
|
1762
|
-
if isinstance(normalized_rows, list):
|
|
1763
|
-
item_by_apply_id = {
|
|
1764
|
-
_coerce_count(item.get("apply_id")): item
|
|
1765
|
-
for item in cast(list[JSONObject], response["data"]["items"])
|
|
1766
|
-
if isinstance(item, dict) and _coerce_count(item.get("apply_id")) is not None
|
|
1767
|
-
}
|
|
1768
|
-
for entry in normalized_rows:
|
|
1769
|
-
if not isinstance(entry, dict):
|
|
1770
|
-
continue
|
|
1771
|
-
apply_id = _coerce_count(entry.get("apply_id"))
|
|
1772
|
-
if apply_id is None:
|
|
1773
|
-
continue
|
|
1774
|
-
target = item_by_apply_id.get(apply_id)
|
|
1775
|
-
if target is None:
|
|
1776
|
-
continue
|
|
1777
|
-
target["normalized_record"] = cast(JSONObject, entry.get("normalized_record") or {})
|
|
1778
|
-
target["normalized_ambiguous_fields"] = cast(JSONObject, entry.get("normalized_ambiguous_fields") or {})
|
|
1779
|
-
if normalized_output_profile == "verbose":
|
|
1780
|
-
response["data"]["debug"] = {
|
|
1781
|
-
"completeness": raw.get("completeness"),
|
|
1782
|
-
"evidence": raw.get("evidence"),
|
|
1783
|
-
"resolved_mappings": raw.get("resolved_mappings"),
|
|
1784
|
-
"row_cap_hit": list_data.get("row_cap_hit"),
|
|
1785
|
-
"sample_only": list_data.get("sample_only"),
|
|
1786
1772
|
}
|
|
1787
|
-
|
|
1773
|
+
if lookup_payload is not None:
|
|
1774
|
+
response["lookup"] = lookup_payload
|
|
1775
|
+
if normalized_output_profile == "normalized":
|
|
1776
|
+
normalized_rows = list_data.get("normalized_rows")
|
|
1777
|
+
if isinstance(normalized_rows, list):
|
|
1778
|
+
item_by_apply_id = {
|
|
1779
|
+
_coerce_count(item.get("apply_id")): item
|
|
1780
|
+
for item in cast(list[JSONObject], response["data"]["items"])
|
|
1781
|
+
if isinstance(item, dict) and _coerce_count(item.get("apply_id")) is not None
|
|
1782
|
+
}
|
|
1783
|
+
for entry in normalized_rows:
|
|
1784
|
+
if not isinstance(entry, dict):
|
|
1785
|
+
continue
|
|
1786
|
+
apply_id = _coerce_count(entry.get("apply_id"))
|
|
1787
|
+
if apply_id is None:
|
|
1788
|
+
continue
|
|
1789
|
+
target = item_by_apply_id.get(apply_id)
|
|
1790
|
+
if target is None:
|
|
1791
|
+
continue
|
|
1792
|
+
target["normalized_record"] = cast(JSONObject, entry.get("normalized_record") or {})
|
|
1793
|
+
target["normalized_ambiguous_fields"] = cast(JSONObject, entry.get("normalized_ambiguous_fields") or {})
|
|
1794
|
+
if normalized_output_profile == "verbose":
|
|
1795
|
+
response["data"]["debug"] = {
|
|
1796
|
+
"completeness": raw.get("completeness"),
|
|
1797
|
+
"evidence": raw.get("evidence"),
|
|
1798
|
+
"resolved_mappings": raw.get("resolved_mappings"),
|
|
1799
|
+
"row_cap_hit": list_data.get("row_cap_hit"),
|
|
1800
|
+
"sample_only": list_data.get("sample_only"),
|
|
1801
|
+
}
|
|
1802
|
+
return response
|
|
1803
|
+
|
|
1804
|
+
return self._run_record_tool(profile, runner)
|
|
1788
1805
|
|
|
1789
1806
|
@tool_cn_name("记录访问")
|
|
1790
1807
|
def record_access(
|
|
@@ -2304,12 +2321,23 @@ class RecordTools(ToolBase):
|
|
|
2304
2321
|
fields=fields,
|
|
2305
2322
|
references=references,
|
|
2306
2323
|
)
|
|
2324
|
+
file_assets = self._record_get_file_assets(
|
|
2325
|
+
context,
|
|
2326
|
+
app_key=app_key,
|
|
2327
|
+
record_id=record_id_int,
|
|
2328
|
+
resolved_view=resolved_view,
|
|
2329
|
+
audit_node_id=cast(int | None, audit_context.get("audit_node_id")),
|
|
2330
|
+
fields=fields,
|
|
2331
|
+
references=references,
|
|
2332
|
+
media_assets=media_assets,
|
|
2333
|
+
)
|
|
2307
2334
|
context_integrity = _record_detail_context_integrity(
|
|
2308
2335
|
references=references,
|
|
2309
2336
|
data_logs=data_logs,
|
|
2310
2337
|
workflow_logs=workflow_logs,
|
|
2311
2338
|
associated_resources=associated_resources,
|
|
2312
2339
|
media_assets=media_assets,
|
|
2340
|
+
file_assets=file_assets,
|
|
2313
2341
|
unavailable_context=unavailable_context,
|
|
2314
2342
|
)
|
|
2315
2343
|
payload: JSONObject = {
|
|
@@ -2332,6 +2360,7 @@ class RecordTools(ToolBase):
|
|
|
2332
2360
|
"requested_focus_fields": [_column_selector_payload(field_id) for field_id in requested_focus_field_ids],
|
|
2333
2361
|
"references": references,
|
|
2334
2362
|
"media_assets": media_assets,
|
|
2363
|
+
"file_assets": file_assets,
|
|
2335
2364
|
"data_logs": data_logs,
|
|
2336
2365
|
"workflow_logs": workflow_logs,
|
|
2337
2366
|
"associated_resources": associated_resources,
|
|
@@ -2867,6 +2896,53 @@ class RecordTools(ToolBase):
|
|
|
2867
2896
|
],
|
|
2868
2897
|
}
|
|
2869
2898
|
|
|
2899
|
+
def _record_get_file_assets(
|
|
2900
|
+
self,
|
|
2901
|
+
context, # type: ignore[no-untyped-def]
|
|
2902
|
+
*,
|
|
2903
|
+
app_key: str,
|
|
2904
|
+
record_id: int,
|
|
2905
|
+
resolved_view: AccessibleViewRoute,
|
|
2906
|
+
audit_node_id: int | None,
|
|
2907
|
+
fields: list[JSONObject],
|
|
2908
|
+
references: list[JSONObject],
|
|
2909
|
+
media_assets: JSONObject,
|
|
2910
|
+
) -> JSONObject:
|
|
2911
|
+
"""Collect and localize file assets from the frontend detail context."""
|
|
2912
|
+
try:
|
|
2913
|
+
def refresh_source_url(candidate: JSONObject) -> str | None:
|
|
2914
|
+
return self._record_get_refreshed_media_source_url(
|
|
2915
|
+
context,
|
|
2916
|
+
app_key=app_key,
|
|
2917
|
+
record_id=record_id,
|
|
2918
|
+
resolved_view=resolved_view,
|
|
2919
|
+
audit_node_id=audit_node_id,
|
|
2920
|
+
candidate=candidate,
|
|
2921
|
+
)
|
|
2922
|
+
|
|
2923
|
+
return _record_detail_file_assets_payload(
|
|
2924
|
+
backend=self.backend,
|
|
2925
|
+
context=context,
|
|
2926
|
+
app_key=app_key,
|
|
2927
|
+
record_id=record_id,
|
|
2928
|
+
fields=fields,
|
|
2929
|
+
references=references,
|
|
2930
|
+
media_assets=media_assets,
|
|
2931
|
+
refresh_source_url=refresh_source_url,
|
|
2932
|
+
)
|
|
2933
|
+
except Exception as exc: # defensive: file assets should never break the core record detail.
|
|
2934
|
+
return {
|
|
2935
|
+
"status": "unavailable",
|
|
2936
|
+
"local_dir": None,
|
|
2937
|
+
"items": [],
|
|
2938
|
+
"warnings": [
|
|
2939
|
+
{
|
|
2940
|
+
"code": "FILE_ASSETS_UNAVAILABLE",
|
|
2941
|
+
"message": f"record_get could not collect file assets: {exc}",
|
|
2942
|
+
}
|
|
2943
|
+
],
|
|
2944
|
+
}
|
|
2945
|
+
|
|
2870
2946
|
def _record_get_refreshed_media_source_url(
|
|
2871
2947
|
self,
|
|
2872
2948
|
context, # type: ignore[no-untyped-def]
|
|
@@ -7896,6 +7972,172 @@ class RecordTools(ToolBase):
|
|
|
7896
7972
|
|
|
7897
7973
|
return self._run_record_tool(profile, runner)
|
|
7898
7974
|
|
|
7975
|
+
def _record_list_query_view_fields(
|
|
7976
|
+
self,
|
|
7977
|
+
*,
|
|
7978
|
+
session_profile,
|
|
7979
|
+
context,
|
|
7980
|
+
app_key: str,
|
|
7981
|
+
view_route: AccessibleViewRoute,
|
|
7982
|
+
page_num: int,
|
|
7983
|
+
page_size: int,
|
|
7984
|
+
query_key: str | None,
|
|
7985
|
+
search_que_ids: list[int] | None,
|
|
7986
|
+
match_rules: list[JSONObject],
|
|
7987
|
+
sort_rules: list[JSONObject],
|
|
7988
|
+
max_rows: int,
|
|
7989
|
+
selected_fields: list[FormField],
|
|
7990
|
+
output_profile: str,
|
|
7991
|
+
) -> JSONObject:
|
|
7992
|
+
"""Run public record_list with fields already resolved from the selected view schema."""
|
|
7993
|
+
view_selection = view_route.view_selection
|
|
7994
|
+
current_page = max(page_num, 1)
|
|
7995
|
+
used_list_type: int | None = None
|
|
7996
|
+
if view_selection is not None:
|
|
7997
|
+
fallback_list_types = [view_route.list_type if view_route.list_type is not None else DEFAULT_RECORD_LIST_TYPE]
|
|
7998
|
+
elif view_route.list_type is not None and view_route.list_type != DEFAULT_RECORD_LIST_TYPE:
|
|
7999
|
+
fallback_list_types = [view_route.list_type]
|
|
8000
|
+
else:
|
|
8001
|
+
fallback_list_types = [DEFAULT_RECORD_LIST_TYPE, 14, 1, 2, 12]
|
|
8002
|
+
last_error: QingflowApiError | None = None
|
|
8003
|
+
page: JSONObject | None = None
|
|
8004
|
+
for candidate_list_type in fallback_list_types:
|
|
8005
|
+
try:
|
|
8006
|
+
page = self._search_page(
|
|
8007
|
+
context,
|
|
8008
|
+
app_key=app_key,
|
|
8009
|
+
view_selection=view_selection,
|
|
8010
|
+
page_num=current_page,
|
|
8011
|
+
page_size=page_size,
|
|
8012
|
+
query_key=query_key,
|
|
8013
|
+
match_rules=match_rules,
|
|
8014
|
+
sorts=sort_rules,
|
|
8015
|
+
search_que_ids=search_que_ids,
|
|
8016
|
+
list_type=candidate_list_type,
|
|
8017
|
+
)
|
|
8018
|
+
used_list_type = None if view_selection is not None else candidate_list_type
|
|
8019
|
+
break
|
|
8020
|
+
except QingflowApiError as exc:
|
|
8021
|
+
last_error = exc
|
|
8022
|
+
if self._should_retry_list_type_fallback(exc) and candidate_list_type != fallback_list_types[-1]:
|
|
8023
|
+
continue
|
|
8024
|
+
raise
|
|
8025
|
+
if page is None:
|
|
8026
|
+
if last_error is not None:
|
|
8027
|
+
raise last_error
|
|
8028
|
+
raise_tool_error(QingflowApiError.config_error("record_list failed: no accessible listType"))
|
|
8029
|
+
|
|
8030
|
+
page_rows = page.get("list")
|
|
8031
|
+
items = page_rows if isinstance(page_rows, list) else []
|
|
8032
|
+
reported_total = _coerce_count(page.get("total"))
|
|
8033
|
+
if reported_total is None:
|
|
8034
|
+
reported_total = _coerce_count(page.get("count"))
|
|
8035
|
+
result_amount = _effective_total(page, page_size)
|
|
8036
|
+
has_more = _page_has_more(page, current_page, page_size, len(items))
|
|
8037
|
+
rows: list[JSONObject] = []
|
|
8038
|
+
normalized_rows: list[JSONObject] = []
|
|
8039
|
+
page_apply_order: list[int] = []
|
|
8040
|
+
page_answer_map: dict[int, list[JSONValue]] = {}
|
|
8041
|
+
for item in items:
|
|
8042
|
+
if not isinstance(item, dict):
|
|
8043
|
+
continue
|
|
8044
|
+
answers = item.get("answers")
|
|
8045
|
+
answer_list = answers if isinstance(answers, list) else []
|
|
8046
|
+
apply_id = _coerce_count(item.get("applyId")) or _coerce_count(item.get("id"))
|
|
8047
|
+
row = _build_flat_row(answer_list, selected_fields, apply_id=apply_id)
|
|
8048
|
+
rows.append(row)
|
|
8049
|
+
if apply_id is not None:
|
|
8050
|
+
page_apply_order.append(apply_id)
|
|
8051
|
+
page_answer_map[apply_id] = cast(list[JSONValue], answer_list)
|
|
8052
|
+
if len(rows) >= max_rows:
|
|
8053
|
+
break
|
|
8054
|
+
if output_profile == "verbose" and page_apply_order:
|
|
8055
|
+
for apply_id in page_apply_order:
|
|
8056
|
+
normalized_record, normalized_ambiguous_fields = _build_normalized_row_from_answers(
|
|
8057
|
+
page_answer_map.get(apply_id, []),
|
|
8058
|
+
selected_fields,
|
|
8059
|
+
)
|
|
8060
|
+
normalized_rows.append(
|
|
8061
|
+
{
|
|
8062
|
+
"apply_id": apply_id,
|
|
8063
|
+
"normalized_record": normalized_record,
|
|
8064
|
+
"normalized_ambiguous_fields": normalized_ambiguous_fields,
|
|
8065
|
+
}
|
|
8066
|
+
)
|
|
8067
|
+
effective_result_amount = result_amount if result_amount is not None else len(rows)
|
|
8068
|
+
completeness = _build_completeness(
|
|
8069
|
+
result_amount=effective_result_amount,
|
|
8070
|
+
returned_items=len(rows),
|
|
8071
|
+
fetched_pages=1,
|
|
8072
|
+
requested_pages=1,
|
|
8073
|
+
has_more=has_more,
|
|
8074
|
+
next_page_token=None,
|
|
8075
|
+
is_complete=not has_more and len(rows) < max_rows,
|
|
8076
|
+
omitted_items=max(0, effective_result_amount - len(rows)),
|
|
8077
|
+
extra={},
|
|
8078
|
+
)
|
|
8079
|
+
evidence = {
|
|
8080
|
+
"query_id": _query_id(),
|
|
8081
|
+
"app_key": app_key,
|
|
8082
|
+
"filters": _echo_filters(match_rules),
|
|
8083
|
+
"selected_columns": [field.que_title for field in selected_fields],
|
|
8084
|
+
"time_range": None,
|
|
8085
|
+
"source_pages": [current_page],
|
|
8086
|
+
"view": _view_selection_payload(view_selection),
|
|
8087
|
+
"backend_reported_total": reported_total,
|
|
8088
|
+
}
|
|
8089
|
+
response: JSONObject = {
|
|
8090
|
+
"profile": session_profile.profile,
|
|
8091
|
+
"ws_id": session_profile.selected_ws_id,
|
|
8092
|
+
"ok": True,
|
|
8093
|
+
"request_route": self._request_route_payload(context),
|
|
8094
|
+
"data": {
|
|
8095
|
+
"mode": "list",
|
|
8096
|
+
"source_tool": "record_list",
|
|
8097
|
+
"view": _view_selection_payload(view_selection),
|
|
8098
|
+
"list": {
|
|
8099
|
+
"rows": rows,
|
|
8100
|
+
"row_cap_hit": _list_row_cap_hit(returned_items=len(rows), row_cap=max_rows),
|
|
8101
|
+
"sample_only": _list_sample_only(
|
|
8102
|
+
returned_items=len(rows),
|
|
8103
|
+
row_cap=max_rows,
|
|
8104
|
+
result_amount=effective_result_amount,
|
|
8105
|
+
),
|
|
8106
|
+
"safe_for_final_conclusion": False,
|
|
8107
|
+
"analysis_warning": _list_sample_warning(
|
|
8108
|
+
returned_items=len(rows),
|
|
8109
|
+
row_cap=max_rows,
|
|
8110
|
+
result_amount=effective_result_amount,
|
|
8111
|
+
),
|
|
8112
|
+
"pagination": {
|
|
8113
|
+
"page_num": current_page,
|
|
8114
|
+
"page_size": page_size,
|
|
8115
|
+
"requested_pages": 1,
|
|
8116
|
+
"result_amount": effective_result_amount,
|
|
8117
|
+
"returned_items": len(rows),
|
|
8118
|
+
"list_type_used": used_list_type,
|
|
8119
|
+
},
|
|
8120
|
+
"applied_limits": {
|
|
8121
|
+
"row_cap": max_rows,
|
|
8122
|
+
"column_cap": len(selected_fields),
|
|
8123
|
+
"selected_columns": [field.que_title for field in selected_fields],
|
|
8124
|
+
},
|
|
8125
|
+
},
|
|
8126
|
+
},
|
|
8127
|
+
"output_profile": output_profile,
|
|
8128
|
+
"next_page_token": None,
|
|
8129
|
+
}
|
|
8130
|
+
if output_profile == "verbose":
|
|
8131
|
+
cast(JSONObject, cast(JSONObject, response["data"])["list"])["normalized_rows"] = normalized_rows
|
|
8132
|
+
response["completeness"] = completeness
|
|
8133
|
+
response["evidence"] = evidence
|
|
8134
|
+
response["resolved_mappings"] = {
|
|
8135
|
+
"select_columns": [_field_mapping_entry("row", field, requested=field.que_title) for field in selected_fields],
|
|
8136
|
+
"filters": [],
|
|
8137
|
+
"time_range": None,
|
|
8138
|
+
}
|
|
8139
|
+
return response
|
|
8140
|
+
|
|
7899
8141
|
def _get_form_schema(self, profile: str, context, app_key: str, *, force_refresh: bool) -> JSONObject: # type: ignore[no-untyped-def]
|
|
7900
8142
|
"""执行内部辅助逻辑。"""
|
|
7901
8143
|
cache_key = (profile, app_key, "applicant_node", None)
|
|
@@ -8392,63 +8634,7 @@ class RecordTools(ToolBase):
|
|
|
8392
8634
|
force_refresh=False,
|
|
8393
8635
|
)
|
|
8394
8636
|
index = cast(FieldIndex, browse_scope["index"])
|
|
8395
|
-
|
|
8396
|
-
resolved: list[int] = []
|
|
8397
|
-
seen: set[int] = set()
|
|
8398
|
-
for selector in selectors:
|
|
8399
|
-
try:
|
|
8400
|
-
field = self._resolve_field_selector(selector, index, location="record_list.query_fields")
|
|
8401
|
-
except RecordInputError as exc:
|
|
8402
|
-
if exc.error_code == "FIELD_NOT_FOUND":
|
|
8403
|
-
raise RecordInputError(
|
|
8404
|
-
message=(
|
|
8405
|
-
f"record_list query field_id '{selector}' is not in the selected view schema "
|
|
8406
|
-
f"({resolved_view.view_id})."
|
|
8407
|
-
),
|
|
8408
|
-
error_code="QUERY_FIELD_NOT_IN_VIEW_SCHEMA",
|
|
8409
|
-
fix_hint="Call record_browse_schema_get for this exact view_id and pass only field_id values from its fields[].",
|
|
8410
|
-
details={
|
|
8411
|
-
"location": "record_list.query_fields",
|
|
8412
|
-
"requested": selector,
|
|
8413
|
-
"view_id": resolved_view.view_id,
|
|
8414
|
-
"view_name": resolved_view.name,
|
|
8415
|
-
},
|
|
8416
|
-
) from exc
|
|
8417
|
-
raise
|
|
8418
|
-
if field.que_id not in visible_question_ids:
|
|
8419
|
-
raise RecordInputError(
|
|
8420
|
-
message=(
|
|
8421
|
-
f"record_list query field_id '{field.que_id}' is not readable in the selected view "
|
|
8422
|
-
f"({resolved_view.view_id})."
|
|
8423
|
-
),
|
|
8424
|
-
error_code="QUERY_FIELD_NOT_IN_VIEW_SCHEMA",
|
|
8425
|
-
fix_hint="Call record_browse_schema_get for this exact view_id and pass only field_id values from its fields[].",
|
|
8426
|
-
details={
|
|
8427
|
-
"location": "record_list.query_fields",
|
|
8428
|
-
"requested": selector,
|
|
8429
|
-
"field_id": field.que_id,
|
|
8430
|
-
"view_id": resolved_view.view_id,
|
|
8431
|
-
"view_name": resolved_view.name,
|
|
8432
|
-
},
|
|
8433
|
-
)
|
|
8434
|
-
if field.que_id in seen:
|
|
8435
|
-
continue
|
|
8436
|
-
resolved.append(field.que_id)
|
|
8437
|
-
seen.add(field.que_id)
|
|
8438
|
-
if len(resolved) > BACKEND_LIST_SEARCH_FIELD_LIMIT:
|
|
8439
|
-
raise RecordInputError(
|
|
8440
|
-
message=(
|
|
8441
|
-
f"record_list query_fields supports at most {BACKEND_LIST_SEARCH_FIELD_LIMIT} fields."
|
|
8442
|
-
),
|
|
8443
|
-
error_code="QUERY_FIELDS_TOO_MANY",
|
|
8444
|
-
fix_hint="Narrow query_fields to the most likely title/name/customer/number fields, or omit query_fields to use the backend default search scope.",
|
|
8445
|
-
details={
|
|
8446
|
-
"location": "record_list.query_fields",
|
|
8447
|
-
"max_fields": BACKEND_LIST_SEARCH_FIELD_LIMIT,
|
|
8448
|
-
"received": len(resolved),
|
|
8449
|
-
},
|
|
8450
|
-
)
|
|
8451
|
-
return resolved
|
|
8637
|
+
return self._resolve_record_list_query_fields(selectors, index, view_route=resolved_view)
|
|
8452
8638
|
|
|
8453
8639
|
return cast(list[int], self._run_record_tool(profile, runner))
|
|
8454
8640
|
|
|
@@ -8460,7 +8646,7 @@ class RecordTools(ToolBase):
|
|
|
8460
8646
|
resolved_view: AccessibleViewRoute,
|
|
8461
8647
|
) -> list[int]:
|
|
8462
8648
|
"""执行内部辅助逻辑。"""
|
|
8463
|
-
browse_scope = self.
|
|
8649
|
+
browse_scope = self._build_browse_read_scope(
|
|
8464
8650
|
profile,
|
|
8465
8651
|
context,
|
|
8466
8652
|
app_key,
|
|
@@ -8468,33 +8654,7 @@ class RecordTools(ToolBase):
|
|
|
8468
8654
|
force_refresh=False,
|
|
8469
8655
|
)
|
|
8470
8656
|
index = cast(FieldIndex, browse_scope["index"])
|
|
8471
|
-
|
|
8472
|
-
ordered_visible_fields = [
|
|
8473
|
-
field
|
|
8474
|
-
for field in self._schema_fields_for_mode(
|
|
8475
|
-
profile,
|
|
8476
|
-
context,
|
|
8477
|
-
app_key,
|
|
8478
|
-
index,
|
|
8479
|
-
schema_mode="browse",
|
|
8480
|
-
resolved_view=resolved_view,
|
|
8481
|
-
)
|
|
8482
|
-
if field.que_id in visible_question_ids and field.que_type not in LAYOUT_ONLY_QUE_TYPES
|
|
8483
|
-
]
|
|
8484
|
-
field_ids = [field.que_id for field in ordered_visible_fields[:MAX_LIST_COLUMN_LIMIT]]
|
|
8485
|
-
if not field_ids:
|
|
8486
|
-
field_ids = [
|
|
8487
|
-
field.que_id
|
|
8488
|
-
for field in index.by_id.values()
|
|
8489
|
-
if field.que_type not in LAYOUT_ONLY_QUE_TYPES
|
|
8490
|
-
][:MAX_LIST_COLUMN_LIMIT]
|
|
8491
|
-
if not field_ids:
|
|
8492
|
-
raise_tool_error(
|
|
8493
|
-
QingflowApiError.config_error(
|
|
8494
|
-
"record_list could not determine readable columns for the selected view"
|
|
8495
|
-
)
|
|
8496
|
-
)
|
|
8497
|
-
return field_ids
|
|
8657
|
+
return [field.que_id for field in self._derive_record_list_fields_from_index(index)]
|
|
8498
8658
|
|
|
8499
8659
|
def _get_view_question_ids(self, profile: str, context, view_key: str) -> set[int]: # type: ignore[no-untyped-def]
|
|
8500
8660
|
"""执行内部辅助逻辑。"""
|
|
@@ -10446,14 +10606,161 @@ class RecordTools(ToolBase):
|
|
|
10446
10606
|
seen.add(field.que_id)
|
|
10447
10607
|
return fields
|
|
10448
10608
|
|
|
10449
|
-
def
|
|
10609
|
+
def _derive_record_list_fields_from_index(self, index: FieldIndex) -> list[FormField]:
|
|
10610
|
+
fields = [
|
|
10611
|
+
field
|
|
10612
|
+
for field in index.by_id.values()
|
|
10613
|
+
if field.que_type not in LAYOUT_ONLY_QUE_TYPES
|
|
10614
|
+
][:MAX_LIST_COLUMN_LIMIT]
|
|
10615
|
+
if not fields:
|
|
10616
|
+
raise_tool_error(
|
|
10617
|
+
QingflowApiError.config_error(
|
|
10618
|
+
"record_list could not determine readable columns for the selected view"
|
|
10619
|
+
)
|
|
10620
|
+
)
|
|
10621
|
+
return fields
|
|
10622
|
+
|
|
10623
|
+
def _resolve_record_list_columns(
|
|
10450
10624
|
self,
|
|
10451
|
-
selectors: list[
|
|
10625
|
+
selectors: list[int],
|
|
10452
10626
|
index: FieldIndex,
|
|
10453
|
-
amount_field: FormField | None,
|
|
10454
|
-
time_field: FormField | None,
|
|
10455
10627
|
*,
|
|
10456
|
-
|
|
10628
|
+
view_route: AccessibleViewRoute,
|
|
10629
|
+
) -> list[FormField]:
|
|
10630
|
+
if not selectors:
|
|
10631
|
+
raise_tool_error(QingflowApiError.config_error("columns is required"))
|
|
10632
|
+
fields: list[FormField] = []
|
|
10633
|
+
seen: set[int] = set()
|
|
10634
|
+
for selector in selectors:
|
|
10635
|
+
try:
|
|
10636
|
+
field = self._resolve_field_selector(selector, index, location="record_list.columns")
|
|
10637
|
+
except RecordInputError as exc:
|
|
10638
|
+
if exc.error_code == "FIELD_NOT_FOUND":
|
|
10639
|
+
raise self._record_list_field_not_in_view_error(
|
|
10640
|
+
exc,
|
|
10641
|
+
location="record_list.columns",
|
|
10642
|
+
error_code="FIELD_NOT_IN_VIEW_SCHEMA",
|
|
10643
|
+
view_route=view_route,
|
|
10644
|
+
) from exc
|
|
10645
|
+
raise
|
|
10646
|
+
if field.que_id in seen:
|
|
10647
|
+
continue
|
|
10648
|
+
fields.append(field)
|
|
10649
|
+
seen.add(field.que_id)
|
|
10650
|
+
return fields
|
|
10651
|
+
|
|
10652
|
+
def _resolve_record_list_query_fields(
|
|
10653
|
+
self,
|
|
10654
|
+
selectors: list[int],
|
|
10655
|
+
index: FieldIndex,
|
|
10656
|
+
*,
|
|
10657
|
+
view_route: AccessibleViewRoute,
|
|
10658
|
+
) -> list[int]:
|
|
10659
|
+
resolved: list[int] = []
|
|
10660
|
+
seen: set[int] = set()
|
|
10661
|
+
for selector in selectors:
|
|
10662
|
+
try:
|
|
10663
|
+
field = self._resolve_field_selector(selector, index, location="record_list.query_fields")
|
|
10664
|
+
except RecordInputError as exc:
|
|
10665
|
+
if exc.error_code == "FIELD_NOT_FOUND":
|
|
10666
|
+
raise self._record_list_field_not_in_view_error(
|
|
10667
|
+
exc,
|
|
10668
|
+
location="record_list.query_fields",
|
|
10669
|
+
error_code="QUERY_FIELD_NOT_IN_VIEW_SCHEMA",
|
|
10670
|
+
view_route=view_route,
|
|
10671
|
+
) from exc
|
|
10672
|
+
raise
|
|
10673
|
+
if field.que_id in seen:
|
|
10674
|
+
continue
|
|
10675
|
+
resolved.append(field.que_id)
|
|
10676
|
+
seen.add(field.que_id)
|
|
10677
|
+
if len(resolved) > BACKEND_LIST_SEARCH_FIELD_LIMIT:
|
|
10678
|
+
raise RecordInputError(
|
|
10679
|
+
message=(
|
|
10680
|
+
f"record_list query_fields supports at most {BACKEND_LIST_SEARCH_FIELD_LIMIT} fields."
|
|
10681
|
+
),
|
|
10682
|
+
error_code="QUERY_FIELDS_TOO_MANY",
|
|
10683
|
+
fix_hint="Narrow query_fields to the most likely title/name/customer/number fields, or omit query_fields to use the backend default search scope.",
|
|
10684
|
+
details={
|
|
10685
|
+
"location": "record_list.query_fields",
|
|
10686
|
+
"max_fields": BACKEND_LIST_SEARCH_FIELD_LIMIT,
|
|
10687
|
+
"received": len(resolved),
|
|
10688
|
+
},
|
|
10689
|
+
)
|
|
10690
|
+
return resolved
|
|
10691
|
+
|
|
10692
|
+
def _resolve_record_list_match_rules(
|
|
10693
|
+
self,
|
|
10694
|
+
context, # type: ignore[no-untyped-def]
|
|
10695
|
+
filters: list[JSONObject],
|
|
10696
|
+
index: FieldIndex,
|
|
10697
|
+
*,
|
|
10698
|
+
view_route: AccessibleViewRoute,
|
|
10699
|
+
) -> list[JSONObject]:
|
|
10700
|
+
try:
|
|
10701
|
+
return self._resolve_match_rules(context, filters, index)
|
|
10702
|
+
except RecordInputError as exc:
|
|
10703
|
+
if exc.error_code == "FIELD_NOT_FOUND":
|
|
10704
|
+
raise self._record_list_field_not_in_view_error(
|
|
10705
|
+
exc,
|
|
10706
|
+
location="record_list.where",
|
|
10707
|
+
error_code="FILTER_FIELD_NOT_IN_VIEW_SCHEMA",
|
|
10708
|
+
view_route=view_route,
|
|
10709
|
+
) from exc
|
|
10710
|
+
raise
|
|
10711
|
+
|
|
10712
|
+
def _resolve_record_list_sort_rules(
|
|
10713
|
+
self,
|
|
10714
|
+
sorts: list[JSONObject],
|
|
10715
|
+
index: FieldIndex,
|
|
10716
|
+
*,
|
|
10717
|
+
view_route: AccessibleViewRoute,
|
|
10718
|
+
) -> list[JSONObject]:
|
|
10719
|
+
try:
|
|
10720
|
+
return self._resolve_sorts(sorts, index)
|
|
10721
|
+
except RecordInputError as exc:
|
|
10722
|
+
if exc.error_code == "FIELD_NOT_FOUND":
|
|
10723
|
+
raise self._record_list_field_not_in_view_error(
|
|
10724
|
+
exc,
|
|
10725
|
+
location="record_list.order_by",
|
|
10726
|
+
error_code="SORT_FIELD_NOT_IN_VIEW_SCHEMA",
|
|
10727
|
+
view_route=view_route,
|
|
10728
|
+
) from exc
|
|
10729
|
+
raise
|
|
10730
|
+
|
|
10731
|
+
def _record_list_field_not_in_view_error(
|
|
10732
|
+
self,
|
|
10733
|
+
exc: RecordInputError,
|
|
10734
|
+
*,
|
|
10735
|
+
location: str,
|
|
10736
|
+
error_code: str,
|
|
10737
|
+
view_route: AccessibleViewRoute,
|
|
10738
|
+
) -> RecordInputError:
|
|
10739
|
+
details = exc.details if isinstance(exc.details, dict) else {}
|
|
10740
|
+
requested = details.get("requested")
|
|
10741
|
+
return RecordInputError(
|
|
10742
|
+
message=(
|
|
10743
|
+
f"{location} field_id '{requested}' is not in the selected view schema "
|
|
10744
|
+
f"({view_route.view_id})."
|
|
10745
|
+
),
|
|
10746
|
+
error_code=error_code,
|
|
10747
|
+
fix_hint="Call record_browse_schema_get for this exact view_id and pass only field_id values from its fields[].",
|
|
10748
|
+
details={
|
|
10749
|
+
"location": location,
|
|
10750
|
+
"requested": requested,
|
|
10751
|
+
"view_id": view_route.view_id,
|
|
10752
|
+
"view_name": view_route.name,
|
|
10753
|
+
},
|
|
10754
|
+
)
|
|
10755
|
+
|
|
10756
|
+
def _resolve_summary_preview_fields(
|
|
10757
|
+
self,
|
|
10758
|
+
selectors: list[str | int],
|
|
10759
|
+
index: FieldIndex,
|
|
10760
|
+
amount_field: FormField | None,
|
|
10761
|
+
time_field: FormField | None,
|
|
10762
|
+
*,
|
|
10763
|
+
max_columns: int | None,
|
|
10457
10764
|
) -> list[FormField]:
|
|
10458
10765
|
"""执行内部辅助逻辑。"""
|
|
10459
10766
|
if selectors:
|
|
@@ -13412,6 +13719,19 @@ _RECORD_MEDIA_IMG_SRC_RE = re.compile(r"""<img\b[^>]*\bsrc\s*=\s*["']?([^"'\s>]+
|
|
|
13412
13719
|
_RECORD_MEDIA_MD_IMAGE_RE = re.compile(r"""!\[[^\]]*]\(([^)\s]+)(?:\s+["'][^"']*["'])?\)""")
|
|
13413
13720
|
_RECORD_MEDIA_URL_RE = re.compile(r"""https?://[^\s<>"')\]]+""", re.IGNORECASE)
|
|
13414
13721
|
_RECORD_MEDIA_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".svg"}
|
|
13722
|
+
_RECORD_FILE_EXTENSIONS = _RECORD_MEDIA_IMAGE_EXTENSIONS | {
|
|
13723
|
+
".csv",
|
|
13724
|
+
".doc",
|
|
13725
|
+
".docx",
|
|
13726
|
+
".json",
|
|
13727
|
+
".md",
|
|
13728
|
+
".pdf",
|
|
13729
|
+
".text",
|
|
13730
|
+
".txt",
|
|
13731
|
+
".xls",
|
|
13732
|
+
".xlsm",
|
|
13733
|
+
".xlsx",
|
|
13734
|
+
}
|
|
13415
13735
|
_RECORD_MEDIA_IMAGE_URL_KEYS = {
|
|
13416
13736
|
"image",
|
|
13417
13737
|
"imageurl",
|
|
@@ -13431,6 +13751,15 @@ _RECORD_MEDIA_IMAGE_URL_KEYS = {
|
|
|
13431
13751
|
"url",
|
|
13432
13752
|
"value",
|
|
13433
13753
|
}
|
|
13754
|
+
_RECORD_FILE_URL_KEYS = _RECORD_MEDIA_IMAGE_URL_KEYS | {
|
|
13755
|
+
"downloadurl",
|
|
13756
|
+
"download_url",
|
|
13757
|
+
"file",
|
|
13758
|
+
"href",
|
|
13759
|
+
"link",
|
|
13760
|
+
"path",
|
|
13761
|
+
}
|
|
13762
|
+
_RECORD_FILE_NAME_KEYS = {"name", "otherinfo", "filename", "file_name", "title"}
|
|
13434
13763
|
|
|
13435
13764
|
|
|
13436
13765
|
def _record_detail_media_assets_payload(
|
|
@@ -13656,6 +13985,286 @@ def _record_detail_media_assets_payload(
|
|
|
13656
13985
|
return {"status": status, "local_dir": str(local_dir) if items else None, "items": items, "warnings": warnings}
|
|
13657
13986
|
|
|
13658
13987
|
|
|
13988
|
+
def _record_detail_file_assets_payload(
|
|
13989
|
+
*,
|
|
13990
|
+
backend: Any,
|
|
13991
|
+
context: BackendRequestContext,
|
|
13992
|
+
app_key: str,
|
|
13993
|
+
record_id: int,
|
|
13994
|
+
fields: list[JSONObject],
|
|
13995
|
+
references: list[JSONObject],
|
|
13996
|
+
media_assets: JSONObject,
|
|
13997
|
+
refresh_source_url: Any | None = None,
|
|
13998
|
+
) -> JSONObject:
|
|
13999
|
+
candidates: list[JSONObject] = []
|
|
14000
|
+
source_record_id = _public_record_id_text(record_id)
|
|
14001
|
+
for field in fields:
|
|
14002
|
+
if isinstance(field, dict):
|
|
14003
|
+
candidates.extend(
|
|
14004
|
+
_record_detail_file_candidates_from_field(
|
|
14005
|
+
field,
|
|
14006
|
+
source_app_key=app_key,
|
|
14007
|
+
source_record_id=source_record_id,
|
|
14008
|
+
forced_source=None,
|
|
14009
|
+
)
|
|
14010
|
+
)
|
|
14011
|
+
for reference in references:
|
|
14012
|
+
if not isinstance(reference, dict):
|
|
14013
|
+
continue
|
|
14014
|
+
target_fields = reference.get("target_fields") if isinstance(reference.get("target_fields"), list) else []
|
|
14015
|
+
target_app_key = _normalize_optional_text(reference.get("target_app_key")) or app_key
|
|
14016
|
+
target_record_id = _normalize_optional_text(reference.get("target_record_id"))
|
|
14017
|
+
for field in target_fields:
|
|
14018
|
+
if isinstance(field, dict):
|
|
14019
|
+
candidates.extend(
|
|
14020
|
+
_record_detail_file_candidates_from_field(
|
|
14021
|
+
field,
|
|
14022
|
+
source_app_key=target_app_key,
|
|
14023
|
+
source_record_id=target_record_id,
|
|
14024
|
+
forced_source="reference_target",
|
|
14025
|
+
)
|
|
14026
|
+
)
|
|
14027
|
+
if not candidates:
|
|
14028
|
+
return {"status": "none", "local_dir": None, "items": [], "warnings": []}
|
|
14029
|
+
|
|
14030
|
+
local_dir = _record_detail_file_assets_dir(uuid4().hex)
|
|
14031
|
+
local_dir_created = False
|
|
14032
|
+
items: list[JSONObject] = []
|
|
14033
|
+
warnings: list[JSONObject] = []
|
|
14034
|
+
file_by_url: dict[str, str] = {}
|
|
14035
|
+
media_by_url = _record_detail_media_assets_by_url(media_assets)
|
|
14036
|
+
media_by_asset_id = _record_detail_media_assets_by_asset_id(media_assets)
|
|
14037
|
+
total_bytes = 0
|
|
14038
|
+
downloaded_count = 0
|
|
14039
|
+
deadline = time.monotonic() + RECORD_GET_FILE_TIME_BUDGET_SECONDS
|
|
14040
|
+
stopped_for_time_budget = False
|
|
14041
|
+
environment_prefix_cache: dict[str, str] = {}
|
|
14042
|
+
|
|
14043
|
+
def ensure_local_dir() -> None:
|
|
14044
|
+
nonlocal local_dir_created
|
|
14045
|
+
if not local_dir_created:
|
|
14046
|
+
local_dir.mkdir(parents=True, exist_ok=True)
|
|
14047
|
+
local_dir_created = True
|
|
14048
|
+
|
|
14049
|
+
for candidate in candidates:
|
|
14050
|
+
if items and time.monotonic() + RECORD_GET_FILE_MIN_REMAINING_SECONDS >= deadline:
|
|
14051
|
+
stopped_for_time_budget = True
|
|
14052
|
+
warnings.append(
|
|
14053
|
+
{
|
|
14054
|
+
"code": "FILE_ASSET_TIME_BUDGET_EXCEEDED",
|
|
14055
|
+
"message": "record_get stopped downloading additional file assets to stay within the internal time budget.",
|
|
14056
|
+
"time_budget_seconds": RECORD_GET_FILE_TIME_BUDGET_SECONDS,
|
|
14057
|
+
}
|
|
14058
|
+
)
|
|
14059
|
+
break
|
|
14060
|
+
source_url = _normalize_optional_text(candidate.get("source_url"))
|
|
14061
|
+
owner = candidate.get("_owner")
|
|
14062
|
+
if not source_url or not isinstance(owner, dict):
|
|
14063
|
+
continue
|
|
14064
|
+
existing_asset_id = file_by_url.get(source_url)
|
|
14065
|
+
if existing_asset_id:
|
|
14066
|
+
_record_detail_attach_file_asset_id(owner, existing_asset_id)
|
|
14067
|
+
continue
|
|
14068
|
+
file_asset_id = f"file_{len(items) + 1:04d}"
|
|
14069
|
+
file_by_url[source_url] = file_asset_id
|
|
14070
|
+
_record_detail_attach_file_asset_id(owner, file_asset_id)
|
|
14071
|
+
base_item = _record_detail_file_asset_base_item(candidate, file_asset_id=file_asset_id)
|
|
14072
|
+
|
|
14073
|
+
media_item = media_by_url.get(source_url)
|
|
14074
|
+
if media_item is None:
|
|
14075
|
+
media_item = _record_detail_media_item_from_owner_asset_ids(owner, media_by_asset_id, candidate)
|
|
14076
|
+
if isinstance(media_item, dict) and media_item.get("asset_id") not in (None, ""):
|
|
14077
|
+
base_item["media_asset_id"] = media_item.get("asset_id")
|
|
14078
|
+
if downloaded_count >= RECORD_GET_FILE_MAX_FILES:
|
|
14079
|
+
items.append(
|
|
14080
|
+
{
|
|
14081
|
+
**base_item,
|
|
14082
|
+
"local_path": None,
|
|
14083
|
+
"mime_type": None,
|
|
14084
|
+
"size_bytes": None,
|
|
14085
|
+
"access_status": "too_large",
|
|
14086
|
+
"download_strategy": "skipped_limit",
|
|
14087
|
+
"readable_by_agent": False,
|
|
14088
|
+
"extraction": {"status": "skipped_too_large", "text_path": None, "preview": None},
|
|
14089
|
+
}
|
|
14090
|
+
)
|
|
14091
|
+
warnings.append(
|
|
14092
|
+
{
|
|
14093
|
+
"code": "FILE_ASSET_LIMIT_EXCEEDED",
|
|
14094
|
+
"message": f"record_get stopped downloading files after {RECORD_GET_FILE_MAX_FILES} assets.",
|
|
14095
|
+
}
|
|
14096
|
+
)
|
|
14097
|
+
continue
|
|
14098
|
+
|
|
14099
|
+
reused_media_path = _normalize_optional_text(media_item.get("local_path")) if isinstance(media_item, dict) else None
|
|
14100
|
+
if reused_media_path and media_item.get("access_status") == "downloaded":
|
|
14101
|
+
file_name = _record_detail_file_name_from_candidate(candidate, source_url=source_url, fallback_id=file_asset_id)
|
|
14102
|
+
mime_type = _normalize_optional_text(media_item.get("mime_type")) or _record_detail_mime_from_url(source_url)
|
|
14103
|
+
items.append(
|
|
14104
|
+
{
|
|
14105
|
+
**base_item,
|
|
14106
|
+
"download_strategy": media_item.get("download_strategy"),
|
|
14107
|
+
"storage_auth_type": media_item.get("storage_auth_type"),
|
|
14108
|
+
"storage_cookie_prefix": media_item.get("storage_cookie_prefix"),
|
|
14109
|
+
"redirected": media_item.get("redirected"),
|
|
14110
|
+
"file_name": file_name,
|
|
14111
|
+
"local_path": reused_media_path,
|
|
14112
|
+
"mime_type": mime_type,
|
|
14113
|
+
"size_bytes": media_item.get("size_bytes"),
|
|
14114
|
+
"access_status": "downloaded",
|
|
14115
|
+
"readable_by_agent": True,
|
|
14116
|
+
"extraction": {"status": "unsupported", "text_path": None, "preview": None},
|
|
14117
|
+
}
|
|
14118
|
+
)
|
|
14119
|
+
downloaded_count += 1
|
|
14120
|
+
continue
|
|
14121
|
+
|
|
14122
|
+
download_strategy = _record_detail_media_download_strategy(source_url)
|
|
14123
|
+
download_succeeded = False
|
|
14124
|
+
content: bytes = b""
|
|
14125
|
+
download_meta: JSONObject = {}
|
|
14126
|
+
try:
|
|
14127
|
+
content, download_meta = _record_detail_download_media_content(
|
|
14128
|
+
backend=backend,
|
|
14129
|
+
context=context,
|
|
14130
|
+
source_url=source_url,
|
|
14131
|
+
warnings=warnings,
|
|
14132
|
+
environment_prefix_cache=environment_prefix_cache,
|
|
14133
|
+
requested_strategy=download_strategy,
|
|
14134
|
+
)
|
|
14135
|
+
download_succeeded = True
|
|
14136
|
+
except QingflowApiError as exc:
|
|
14137
|
+
blocked = exc.http_status in {401, 403}
|
|
14138
|
+
if blocked and download_strategy != "referer_acl" and callable(refresh_source_url):
|
|
14139
|
+
refreshed_url = _normalize_optional_text(refresh_source_url(candidate))
|
|
14140
|
+
if refreshed_url and refreshed_url != source_url:
|
|
14141
|
+
refreshed_strategy = _record_detail_media_download_strategy(refreshed_url)
|
|
14142
|
+
try:
|
|
14143
|
+
content, download_meta = _record_detail_download_media_content(
|
|
14144
|
+
backend=backend,
|
|
14145
|
+
context=context,
|
|
14146
|
+
source_url=refreshed_url,
|
|
14147
|
+
warnings=warnings,
|
|
14148
|
+
environment_prefix_cache=environment_prefix_cache,
|
|
14149
|
+
requested_strategy=(
|
|
14150
|
+
download_strategy
|
|
14151
|
+
if download_strategy == "decrypted_file_url_then_storage_cookie_redirect"
|
|
14152
|
+
else refreshed_strategy
|
|
14153
|
+
),
|
|
14154
|
+
)
|
|
14155
|
+
source_url = refreshed_url
|
|
14156
|
+
base_item["source_url"] = refreshed_url
|
|
14157
|
+
download_succeeded = True
|
|
14158
|
+
except QingflowApiError as refreshed_exc:
|
|
14159
|
+
exc = refreshed_exc
|
|
14160
|
+
blocked = exc.http_status in {401, 403}
|
|
14161
|
+
else:
|
|
14162
|
+
warnings.append(
|
|
14163
|
+
{
|
|
14164
|
+
"code": "FILE_ASSET_STORAGE_URL_REFRESHED",
|
|
14165
|
+
"file_asset_id": file_asset_id,
|
|
14166
|
+
"message": "record_get refreshed the record detail once before downloading this file asset.",
|
|
14167
|
+
}
|
|
14168
|
+
)
|
|
14169
|
+
if not download_succeeded:
|
|
14170
|
+
warning_code = "STORAGE_COOKIE_AUTH_FAILED" if blocked and download_strategy != "referer_acl" else "FILE_ASSET_DOWNLOAD_FAILED"
|
|
14171
|
+
items.append(
|
|
14172
|
+
{
|
|
14173
|
+
**base_item,
|
|
14174
|
+
"storage_auth_type": _record_detail_storage_auth_type(source_url),
|
|
14175
|
+
"storage_cookie_prefix": environment_prefix_cache.get("value"),
|
|
14176
|
+
"redirected": False,
|
|
14177
|
+
"local_path": None,
|
|
14178
|
+
"mime_type": _record_detail_mime_from_url(source_url),
|
|
14179
|
+
"size_bytes": None,
|
|
14180
|
+
"access_status": "blocked_private_url" if blocked else "download_failed",
|
|
14181
|
+
"download_strategy": download_strategy,
|
|
14182
|
+
"readable_by_agent": False,
|
|
14183
|
+
"extraction": {"status": "failed", "text_path": None, "preview": None},
|
|
14184
|
+
}
|
|
14185
|
+
)
|
|
14186
|
+
warnings.append(
|
|
14187
|
+
{
|
|
14188
|
+
"code": warning_code,
|
|
14189
|
+
"file_asset_id": file_asset_id,
|
|
14190
|
+
"message": f"record_get could not download file asset {file_asset_id}: {exc.message}",
|
|
14191
|
+
"http_status": exc.http_status,
|
|
14192
|
+
}
|
|
14193
|
+
)
|
|
14194
|
+
continue
|
|
14195
|
+
|
|
14196
|
+
if not isinstance(content, bytes):
|
|
14197
|
+
content = bytes(content or b"")
|
|
14198
|
+
file_name = _record_detail_file_name_from_candidate(candidate, source_url=source_url, fallback_id=file_asset_id)
|
|
14199
|
+
mime_type = _record_detail_file_mime_from_content_or_name(content, source_url=source_url, file_name=file_name)
|
|
14200
|
+
size_bytes = len(content)
|
|
14201
|
+
if size_bytes > RECORD_GET_FILE_MAX_BYTES or total_bytes + size_bytes > RECORD_GET_FILE_MAX_TOTAL_BYTES:
|
|
14202
|
+
items.append(
|
|
14203
|
+
{
|
|
14204
|
+
**base_item,
|
|
14205
|
+
**download_meta,
|
|
14206
|
+
"file_name": file_name,
|
|
14207
|
+
"local_path": None,
|
|
14208
|
+
"mime_type": mime_type,
|
|
14209
|
+
"size_bytes": size_bytes,
|
|
14210
|
+
"access_status": "too_large",
|
|
14211
|
+
"readable_by_agent": False,
|
|
14212
|
+
"extraction": {"status": "skipped_too_large", "text_path": None, "preview": None},
|
|
14213
|
+
}
|
|
14214
|
+
)
|
|
14215
|
+
warnings.append(
|
|
14216
|
+
{
|
|
14217
|
+
"code": "FILE_ASSET_SIZE_LIMIT_EXCEEDED",
|
|
14218
|
+
"file_asset_id": file_asset_id,
|
|
14219
|
+
"message": "record_get skipped a file asset because it exceeded the internal file size budget.",
|
|
14220
|
+
}
|
|
14221
|
+
)
|
|
14222
|
+
continue
|
|
14223
|
+
|
|
14224
|
+
ensure_local_dir()
|
|
14225
|
+
extension = _record_detail_file_extension(mime_type, source_url=source_url, file_name=file_name)
|
|
14226
|
+
local_path = local_dir / f"{file_asset_id}{extension}"
|
|
14227
|
+
local_path.write_bytes(content)
|
|
14228
|
+
extraction = _record_detail_extract_file_asset_text(
|
|
14229
|
+
content,
|
|
14230
|
+
mime_type=mime_type,
|
|
14231
|
+
file_name=file_name,
|
|
14232
|
+
local_dir=local_dir,
|
|
14233
|
+
file_asset_id=file_asset_id,
|
|
14234
|
+
)
|
|
14235
|
+
if extraction.get("status") == "failed":
|
|
14236
|
+
warnings.append(
|
|
14237
|
+
{
|
|
14238
|
+
"code": "FILE_ASSET_EXTRACTION_FAILED",
|
|
14239
|
+
"file_asset_id": file_asset_id,
|
|
14240
|
+
"message": f"record_get downloaded file asset {file_asset_id}, but text extraction failed.",
|
|
14241
|
+
}
|
|
14242
|
+
)
|
|
14243
|
+
total_bytes += size_bytes
|
|
14244
|
+
downloaded_count += 1
|
|
14245
|
+
items.append(
|
|
14246
|
+
{
|
|
14247
|
+
**base_item,
|
|
14248
|
+
**download_meta,
|
|
14249
|
+
"file_name": file_name,
|
|
14250
|
+
"local_path": str(local_path),
|
|
14251
|
+
"mime_type": mime_type,
|
|
14252
|
+
"size_bytes": size_bytes,
|
|
14253
|
+
"access_status": "downloaded",
|
|
14254
|
+
"readable_by_agent": extraction.get("status") == "ok" or _record_detail_image_mime_from_bytes(content) is not None,
|
|
14255
|
+
"extraction": extraction,
|
|
14256
|
+
}
|
|
14257
|
+
)
|
|
14258
|
+
|
|
14259
|
+
if not items:
|
|
14260
|
+
status = "none"
|
|
14261
|
+
elif stopped_for_time_budget or any(item.get("access_status") != "downloaded" or cast(JSONObject, item.get("extraction", {})).get("status") == "failed" for item in items):
|
|
14262
|
+
status = "partial"
|
|
14263
|
+
else:
|
|
14264
|
+
status = "ok"
|
|
14265
|
+
return {"status": status, "local_dir": str(local_dir) if items else None, "items": items, "warnings": warnings}
|
|
14266
|
+
|
|
14267
|
+
|
|
13659
14268
|
def _record_detail_media_candidates_from_field(
|
|
13660
14269
|
field: JSONObject,
|
|
13661
14270
|
*,
|
|
@@ -13736,6 +14345,104 @@ def _record_detail_media_candidates_from_field(
|
|
|
13736
14345
|
return candidates
|
|
13737
14346
|
|
|
13738
14347
|
|
|
14348
|
+
def _record_detail_file_candidates_from_field(
|
|
14349
|
+
field: JSONObject,
|
|
14350
|
+
*,
|
|
14351
|
+
source_app_key: str | None,
|
|
14352
|
+
source_record_id: str | None,
|
|
14353
|
+
forced_source: str | None,
|
|
14354
|
+
) -> list[JSONObject]:
|
|
14355
|
+
field_id = _coerce_count(field.get("field_id"))
|
|
14356
|
+
field_title = _normalize_optional_text(field.get("title"))
|
|
14357
|
+
field_type = _normalize_optional_text(field.get("type"))
|
|
14358
|
+
candidates: list[JSONObject] = []
|
|
14359
|
+
seen_urls: set[str] = set()
|
|
14360
|
+
|
|
14361
|
+
def add_candidate(url: str | None, *, source: str, path: str, name: str | None = None, file_hint: bool = False) -> None:
|
|
14362
|
+
normalized_url = _record_detail_normalize_media_url(url)
|
|
14363
|
+
if not normalized_url or normalized_url in seen_urls:
|
|
14364
|
+
return
|
|
14365
|
+
if not _record_detail_supported_file_url(normalized_url):
|
|
14366
|
+
return
|
|
14367
|
+
if not file_hint and not _record_detail_url_or_name_looks_like_file(normalized_url, name):
|
|
14368
|
+
return
|
|
14369
|
+
seen_urls.add(normalized_url)
|
|
14370
|
+
candidates.append(
|
|
14371
|
+
{
|
|
14372
|
+
"_owner": field,
|
|
14373
|
+
"kind": "file",
|
|
14374
|
+
"source": forced_source or source,
|
|
14375
|
+
"source_path": path,
|
|
14376
|
+
"field_id": field_id,
|
|
14377
|
+
"field_title": field_title,
|
|
14378
|
+
"source_app_key": source_app_key,
|
|
14379
|
+
"source_record_id": source_record_id,
|
|
14380
|
+
"source_url": normalized_url,
|
|
14381
|
+
"file_name": name,
|
|
14382
|
+
}
|
|
14383
|
+
)
|
|
14384
|
+
|
|
14385
|
+
def candidate_name_from_mapping(value: dict[Any, Any]) -> str | None:
|
|
14386
|
+
for key, item in value.items():
|
|
14387
|
+
if _record_detail_media_key(key) in _RECORD_FILE_NAME_KEYS:
|
|
14388
|
+
text = _normalize_optional_text(item) if not isinstance(item, (dict, list)) else None
|
|
14389
|
+
if text:
|
|
14390
|
+
return text
|
|
14391
|
+
return None
|
|
14392
|
+
|
|
14393
|
+
def scan_text(value: str, *, path: str, source: str, file_hint: bool = False) -> None:
|
|
14394
|
+
for match in _RECORD_MEDIA_IMG_SRC_RE.finditer(value):
|
|
14395
|
+
add_candidate(match.group(1), source="rich_text", path=path, file_hint=True)
|
|
14396
|
+
for match in _RECORD_MEDIA_MD_IMAGE_RE.finditer(value):
|
|
14397
|
+
add_candidate(match.group(1), source="rich_text", path=path, file_hint=True)
|
|
14398
|
+
for match in _RECORD_MEDIA_URL_RE.finditer(value):
|
|
14399
|
+
add_candidate(match.group(0), source=source, path=path, file_hint=file_hint)
|
|
14400
|
+
|
|
14401
|
+
def scan_value(value: JSONValue, *, path: str, source: str, file_hint: bool = False) -> None:
|
|
14402
|
+
if isinstance(value, str):
|
|
14403
|
+
scan_text(value, path=path, source=source, file_hint=file_hint)
|
|
14404
|
+
return
|
|
14405
|
+
if isinstance(value, list):
|
|
14406
|
+
for index, item in enumerate(value):
|
|
14407
|
+
scan_value(cast(JSONValue, item), path=f"{path}[{index}]", source=source, file_hint=file_hint)
|
|
14408
|
+
return
|
|
14409
|
+
if not isinstance(value, dict):
|
|
14410
|
+
return
|
|
14411
|
+
|
|
14412
|
+
attachment = _extract_attachment_item(cast(JSONValue, value))
|
|
14413
|
+
if attachment:
|
|
14414
|
+
add_candidate(
|
|
14415
|
+
_normalize_optional_text(attachment.get("value")),
|
|
14416
|
+
source="attachment" if source == "attachment" else source,
|
|
14417
|
+
path=path,
|
|
14418
|
+
name=_normalize_optional_text(attachment.get("name")),
|
|
14419
|
+
file_hint=True,
|
|
14420
|
+
)
|
|
14421
|
+
candidate_name = candidate_name_from_mapping(value)
|
|
14422
|
+
for key, item in value.items():
|
|
14423
|
+
normalized_key = _record_detail_media_key(key)
|
|
14424
|
+
item_text = _normalize_optional_text(item) if not isinstance(item, (dict, list)) else None
|
|
14425
|
+
key_source = source
|
|
14426
|
+
key_file_hint = file_hint
|
|
14427
|
+
if normalized_key in _RECORD_FILE_URL_KEYS:
|
|
14428
|
+
key_source = "attachment" if source == "attachment" else ("image_field" if source != "subtable" else "subtable")
|
|
14429
|
+
key_file_hint = source == "attachment" or normalized_key not in {"value", "url"}
|
|
14430
|
+
if item_text:
|
|
14431
|
+
add_candidate(item_text, source=key_source, path=f"{path}.{key}", name=candidate_name, file_hint=key_file_hint)
|
|
14432
|
+
scan_value(cast(JSONValue, item), path=f"{path}.{key}", source=key_source, file_hint=key_file_hint)
|
|
14433
|
+
|
|
14434
|
+
value = cast(JSONValue, field.get("value"))
|
|
14435
|
+
display_value = cast(JSONValue, field.get("display_value"))
|
|
14436
|
+
if field_type == "attachment":
|
|
14437
|
+
scan_value(value, path="value", source="attachment", file_hint=True)
|
|
14438
|
+
elif field_type == "subtable":
|
|
14439
|
+
scan_value(value, path="value", source="subtable", file_hint=True)
|
|
14440
|
+
else:
|
|
14441
|
+
scan_value(value, path="value", source="image_field", file_hint=False)
|
|
14442
|
+
scan_value(display_value, path="display_value", source="rich_text", file_hint=False)
|
|
14443
|
+
return candidates
|
|
14444
|
+
|
|
14445
|
+
|
|
13739
14446
|
def _record_detail_attach_asset_id(field: JSONObject, asset_id: str) -> None:
|
|
13740
14447
|
asset_ids = field.get("asset_ids")
|
|
13741
14448
|
if not isinstance(asset_ids, list):
|
|
@@ -13745,6 +14452,15 @@ def _record_detail_attach_asset_id(field: JSONObject, asset_id: str) -> None:
|
|
|
13745
14452
|
asset_ids.append(asset_id)
|
|
13746
14453
|
|
|
13747
14454
|
|
|
14455
|
+
def _record_detail_attach_file_asset_id(field: JSONObject, file_asset_id: str) -> None:
|
|
14456
|
+
asset_ids = field.get("file_asset_ids")
|
|
14457
|
+
if not isinstance(asset_ids, list):
|
|
14458
|
+
asset_ids = []
|
|
14459
|
+
field["file_asset_ids"] = asset_ids
|
|
14460
|
+
if file_asset_id not in asset_ids:
|
|
14461
|
+
asset_ids.append(file_asset_id)
|
|
14462
|
+
|
|
14463
|
+
|
|
13748
14464
|
def _record_detail_media_asset_base_item(candidate: JSONObject, *, asset_id: str) -> JSONObject:
|
|
13749
14465
|
payload: JSONObject = {
|
|
13750
14466
|
"asset_id": asset_id,
|
|
@@ -13760,12 +14476,77 @@ def _record_detail_media_asset_base_item(candidate: JSONObject, *, asset_id: str
|
|
|
13760
14476
|
return payload
|
|
13761
14477
|
|
|
13762
14478
|
|
|
14479
|
+
def _record_detail_file_asset_base_item(candidate: JSONObject, *, file_asset_id: str) -> JSONObject:
|
|
14480
|
+
payload: JSONObject = {
|
|
14481
|
+
"file_asset_id": file_asset_id,
|
|
14482
|
+
"kind": candidate.get("kind") or "file",
|
|
14483
|
+
"source": candidate.get("source") or "attachment",
|
|
14484
|
+
"field_id": candidate.get("field_id"),
|
|
14485
|
+
"field_title": candidate.get("field_title"),
|
|
14486
|
+
"source_url": candidate.get("source_url"),
|
|
14487
|
+
}
|
|
14488
|
+
for key in ("source_path", "source_app_key", "source_record_id", "file_name"):
|
|
14489
|
+
if candidate.get(key) not in (None, ""):
|
|
14490
|
+
payload[key] = candidate.get(key)
|
|
14491
|
+
return payload
|
|
14492
|
+
|
|
14493
|
+
|
|
14494
|
+
def _record_detail_media_assets_by_url(media_assets: JSONObject) -> dict[str, JSONObject]:
|
|
14495
|
+
items = media_assets.get("items") if isinstance(media_assets.get("items"), list) else []
|
|
14496
|
+
result: dict[str, JSONObject] = {}
|
|
14497
|
+
for item in items:
|
|
14498
|
+
if not isinstance(item, dict):
|
|
14499
|
+
continue
|
|
14500
|
+
source_url = _normalize_optional_text(item.get("source_url"))
|
|
14501
|
+
if source_url and source_url not in result:
|
|
14502
|
+
result[source_url] = cast(JSONObject, item)
|
|
14503
|
+
return result
|
|
14504
|
+
|
|
14505
|
+
|
|
14506
|
+
def _record_detail_media_assets_by_asset_id(media_assets: JSONObject) -> dict[str, JSONObject]:
|
|
14507
|
+
items = media_assets.get("items") if isinstance(media_assets.get("items"), list) else []
|
|
14508
|
+
result: dict[str, JSONObject] = {}
|
|
14509
|
+
for item in items:
|
|
14510
|
+
if not isinstance(item, dict):
|
|
14511
|
+
continue
|
|
14512
|
+
asset_id = _normalize_optional_text(item.get("asset_id"))
|
|
14513
|
+
if asset_id and asset_id not in result:
|
|
14514
|
+
result[asset_id] = cast(JSONObject, item)
|
|
14515
|
+
return result
|
|
14516
|
+
|
|
14517
|
+
|
|
14518
|
+
def _record_detail_media_item_from_owner_asset_ids(
|
|
14519
|
+
owner: JSONObject,
|
|
14520
|
+
media_by_asset_id: dict[str, JSONObject],
|
|
14521
|
+
candidate: JSONObject,
|
|
14522
|
+
) -> JSONObject | None:
|
|
14523
|
+
asset_ids = owner.get("asset_ids") if isinstance(owner.get("asset_ids"), list) else []
|
|
14524
|
+
if len(asset_ids) != 1:
|
|
14525
|
+
return None
|
|
14526
|
+
media_item = media_by_asset_id.get(str(asset_ids[0]))
|
|
14527
|
+
if not isinstance(media_item, dict):
|
|
14528
|
+
return None
|
|
14529
|
+
candidate_name = _normalize_optional_text(candidate.get("file_name"))
|
|
14530
|
+
media_name = _normalize_optional_text(media_item.get("file_name"))
|
|
14531
|
+
if candidate_name and media_name and candidate_name != media_name:
|
|
14532
|
+
return None
|
|
14533
|
+
if candidate.get("field_id") not in (None, media_item.get("field_id")):
|
|
14534
|
+
return None
|
|
14535
|
+
return media_item
|
|
14536
|
+
|
|
14537
|
+
|
|
13763
14538
|
def _record_detail_media_assets_dir(run_id: str) -> Path:
|
|
13764
14539
|
custom_home = os.environ.get("QINGFLOW_MCP_RECORD_ASSETS_HOME")
|
|
13765
14540
|
base_dir = Path(custom_home).expanduser() if custom_home else get_mcp_home() / "record-assets"
|
|
13766
14541
|
return base_dir / run_id
|
|
13767
14542
|
|
|
13768
14543
|
|
|
14544
|
+
def _record_detail_file_assets_dir(run_id: str) -> Path:
|
|
14545
|
+
custom_home = os.environ.get("QINGFLOW_MCP_RECORD_FILES_HOME")
|
|
14546
|
+
base_dir = Path(custom_home).expanduser() if custom_home else get_mcp_home() / "record-files"
|
|
14547
|
+
return base_dir / run_id
|
|
14548
|
+
|
|
14549
|
+
|
|
13769
14550
|
def _record_detail_media_download_headers(context: BackendRequestContext) -> dict[str, str]:
|
|
13770
14551
|
origin = _record_detail_context_origin(context)
|
|
13771
14552
|
return {"User-Agent": DEFAULT_USER_AGENT, "Referer": f"{origin}/", "Origin": origin}
|
|
@@ -13970,6 +14751,11 @@ def _record_detail_supported_media_url(url: str) -> bool:
|
|
|
13970
14751
|
return parsed.scheme.lower() in {"http", "https"} or _record_detail_is_download_file_url(url)
|
|
13971
14752
|
|
|
13972
14753
|
|
|
14754
|
+
def _record_detail_supported_file_url(url: str) -> bool:
|
|
14755
|
+
parsed = urlsplit(url)
|
|
14756
|
+
return parsed.scheme.lower() in {"http", "https"} or _record_detail_is_download_file_url(url)
|
|
14757
|
+
|
|
14758
|
+
|
|
13973
14759
|
def _record_detail_media_key(key: Any) -> str:
|
|
13974
14760
|
return str(key or "").strip().replace("-", "_").lower()
|
|
13975
14761
|
|
|
@@ -13982,6 +14768,18 @@ def _record_detail_url_or_name_looks_like_image(url: str, name: str | None = Non
|
|
|
13982
14768
|
return False
|
|
13983
14769
|
|
|
13984
14770
|
|
|
14771
|
+
def _record_detail_url_or_name_looks_like_file(url: str, name: str | None = None) -> bool:
|
|
14772
|
+
if _record_detail_is_download_file_url(url) or _record_detail_is_qingflow_storage_url(url):
|
|
14773
|
+
return True
|
|
14774
|
+
for value in (url, name or ""):
|
|
14775
|
+
if not value:
|
|
14776
|
+
continue
|
|
14777
|
+
path = unquote(urlsplit(value).path).lower() or value.lower()
|
|
14778
|
+
if any(path.endswith(extension) for extension in _RECORD_FILE_EXTENSIONS):
|
|
14779
|
+
return True
|
|
14780
|
+
return False
|
|
14781
|
+
|
|
14782
|
+
|
|
13985
14783
|
def _record_detail_mime_from_url(url: str) -> str | None:
|
|
13986
14784
|
path = unquote(urlsplit(url).path).lower()
|
|
13987
14785
|
if path.endswith(".png"):
|
|
@@ -13996,9 +14794,210 @@ def _record_detail_mime_from_url(url: str) -> str | None:
|
|
|
13996
14794
|
return "image/bmp"
|
|
13997
14795
|
if path.endswith(".svg"):
|
|
13998
14796
|
return "image/svg+xml"
|
|
14797
|
+
if path.endswith(".pdf"):
|
|
14798
|
+
return "application/pdf"
|
|
14799
|
+
if path.endswith(".docx"):
|
|
14800
|
+
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
14801
|
+
if path.endswith(".xlsx"):
|
|
14802
|
+
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
14803
|
+
if path.endswith(".xlsm"):
|
|
14804
|
+
return "application/vnd.ms-excel.sheet.macroEnabled.12"
|
|
14805
|
+
if path.endswith(".csv"):
|
|
14806
|
+
return "text/csv"
|
|
14807
|
+
if path.endswith(".txt") or path.endswith(".text"):
|
|
14808
|
+
return "text/plain"
|
|
14809
|
+
if path.endswith(".md"):
|
|
14810
|
+
return "text/markdown"
|
|
14811
|
+
if path.endswith(".json"):
|
|
14812
|
+
return "application/json"
|
|
14813
|
+
return None
|
|
14814
|
+
|
|
14815
|
+
|
|
14816
|
+
def _record_detail_file_name_from_candidate(candidate: JSONObject, *, source_url: str, fallback_id: str) -> str:
|
|
14817
|
+
raw_name = _normalize_optional_text(candidate.get("file_name"))
|
|
14818
|
+
if raw_name:
|
|
14819
|
+
return raw_name
|
|
14820
|
+
path_name = Path(unquote(urlsplit(source_url).path)).name
|
|
14821
|
+
if path_name:
|
|
14822
|
+
return path_name
|
|
14823
|
+
return fallback_id
|
|
14824
|
+
|
|
14825
|
+
|
|
14826
|
+
def _record_detail_file_mime_from_content_or_name(content: bytes, *, source_url: str, file_name: str) -> str | None:
|
|
14827
|
+
image_mime = _record_detail_image_mime_from_bytes(content)
|
|
14828
|
+
if image_mime:
|
|
14829
|
+
return image_mime
|
|
14830
|
+
if content.startswith(b"%PDF"):
|
|
14831
|
+
return "application/pdf"
|
|
14832
|
+
guessed = mimetypes.guess_type(file_name or source_url)[0] or _record_detail_mime_from_url(source_url)
|
|
14833
|
+
if guessed:
|
|
14834
|
+
return guessed
|
|
14835
|
+
lowered = (file_name or source_url).lower()
|
|
14836
|
+
if lowered.endswith(".docx"):
|
|
14837
|
+
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
14838
|
+
if lowered.endswith(".xlsx"):
|
|
14839
|
+
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
14840
|
+
if lowered.endswith(".xlsm"):
|
|
14841
|
+
return "application/vnd.ms-excel.sheet.macroEnabled.12"
|
|
14842
|
+
if lowered.endswith(".csv"):
|
|
14843
|
+
return "text/csv"
|
|
14844
|
+
if lowered.endswith(".json"):
|
|
14845
|
+
return "application/json"
|
|
14846
|
+
if _record_detail_bytes_look_like_text(content):
|
|
14847
|
+
return "text/plain"
|
|
13999
14848
|
return None
|
|
14000
14849
|
|
|
14001
14850
|
|
|
14851
|
+
def _record_detail_file_extension(mime_type: str | None, *, source_url: str, file_name: str) -> str:
|
|
14852
|
+
for value in (file_name, unquote(urlsplit(source_url).path)):
|
|
14853
|
+
suffix = Path(value).suffix.lower()
|
|
14854
|
+
if suffix and re.fullmatch(r"\.[a-z0-9]{1,10}", suffix):
|
|
14855
|
+
return suffix
|
|
14856
|
+
if mime_type:
|
|
14857
|
+
extension = mimetypes.guess_extension(mime_type)
|
|
14858
|
+
if extension:
|
|
14859
|
+
return ".jpg" if extension == ".jpe" else extension
|
|
14860
|
+
return ".bin"
|
|
14861
|
+
|
|
14862
|
+
|
|
14863
|
+
def _record_detail_bytes_look_like_text(content: bytes) -> bool:
|
|
14864
|
+
if not content:
|
|
14865
|
+
return True
|
|
14866
|
+
sample = content[:4096]
|
|
14867
|
+
if b"\x00" in sample:
|
|
14868
|
+
return False
|
|
14869
|
+
try:
|
|
14870
|
+
sample.decode("utf-8")
|
|
14871
|
+
return True
|
|
14872
|
+
except UnicodeDecodeError:
|
|
14873
|
+
try:
|
|
14874
|
+
sample.decode("gb18030")
|
|
14875
|
+
return True
|
|
14876
|
+
except UnicodeDecodeError:
|
|
14877
|
+
return False
|
|
14878
|
+
|
|
14879
|
+
|
|
14880
|
+
def _record_detail_extract_file_asset_text(
|
|
14881
|
+
content: bytes,
|
|
14882
|
+
*,
|
|
14883
|
+
mime_type: str | None,
|
|
14884
|
+
file_name: str,
|
|
14885
|
+
local_dir: Path,
|
|
14886
|
+
file_asset_id: str,
|
|
14887
|
+
) -> JSONObject:
|
|
14888
|
+
normalized_name = file_name.lower()
|
|
14889
|
+
try:
|
|
14890
|
+
text: str | None
|
|
14891
|
+
if normalized_name.endswith(".docx") or mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
|
14892
|
+
text = _record_detail_extract_docx_text(content)
|
|
14893
|
+
elif normalized_name.endswith((".xlsx", ".xlsm")) or mime_type in {
|
|
14894
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
14895
|
+
"application/vnd.ms-excel.sheet.macroEnabled.12",
|
|
14896
|
+
}:
|
|
14897
|
+
text = _record_detail_extract_xlsx_text(content)
|
|
14898
|
+
elif normalized_name.endswith(".pdf") or mime_type == "application/pdf":
|
|
14899
|
+
text = _record_detail_extract_pdf_text(content)
|
|
14900
|
+
elif normalized_name.endswith(".json") or mime_type == "application/json":
|
|
14901
|
+
text = _record_detail_decode_json_text(content)
|
|
14902
|
+
elif normalized_name.endswith((".csv", ".txt", ".text", ".md")) or (mime_type or "").startswith("text/"):
|
|
14903
|
+
text = _record_detail_decode_text(content)
|
|
14904
|
+
else:
|
|
14905
|
+
text = None
|
|
14906
|
+
except Exception as exc:
|
|
14907
|
+
return {"status": "failed", "text_path": None, "preview": None, "error": str(exc)}
|
|
14908
|
+
if text is None:
|
|
14909
|
+
return {"status": "unsupported", "text_path": None, "preview": None}
|
|
14910
|
+
text_path = local_dir / f"{file_asset_id}.txt"
|
|
14911
|
+
text_path.write_text(text, encoding="utf-8")
|
|
14912
|
+
preview = text[:RECORD_GET_FILE_EXTRACT_PREVIEW_CHARS]
|
|
14913
|
+
return {
|
|
14914
|
+
"status": "ok",
|
|
14915
|
+
"text_path": str(text_path),
|
|
14916
|
+
"preview": preview,
|
|
14917
|
+
"preview_truncated": len(text) > RECORD_GET_FILE_EXTRACT_PREVIEW_CHARS,
|
|
14918
|
+
}
|
|
14919
|
+
|
|
14920
|
+
|
|
14921
|
+
def _record_detail_decode_text(content: bytes) -> str:
|
|
14922
|
+
for encoding in ("utf-8-sig", "utf-8", "gb18030"):
|
|
14923
|
+
try:
|
|
14924
|
+
return content.decode(encoding)
|
|
14925
|
+
except UnicodeDecodeError:
|
|
14926
|
+
continue
|
|
14927
|
+
return content.decode("utf-8", errors="replace")
|
|
14928
|
+
|
|
14929
|
+
|
|
14930
|
+
def _record_detail_decode_json_text(content: bytes) -> str:
|
|
14931
|
+
text = _record_detail_decode_text(content)
|
|
14932
|
+
try:
|
|
14933
|
+
return json.dumps(json.loads(text), ensure_ascii=False, indent=2)
|
|
14934
|
+
except ValueError:
|
|
14935
|
+
return text
|
|
14936
|
+
|
|
14937
|
+
|
|
14938
|
+
def _record_detail_extract_docx_text(content: bytes) -> str:
|
|
14939
|
+
with zipfile.ZipFile(BytesIO(content)) as archive:
|
|
14940
|
+
document_xml = archive.read("word/document.xml")
|
|
14941
|
+
root = ElementTree.fromstring(document_xml)
|
|
14942
|
+
ns = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}"
|
|
14943
|
+
body = root.find(f"{ns}body")
|
|
14944
|
+
if body is None:
|
|
14945
|
+
return ""
|
|
14946
|
+
|
|
14947
|
+
def node_text(node: ElementTree.Element) -> str:
|
|
14948
|
+
return "".join(text_node.text or "" for text_node in node.iter(f"{ns}t")).strip()
|
|
14949
|
+
|
|
14950
|
+
lines: list[str] = []
|
|
14951
|
+
for child in list(body):
|
|
14952
|
+
if child.tag == f"{ns}p":
|
|
14953
|
+
line = node_text(child)
|
|
14954
|
+
if line:
|
|
14955
|
+
lines.append(line)
|
|
14956
|
+
elif child.tag == f"{ns}tbl":
|
|
14957
|
+
for row in child.iter(f"{ns}tr"):
|
|
14958
|
+
cells = [node_text(cell) for cell in row.iter(f"{ns}tc")]
|
|
14959
|
+
cells = [cell for cell in cells if cell]
|
|
14960
|
+
if cells:
|
|
14961
|
+
lines.append(" | ".join(cells))
|
|
14962
|
+
return "\n".join(lines)
|
|
14963
|
+
|
|
14964
|
+
|
|
14965
|
+
def _record_detail_extract_xlsx_text(content: bytes) -> str:
|
|
14966
|
+
from openpyxl import load_workbook
|
|
14967
|
+
|
|
14968
|
+
workbook = load_workbook(BytesIO(content), read_only=True, data_only=True)
|
|
14969
|
+
try:
|
|
14970
|
+
parts: list[str] = []
|
|
14971
|
+
for sheet in workbook.worksheets:
|
|
14972
|
+
parts.append(f"# {sheet.title}")
|
|
14973
|
+
row_count = 0
|
|
14974
|
+
for row in sheet.iter_rows(values_only=True):
|
|
14975
|
+
row_count += 1
|
|
14976
|
+
if row_count > RECORD_GET_FILE_EXTRACT_XLSX_MAX_ROWS_PER_SHEET:
|
|
14977
|
+
parts.append(f"... skipped rows after {RECORD_GET_FILE_EXTRACT_XLSX_MAX_ROWS_PER_SHEET}")
|
|
14978
|
+
break
|
|
14979
|
+
cells = ["" if cell is None else str(cell) for cell in row]
|
|
14980
|
+
if any(cell for cell in cells):
|
|
14981
|
+
parts.append("\t".join(cells).rstrip())
|
|
14982
|
+
return "\n".join(parts)
|
|
14983
|
+
finally:
|
|
14984
|
+
workbook.close()
|
|
14985
|
+
|
|
14986
|
+
|
|
14987
|
+
def _record_detail_extract_pdf_text(content: bytes) -> str:
|
|
14988
|
+
from pypdf import PdfReader
|
|
14989
|
+
|
|
14990
|
+
reader = PdfReader(BytesIO(content))
|
|
14991
|
+
lines: list[str] = []
|
|
14992
|
+
for index, page in enumerate(reader.pages[:RECORD_GET_FILE_EXTRACT_PDF_MAX_PAGES], start=1):
|
|
14993
|
+
page_text = page.extract_text() or ""
|
|
14994
|
+
if page_text.strip():
|
|
14995
|
+
lines.append(f"# Page {index}\n{page_text.strip()}")
|
|
14996
|
+
if len(reader.pages) > RECORD_GET_FILE_EXTRACT_PDF_MAX_PAGES:
|
|
14997
|
+
lines.append(f"... skipped pages after {RECORD_GET_FILE_EXTRACT_PDF_MAX_PAGES}")
|
|
14998
|
+
return "\n\n".join(lines)
|
|
14999
|
+
|
|
15000
|
+
|
|
14002
15001
|
def _record_detail_image_mime_from_bytes(content: bytes) -> str | None:
|
|
14003
15002
|
if content.startswith(b"\x89PNG\r\n\x1a\n"):
|
|
14004
15003
|
return "image/png"
|
|
@@ -14038,6 +15037,7 @@ def _record_detail_context_integrity(
|
|
|
14038
15037
|
workflow_logs: JSONObject,
|
|
14039
15038
|
associated_resources: list[JSONObject],
|
|
14040
15039
|
media_assets: JSONObject,
|
|
15040
|
+
file_assets: JSONObject,
|
|
14041
15041
|
unavailable_context: list[JSONObject],
|
|
14042
15042
|
) -> JSONObject:
|
|
14043
15043
|
reference_unavailable = any(item.get("target_detail_completeness") != "full" for item in references)
|
|
@@ -14049,6 +15049,7 @@ def _record_detail_context_integrity(
|
|
|
14049
15049
|
"workflow_logs": workflow_logs.get("status") or "unknown",
|
|
14050
15050
|
"associated_resources": "full" if associated_resources or not any(item.get("section") == "associated_resources" for item in unavailable_context) else "unavailable",
|
|
14051
15051
|
"media_assets": media_assets.get("status") or "unknown",
|
|
15052
|
+
"file_assets": file_assets.get("status") or "unknown",
|
|
14052
15053
|
"unavailable_count": len(unavailable_context),
|
|
14053
15054
|
"safe_for_record_fact_conclusion": True,
|
|
14054
15055
|
"safe_for_full_log_conclusion": False,
|
|
@@ -14063,6 +15064,7 @@ def _record_detail_semantic_context(payload: JSONObject) -> str:
|
|
|
14063
15064
|
fields = payload.get("fields") if isinstance(payload.get("fields"), list) else []
|
|
14064
15065
|
references = payload.get("references") if isinstance(payload.get("references"), list) else []
|
|
14065
15066
|
media_assets = payload.get("media_assets") if isinstance(payload.get("media_assets"), dict) else {}
|
|
15067
|
+
file_assets = payload.get("file_assets") if isinstance(payload.get("file_assets"), dict) else {}
|
|
14066
15068
|
data_logs = payload.get("data_logs") if isinstance(payload.get("data_logs"), dict) else {}
|
|
14067
15069
|
workflow_logs = payload.get("workflow_logs") if isinstance(payload.get("workflow_logs"), dict) else {}
|
|
14068
15070
|
associated_resources = payload.get("associated_resources") if isinstance(payload.get("associated_resources"), list) else []
|
|
@@ -14110,6 +15112,20 @@ def _record_detail_semantic_context(payload: JSONObject) -> str:
|
|
|
14110
15112
|
f"(fieldId={_semantic_escape(item.get('field_id'))}),"
|
|
14111
15113
|
f"本地路径:{_semantic_escape(item.get('local_path')) or '无'},{_semantic_escape(readable_text)}。"
|
|
14112
15114
|
)
|
|
15115
|
+
file_items = file_assets.get("items") if isinstance(file_assets.get("items"), list) else []
|
|
15116
|
+
if file_items:
|
|
15117
|
+
lines.extend(["", "文件附件:"])
|
|
15118
|
+
for item in file_items:
|
|
15119
|
+
if not isinstance(item, dict):
|
|
15120
|
+
continue
|
|
15121
|
+
extraction = item.get("extraction") if isinstance(item.get("extraction"), dict) else {}
|
|
15122
|
+
readable_text = "可由智能体读取" if item.get("readable_by_agent") else f"不可直接读取({item.get('access_status') or 'unknown'})"
|
|
15123
|
+
lines.append(
|
|
15124
|
+
f"- 文件 {_semantic_escape(item.get('file_asset_id'))}「{_semantic_escape(item.get('file_name'))}」"
|
|
15125
|
+
f"来自字段「{_semantic_escape(item.get('field_title'))}」(fieldId={_semantic_escape(item.get('field_id'))}),"
|
|
15126
|
+
f"本地路径:{_semantic_escape(item.get('local_path')) or '无'},"
|
|
15127
|
+
f"提取文本:{_semantic_escape(extraction.get('text_path')) or '无'},{_semantic_escape(readable_text)}。"
|
|
15128
|
+
)
|
|
14113
15129
|
lines.extend(["", "最近数据日志:"])
|
|
14114
15130
|
_append_semantic_log_lines(lines, data_logs)
|
|
14115
15131
|
lines.extend(["", "最近流程日志:"])
|
|
@@ -14227,7 +15243,6 @@ def _build_record_list_lookup_payload(
|
|
|
14227
15243
|
query: str | None,
|
|
14228
15244
|
items: list[JSONObject],
|
|
14229
15245
|
pagination: JSONObject,
|
|
14230
|
-
limit: int,
|
|
14231
15246
|
) -> JSONObject | None:
|
|
14232
15247
|
if not query:
|
|
14233
15248
|
return None
|
|
@@ -14236,17 +15251,7 @@ def _build_record_list_lookup_payload(
|
|
|
14236
15251
|
if returned_items is None:
|
|
14237
15252
|
returned_items = len(items)
|
|
14238
15253
|
truncated = bool(reported_total is not None and reported_total > returned_items)
|
|
14239
|
-
|
|
14240
|
-
for index, item in enumerate(items):
|
|
14241
|
-
candidate = _record_list_candidate_payload(item, query=query)
|
|
14242
|
-
score = _coerce_count(candidate.get("score")) or 0
|
|
14243
|
-
if score <= 0:
|
|
14244
|
-
candidate["score"] = 40
|
|
14245
|
-
candidate["match_reason"] = "backend_match_without_selected_field_evidence"
|
|
14246
|
-
scored.append((int(candidate["score"]), index, candidate))
|
|
14247
|
-
scored.sort(key=lambda entry: (-entry[0], entry[1]))
|
|
14248
|
-
candidates = [entry[2] for entry in scored[: min(limit, LOOKUP_CONFIRMATION_CANDIDATE_LIMIT)]]
|
|
14249
|
-
confidence = _record_list_lookup_confidence(candidates, truncated=truncated)
|
|
15254
|
+
confidence = _record_list_lookup_confidence(returned_items=returned_items, reported_total=reported_total, truncated=truncated)
|
|
14250
15255
|
next_action = {
|
|
14251
15256
|
"single_high": "record_get",
|
|
14252
15257
|
"multiple": "ask_user",
|
|
@@ -14256,140 +15261,25 @@ def _build_record_list_lookup_payload(
|
|
|
14256
15261
|
return {
|
|
14257
15262
|
"mode": "candidate_locator",
|
|
14258
15263
|
"query": query,
|
|
14259
|
-
"
|
|
14260
|
-
"
|
|
15264
|
+
"total_count": reported_total,
|
|
15265
|
+
"returned_count": returned_items,
|
|
15266
|
+
"truncated": truncated,
|
|
14261
15267
|
"confidence": confidence,
|
|
14262
15268
|
"next_action": next_action,
|
|
14263
|
-
"candidates": candidates,
|
|
14264
15269
|
}
|
|
14265
15270
|
|
|
14266
15271
|
|
|
14267
|
-
def _record_list_lookup_confidence(
|
|
14268
|
-
if
|
|
15272
|
+
def _record_list_lookup_confidence(*, returned_items: int, reported_total: int | None, truncated: bool) -> str:
|
|
15273
|
+
if returned_items <= 0:
|
|
14269
15274
|
return "none"
|
|
14270
15275
|
if truncated:
|
|
14271
15276
|
return "truncated"
|
|
14272
|
-
if
|
|
14273
|
-
|
|
14274
|
-
top = _coerce_count(candidates[0].get("score")) or 0
|
|
14275
|
-
second = _coerce_count(candidates[1].get("score")) or 0
|
|
14276
|
-
if top >= 90 and (top - second) >= 10:
|
|
15277
|
+
effective_total = reported_total if reported_total is not None else returned_items
|
|
15278
|
+
if effective_total == 1:
|
|
14277
15279
|
return "single_high"
|
|
14278
15280
|
return "multiple"
|
|
14279
15281
|
|
|
14280
15282
|
|
|
14281
|
-
def _record_list_candidate_payload(item: JSONObject, *, query: str) -> JSONObject:
|
|
14282
|
-
query_norm = _normalize_lookup_query_text(query)
|
|
14283
|
-
matched_fields: list[JSONObject] = []
|
|
14284
|
-
best_score = 0
|
|
14285
|
-
for key, value in item.items():
|
|
14286
|
-
if key in {"normalized_record", "normalized_ambiguous_fields"}:
|
|
14287
|
-
continue
|
|
14288
|
-
value_text = _record_list_value_text(value)
|
|
14289
|
-
if not value_text:
|
|
14290
|
-
continue
|
|
14291
|
-
score, match_type = _record_list_field_match_score(key, value_text, query_norm)
|
|
14292
|
-
if score <= 0:
|
|
14293
|
-
continue
|
|
14294
|
-
best_score = max(best_score, score)
|
|
14295
|
-
matched_fields.append(
|
|
14296
|
-
{
|
|
14297
|
-
"title": key,
|
|
14298
|
-
"value": _truncate_text(value_text, 120),
|
|
14299
|
-
"match_type": match_type,
|
|
14300
|
-
"score": score,
|
|
14301
|
-
}
|
|
14302
|
-
)
|
|
14303
|
-
if matched_fields:
|
|
14304
|
-
best_score = min(100, best_score + min(10, (len(matched_fields) - 1) * 3))
|
|
14305
|
-
record_id = _normalize_optional_text(item.get("record_id")) or _normalize_optional_text(item.get("apply_id"))
|
|
14306
|
-
display_fields = _record_list_display_fields(item)
|
|
14307
|
-
return {
|
|
14308
|
-
"record_id": record_id,
|
|
14309
|
-
"title": _record_list_candidate_title(item) or record_id,
|
|
14310
|
-
"score": best_score,
|
|
14311
|
-
"matched_fields": sorted(matched_fields, key=lambda entry: int(entry.get("score") or 0), reverse=True)[:6],
|
|
14312
|
-
"display_fields": display_fields,
|
|
14313
|
-
}
|
|
14314
|
-
|
|
14315
|
-
|
|
14316
|
-
def _record_list_field_match_score(field_title: str, value_text: str, query_norm: str) -> tuple[int, str]:
|
|
14317
|
-
if not query_norm:
|
|
14318
|
-
return 0, "none"
|
|
14319
|
-
value_norm = _normalize_lookup_query_text(value_text)
|
|
14320
|
-
if not value_norm:
|
|
14321
|
-
return 0, "none"
|
|
14322
|
-
title_norm = _normalize_lookup_query_text(field_title)
|
|
14323
|
-
title_like = _record_list_title_like(field_title)
|
|
14324
|
-
id_like = field_title in {"record_id", "apply_id"} or "编号" in field_title or "id" == title_norm
|
|
14325
|
-
if value_norm == query_norm:
|
|
14326
|
-
if id_like:
|
|
14327
|
-
return 100, "exact_identifier"
|
|
14328
|
-
if title_like:
|
|
14329
|
-
return 96, "exact_title"
|
|
14330
|
-
return 86, "exact"
|
|
14331
|
-
if query_norm in value_norm:
|
|
14332
|
-
if id_like:
|
|
14333
|
-
return 92, "contains_identifier"
|
|
14334
|
-
if title_like:
|
|
14335
|
-
return 88, "contains_title"
|
|
14336
|
-
return 68, "contains"
|
|
14337
|
-
if title_like and value_norm in query_norm and len(value_norm) >= 2:
|
|
14338
|
-
return 72, "reverse_contains_title"
|
|
14339
|
-
return 0, "none"
|
|
14340
|
-
|
|
14341
|
-
|
|
14342
|
-
def _record_list_candidate_title(item: JSONObject) -> str | None:
|
|
14343
|
-
for key, value in item.items():
|
|
14344
|
-
if key in {"record_id", "apply_id"}:
|
|
14345
|
-
continue
|
|
14346
|
-
if _record_list_title_like(key):
|
|
14347
|
-
text = _record_list_value_text(value)
|
|
14348
|
-
if text:
|
|
14349
|
-
return _truncate_text(text, 80)
|
|
14350
|
-
for key, value in item.items():
|
|
14351
|
-
if key in {"record_id", "apply_id", "normalized_record", "normalized_ambiguous_fields"}:
|
|
14352
|
-
continue
|
|
14353
|
-
text = _record_list_value_text(value)
|
|
14354
|
-
if text:
|
|
14355
|
-
return _truncate_text(text, 80)
|
|
14356
|
-
return None
|
|
14357
|
-
|
|
14358
|
-
|
|
14359
|
-
def _record_list_display_fields(item: JSONObject) -> list[JSONObject]:
|
|
14360
|
-
display: list[JSONObject] = []
|
|
14361
|
-
for key, value in item.items():
|
|
14362
|
-
if key in {"record_id", "apply_id", "normalized_record", "normalized_ambiguous_fields"}:
|
|
14363
|
-
continue
|
|
14364
|
-
text = _record_list_value_text(value)
|
|
14365
|
-
if not text:
|
|
14366
|
-
continue
|
|
14367
|
-
display.append({"title": key, "value": _truncate_text(text, 120)})
|
|
14368
|
-
if len(display) >= 6:
|
|
14369
|
-
break
|
|
14370
|
-
return display
|
|
14371
|
-
|
|
14372
|
-
|
|
14373
|
-
def _record_list_title_like(field_title: str) -> bool:
|
|
14374
|
-
lowered = field_title.lower()
|
|
14375
|
-
return any(
|
|
14376
|
-
token in field_title or token in lowered
|
|
14377
|
-
for token in ("标题", "名称", "名字", "客户", "公司", "项目", "商机", "线索", "主题", "编号", "name", "title", "customer", "company")
|
|
14378
|
-
)
|
|
14379
|
-
|
|
14380
|
-
|
|
14381
|
-
def _record_list_value_text(value: JSONValue) -> str:
|
|
14382
|
-
if value is None:
|
|
14383
|
-
return ""
|
|
14384
|
-
if isinstance(value, str):
|
|
14385
|
-
return value.strip()
|
|
14386
|
-
return _stringify_json(value).strip()
|
|
14387
|
-
|
|
14388
|
-
|
|
14389
|
-
def _normalize_lookup_query_text(value: str) -> str:
|
|
14390
|
-
return re.sub(r"\s+", "", value).lower()
|
|
14391
|
-
|
|
14392
|
-
|
|
14393
15283
|
def _truncate_text(value: str, limit: int) -> str:
|
|
14394
15284
|
if len(value) <= limit:
|
|
14395
15285
|
return value
|