@josephyan/qingflow-cli 1.0.10 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,18 +2,22 @@ from __future__ import annotations
2
2
 
3
3
  import csv
4
4
  import html
5
+ import mimetypes
5
6
  import json
6
7
  import os
7
8
  import re
8
9
  import time
10
+ import zipfile
9
11
  from copy import deepcopy
10
12
  from dataclasses import dataclass
11
13
  from datetime import UTC, datetime, timedelta
12
14
  from decimal import Decimal, InvalidOperation
15
+ from io import BytesIO
13
16
  from pathlib import Path
14
17
  from typing import Any, cast
15
18
  from urllib.parse import parse_qs, unquote, urlsplit
16
19
  from uuid import uuid4
20
+ from xml.etree import ElementTree
17
21
 
18
22
  from mcp.server.fastmcp import FastMCP
19
23
 
@@ -34,6 +38,7 @@ from .directory_tools import _directory_has_more, _directory_items
34
38
 
35
39
  DEFAULT_QUERY_PAGE_SIZE = 50
36
40
  DEFAULT_LIST_PAGE_SIZE = 200
41
+ DEFAULT_RECORD_LIST_RETURN_LIMIT = 10
37
42
  BACKEND_RECORD_ACCESS_PAGE_SIZE = 1000
38
43
  DEFAULT_RECORD_ACCESS_SHARD_ROWS = 20_000
39
44
  RECORD_ACCESS_UNBOUNDED_ROW_THRESHOLD = 50_000
@@ -43,6 +48,14 @@ RECORD_GET_DETAIL_LOG_PAGE_SIZE = 10
43
48
  RECORD_GET_MEDIA_MAX_IMAGES = 30
44
49
  RECORD_GET_MEDIA_MAX_IMAGE_BYTES = 20 * 1024 * 1024
45
50
  RECORD_GET_MEDIA_MAX_TOTAL_BYTES = 100 * 1024 * 1024
51
+ RECORD_GET_FILE_MAX_FILES = 50
52
+ RECORD_GET_FILE_MAX_BYTES = 50 * 1024 * 1024
53
+ RECORD_GET_FILE_MAX_TOTAL_BYTES = 200 * 1024 * 1024
54
+ RECORD_GET_FILE_TIME_BUDGET_SECONDS = 55.0
55
+ RECORD_GET_FILE_MIN_REMAINING_SECONDS = 8.0
56
+ RECORD_GET_FILE_EXTRACT_PREVIEW_CHARS = 20_000
57
+ RECORD_GET_FILE_EXTRACT_XLSX_MAX_ROWS_PER_SHEET = 200
58
+ RECORD_GET_FILE_EXTRACT_PDF_MAX_PAGES = 50
46
59
  DEFAULT_ANALYSIS_PAGE_SIZE = 1000
47
60
  DEFAULT_SCAN_MAX_PAGES = 10
48
61
  DEFAULT_ANALYSIS_SCAN_MAX_PAGES = 100
@@ -355,7 +368,7 @@ class RecordTools(ToolBase):
355
368
  description=(
356
369
  "Browse Qingflow records with a schema-first list DSL. "
357
370
  "Use record_browse_schema_get first, then pass field_id-only columns, where, and order_by clauses. "
358
- "This route is for browse/export/sample inspection only, not analysis."
371
+ "This route returns up to 10 rows plus total counts for browse, sample inspection, and fuzzy record lookup; it is not for analysis."
359
372
  )
360
373
  )
361
374
  def record_list(
@@ -366,7 +379,6 @@ class RecordTools(ToolBase):
366
379
  query_fields: list[JSONObject | int] | None = None,
367
380
  where: list[JSONObject] | None = None,
368
381
  order_by: list[JSONObject] | None = None,
369
- limit: int = 50,
370
382
  page: int = 1,
371
383
  view_id: str | None = None,
372
384
  output_profile: str = "normal",
@@ -379,7 +391,6 @@ class RecordTools(ToolBase):
379
391
  query_fields=query_fields or [],
380
392
  where=where or [],
381
393
  order_by=order_by or [],
382
- limit=limit,
383
394
  page=page,
384
395
  view_id=view_id,
385
396
  list_type=None,
@@ -1618,8 +1629,8 @@ class RecordTools(ToolBase):
1618
1629
  query_fields: list[JSONObject | int] | None = None,
1619
1630
  where: list[JSONObject],
1620
1631
  order_by: list[JSONObject],
1621
- limit: int,
1622
- page: int,
1632
+ limit: int = DEFAULT_RECORD_LIST_RETURN_LIMIT,
1633
+ page: int = 1,
1623
1634
  view_id: str | None = None,
1624
1635
  list_type: int | None = None,
1625
1636
  view_key: str | None = None,
@@ -1664,127 +1675,133 @@ class RecordTools(ToolBase):
1664
1675
  },
1665
1676
  )
1666
1677
  )
1667
- resolved_columns = normalized_columns or self._derive_public_list_columns_for_public(
1668
- profile=profile,
1669
- app_key=app_key,
1670
- resolved_view=view_route,
1671
- )
1672
- resolved_query_fields = self._resolve_record_list_query_fields_for_public(
1673
- profile=profile,
1674
- app_key=app_key,
1675
- resolved_view=view_route,
1676
- selectors=normalized_query_field_selectors,
1677
- )
1678
+ filters = self._normalize_record_list_where(where)
1679
+ sorts = self._normalize_record_list_order_by(order_by)
1678
1680
 
1679
- raw = self.record_query(
1680
- profile=profile,
1681
- query_mode="list",
1682
- app_key=app_key,
1683
- apply_id=None,
1684
- page_num=page,
1685
- page_size=DEFAULT_LIST_PAGE_SIZE,
1686
- requested_pages=1,
1687
- scan_max_pages=1,
1688
- auto_expand_pages=False,
1689
- query_key=normalized_query,
1690
- search_que_ids=resolved_query_fields,
1691
- filters=self._normalize_record_list_where(where),
1692
- sorts=self._normalize_record_list_order_by(order_by),
1693
- max_rows=limit,
1694
- max_columns=len(resolved_columns),
1695
- select_columns=resolved_columns,
1696
- amount_column=None,
1697
- time_range={},
1698
- stat_policy={},
1699
- strict_full=False,
1700
- output_profile="verbose" if normalized_output_profile in {"verbose", "normalized"} else DEFAULT_OUTPUT_PROFILE,
1701
- list_type=view_route.list_type if view_route.list_type is not None else DEFAULT_RECORD_LIST_TYPE,
1702
- view_key=view_route.view_selection.view_key if view_route.view_selection is not None else None,
1703
- view_name=view_route.view_selection.view_name if view_route.view_selection is not None else None,
1704
- )
1705
- list_data = cast(JSONObject, cast(JSONObject, raw["data"])["list"])
1706
- pagination = cast(JSONObject, list_data["pagination"])
1707
- warnings: list[JSONObject] = []
1708
- warnings.extend(legacy_warnings)
1709
- warnings.extend(compatibility_warnings)
1710
- warnings.extend(_view_filter_trust_warnings(view_route))
1711
- warning = _normalize_optional_text(list_data.get("analysis_warning"))
1712
- if warning:
1713
- warnings.append({"code": "BROWSE_ONLY", "message": warning})
1714
- list_type_used = _coerce_count(pagination.get("list_type_used"))
1715
- if list_type_used is not None and list_type_used != DEFAULT_RECORD_LIST_TYPE:
1716
- warnings.append(
1717
- {
1718
- "code": "LIST_TYPE_FALLBACK",
1719
- "message": (
1720
- f"record_list not accessible via listType={DEFAULT_RECORD_LIST_TYPE}; "
1721
- f"fell back to listType={list_type_used} ({get_record_list_type_label(list_type_used)})."
1722
- ),
1723
- }
1681
+ def runner(session_profile, context):
1682
+ browse_scope = self._build_browse_read_scope(
1683
+ profile,
1684
+ context,
1685
+ app_key,
1686
+ view_route,
1687
+ force_refresh=False,
1724
1688
  )
1725
- rows = list_data.get("rows", [])
1726
- normalized_public_rows = _normalize_public_record_rows(rows if isinstance(rows, list) else [])
1727
- lookup_payload = _build_record_list_lookup_payload(
1728
- query=normalized_query,
1729
- items=normalized_public_rows,
1730
- pagination=pagination,
1731
- limit=limit,
1732
- )
1733
- response: JSONObject = {
1734
- "profile": profile,
1735
- "ws_id": raw.get("ws_id"),
1736
- "ok": bool(raw.get("ok", True)),
1737
- "request_route": raw.get("request_route"),
1738
- "warnings": warnings,
1739
- "verification": _view_filter_verification_payload(view_route),
1740
- "output_profile": normalized_output_profile,
1741
- "data": {
1742
- "app_key": app_key,
1743
- "items": normalized_public_rows,
1744
- "pagination": {
1745
- "page": page,
1746
- "limit": limit,
1747
- "returned_items": pagination.get("returned_items"),
1748
- "result_amount": pagination.get("result_amount"),
1749
- "list_type_used": list_type_used,
1750
- },
1751
- "selection": {
1752
- "columns": [_column_selector_payload(field_id) for field_id in resolved_columns],
1753
- "query_fields": [_column_selector_payload(field_id) for field_id in resolved_query_fields],
1754
- "view": _accessible_view_payload(view_route),
1689
+ index = cast(FieldIndex, browse_scope["index"])
1690
+ selected_fields = (
1691
+ self._resolve_record_list_columns(normalized_columns, index, view_route=view_route)
1692
+ if normalized_columns
1693
+ else self._derive_record_list_fields_from_index(index)
1694
+ )
1695
+ resolved_columns = [field.que_id for field in selected_fields]
1696
+ resolved_query_fields = self._resolve_record_list_query_fields(
1697
+ normalized_query_field_selectors,
1698
+ index,
1699
+ view_route=view_route,
1700
+ )
1701
+ match_rules = self._resolve_record_list_match_rules(context, filters, index, view_route=view_route)
1702
+ sort_rules = self._resolve_record_list_sort_rules(sorts, index, view_route=view_route)
1703
+ raw = self._record_list_query_view_fields(
1704
+ session_profile=session_profile,
1705
+ context=context,
1706
+ app_key=app_key,
1707
+ view_route=view_route,
1708
+ page_num=page,
1709
+ page_size=DEFAULT_LIST_PAGE_SIZE,
1710
+ query_key=normalized_query,
1711
+ search_que_ids=resolved_query_fields or None,
1712
+ match_rules=match_rules,
1713
+ sort_rules=sort_rules,
1714
+ max_rows=limit,
1715
+ selected_fields=selected_fields,
1716
+ output_profile="verbose" if normalized_output_profile in {"verbose", "normalized"} else DEFAULT_OUTPUT_PROFILE,
1717
+ )
1718
+ list_data = cast(JSONObject, cast(JSONObject, raw["data"])["list"])
1719
+ pagination = cast(JSONObject, list_data["pagination"])
1720
+ warnings: list[JSONObject] = []
1721
+ warnings.extend(legacy_warnings)
1722
+ warnings.extend(compatibility_warnings)
1723
+ warnings.extend(_view_filter_trust_warnings(view_route))
1724
+ warning = _normalize_optional_text(list_data.get("analysis_warning"))
1725
+ if warning:
1726
+ warnings.append({"code": "BROWSE_ONLY", "message": warning})
1727
+ list_type_used = _coerce_count(pagination.get("list_type_used"))
1728
+ if list_type_used is not None and list_type_used != DEFAULT_RECORD_LIST_TYPE:
1729
+ warnings.append(
1730
+ {
1731
+ "code": "LIST_TYPE_FALLBACK",
1732
+ "message": (
1733
+ f"record_list not accessible via listType={DEFAULT_RECORD_LIST_TYPE}; "
1734
+ f"fell back to listType={list_type_used} ({get_record_list_type_label(list_type_used)})."
1735
+ ),
1736
+ }
1737
+ )
1738
+ rows = list_data.get("rows", [])
1739
+ normalized_public_rows = _normalize_public_record_rows(rows if isinstance(rows, list) else [])
1740
+ lookup_payload = _build_record_list_lookup_payload(
1741
+ query=normalized_query,
1742
+ items=normalized_public_rows,
1743
+ pagination=pagination,
1744
+ )
1745
+ total_count = _coerce_count(pagination.get("result_amount"))
1746
+ returned_count = _coerce_count(pagination.get("returned_items"))
1747
+ if returned_count is None:
1748
+ returned_count = len(normalized_public_rows)
1749
+ truncated = bool(total_count is not None and total_count > returned_count)
1750
+ response: JSONObject = {
1751
+ "profile": profile,
1752
+ "ws_id": raw.get("ws_id"),
1753
+ "ok": bool(raw.get("ok", True)),
1754
+ "request_route": raw.get("request_route"),
1755
+ "warnings": warnings,
1756
+ "verification": _view_filter_verification_payload(view_route),
1757
+ "output_profile": normalized_output_profile,
1758
+ "data": {
1759
+ "app_key": app_key,
1760
+ "items": normalized_public_rows,
1761
+ "pagination": {
1762
+ "returned_count": returned_count,
1763
+ "total_count": total_count,
1764
+ "truncated": truncated,
1765
+ },
1766
+ "selection": {
1767
+ "columns": [_column_selector_payload(field_id) for field_id in resolved_columns],
1768
+ "query_fields": [_column_selector_payload(field_id) for field_id in resolved_query_fields],
1769
+ "view": _accessible_view_payload(view_route),
1770
+ },
1755
1771
  },
1756
- },
1757
- }
1758
- if lookup_payload is not None:
1759
- response["lookup"] = lookup_payload
1760
- if normalized_output_profile == "normalized":
1761
- normalized_rows = list_data.get("normalized_rows")
1762
- if isinstance(normalized_rows, list):
1763
- item_by_apply_id = {
1764
- _coerce_count(item.get("apply_id")): item
1765
- for item in cast(list[JSONObject], response["data"]["items"])
1766
- if isinstance(item, dict) and _coerce_count(item.get("apply_id")) is not None
1767
- }
1768
- for entry in normalized_rows:
1769
- if not isinstance(entry, dict):
1770
- continue
1771
- apply_id = _coerce_count(entry.get("apply_id"))
1772
- if apply_id is None:
1773
- continue
1774
- target = item_by_apply_id.get(apply_id)
1775
- if target is None:
1776
- continue
1777
- target["normalized_record"] = cast(JSONObject, entry.get("normalized_record") or {})
1778
- target["normalized_ambiguous_fields"] = cast(JSONObject, entry.get("normalized_ambiguous_fields") or {})
1779
- if normalized_output_profile == "verbose":
1780
- response["data"]["debug"] = {
1781
- "completeness": raw.get("completeness"),
1782
- "evidence": raw.get("evidence"),
1783
- "resolved_mappings": raw.get("resolved_mappings"),
1784
- "row_cap_hit": list_data.get("row_cap_hit"),
1785
- "sample_only": list_data.get("sample_only"),
1786
1772
  }
1787
- return response
1773
+ if lookup_payload is not None:
1774
+ response["lookup"] = lookup_payload
1775
+ if normalized_output_profile == "normalized":
1776
+ normalized_rows = list_data.get("normalized_rows")
1777
+ if isinstance(normalized_rows, list):
1778
+ item_by_apply_id = {
1779
+ _coerce_count(item.get("apply_id")): item
1780
+ for item in cast(list[JSONObject], response["data"]["items"])
1781
+ if isinstance(item, dict) and _coerce_count(item.get("apply_id")) is not None
1782
+ }
1783
+ for entry in normalized_rows:
1784
+ if not isinstance(entry, dict):
1785
+ continue
1786
+ apply_id = _coerce_count(entry.get("apply_id"))
1787
+ if apply_id is None:
1788
+ continue
1789
+ target = item_by_apply_id.get(apply_id)
1790
+ if target is None:
1791
+ continue
1792
+ target["normalized_record"] = cast(JSONObject, entry.get("normalized_record") or {})
1793
+ target["normalized_ambiguous_fields"] = cast(JSONObject, entry.get("normalized_ambiguous_fields") or {})
1794
+ if normalized_output_profile == "verbose":
1795
+ response["data"]["debug"] = {
1796
+ "completeness": raw.get("completeness"),
1797
+ "evidence": raw.get("evidence"),
1798
+ "resolved_mappings": raw.get("resolved_mappings"),
1799
+ "row_cap_hit": list_data.get("row_cap_hit"),
1800
+ "sample_only": list_data.get("sample_only"),
1801
+ }
1802
+ return response
1803
+
1804
+ return self._run_record_tool(profile, runner)
1788
1805
 
1789
1806
  @tool_cn_name("记录访问")
1790
1807
  def record_access(
@@ -2304,12 +2321,23 @@ class RecordTools(ToolBase):
2304
2321
  fields=fields,
2305
2322
  references=references,
2306
2323
  )
2324
+ file_assets = self._record_get_file_assets(
2325
+ context,
2326
+ app_key=app_key,
2327
+ record_id=record_id_int,
2328
+ resolved_view=resolved_view,
2329
+ audit_node_id=cast(int | None, audit_context.get("audit_node_id")),
2330
+ fields=fields,
2331
+ references=references,
2332
+ media_assets=media_assets,
2333
+ )
2307
2334
  context_integrity = _record_detail_context_integrity(
2308
2335
  references=references,
2309
2336
  data_logs=data_logs,
2310
2337
  workflow_logs=workflow_logs,
2311
2338
  associated_resources=associated_resources,
2312
2339
  media_assets=media_assets,
2340
+ file_assets=file_assets,
2313
2341
  unavailable_context=unavailable_context,
2314
2342
  )
2315
2343
  payload: JSONObject = {
@@ -2332,6 +2360,7 @@ class RecordTools(ToolBase):
2332
2360
  "requested_focus_fields": [_column_selector_payload(field_id) for field_id in requested_focus_field_ids],
2333
2361
  "references": references,
2334
2362
  "media_assets": media_assets,
2363
+ "file_assets": file_assets,
2335
2364
  "data_logs": data_logs,
2336
2365
  "workflow_logs": workflow_logs,
2337
2366
  "associated_resources": associated_resources,
@@ -2867,6 +2896,53 @@ class RecordTools(ToolBase):
2867
2896
  ],
2868
2897
  }
2869
2898
 
2899
+ def _record_get_file_assets(
2900
+ self,
2901
+ context, # type: ignore[no-untyped-def]
2902
+ *,
2903
+ app_key: str,
2904
+ record_id: int,
2905
+ resolved_view: AccessibleViewRoute,
2906
+ audit_node_id: int | None,
2907
+ fields: list[JSONObject],
2908
+ references: list[JSONObject],
2909
+ media_assets: JSONObject,
2910
+ ) -> JSONObject:
2911
+ """Collect and localize file assets from the frontend detail context."""
2912
+ try:
2913
+ def refresh_source_url(candidate: JSONObject) -> str | None:
2914
+ return self._record_get_refreshed_media_source_url(
2915
+ context,
2916
+ app_key=app_key,
2917
+ record_id=record_id,
2918
+ resolved_view=resolved_view,
2919
+ audit_node_id=audit_node_id,
2920
+ candidate=candidate,
2921
+ )
2922
+
2923
+ return _record_detail_file_assets_payload(
2924
+ backend=self.backend,
2925
+ context=context,
2926
+ app_key=app_key,
2927
+ record_id=record_id,
2928
+ fields=fields,
2929
+ references=references,
2930
+ media_assets=media_assets,
2931
+ refresh_source_url=refresh_source_url,
2932
+ )
2933
+ except Exception as exc: # defensive: file assets should never break the core record detail.
2934
+ return {
2935
+ "status": "unavailable",
2936
+ "local_dir": None,
2937
+ "items": [],
2938
+ "warnings": [
2939
+ {
2940
+ "code": "FILE_ASSETS_UNAVAILABLE",
2941
+ "message": f"record_get could not collect file assets: {exc}",
2942
+ }
2943
+ ],
2944
+ }
2945
+
2870
2946
  def _record_get_refreshed_media_source_url(
2871
2947
  self,
2872
2948
  context, # type: ignore[no-untyped-def]
@@ -7896,6 +7972,172 @@ class RecordTools(ToolBase):
7896
7972
 
7897
7973
  return self._run_record_tool(profile, runner)
7898
7974
 
7975
+ def _record_list_query_view_fields(
7976
+ self,
7977
+ *,
7978
+ session_profile,
7979
+ context,
7980
+ app_key: str,
7981
+ view_route: AccessibleViewRoute,
7982
+ page_num: int,
7983
+ page_size: int,
7984
+ query_key: str | None,
7985
+ search_que_ids: list[int] | None,
7986
+ match_rules: list[JSONObject],
7987
+ sort_rules: list[JSONObject],
7988
+ max_rows: int,
7989
+ selected_fields: list[FormField],
7990
+ output_profile: str,
7991
+ ) -> JSONObject:
7992
+ """Run public record_list with fields already resolved from the selected view schema."""
7993
+ view_selection = view_route.view_selection
7994
+ current_page = max(page_num, 1)
7995
+ used_list_type: int | None = None
7996
+ if view_selection is not None:
7997
+ fallback_list_types = [view_route.list_type if view_route.list_type is not None else DEFAULT_RECORD_LIST_TYPE]
7998
+ elif view_route.list_type is not None and view_route.list_type != DEFAULT_RECORD_LIST_TYPE:
7999
+ fallback_list_types = [view_route.list_type]
8000
+ else:
8001
+ fallback_list_types = [DEFAULT_RECORD_LIST_TYPE, 14, 1, 2, 12]
8002
+ last_error: QingflowApiError | None = None
8003
+ page: JSONObject | None = None
8004
+ for candidate_list_type in fallback_list_types:
8005
+ try:
8006
+ page = self._search_page(
8007
+ context,
8008
+ app_key=app_key,
8009
+ view_selection=view_selection,
8010
+ page_num=current_page,
8011
+ page_size=page_size,
8012
+ query_key=query_key,
8013
+ match_rules=match_rules,
8014
+ sorts=sort_rules,
8015
+ search_que_ids=search_que_ids,
8016
+ list_type=candidate_list_type,
8017
+ )
8018
+ used_list_type = None if view_selection is not None else candidate_list_type
8019
+ break
8020
+ except QingflowApiError as exc:
8021
+ last_error = exc
8022
+ if self._should_retry_list_type_fallback(exc) and candidate_list_type != fallback_list_types[-1]:
8023
+ continue
8024
+ raise
8025
+ if page is None:
8026
+ if last_error is not None:
8027
+ raise last_error
8028
+ raise_tool_error(QingflowApiError.config_error("record_list failed: no accessible listType"))
8029
+
8030
+ page_rows = page.get("list")
8031
+ items = page_rows if isinstance(page_rows, list) else []
8032
+ reported_total = _coerce_count(page.get("total"))
8033
+ if reported_total is None:
8034
+ reported_total = _coerce_count(page.get("count"))
8035
+ result_amount = _effective_total(page, page_size)
8036
+ has_more = _page_has_more(page, current_page, page_size, len(items))
8037
+ rows: list[JSONObject] = []
8038
+ normalized_rows: list[JSONObject] = []
8039
+ page_apply_order: list[int] = []
8040
+ page_answer_map: dict[int, list[JSONValue]] = {}
8041
+ for item in items:
8042
+ if not isinstance(item, dict):
8043
+ continue
8044
+ answers = item.get("answers")
8045
+ answer_list = answers if isinstance(answers, list) else []
8046
+ apply_id = _coerce_count(item.get("applyId")) or _coerce_count(item.get("id"))
8047
+ row = _build_flat_row(answer_list, selected_fields, apply_id=apply_id)
8048
+ rows.append(row)
8049
+ if apply_id is not None:
8050
+ page_apply_order.append(apply_id)
8051
+ page_answer_map[apply_id] = cast(list[JSONValue], answer_list)
8052
+ if len(rows) >= max_rows:
8053
+ break
8054
+ if output_profile == "verbose" and page_apply_order:
8055
+ for apply_id in page_apply_order:
8056
+ normalized_record, normalized_ambiguous_fields = _build_normalized_row_from_answers(
8057
+ page_answer_map.get(apply_id, []),
8058
+ selected_fields,
8059
+ )
8060
+ normalized_rows.append(
8061
+ {
8062
+ "apply_id": apply_id,
8063
+ "normalized_record": normalized_record,
8064
+ "normalized_ambiguous_fields": normalized_ambiguous_fields,
8065
+ }
8066
+ )
8067
+ effective_result_amount = result_amount if result_amount is not None else len(rows)
8068
+ completeness = _build_completeness(
8069
+ result_amount=effective_result_amount,
8070
+ returned_items=len(rows),
8071
+ fetched_pages=1,
8072
+ requested_pages=1,
8073
+ has_more=has_more,
8074
+ next_page_token=None,
8075
+ is_complete=not has_more and len(rows) < max_rows,
8076
+ omitted_items=max(0, effective_result_amount - len(rows)),
8077
+ extra={},
8078
+ )
8079
+ evidence = {
8080
+ "query_id": _query_id(),
8081
+ "app_key": app_key,
8082
+ "filters": _echo_filters(match_rules),
8083
+ "selected_columns": [field.que_title for field in selected_fields],
8084
+ "time_range": None,
8085
+ "source_pages": [current_page],
8086
+ "view": _view_selection_payload(view_selection),
8087
+ "backend_reported_total": reported_total,
8088
+ }
8089
+ response: JSONObject = {
8090
+ "profile": session_profile.profile,
8091
+ "ws_id": session_profile.selected_ws_id,
8092
+ "ok": True,
8093
+ "request_route": self._request_route_payload(context),
8094
+ "data": {
8095
+ "mode": "list",
8096
+ "source_tool": "record_list",
8097
+ "view": _view_selection_payload(view_selection),
8098
+ "list": {
8099
+ "rows": rows,
8100
+ "row_cap_hit": _list_row_cap_hit(returned_items=len(rows), row_cap=max_rows),
8101
+ "sample_only": _list_sample_only(
8102
+ returned_items=len(rows),
8103
+ row_cap=max_rows,
8104
+ result_amount=effective_result_amount,
8105
+ ),
8106
+ "safe_for_final_conclusion": False,
8107
+ "analysis_warning": _list_sample_warning(
8108
+ returned_items=len(rows),
8109
+ row_cap=max_rows,
8110
+ result_amount=effective_result_amount,
8111
+ ),
8112
+ "pagination": {
8113
+ "page_num": current_page,
8114
+ "page_size": page_size,
8115
+ "requested_pages": 1,
8116
+ "result_amount": effective_result_amount,
8117
+ "returned_items": len(rows),
8118
+ "list_type_used": used_list_type,
8119
+ },
8120
+ "applied_limits": {
8121
+ "row_cap": max_rows,
8122
+ "column_cap": len(selected_fields),
8123
+ "selected_columns": [field.que_title for field in selected_fields],
8124
+ },
8125
+ },
8126
+ },
8127
+ "output_profile": output_profile,
8128
+ "next_page_token": None,
8129
+ }
8130
+ if output_profile == "verbose":
8131
+ cast(JSONObject, cast(JSONObject, response["data"])["list"])["normalized_rows"] = normalized_rows
8132
+ response["completeness"] = completeness
8133
+ response["evidence"] = evidence
8134
+ response["resolved_mappings"] = {
8135
+ "select_columns": [_field_mapping_entry("row", field, requested=field.que_title) for field in selected_fields],
8136
+ "filters": [],
8137
+ "time_range": None,
8138
+ }
8139
+ return response
8140
+
7899
8141
  def _get_form_schema(self, profile: str, context, app_key: str, *, force_refresh: bool) -> JSONObject: # type: ignore[no-untyped-def]
7900
8142
  """执行内部辅助逻辑。"""
7901
8143
  cache_key = (profile, app_key, "applicant_node", None)
@@ -8392,63 +8634,7 @@ class RecordTools(ToolBase):
8392
8634
  force_refresh=False,
8393
8635
  )
8394
8636
  index = cast(FieldIndex, browse_scope["index"])
8395
- visible_question_ids = cast(set[int], browse_scope["visible_question_ids"])
8396
- resolved: list[int] = []
8397
- seen: set[int] = set()
8398
- for selector in selectors:
8399
- try:
8400
- field = self._resolve_field_selector(selector, index, location="record_list.query_fields")
8401
- except RecordInputError as exc:
8402
- if exc.error_code == "FIELD_NOT_FOUND":
8403
- raise RecordInputError(
8404
- message=(
8405
- f"record_list query field_id '{selector}' is not in the selected view schema "
8406
- f"({resolved_view.view_id})."
8407
- ),
8408
- error_code="QUERY_FIELD_NOT_IN_VIEW_SCHEMA",
8409
- fix_hint="Call record_browse_schema_get for this exact view_id and pass only field_id values from its fields[].",
8410
- details={
8411
- "location": "record_list.query_fields",
8412
- "requested": selector,
8413
- "view_id": resolved_view.view_id,
8414
- "view_name": resolved_view.name,
8415
- },
8416
- ) from exc
8417
- raise
8418
- if field.que_id not in visible_question_ids:
8419
- raise RecordInputError(
8420
- message=(
8421
- f"record_list query field_id '{field.que_id}' is not readable in the selected view "
8422
- f"({resolved_view.view_id})."
8423
- ),
8424
- error_code="QUERY_FIELD_NOT_IN_VIEW_SCHEMA",
8425
- fix_hint="Call record_browse_schema_get for this exact view_id and pass only field_id values from its fields[].",
8426
- details={
8427
- "location": "record_list.query_fields",
8428
- "requested": selector,
8429
- "field_id": field.que_id,
8430
- "view_id": resolved_view.view_id,
8431
- "view_name": resolved_view.name,
8432
- },
8433
- )
8434
- if field.que_id in seen:
8435
- continue
8436
- resolved.append(field.que_id)
8437
- seen.add(field.que_id)
8438
- if len(resolved) > BACKEND_LIST_SEARCH_FIELD_LIMIT:
8439
- raise RecordInputError(
8440
- message=(
8441
- f"record_list query_fields supports at most {BACKEND_LIST_SEARCH_FIELD_LIMIT} fields."
8442
- ),
8443
- error_code="QUERY_FIELDS_TOO_MANY",
8444
- fix_hint="Narrow query_fields to the most likely title/name/customer/number fields, or omit query_fields to use the backend default search scope.",
8445
- details={
8446
- "location": "record_list.query_fields",
8447
- "max_fields": BACKEND_LIST_SEARCH_FIELD_LIMIT,
8448
- "received": len(resolved),
8449
- },
8450
- )
8451
- return resolved
8637
+ return self._resolve_record_list_query_fields(selectors, index, view_route=resolved_view)
8452
8638
 
8453
8639
  return cast(list[int], self._run_record_tool(profile, runner))
8454
8640
 
@@ -8460,7 +8646,7 @@ class RecordTools(ToolBase):
8460
8646
  resolved_view: AccessibleViewRoute,
8461
8647
  ) -> list[int]:
8462
8648
  """执行内部辅助逻辑。"""
8463
- browse_scope = self._build_browse_write_scope(
8649
+ browse_scope = self._build_browse_read_scope(
8464
8650
  profile,
8465
8651
  context,
8466
8652
  app_key,
@@ -8468,33 +8654,7 @@ class RecordTools(ToolBase):
8468
8654
  force_refresh=False,
8469
8655
  )
8470
8656
  index = cast(FieldIndex, browse_scope["index"])
8471
- visible_question_ids = cast(set[int], browse_scope["visible_question_ids"])
8472
- ordered_visible_fields = [
8473
- field
8474
- for field in self._schema_fields_for_mode(
8475
- profile,
8476
- context,
8477
- app_key,
8478
- index,
8479
- schema_mode="browse",
8480
- resolved_view=resolved_view,
8481
- )
8482
- if field.que_id in visible_question_ids and field.que_type not in LAYOUT_ONLY_QUE_TYPES
8483
- ]
8484
- field_ids = [field.que_id for field in ordered_visible_fields[:MAX_LIST_COLUMN_LIMIT]]
8485
- if not field_ids:
8486
- field_ids = [
8487
- field.que_id
8488
- for field in index.by_id.values()
8489
- if field.que_type not in LAYOUT_ONLY_QUE_TYPES
8490
- ][:MAX_LIST_COLUMN_LIMIT]
8491
- if not field_ids:
8492
- raise_tool_error(
8493
- QingflowApiError.config_error(
8494
- "record_list could not determine readable columns for the selected view"
8495
- )
8496
- )
8497
- return field_ids
8657
+ return [field.que_id for field in self._derive_record_list_fields_from_index(index)]
8498
8658
 
8499
8659
  def _get_view_question_ids(self, profile: str, context, view_key: str) -> set[int]: # type: ignore[no-untyped-def]
8500
8660
  """执行内部辅助逻辑。"""
@@ -10446,14 +10606,161 @@ class RecordTools(ToolBase):
10446
10606
  seen.add(field.que_id)
10447
10607
  return fields
10448
10608
 
10449
- def _resolve_summary_preview_fields(
10609
+ def _derive_record_list_fields_from_index(self, index: FieldIndex) -> list[FormField]:
10610
+ fields = [
10611
+ field
10612
+ for field in index.by_id.values()
10613
+ if field.que_type not in LAYOUT_ONLY_QUE_TYPES
10614
+ ][:MAX_LIST_COLUMN_LIMIT]
10615
+ if not fields:
10616
+ raise_tool_error(
10617
+ QingflowApiError.config_error(
10618
+ "record_list could not determine readable columns for the selected view"
10619
+ )
10620
+ )
10621
+ return fields
10622
+
10623
+ def _resolve_record_list_columns(
10450
10624
  self,
10451
- selectors: list[str | int],
10625
+ selectors: list[int],
10452
10626
  index: FieldIndex,
10453
- amount_field: FormField | None,
10454
- time_field: FormField | None,
10455
10627
  *,
10456
- max_columns: int | None,
10628
+ view_route: AccessibleViewRoute,
10629
+ ) -> list[FormField]:
10630
+ if not selectors:
10631
+ raise_tool_error(QingflowApiError.config_error("columns is required"))
10632
+ fields: list[FormField] = []
10633
+ seen: set[int] = set()
10634
+ for selector in selectors:
10635
+ try:
10636
+ field = self._resolve_field_selector(selector, index, location="record_list.columns")
10637
+ except RecordInputError as exc:
10638
+ if exc.error_code == "FIELD_NOT_FOUND":
10639
+ raise self._record_list_field_not_in_view_error(
10640
+ exc,
10641
+ location="record_list.columns",
10642
+ error_code="FIELD_NOT_IN_VIEW_SCHEMA",
10643
+ view_route=view_route,
10644
+ ) from exc
10645
+ raise
10646
+ if field.que_id in seen:
10647
+ continue
10648
+ fields.append(field)
10649
+ seen.add(field.que_id)
10650
+ return fields
10651
+
10652
+ def _resolve_record_list_query_fields(
10653
+ self,
10654
+ selectors: list[int],
10655
+ index: FieldIndex,
10656
+ *,
10657
+ view_route: AccessibleViewRoute,
10658
+ ) -> list[int]:
10659
+ resolved: list[int] = []
10660
+ seen: set[int] = set()
10661
+ for selector in selectors:
10662
+ try:
10663
+ field = self._resolve_field_selector(selector, index, location="record_list.query_fields")
10664
+ except RecordInputError as exc:
10665
+ if exc.error_code == "FIELD_NOT_FOUND":
10666
+ raise self._record_list_field_not_in_view_error(
10667
+ exc,
10668
+ location="record_list.query_fields",
10669
+ error_code="QUERY_FIELD_NOT_IN_VIEW_SCHEMA",
10670
+ view_route=view_route,
10671
+ ) from exc
10672
+ raise
10673
+ if field.que_id in seen:
10674
+ continue
10675
+ resolved.append(field.que_id)
10676
+ seen.add(field.que_id)
10677
+ if len(resolved) > BACKEND_LIST_SEARCH_FIELD_LIMIT:
10678
+ raise RecordInputError(
10679
+ message=(
10680
+ f"record_list query_fields supports at most {BACKEND_LIST_SEARCH_FIELD_LIMIT} fields."
10681
+ ),
10682
+ error_code="QUERY_FIELDS_TOO_MANY",
10683
+ fix_hint="Narrow query_fields to the most likely title/name/customer/number fields, or omit query_fields to use the backend default search scope.",
10684
+ details={
10685
+ "location": "record_list.query_fields",
10686
+ "max_fields": BACKEND_LIST_SEARCH_FIELD_LIMIT,
10687
+ "received": len(resolved),
10688
+ },
10689
+ )
10690
+ return resolved
10691
+
10692
+ def _resolve_record_list_match_rules(
10693
+ self,
10694
+ context, # type: ignore[no-untyped-def]
10695
+ filters: list[JSONObject],
10696
+ index: FieldIndex,
10697
+ *,
10698
+ view_route: AccessibleViewRoute,
10699
+ ) -> list[JSONObject]:
10700
+ try:
10701
+ return self._resolve_match_rules(context, filters, index)
10702
+ except RecordInputError as exc:
10703
+ if exc.error_code == "FIELD_NOT_FOUND":
10704
+ raise self._record_list_field_not_in_view_error(
10705
+ exc,
10706
+ location="record_list.where",
10707
+ error_code="FILTER_FIELD_NOT_IN_VIEW_SCHEMA",
10708
+ view_route=view_route,
10709
+ ) from exc
10710
+ raise
10711
+
10712
+ def _resolve_record_list_sort_rules(
10713
+ self,
10714
+ sorts: list[JSONObject],
10715
+ index: FieldIndex,
10716
+ *,
10717
+ view_route: AccessibleViewRoute,
10718
+ ) -> list[JSONObject]:
10719
+ try:
10720
+ return self._resolve_sorts(sorts, index)
10721
+ except RecordInputError as exc:
10722
+ if exc.error_code == "FIELD_NOT_FOUND":
10723
+ raise self._record_list_field_not_in_view_error(
10724
+ exc,
10725
+ location="record_list.order_by",
10726
+ error_code="SORT_FIELD_NOT_IN_VIEW_SCHEMA",
10727
+ view_route=view_route,
10728
+ ) from exc
10729
+ raise
10730
+
10731
+ def _record_list_field_not_in_view_error(
10732
+ self,
10733
+ exc: RecordInputError,
10734
+ *,
10735
+ location: str,
10736
+ error_code: str,
10737
+ view_route: AccessibleViewRoute,
10738
+ ) -> RecordInputError:
10739
+ details = exc.details if isinstance(exc.details, dict) else {}
10740
+ requested = details.get("requested")
10741
+ return RecordInputError(
10742
+ message=(
10743
+ f"{location} field_id '{requested}' is not in the selected view schema "
10744
+ f"({view_route.view_id})."
10745
+ ),
10746
+ error_code=error_code,
10747
+ fix_hint="Call record_browse_schema_get for this exact view_id and pass only field_id values from its fields[].",
10748
+ details={
10749
+ "location": location,
10750
+ "requested": requested,
10751
+ "view_id": view_route.view_id,
10752
+ "view_name": view_route.name,
10753
+ },
10754
+ )
10755
+
10756
+ def _resolve_summary_preview_fields(
10757
+ self,
10758
+ selectors: list[str | int],
10759
+ index: FieldIndex,
10760
+ amount_field: FormField | None,
10761
+ time_field: FormField | None,
10762
+ *,
10763
+ max_columns: int | None,
10457
10764
  ) -> list[FormField]:
10458
10765
  """执行内部辅助逻辑。"""
10459
10766
  if selectors:
@@ -13412,6 +13719,19 @@ _RECORD_MEDIA_IMG_SRC_RE = re.compile(r"""<img\b[^>]*\bsrc\s*=\s*["']?([^"'\s>]+
13412
13719
  _RECORD_MEDIA_MD_IMAGE_RE = re.compile(r"""!\[[^\]]*]\(([^)\s]+)(?:\s+["'][^"']*["'])?\)""")
13413
13720
  _RECORD_MEDIA_URL_RE = re.compile(r"""https?://[^\s<>"')\]]+""", re.IGNORECASE)
13414
13721
  _RECORD_MEDIA_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".svg"}
13722
+ _RECORD_FILE_EXTENSIONS = _RECORD_MEDIA_IMAGE_EXTENSIONS | {
13723
+ ".csv",
13724
+ ".doc",
13725
+ ".docx",
13726
+ ".json",
13727
+ ".md",
13728
+ ".pdf",
13729
+ ".text",
13730
+ ".txt",
13731
+ ".xls",
13732
+ ".xlsm",
13733
+ ".xlsx",
13734
+ }
13415
13735
  _RECORD_MEDIA_IMAGE_URL_KEYS = {
13416
13736
  "image",
13417
13737
  "imageurl",
@@ -13431,6 +13751,15 @@ _RECORD_MEDIA_IMAGE_URL_KEYS = {
13431
13751
  "url",
13432
13752
  "value",
13433
13753
  }
13754
+ _RECORD_FILE_URL_KEYS = _RECORD_MEDIA_IMAGE_URL_KEYS | {
13755
+ "downloadurl",
13756
+ "download_url",
13757
+ "file",
13758
+ "href",
13759
+ "link",
13760
+ "path",
13761
+ }
13762
+ _RECORD_FILE_NAME_KEYS = {"name", "otherinfo", "filename", "file_name", "title"}
13434
13763
 
13435
13764
 
13436
13765
  def _record_detail_media_assets_payload(
@@ -13656,6 +13985,286 @@ def _record_detail_media_assets_payload(
13656
13985
  return {"status": status, "local_dir": str(local_dir) if items else None, "items": items, "warnings": warnings}
13657
13986
 
13658
13987
 
13988
+ def _record_detail_file_assets_payload(
13989
+ *,
13990
+ backend: Any,
13991
+ context: BackendRequestContext,
13992
+ app_key: str,
13993
+ record_id: int,
13994
+ fields: list[JSONObject],
13995
+ references: list[JSONObject],
13996
+ media_assets: JSONObject,
13997
+ refresh_source_url: Any | None = None,
13998
+ ) -> JSONObject:
13999
+ candidates: list[JSONObject] = []
14000
+ source_record_id = _public_record_id_text(record_id)
14001
+ for field in fields:
14002
+ if isinstance(field, dict):
14003
+ candidates.extend(
14004
+ _record_detail_file_candidates_from_field(
14005
+ field,
14006
+ source_app_key=app_key,
14007
+ source_record_id=source_record_id,
14008
+ forced_source=None,
14009
+ )
14010
+ )
14011
+ for reference in references:
14012
+ if not isinstance(reference, dict):
14013
+ continue
14014
+ target_fields = reference.get("target_fields") if isinstance(reference.get("target_fields"), list) else []
14015
+ target_app_key = _normalize_optional_text(reference.get("target_app_key")) or app_key
14016
+ target_record_id = _normalize_optional_text(reference.get("target_record_id"))
14017
+ for field in target_fields:
14018
+ if isinstance(field, dict):
14019
+ candidates.extend(
14020
+ _record_detail_file_candidates_from_field(
14021
+ field,
14022
+ source_app_key=target_app_key,
14023
+ source_record_id=target_record_id,
14024
+ forced_source="reference_target",
14025
+ )
14026
+ )
14027
+ if not candidates:
14028
+ return {"status": "none", "local_dir": None, "items": [], "warnings": []}
14029
+
14030
+ local_dir = _record_detail_file_assets_dir(uuid4().hex)
14031
+ local_dir_created = False
14032
+ items: list[JSONObject] = []
14033
+ warnings: list[JSONObject] = []
14034
+ file_by_url: dict[str, str] = {}
14035
+ media_by_url = _record_detail_media_assets_by_url(media_assets)
14036
+ media_by_asset_id = _record_detail_media_assets_by_asset_id(media_assets)
14037
+ total_bytes = 0
14038
+ downloaded_count = 0
14039
+ deadline = time.monotonic() + RECORD_GET_FILE_TIME_BUDGET_SECONDS
14040
+ stopped_for_time_budget = False
14041
+ environment_prefix_cache: dict[str, str] = {}
14042
+
14043
+ def ensure_local_dir() -> None:
14044
+ nonlocal local_dir_created
14045
+ if not local_dir_created:
14046
+ local_dir.mkdir(parents=True, exist_ok=True)
14047
+ local_dir_created = True
14048
+
14049
+ for candidate in candidates:
14050
+ if items and time.monotonic() + RECORD_GET_FILE_MIN_REMAINING_SECONDS >= deadline:
14051
+ stopped_for_time_budget = True
14052
+ warnings.append(
14053
+ {
14054
+ "code": "FILE_ASSET_TIME_BUDGET_EXCEEDED",
14055
+ "message": "record_get stopped downloading additional file assets to stay within the internal time budget.",
14056
+ "time_budget_seconds": RECORD_GET_FILE_TIME_BUDGET_SECONDS,
14057
+ }
14058
+ )
14059
+ break
14060
+ source_url = _normalize_optional_text(candidate.get("source_url"))
14061
+ owner = candidate.get("_owner")
14062
+ if not source_url or not isinstance(owner, dict):
14063
+ continue
14064
+ existing_asset_id = file_by_url.get(source_url)
14065
+ if existing_asset_id:
14066
+ _record_detail_attach_file_asset_id(owner, existing_asset_id)
14067
+ continue
14068
+ file_asset_id = f"file_{len(items) + 1:04d}"
14069
+ file_by_url[source_url] = file_asset_id
14070
+ _record_detail_attach_file_asset_id(owner, file_asset_id)
14071
+ base_item = _record_detail_file_asset_base_item(candidate, file_asset_id=file_asset_id)
14072
+
14073
+ media_item = media_by_url.get(source_url)
14074
+ if media_item is None:
14075
+ media_item = _record_detail_media_item_from_owner_asset_ids(owner, media_by_asset_id, candidate)
14076
+ if isinstance(media_item, dict) and media_item.get("asset_id") not in (None, ""):
14077
+ base_item["media_asset_id"] = media_item.get("asset_id")
14078
+ if downloaded_count >= RECORD_GET_FILE_MAX_FILES:
14079
+ items.append(
14080
+ {
14081
+ **base_item,
14082
+ "local_path": None,
14083
+ "mime_type": None,
14084
+ "size_bytes": None,
14085
+ "access_status": "too_large",
14086
+ "download_strategy": "skipped_limit",
14087
+ "readable_by_agent": False,
14088
+ "extraction": {"status": "skipped_too_large", "text_path": None, "preview": None},
14089
+ }
14090
+ )
14091
+ warnings.append(
14092
+ {
14093
+ "code": "FILE_ASSET_LIMIT_EXCEEDED",
14094
+ "message": f"record_get stopped downloading files after {RECORD_GET_FILE_MAX_FILES} assets.",
14095
+ }
14096
+ )
14097
+ continue
14098
+
14099
+ reused_media_path = _normalize_optional_text(media_item.get("local_path")) if isinstance(media_item, dict) else None
14100
+ if reused_media_path and media_item.get("access_status") == "downloaded":
14101
+ file_name = _record_detail_file_name_from_candidate(candidate, source_url=source_url, fallback_id=file_asset_id)
14102
+ mime_type = _normalize_optional_text(media_item.get("mime_type")) or _record_detail_mime_from_url(source_url)
14103
+ items.append(
14104
+ {
14105
+ **base_item,
14106
+ "download_strategy": media_item.get("download_strategy"),
14107
+ "storage_auth_type": media_item.get("storage_auth_type"),
14108
+ "storage_cookie_prefix": media_item.get("storage_cookie_prefix"),
14109
+ "redirected": media_item.get("redirected"),
14110
+ "file_name": file_name,
14111
+ "local_path": reused_media_path,
14112
+ "mime_type": mime_type,
14113
+ "size_bytes": media_item.get("size_bytes"),
14114
+ "access_status": "downloaded",
14115
+ "readable_by_agent": True,
14116
+ "extraction": {"status": "unsupported", "text_path": None, "preview": None},
14117
+ }
14118
+ )
14119
+ downloaded_count += 1
14120
+ continue
14121
+
14122
+ download_strategy = _record_detail_media_download_strategy(source_url)
14123
+ download_succeeded = False
14124
+ content: bytes = b""
14125
+ download_meta: JSONObject = {}
14126
+ try:
14127
+ content, download_meta = _record_detail_download_media_content(
14128
+ backend=backend,
14129
+ context=context,
14130
+ source_url=source_url,
14131
+ warnings=warnings,
14132
+ environment_prefix_cache=environment_prefix_cache,
14133
+ requested_strategy=download_strategy,
14134
+ )
14135
+ download_succeeded = True
14136
+ except QingflowApiError as exc:
14137
+ blocked = exc.http_status in {401, 403}
14138
+ if blocked and download_strategy != "referer_acl" and callable(refresh_source_url):
14139
+ refreshed_url = _normalize_optional_text(refresh_source_url(candidate))
14140
+ if refreshed_url and refreshed_url != source_url:
14141
+ refreshed_strategy = _record_detail_media_download_strategy(refreshed_url)
14142
+ try:
14143
+ content, download_meta = _record_detail_download_media_content(
14144
+ backend=backend,
14145
+ context=context,
14146
+ source_url=refreshed_url,
14147
+ warnings=warnings,
14148
+ environment_prefix_cache=environment_prefix_cache,
14149
+ requested_strategy=(
14150
+ download_strategy
14151
+ if download_strategy == "decrypted_file_url_then_storage_cookie_redirect"
14152
+ else refreshed_strategy
14153
+ ),
14154
+ )
14155
+ source_url = refreshed_url
14156
+ base_item["source_url"] = refreshed_url
14157
+ download_succeeded = True
14158
+ except QingflowApiError as refreshed_exc:
14159
+ exc = refreshed_exc
14160
+ blocked = exc.http_status in {401, 403}
14161
+ else:
14162
+ warnings.append(
14163
+ {
14164
+ "code": "FILE_ASSET_STORAGE_URL_REFRESHED",
14165
+ "file_asset_id": file_asset_id,
14166
+ "message": "record_get refreshed the record detail once before downloading this file asset.",
14167
+ }
14168
+ )
14169
+ if not download_succeeded:
14170
+ warning_code = "STORAGE_COOKIE_AUTH_FAILED" if blocked and download_strategy != "referer_acl" else "FILE_ASSET_DOWNLOAD_FAILED"
14171
+ items.append(
14172
+ {
14173
+ **base_item,
14174
+ "storage_auth_type": _record_detail_storage_auth_type(source_url),
14175
+ "storage_cookie_prefix": environment_prefix_cache.get("value"),
14176
+ "redirected": False,
14177
+ "local_path": None,
14178
+ "mime_type": _record_detail_mime_from_url(source_url),
14179
+ "size_bytes": None,
14180
+ "access_status": "blocked_private_url" if blocked else "download_failed",
14181
+ "download_strategy": download_strategy,
14182
+ "readable_by_agent": False,
14183
+ "extraction": {"status": "failed", "text_path": None, "preview": None},
14184
+ }
14185
+ )
14186
+ warnings.append(
14187
+ {
14188
+ "code": warning_code,
14189
+ "file_asset_id": file_asset_id,
14190
+ "message": f"record_get could not download file asset {file_asset_id}: {exc.message}",
14191
+ "http_status": exc.http_status,
14192
+ }
14193
+ )
14194
+ continue
14195
+
14196
+ if not isinstance(content, bytes):
14197
+ content = bytes(content or b"")
14198
+ file_name = _record_detail_file_name_from_candidate(candidate, source_url=source_url, fallback_id=file_asset_id)
14199
+ mime_type = _record_detail_file_mime_from_content_or_name(content, source_url=source_url, file_name=file_name)
14200
+ size_bytes = len(content)
14201
+ if size_bytes > RECORD_GET_FILE_MAX_BYTES or total_bytes + size_bytes > RECORD_GET_FILE_MAX_TOTAL_BYTES:
14202
+ items.append(
14203
+ {
14204
+ **base_item,
14205
+ **download_meta,
14206
+ "file_name": file_name,
14207
+ "local_path": None,
14208
+ "mime_type": mime_type,
14209
+ "size_bytes": size_bytes,
14210
+ "access_status": "too_large",
14211
+ "readable_by_agent": False,
14212
+ "extraction": {"status": "skipped_too_large", "text_path": None, "preview": None},
14213
+ }
14214
+ )
14215
+ warnings.append(
14216
+ {
14217
+ "code": "FILE_ASSET_SIZE_LIMIT_EXCEEDED",
14218
+ "file_asset_id": file_asset_id,
14219
+ "message": "record_get skipped a file asset because it exceeded the internal file size budget.",
14220
+ }
14221
+ )
14222
+ continue
14223
+
14224
+ ensure_local_dir()
14225
+ extension = _record_detail_file_extension(mime_type, source_url=source_url, file_name=file_name)
14226
+ local_path = local_dir / f"{file_asset_id}{extension}"
14227
+ local_path.write_bytes(content)
14228
+ extraction = _record_detail_extract_file_asset_text(
14229
+ content,
14230
+ mime_type=mime_type,
14231
+ file_name=file_name,
14232
+ local_dir=local_dir,
14233
+ file_asset_id=file_asset_id,
14234
+ )
14235
+ if extraction.get("status") == "failed":
14236
+ warnings.append(
14237
+ {
14238
+ "code": "FILE_ASSET_EXTRACTION_FAILED",
14239
+ "file_asset_id": file_asset_id,
14240
+ "message": f"record_get downloaded file asset {file_asset_id}, but text extraction failed.",
14241
+ }
14242
+ )
14243
+ total_bytes += size_bytes
14244
+ downloaded_count += 1
14245
+ items.append(
14246
+ {
14247
+ **base_item,
14248
+ **download_meta,
14249
+ "file_name": file_name,
14250
+ "local_path": str(local_path),
14251
+ "mime_type": mime_type,
14252
+ "size_bytes": size_bytes,
14253
+ "access_status": "downloaded",
14254
+ "readable_by_agent": extraction.get("status") == "ok" or _record_detail_image_mime_from_bytes(content) is not None,
14255
+ "extraction": extraction,
14256
+ }
14257
+ )
14258
+
14259
+ if not items:
14260
+ status = "none"
14261
+ elif stopped_for_time_budget or any(item.get("access_status") != "downloaded" or cast(JSONObject, item.get("extraction", {})).get("status") == "failed" for item in items):
14262
+ status = "partial"
14263
+ else:
14264
+ status = "ok"
14265
+ return {"status": status, "local_dir": str(local_dir) if items else None, "items": items, "warnings": warnings}
14266
+
14267
+
13659
14268
  def _record_detail_media_candidates_from_field(
13660
14269
  field: JSONObject,
13661
14270
  *,
@@ -13736,6 +14345,104 @@ def _record_detail_media_candidates_from_field(
13736
14345
  return candidates
13737
14346
 
13738
14347
 
14348
+ def _record_detail_file_candidates_from_field(
14349
+ field: JSONObject,
14350
+ *,
14351
+ source_app_key: str | None,
14352
+ source_record_id: str | None,
14353
+ forced_source: str | None,
14354
+ ) -> list[JSONObject]:
14355
+ field_id = _coerce_count(field.get("field_id"))
14356
+ field_title = _normalize_optional_text(field.get("title"))
14357
+ field_type = _normalize_optional_text(field.get("type"))
14358
+ candidates: list[JSONObject] = []
14359
+ seen_urls: set[str] = set()
14360
+
14361
+ def add_candidate(url: str | None, *, source: str, path: str, name: str | None = None, file_hint: bool = False) -> None:
14362
+ normalized_url = _record_detail_normalize_media_url(url)
14363
+ if not normalized_url or normalized_url in seen_urls:
14364
+ return
14365
+ if not _record_detail_supported_file_url(normalized_url):
14366
+ return
14367
+ if not file_hint and not _record_detail_url_or_name_looks_like_file(normalized_url, name):
14368
+ return
14369
+ seen_urls.add(normalized_url)
14370
+ candidates.append(
14371
+ {
14372
+ "_owner": field,
14373
+ "kind": "file",
14374
+ "source": forced_source or source,
14375
+ "source_path": path,
14376
+ "field_id": field_id,
14377
+ "field_title": field_title,
14378
+ "source_app_key": source_app_key,
14379
+ "source_record_id": source_record_id,
14380
+ "source_url": normalized_url,
14381
+ "file_name": name,
14382
+ }
14383
+ )
14384
+
14385
+ def candidate_name_from_mapping(value: dict[Any, Any]) -> str | None:
14386
+ for key, item in value.items():
14387
+ if _record_detail_media_key(key) in _RECORD_FILE_NAME_KEYS:
14388
+ text = _normalize_optional_text(item) if not isinstance(item, (dict, list)) else None
14389
+ if text:
14390
+ return text
14391
+ return None
14392
+
14393
+ def scan_text(value: str, *, path: str, source: str, file_hint: bool = False) -> None:
14394
+ for match in _RECORD_MEDIA_IMG_SRC_RE.finditer(value):
14395
+ add_candidate(match.group(1), source="rich_text", path=path, file_hint=True)
14396
+ for match in _RECORD_MEDIA_MD_IMAGE_RE.finditer(value):
14397
+ add_candidate(match.group(1), source="rich_text", path=path, file_hint=True)
14398
+ for match in _RECORD_MEDIA_URL_RE.finditer(value):
14399
+ add_candidate(match.group(0), source=source, path=path, file_hint=file_hint)
14400
+
14401
+ def scan_value(value: JSONValue, *, path: str, source: str, file_hint: bool = False) -> None:
14402
+ if isinstance(value, str):
14403
+ scan_text(value, path=path, source=source, file_hint=file_hint)
14404
+ return
14405
+ if isinstance(value, list):
14406
+ for index, item in enumerate(value):
14407
+ scan_value(cast(JSONValue, item), path=f"{path}[{index}]", source=source, file_hint=file_hint)
14408
+ return
14409
+ if not isinstance(value, dict):
14410
+ return
14411
+
14412
+ attachment = _extract_attachment_item(cast(JSONValue, value))
14413
+ if attachment:
14414
+ add_candidate(
14415
+ _normalize_optional_text(attachment.get("value")),
14416
+ source="attachment" if source == "attachment" else source,
14417
+ path=path,
14418
+ name=_normalize_optional_text(attachment.get("name")),
14419
+ file_hint=True,
14420
+ )
14421
+ candidate_name = candidate_name_from_mapping(value)
14422
+ for key, item in value.items():
14423
+ normalized_key = _record_detail_media_key(key)
14424
+ item_text = _normalize_optional_text(item) if not isinstance(item, (dict, list)) else None
14425
+ key_source = source
14426
+ key_file_hint = file_hint
14427
+ if normalized_key in _RECORD_FILE_URL_KEYS:
14428
+ key_source = "attachment" if source == "attachment" else ("image_field" if source != "subtable" else "subtable")
14429
+ key_file_hint = source == "attachment" or normalized_key not in {"value", "url"}
14430
+ if item_text:
14431
+ add_candidate(item_text, source=key_source, path=f"{path}.{key}", name=candidate_name, file_hint=key_file_hint)
14432
+ scan_value(cast(JSONValue, item), path=f"{path}.{key}", source=key_source, file_hint=key_file_hint)
14433
+
14434
+ value = cast(JSONValue, field.get("value"))
14435
+ display_value = cast(JSONValue, field.get("display_value"))
14436
+ if field_type == "attachment":
14437
+ scan_value(value, path="value", source="attachment", file_hint=True)
14438
+ elif field_type == "subtable":
14439
+ scan_value(value, path="value", source="subtable", file_hint=True)
14440
+ else:
14441
+ scan_value(value, path="value", source="image_field", file_hint=False)
14442
+ scan_value(display_value, path="display_value", source="rich_text", file_hint=False)
14443
+ return candidates
14444
+
14445
+
13739
14446
  def _record_detail_attach_asset_id(field: JSONObject, asset_id: str) -> None:
13740
14447
  asset_ids = field.get("asset_ids")
13741
14448
  if not isinstance(asset_ids, list):
@@ -13745,6 +14452,15 @@ def _record_detail_attach_asset_id(field: JSONObject, asset_id: str) -> None:
13745
14452
  asset_ids.append(asset_id)
13746
14453
 
13747
14454
 
14455
+ def _record_detail_attach_file_asset_id(field: JSONObject, file_asset_id: str) -> None:
14456
+ asset_ids = field.get("file_asset_ids")
14457
+ if not isinstance(asset_ids, list):
14458
+ asset_ids = []
14459
+ field["file_asset_ids"] = asset_ids
14460
+ if file_asset_id not in asset_ids:
14461
+ asset_ids.append(file_asset_id)
14462
+
14463
+
13748
14464
  def _record_detail_media_asset_base_item(candidate: JSONObject, *, asset_id: str) -> JSONObject:
13749
14465
  payload: JSONObject = {
13750
14466
  "asset_id": asset_id,
@@ -13760,12 +14476,77 @@ def _record_detail_media_asset_base_item(candidate: JSONObject, *, asset_id: str
13760
14476
  return payload
13761
14477
 
13762
14478
 
14479
+ def _record_detail_file_asset_base_item(candidate: JSONObject, *, file_asset_id: str) -> JSONObject:
14480
+ payload: JSONObject = {
14481
+ "file_asset_id": file_asset_id,
14482
+ "kind": candidate.get("kind") or "file",
14483
+ "source": candidate.get("source") or "attachment",
14484
+ "field_id": candidate.get("field_id"),
14485
+ "field_title": candidate.get("field_title"),
14486
+ "source_url": candidate.get("source_url"),
14487
+ }
14488
+ for key in ("source_path", "source_app_key", "source_record_id", "file_name"):
14489
+ if candidate.get(key) not in (None, ""):
14490
+ payload[key] = candidate.get(key)
14491
+ return payload
14492
+
14493
+
14494
+ def _record_detail_media_assets_by_url(media_assets: JSONObject) -> dict[str, JSONObject]:
14495
+ items = media_assets.get("items") if isinstance(media_assets.get("items"), list) else []
14496
+ result: dict[str, JSONObject] = {}
14497
+ for item in items:
14498
+ if not isinstance(item, dict):
14499
+ continue
14500
+ source_url = _normalize_optional_text(item.get("source_url"))
14501
+ if source_url and source_url not in result:
14502
+ result[source_url] = cast(JSONObject, item)
14503
+ return result
14504
+
14505
+
14506
+ def _record_detail_media_assets_by_asset_id(media_assets: JSONObject) -> dict[str, JSONObject]:
14507
+ items = media_assets.get("items") if isinstance(media_assets.get("items"), list) else []
14508
+ result: dict[str, JSONObject] = {}
14509
+ for item in items:
14510
+ if not isinstance(item, dict):
14511
+ continue
14512
+ asset_id = _normalize_optional_text(item.get("asset_id"))
14513
+ if asset_id and asset_id not in result:
14514
+ result[asset_id] = cast(JSONObject, item)
14515
+ return result
14516
+
14517
+
14518
+ def _record_detail_media_item_from_owner_asset_ids(
14519
+ owner: JSONObject,
14520
+ media_by_asset_id: dict[str, JSONObject],
14521
+ candidate: JSONObject,
14522
+ ) -> JSONObject | None:
14523
+ asset_ids = owner.get("asset_ids") if isinstance(owner.get("asset_ids"), list) else []
14524
+ if len(asset_ids) != 1:
14525
+ return None
14526
+ media_item = media_by_asset_id.get(str(asset_ids[0]))
14527
+ if not isinstance(media_item, dict):
14528
+ return None
14529
+ candidate_name = _normalize_optional_text(candidate.get("file_name"))
14530
+ media_name = _normalize_optional_text(media_item.get("file_name"))
14531
+ if candidate_name and media_name and candidate_name != media_name:
14532
+ return None
14533
+ if candidate.get("field_id") not in (None, media_item.get("field_id")):
14534
+ return None
14535
+ return media_item
14536
+
14537
+
13763
14538
  def _record_detail_media_assets_dir(run_id: str) -> Path:
13764
14539
  custom_home = os.environ.get("QINGFLOW_MCP_RECORD_ASSETS_HOME")
13765
14540
  base_dir = Path(custom_home).expanduser() if custom_home else get_mcp_home() / "record-assets"
13766
14541
  return base_dir / run_id
13767
14542
 
13768
14543
 
14544
+ def _record_detail_file_assets_dir(run_id: str) -> Path:
14545
+ custom_home = os.environ.get("QINGFLOW_MCP_RECORD_FILES_HOME")
14546
+ base_dir = Path(custom_home).expanduser() if custom_home else get_mcp_home() / "record-files"
14547
+ return base_dir / run_id
14548
+
14549
+
13769
14550
  def _record_detail_media_download_headers(context: BackendRequestContext) -> dict[str, str]:
13770
14551
  origin = _record_detail_context_origin(context)
13771
14552
  return {"User-Agent": DEFAULT_USER_AGENT, "Referer": f"{origin}/", "Origin": origin}
@@ -13970,6 +14751,11 @@ def _record_detail_supported_media_url(url: str) -> bool:
13970
14751
  return parsed.scheme.lower() in {"http", "https"} or _record_detail_is_download_file_url(url)
13971
14752
 
13972
14753
 
14754
+ def _record_detail_supported_file_url(url: str) -> bool:
14755
+ parsed = urlsplit(url)
14756
+ return parsed.scheme.lower() in {"http", "https"} or _record_detail_is_download_file_url(url)
14757
+
14758
+
13973
14759
  def _record_detail_media_key(key: Any) -> str:
13974
14760
  return str(key or "").strip().replace("-", "_").lower()
13975
14761
 
@@ -13982,6 +14768,18 @@ def _record_detail_url_or_name_looks_like_image(url: str, name: str | None = Non
13982
14768
  return False
13983
14769
 
13984
14770
 
14771
+ def _record_detail_url_or_name_looks_like_file(url: str, name: str | None = None) -> bool:
14772
+ if _record_detail_is_download_file_url(url) or _record_detail_is_qingflow_storage_url(url):
14773
+ return True
14774
+ for value in (url, name or ""):
14775
+ if not value:
14776
+ continue
14777
+ path = unquote(urlsplit(value).path).lower() or value.lower()
14778
+ if any(path.endswith(extension) for extension in _RECORD_FILE_EXTENSIONS):
14779
+ return True
14780
+ return False
14781
+
14782
+
13985
14783
  def _record_detail_mime_from_url(url: str) -> str | None:
13986
14784
  path = unquote(urlsplit(url).path).lower()
13987
14785
  if path.endswith(".png"):
@@ -13996,9 +14794,210 @@ def _record_detail_mime_from_url(url: str) -> str | None:
13996
14794
  return "image/bmp"
13997
14795
  if path.endswith(".svg"):
13998
14796
  return "image/svg+xml"
14797
+ if path.endswith(".pdf"):
14798
+ return "application/pdf"
14799
+ if path.endswith(".docx"):
14800
+ return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
14801
+ if path.endswith(".xlsx"):
14802
+ return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
14803
+ if path.endswith(".xlsm"):
14804
+ return "application/vnd.ms-excel.sheet.macroEnabled.12"
14805
+ if path.endswith(".csv"):
14806
+ return "text/csv"
14807
+ if path.endswith(".txt") or path.endswith(".text"):
14808
+ return "text/plain"
14809
+ if path.endswith(".md"):
14810
+ return "text/markdown"
14811
+ if path.endswith(".json"):
14812
+ return "application/json"
14813
+ return None
14814
+
14815
+
14816
+ def _record_detail_file_name_from_candidate(candidate: JSONObject, *, source_url: str, fallback_id: str) -> str:
14817
+ raw_name = _normalize_optional_text(candidate.get("file_name"))
14818
+ if raw_name:
14819
+ return raw_name
14820
+ path_name = Path(unquote(urlsplit(source_url).path)).name
14821
+ if path_name:
14822
+ return path_name
14823
+ return fallback_id
14824
+
14825
+
14826
+ def _record_detail_file_mime_from_content_or_name(content: bytes, *, source_url: str, file_name: str) -> str | None:
14827
+ image_mime = _record_detail_image_mime_from_bytes(content)
14828
+ if image_mime:
14829
+ return image_mime
14830
+ if content.startswith(b"%PDF"):
14831
+ return "application/pdf"
14832
+ guessed = mimetypes.guess_type(file_name or source_url)[0] or _record_detail_mime_from_url(source_url)
14833
+ if guessed:
14834
+ return guessed
14835
+ lowered = (file_name or source_url).lower()
14836
+ if lowered.endswith(".docx"):
14837
+ return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
14838
+ if lowered.endswith(".xlsx"):
14839
+ return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
14840
+ if lowered.endswith(".xlsm"):
14841
+ return "application/vnd.ms-excel.sheet.macroEnabled.12"
14842
+ if lowered.endswith(".csv"):
14843
+ return "text/csv"
14844
+ if lowered.endswith(".json"):
14845
+ return "application/json"
14846
+ if _record_detail_bytes_look_like_text(content):
14847
+ return "text/plain"
13999
14848
  return None
14000
14849
 
14001
14850
 
14851
+ def _record_detail_file_extension(mime_type: str | None, *, source_url: str, file_name: str) -> str:
14852
+ for value in (file_name, unquote(urlsplit(source_url).path)):
14853
+ suffix = Path(value).suffix.lower()
14854
+ if suffix and re.fullmatch(r"\.[a-z0-9]{1,10}", suffix):
14855
+ return suffix
14856
+ if mime_type:
14857
+ extension = mimetypes.guess_extension(mime_type)
14858
+ if extension:
14859
+ return ".jpg" if extension == ".jpe" else extension
14860
+ return ".bin"
14861
+
14862
+
14863
+ def _record_detail_bytes_look_like_text(content: bytes) -> bool:
14864
+ if not content:
14865
+ return True
14866
+ sample = content[:4096]
14867
+ if b"\x00" in sample:
14868
+ return False
14869
+ try:
14870
+ sample.decode("utf-8")
14871
+ return True
14872
+ except UnicodeDecodeError:
14873
+ try:
14874
+ sample.decode("gb18030")
14875
+ return True
14876
+ except UnicodeDecodeError:
14877
+ return False
14878
+
14879
+
14880
+ def _record_detail_extract_file_asset_text(
14881
+ content: bytes,
14882
+ *,
14883
+ mime_type: str | None,
14884
+ file_name: str,
14885
+ local_dir: Path,
14886
+ file_asset_id: str,
14887
+ ) -> JSONObject:
14888
+ normalized_name = file_name.lower()
14889
+ try:
14890
+ text: str | None
14891
+ if normalized_name.endswith(".docx") or mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
14892
+ text = _record_detail_extract_docx_text(content)
14893
+ elif normalized_name.endswith((".xlsx", ".xlsm")) or mime_type in {
14894
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
14895
+ "application/vnd.ms-excel.sheet.macroEnabled.12",
14896
+ }:
14897
+ text = _record_detail_extract_xlsx_text(content)
14898
+ elif normalized_name.endswith(".pdf") or mime_type == "application/pdf":
14899
+ text = _record_detail_extract_pdf_text(content)
14900
+ elif normalized_name.endswith(".json") or mime_type == "application/json":
14901
+ text = _record_detail_decode_json_text(content)
14902
+ elif normalized_name.endswith((".csv", ".txt", ".text", ".md")) or (mime_type or "").startswith("text/"):
14903
+ text = _record_detail_decode_text(content)
14904
+ else:
14905
+ text = None
14906
+ except Exception as exc:
14907
+ return {"status": "failed", "text_path": None, "preview": None, "error": str(exc)}
14908
+ if text is None:
14909
+ return {"status": "unsupported", "text_path": None, "preview": None}
14910
+ text_path = local_dir / f"{file_asset_id}.txt"
14911
+ text_path.write_text(text, encoding="utf-8")
14912
+ preview = text[:RECORD_GET_FILE_EXTRACT_PREVIEW_CHARS]
14913
+ return {
14914
+ "status": "ok",
14915
+ "text_path": str(text_path),
14916
+ "preview": preview,
14917
+ "preview_truncated": len(text) > RECORD_GET_FILE_EXTRACT_PREVIEW_CHARS,
14918
+ }
14919
+
14920
+
14921
+ def _record_detail_decode_text(content: bytes) -> str:
14922
+ for encoding in ("utf-8-sig", "utf-8", "gb18030"):
14923
+ try:
14924
+ return content.decode(encoding)
14925
+ except UnicodeDecodeError:
14926
+ continue
14927
+ return content.decode("utf-8", errors="replace")
14928
+
14929
+
14930
+ def _record_detail_decode_json_text(content: bytes) -> str:
14931
+ text = _record_detail_decode_text(content)
14932
+ try:
14933
+ return json.dumps(json.loads(text), ensure_ascii=False, indent=2)
14934
+ except ValueError:
14935
+ return text
14936
+
14937
+
14938
+ def _record_detail_extract_docx_text(content: bytes) -> str:
14939
+ with zipfile.ZipFile(BytesIO(content)) as archive:
14940
+ document_xml = archive.read("word/document.xml")
14941
+ root = ElementTree.fromstring(document_xml)
14942
+ ns = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}"
14943
+ body = root.find(f"{ns}body")
14944
+ if body is None:
14945
+ return ""
14946
+
14947
+ def node_text(node: ElementTree.Element) -> str:
14948
+ return "".join(text_node.text or "" for text_node in node.iter(f"{ns}t")).strip()
14949
+
14950
+ lines: list[str] = []
14951
+ for child in list(body):
14952
+ if child.tag == f"{ns}p":
14953
+ line = node_text(child)
14954
+ if line:
14955
+ lines.append(line)
14956
+ elif child.tag == f"{ns}tbl":
14957
+ for row in child.iter(f"{ns}tr"):
14958
+ cells = [node_text(cell) for cell in row.iter(f"{ns}tc")]
14959
+ cells = [cell for cell in cells if cell]
14960
+ if cells:
14961
+ lines.append(" | ".join(cells))
14962
+ return "\n".join(lines)
14963
+
14964
+
14965
+ def _record_detail_extract_xlsx_text(content: bytes) -> str:
14966
+ from openpyxl import load_workbook
14967
+
14968
+ workbook = load_workbook(BytesIO(content), read_only=True, data_only=True)
14969
+ try:
14970
+ parts: list[str] = []
14971
+ for sheet in workbook.worksheets:
14972
+ parts.append(f"# {sheet.title}")
14973
+ row_count = 0
14974
+ for row in sheet.iter_rows(values_only=True):
14975
+ row_count += 1
14976
+ if row_count > RECORD_GET_FILE_EXTRACT_XLSX_MAX_ROWS_PER_SHEET:
14977
+ parts.append(f"... skipped rows after {RECORD_GET_FILE_EXTRACT_XLSX_MAX_ROWS_PER_SHEET}")
14978
+ break
14979
+ cells = ["" if cell is None else str(cell) for cell in row]
14980
+ if any(cell for cell in cells):
14981
+ parts.append("\t".join(cells).rstrip())
14982
+ return "\n".join(parts)
14983
+ finally:
14984
+ workbook.close()
14985
+
14986
+
14987
+ def _record_detail_extract_pdf_text(content: bytes) -> str:
14988
+ from pypdf import PdfReader
14989
+
14990
+ reader = PdfReader(BytesIO(content))
14991
+ lines: list[str] = []
14992
+ for index, page in enumerate(reader.pages[:RECORD_GET_FILE_EXTRACT_PDF_MAX_PAGES], start=1):
14993
+ page_text = page.extract_text() or ""
14994
+ if page_text.strip():
14995
+ lines.append(f"# Page {index}\n{page_text.strip()}")
14996
+ if len(reader.pages) > RECORD_GET_FILE_EXTRACT_PDF_MAX_PAGES:
14997
+ lines.append(f"... skipped pages after {RECORD_GET_FILE_EXTRACT_PDF_MAX_PAGES}")
14998
+ return "\n\n".join(lines)
14999
+
15000
+
14002
15001
  def _record_detail_image_mime_from_bytes(content: bytes) -> str | None:
14003
15002
  if content.startswith(b"\x89PNG\r\n\x1a\n"):
14004
15003
  return "image/png"
@@ -14038,6 +15037,7 @@ def _record_detail_context_integrity(
14038
15037
  workflow_logs: JSONObject,
14039
15038
  associated_resources: list[JSONObject],
14040
15039
  media_assets: JSONObject,
15040
+ file_assets: JSONObject,
14041
15041
  unavailable_context: list[JSONObject],
14042
15042
  ) -> JSONObject:
14043
15043
  reference_unavailable = any(item.get("target_detail_completeness") != "full" for item in references)
@@ -14049,6 +15049,7 @@ def _record_detail_context_integrity(
14049
15049
  "workflow_logs": workflow_logs.get("status") or "unknown",
14050
15050
  "associated_resources": "full" if associated_resources or not any(item.get("section") == "associated_resources" for item in unavailable_context) else "unavailable",
14051
15051
  "media_assets": media_assets.get("status") or "unknown",
15052
+ "file_assets": file_assets.get("status") or "unknown",
14052
15053
  "unavailable_count": len(unavailable_context),
14053
15054
  "safe_for_record_fact_conclusion": True,
14054
15055
  "safe_for_full_log_conclusion": False,
@@ -14063,6 +15064,7 @@ def _record_detail_semantic_context(payload: JSONObject) -> str:
14063
15064
  fields = payload.get("fields") if isinstance(payload.get("fields"), list) else []
14064
15065
  references = payload.get("references") if isinstance(payload.get("references"), list) else []
14065
15066
  media_assets = payload.get("media_assets") if isinstance(payload.get("media_assets"), dict) else {}
15067
+ file_assets = payload.get("file_assets") if isinstance(payload.get("file_assets"), dict) else {}
14066
15068
  data_logs = payload.get("data_logs") if isinstance(payload.get("data_logs"), dict) else {}
14067
15069
  workflow_logs = payload.get("workflow_logs") if isinstance(payload.get("workflow_logs"), dict) else {}
14068
15070
  associated_resources = payload.get("associated_resources") if isinstance(payload.get("associated_resources"), list) else []
@@ -14110,6 +15112,20 @@ def _record_detail_semantic_context(payload: JSONObject) -> str:
14110
15112
  f"(fieldId={_semantic_escape(item.get('field_id'))}),"
14111
15113
  f"本地路径:{_semantic_escape(item.get('local_path')) or '无'},{_semantic_escape(readable_text)}。"
14112
15114
  )
15115
+ file_items = file_assets.get("items") if isinstance(file_assets.get("items"), list) else []
15116
+ if file_items:
15117
+ lines.extend(["", "文件附件:"])
15118
+ for item in file_items:
15119
+ if not isinstance(item, dict):
15120
+ continue
15121
+ extraction = item.get("extraction") if isinstance(item.get("extraction"), dict) else {}
15122
+ readable_text = "可由智能体读取" if item.get("readable_by_agent") else f"不可直接读取({item.get('access_status') or 'unknown'})"
15123
+ lines.append(
15124
+ f"- 文件 {_semantic_escape(item.get('file_asset_id'))}「{_semantic_escape(item.get('file_name'))}」"
15125
+ f"来自字段「{_semantic_escape(item.get('field_title'))}」(fieldId={_semantic_escape(item.get('field_id'))}),"
15126
+ f"本地路径:{_semantic_escape(item.get('local_path')) or '无'},"
15127
+ f"提取文本:{_semantic_escape(extraction.get('text_path')) or '无'},{_semantic_escape(readable_text)}。"
15128
+ )
14113
15129
  lines.extend(["", "最近数据日志:"])
14114
15130
  _append_semantic_log_lines(lines, data_logs)
14115
15131
  lines.extend(["", "最近流程日志:"])
@@ -14227,7 +15243,6 @@ def _build_record_list_lookup_payload(
14227
15243
  query: str | None,
14228
15244
  items: list[JSONObject],
14229
15245
  pagination: JSONObject,
14230
- limit: int,
14231
15246
  ) -> JSONObject | None:
14232
15247
  if not query:
14233
15248
  return None
@@ -14236,17 +15251,7 @@ def _build_record_list_lookup_payload(
14236
15251
  if returned_items is None:
14237
15252
  returned_items = len(items)
14238
15253
  truncated = bool(reported_total is not None and reported_total > returned_items)
14239
- scored: list[tuple[int, int, JSONObject]] = []
14240
- for index, item in enumerate(items):
14241
- candidate = _record_list_candidate_payload(item, query=query)
14242
- score = _coerce_count(candidate.get("score")) or 0
14243
- if score <= 0:
14244
- candidate["score"] = 40
14245
- candidate["match_reason"] = "backend_match_without_selected_field_evidence"
14246
- scored.append((int(candidate["score"]), index, candidate))
14247
- scored.sort(key=lambda entry: (-entry[0], entry[1]))
14248
- candidates = [entry[2] for entry in scored[: min(limit, LOOKUP_CONFIRMATION_CANDIDATE_LIMIT)]]
14249
- confidence = _record_list_lookup_confidence(candidates, truncated=truncated)
15254
+ confidence = _record_list_lookup_confidence(returned_items=returned_items, reported_total=reported_total, truncated=truncated)
14250
15255
  next_action = {
14251
15256
  "single_high": "record_get",
14252
15257
  "multiple": "ask_user",
@@ -14256,140 +15261,25 @@ def _build_record_list_lookup_payload(
14256
15261
  return {
14257
15262
  "mode": "candidate_locator",
14258
15263
  "query": query,
14259
- "reported_total": reported_total,
14260
- "returned_candidates": len(candidates),
15264
+ "total_count": reported_total,
15265
+ "returned_count": returned_items,
15266
+ "truncated": truncated,
14261
15267
  "confidence": confidence,
14262
15268
  "next_action": next_action,
14263
- "candidates": candidates,
14264
15269
  }
14265
15270
 
14266
15271
 
14267
- def _record_list_lookup_confidence(candidates: list[JSONObject], *, truncated: bool) -> str:
14268
- if not candidates:
15272
+ def _record_list_lookup_confidence(*, returned_items: int, reported_total: int | None, truncated: bool) -> str:
15273
+ if returned_items <= 0:
14269
15274
  return "none"
14270
15275
  if truncated:
14271
15276
  return "truncated"
14272
- if len(candidates) == 1:
14273
- return "single_high"
14274
- top = _coerce_count(candidates[0].get("score")) or 0
14275
- second = _coerce_count(candidates[1].get("score")) or 0
14276
- if top >= 90 and (top - second) >= 10:
15277
+ effective_total = reported_total if reported_total is not None else returned_items
15278
+ if effective_total == 1:
14277
15279
  return "single_high"
14278
15280
  return "multiple"
14279
15281
 
14280
15282
 
14281
- def _record_list_candidate_payload(item: JSONObject, *, query: str) -> JSONObject:
14282
- query_norm = _normalize_lookup_query_text(query)
14283
- matched_fields: list[JSONObject] = []
14284
- best_score = 0
14285
- for key, value in item.items():
14286
- if key in {"normalized_record", "normalized_ambiguous_fields"}:
14287
- continue
14288
- value_text = _record_list_value_text(value)
14289
- if not value_text:
14290
- continue
14291
- score, match_type = _record_list_field_match_score(key, value_text, query_norm)
14292
- if score <= 0:
14293
- continue
14294
- best_score = max(best_score, score)
14295
- matched_fields.append(
14296
- {
14297
- "title": key,
14298
- "value": _truncate_text(value_text, 120),
14299
- "match_type": match_type,
14300
- "score": score,
14301
- }
14302
- )
14303
- if matched_fields:
14304
- best_score = min(100, best_score + min(10, (len(matched_fields) - 1) * 3))
14305
- record_id = _normalize_optional_text(item.get("record_id")) or _normalize_optional_text(item.get("apply_id"))
14306
- display_fields = _record_list_display_fields(item)
14307
- return {
14308
- "record_id": record_id,
14309
- "title": _record_list_candidate_title(item) or record_id,
14310
- "score": best_score,
14311
- "matched_fields": sorted(matched_fields, key=lambda entry: int(entry.get("score") or 0), reverse=True)[:6],
14312
- "display_fields": display_fields,
14313
- }
14314
-
14315
-
14316
- def _record_list_field_match_score(field_title: str, value_text: str, query_norm: str) -> tuple[int, str]:
14317
- if not query_norm:
14318
- return 0, "none"
14319
- value_norm = _normalize_lookup_query_text(value_text)
14320
- if not value_norm:
14321
- return 0, "none"
14322
- title_norm = _normalize_lookup_query_text(field_title)
14323
- title_like = _record_list_title_like(field_title)
14324
- id_like = field_title in {"record_id", "apply_id"} or "编号" in field_title or "id" == title_norm
14325
- if value_norm == query_norm:
14326
- if id_like:
14327
- return 100, "exact_identifier"
14328
- if title_like:
14329
- return 96, "exact_title"
14330
- return 86, "exact"
14331
- if query_norm in value_norm:
14332
- if id_like:
14333
- return 92, "contains_identifier"
14334
- if title_like:
14335
- return 88, "contains_title"
14336
- return 68, "contains"
14337
- if title_like and value_norm in query_norm and len(value_norm) >= 2:
14338
- return 72, "reverse_contains_title"
14339
- return 0, "none"
14340
-
14341
-
14342
- def _record_list_candidate_title(item: JSONObject) -> str | None:
14343
- for key, value in item.items():
14344
- if key in {"record_id", "apply_id"}:
14345
- continue
14346
- if _record_list_title_like(key):
14347
- text = _record_list_value_text(value)
14348
- if text:
14349
- return _truncate_text(text, 80)
14350
- for key, value in item.items():
14351
- if key in {"record_id", "apply_id", "normalized_record", "normalized_ambiguous_fields"}:
14352
- continue
14353
- text = _record_list_value_text(value)
14354
- if text:
14355
- return _truncate_text(text, 80)
14356
- return None
14357
-
14358
-
14359
- def _record_list_display_fields(item: JSONObject) -> list[JSONObject]:
14360
- display: list[JSONObject] = []
14361
- for key, value in item.items():
14362
- if key in {"record_id", "apply_id", "normalized_record", "normalized_ambiguous_fields"}:
14363
- continue
14364
- text = _record_list_value_text(value)
14365
- if not text:
14366
- continue
14367
- display.append({"title": key, "value": _truncate_text(text, 120)})
14368
- if len(display) >= 6:
14369
- break
14370
- return display
14371
-
14372
-
14373
- def _record_list_title_like(field_title: str) -> bool:
14374
- lowered = field_title.lower()
14375
- return any(
14376
- token in field_title or token in lowered
14377
- for token in ("标题", "名称", "名字", "客户", "公司", "项目", "商机", "线索", "主题", "编号", "name", "title", "customer", "company")
14378
- )
14379
-
14380
-
14381
- def _record_list_value_text(value: JSONValue) -> str:
14382
- if value is None:
14383
- return ""
14384
- if isinstance(value, str):
14385
- return value.strip()
14386
- return _stringify_json(value).strip()
14387
-
14388
-
14389
- def _normalize_lookup_query_text(value: str) -> str:
14390
- return re.sub(r"\s+", "", value).lower()
14391
-
14392
-
14393
15283
  def _truncate_text(value: str, limit: int) -> str:
14394
15284
  if len(value) <= limit:
14395
15285
  return value