contract-archive-cli 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. contract_archive/__init__.py +2 -0
  2. contract_archive/archive/__init__.py +64 -0
  3. contract_archive/archive/db.py +126 -0
  4. contract_archive/archive/ingest.py +667 -0
  5. contract_archive/archive/migrations/001_init.sql +62 -0
  6. contract_archive/archive/migrations/002_obligations.sql +25 -0
  7. contract_archive/archive/migrations/003_document_types.sql +31 -0
  8. contract_archive/archive/migrations/004_seals_subjects.sql +36 -0
  9. contract_archive/archive/migrations/005_completeness.sql +18 -0
  10. contract_archive/archive/party_registry.py +276 -0
  11. contract_archive/archive/paths.py +113 -0
  12. contract_archive/archive/repository.py +918 -0
  13. contract_archive/cli.py +455 -0
  14. contract_archive/cli_common.py +293 -0
  15. contract_archive/cli_config.py +96 -0
  16. contract_archive/cli_introspect.py +204 -0
  17. contract_archive/cli_party.py +166 -0
  18. contract_archive/cli_query.py +492 -0
  19. contract_archive/cli_render.py +575 -0
  20. contract_archive/config.py +257 -0
  21. contract_archive/errors.py +163 -0
  22. contract_archive/extraction/__init__.py +14 -0
  23. contract_archive/extraction/amount_check.py +87 -0
  24. contract_archive/extraction/contract_extractor.py +103 -0
  25. contract_archive/extraction/document_extractor.py +546 -0
  26. contract_archive/extraction/evidence_page_fix.py +99 -0
  27. contract_archive/extraction/llm_extractor.py +207 -0
  28. contract_archive/extraction/normalize.py +210 -0
  29. contract_archive/extraction/property_fee.py +79 -0
  30. contract_archive/extraction/vision_seal.py +390 -0
  31. contract_archive/pipelines/__init__.py +9 -0
  32. contract_archive/pipelines/mineru_pipeline.py +955 -0
  33. contract_archive/pipelines/vl_ocr.py +160 -0
  34. contract_archive/schemas/__init__.py +67 -0
  35. contract_archive/schemas/document.py +408 -0
  36. contract_archive/utils/__init__.py +27 -0
  37. contract_archive/utils/device.py +51 -0
  38. contract_archive/utils/http_env.py +54 -0
  39. contract_archive/utils/pdf.py +207 -0
  40. contract_archive_cli-0.2.7.dist-info/METADATA +386 -0
  41. contract_archive_cli-0.2.7.dist-info/RECORD +44 -0
  42. contract_archive_cli-0.2.7.dist-info/WHEEL +4 -0
  43. contract_archive_cli-0.2.7.dist-info/entry_points.txt +2 -0
  44. contract_archive_cli-0.2.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,166 @@
1
+ """
2
+ `party` 子命令:管理 known_parties 身份基准库(查看/录入/删除主体固有标识)。
3
+
4
+ 独立文件——cli.py 已逼近 1000 行红线,不能再塞。known_parties.json 含真实 PII,
5
+ 故本命令只在本地档案库读写,不提供导出/分享。基准的"首见入库"由 ingest 自动完成,
6
+ 本命令组负责人工查看与修正:set 覆盖(纠正被 OCR 读错的首见基准)、rm 删除。
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import json as _json
11
+ import sys
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ import typer
16
+ from rich.table import Table
17
+
18
+ from .archive.party_registry import PartyRegistry, group_by_value
19
+ from .archive.paths import ArchivePaths, default_archive_root
20
+ # 复用 cli_common 的全局 console(理由同 cli_config):自建实例会让全局 --no-color 失效。
21
+ from .cli_common import OutputFormat, console, err_console
22
+ from .config import load_settings
23
+
24
+ # pretty_exceptions_show_locals=False:防 traceback 把 PII 等局部变量 dump 到终端。
25
+ party_app = typer.Typer(
26
+ help="管理 known_parties 身份基准库(主体固有标识的跨文档核对基准)",
27
+ pretty_exceptions_show_locals=False,
28
+ no_args_is_help=True, # clig.dev:裸 `party` 列出 list/show/set/rm,而非报 Missing command
29
+ context_settings={"help_option_names": ["-h", "--help"]},
30
+ )
31
+
32
+ _archive_opt = typer.Option(
33
+ None,
34
+ "--archive",
35
+ "-a",
36
+ help="档案库根目录;不传则用 CONTRACT_ARCHIVE_DIR 或 XDG 默认",
37
+ )
38
+
39
+
40
+ def _resolve_archive(archive_opt: Optional[Path]) -> ArchivePaths:
41
+ """与 cli._resolve_archive 同逻辑:flag > env/config > XDG 默认。隔离实现以避免循环 import。"""
42
+ if archive_opt:
43
+ root = archive_opt
44
+ else:
45
+ configured = load_settings().archive_dir
46
+ root = Path(configured) if configured else default_archive_root()
47
+ return ArchivePaths(root=root.expanduser().resolve())
48
+
49
+
50
+ def _load_registry(archive_opt: Optional[Path]) -> PartyRegistry:
51
+ return PartyRegistry.load(_resolve_archive(archive_opt).known_parties_path)
52
+
53
+
54
+ @party_app.command("list")
55
+ def list_parties(
56
+ archive: Optional[Path] = _archive_opt,
57
+ fmt: OutputFormat = typer.Option(OutputFormat.table, "--format", help="table | json"),
58
+ ) -> None:
59
+ """列出基准库里所有主体及其固有标识。"""
60
+ reg = _load_registry(archive)
61
+ parties = reg.all_parties()
62
+ if fmt is OutputFormat.json:
63
+ # known_parties 是跨文档身份核对基准,agent 据此核对身份——给机读出口,别只剩表格。
64
+ # 空库吐合法 {}(与其它命令空集合吐 [] 同一套契约)。注意:含真实 PII,仍只到本地 stdout。
65
+ print(_json.dumps(parties, ensure_ascii=False, indent=2))
66
+ return
67
+ if not parties:
68
+ err_console.print("[yellow]known_parties 为空——入库文档后会自动录入首见标识。[/yellow]")
69
+ return
70
+ table = Table(title=f"known_parties · {len(parties)} 个主体")
71
+ table.add_column("主体", style="cyan", no_wrap=True)
72
+ table.add_column("标识")
73
+ table.add_column("值", overflow="fold")
74
+ table.add_column("首见", style="dim")
75
+ # 表格按值折叠同号多 label(电话/联系电话…)去冗余;--format json 保持原始未折叠。
76
+ for name, ids in parties.items():
77
+ first = True
78
+ for label, rec in group_by_value(ids):
79
+ table.add_row(name if first else "", label, rec.get("value", ""), str(rec.get("first_seen_doc", ""))[:12])
80
+ first = False
81
+ console.print(table)
82
+
83
+
84
+ @party_app.command("show")
85
+ def show_party(
86
+ name: str = typer.Argument(..., help="主体名(姓名或机构全称)"),
87
+ archive: Optional[Path] = _archive_opt,
88
+ fmt: OutputFormat = typer.Option(OutputFormat.table, "--format", help="table | json"),
89
+ ) -> None:
90
+ """查看某主体的全部标识基准。"""
91
+ reg = _load_registry(archive)
92
+ ids = reg.get(name)
93
+ if not ids:
94
+ # json 模式吐 not_found 信封到 stdout(别让 | jq 拿空输入);table 走 stderr。
95
+ if fmt is OutputFormat.json:
96
+ print(_json.dumps({"error": "not_found", "name": name}, ensure_ascii=False))
97
+ else:
98
+ err_console.print(f"[red]未找到主体: {name}[/red]")
99
+ raise typer.Exit(1)
100
+ if fmt is OutputFormat.json:
101
+ print(_json.dumps(ids, ensure_ascii=False, indent=2))
102
+ return
103
+ # 同值多 label 折叠后才是"几项不同标识"——同号的电话/联系电话算一项,故计数用折叠后行数。
104
+ rows = group_by_value(ids)
105
+ table = Table(title=f"{name} · {len(rows)} 项标识")
106
+ table.add_column("标识", style="cyan")
107
+ table.add_column("值", overflow="fold")
108
+ table.add_column("角色", style="dim")
109
+ table.add_column("首见出处", style="dim")
110
+ for label, rec in rows:
111
+ table.add_row(label, rec.get("value", ""), rec.get("role", ""), str(rec.get("first_seen_doc", "")))
112
+ console.print(table)
113
+
114
+
115
+ @party_app.command("set")
116
+ def set_party(
117
+ name: str = typer.Argument(..., help="主体名"),
118
+ label: str = typer.Argument(..., help="标识名,如 身份证号 / 电话 / 银行账号"),
119
+ value: str = typer.Argument(..., help="标识值"),
120
+ archive: Optional[Path] = _archive_opt,
121
+ ) -> None:
122
+ """手动录入/修正某主体的标识基准(覆盖既有值;用于纠正首见时被 OCR 读错的基准)。"""
123
+ paths = _resolve_archive(archive)
124
+ reg = PartyRegistry.load(paths.known_parties_path)
125
+ try:
126
+ reg.set(name, label, value)
127
+ except ValueError as e:
128
+ err_console.print(f"[red]{e}[/red]")
129
+ raise typer.Exit(1)
130
+ reg.save()
131
+ # 状态变更确认走 stderr(与 delete/vacuum 一致),stdout 留给数据。
132
+ err_console.print(f"[green]已设置[/green] {name}·{label} → {paths.known_parties_path}")
133
+ err_console.print(
134
+ "[yellow]注意:known_parties.json 明文存 PII,已设为仅本人可读(0600),请勿提交或分享。[/yellow]"
135
+ )
136
+
137
+
138
+ @party_app.command("rm")
139
+ def rm_party(
140
+ name: str = typer.Argument(..., help="主体名"),
141
+ label: Optional[str] = typer.Argument(None, help="标识名;省略则删除该主体全部标识"),
142
+ archive: Optional[Path] = _archive_opt,
143
+ yes: bool = typer.Option(False, "--yes", "-y", help="跳过确认(删整个主体时需要)"),
144
+ ) -> None:
145
+ """删除某主体的某标识;不给 label 则删除整个主体。"""
146
+ paths = _resolve_archive(archive)
147
+ reg = PartyRegistry.load(paths.known_parties_path)
148
+ target = f"{name}·{label}" if label else name
149
+ # 删【整个主体】(省略 label)是更危险路径:known_parties 是跨文档 PII 核对基准,
150
+ # 删错会让后续身份核对静默失准。比照 delete 的守卫——非交互须显式 --yes,TTY 下确认。
151
+ # 删单个 label(精确指定)影响小,保持轻量、不强制确认。
152
+ if label is None and not yes:
153
+ if not sys.stdin.isatty():
154
+ err_console.print(
155
+ f"[red]拒绝在非交互环境删除整个主体 {name}:请加 --yes 确认[/red]"
156
+ )
157
+ raise typer.Exit(1)
158
+ if not typer.confirm(f"删除主体 {name} 的全部标识基准?", default=False):
159
+ err_console.print("[yellow]aborted[/yellow]")
160
+ raise typer.Exit(0)
161
+ if reg.remove(name, label):
162
+ reg.save()
163
+ err_console.print(f"[green]已删除[/green] {target}")
164
+ else:
165
+ err_console.print(f"[red]未找到: {target}[/red]")
166
+ raise typer.Exit(1)
@@ -0,0 +1,492 @@
1
+ """
2
+ 只读查询/展示命令:list / search / show / raw / stats / todo / seals。
3
+
4
+ 这些命令不写库、不调用付费 API,是档案库的"读侧"。它们用 @app.command 挂到
5
+ cli_common 的主 app 上——import 本模块即触发注册(见 cli.py 的组装段)。
6
+
7
+ 依赖方向:本模块只 import cli_common(基础设施)+ archive 读函数 + cli_render
8
+ (纯渲染),绝不回头 import cli(写命令模块),以保持 DAG、避免循环 import。
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import json as _json
13
+ import sys
14
+ from dataclasses import asdict
15
+ from pathlib import Path
16
+ from typing import Optional
17
+
18
+ import typer
19
+ from rich.table import Table
20
+
21
+ from .archive import (
22
+ SearchFilter,
23
+ Stats,
24
+ collect_stats,
25
+ list_documents,
26
+ list_obligations,
27
+ list_seals,
28
+ load_document_text,
29
+ open_archive_db,
30
+ search_documents,
31
+ )
32
+ from .cli_common import (
33
+ Actor,
34
+ ColorWhen,
35
+ DocStatus,
36
+ DocType,
37
+ OrderBy,
38
+ OutputFormat,
39
+ _archive_empty,
40
+ _archive_opt,
41
+ _resolve_archive,
42
+ _resolve_ident,
43
+ app,
44
+ color_disabled,
45
+ console,
46
+ err_console,
47
+ not_found_json,
48
+ )
49
+ from .cli_render import (
50
+ build_list_table,
51
+ build_search_table,
52
+ build_show_table,
53
+ color_legend,
54
+ extracted_terms,
55
+ render_highlighted,
56
+ row_to_dict,
57
+ seal_rows_to_dict,
58
+ )
59
+
60
+ # ---------- list ----------
61
+
62
+
63
+ @app.command("list")
64
+ def list_cmd(
65
+ archive: Optional[Path] = _archive_opt,
66
+ limit: int = typer.Option(50, "--limit", "-n", help="最多返回 N 条(按排序截断)"),
67
+ order_by: OrderBy = typer.Option(
68
+ OrderBy.ingested_at, "--order-by", help="排序字段"
69
+ ),
70
+ status: Optional[DocStatus] = typer.Option(
71
+ None, "--status", help="过滤状态;默认全部"
72
+ ),
73
+ doc_type: Optional[DocType] = typer.Option(
74
+ None, "--type", help="按文档类型过滤"
75
+ ),
76
+ incomplete: bool = typer.Option(
77
+ False, "--incomplete", help="只列疑似不完整的合同(缺签章/缺要素)"
78
+ ),
79
+ fmt: OutputFormat = typer.Option(OutputFormat.table, "--format", help="table | json"),
80
+ ) -> None:
81
+ """列出档案库内已索引文档。"""
82
+ paths = _resolve_archive(archive)
83
+ if _archive_empty(paths, fmt):
84
+ return
85
+ conn = open_archive_db(paths.db_path)
86
+ rows = list_documents(
87
+ conn,
88
+ limit=limit,
89
+ order_by=order_by.value,
90
+ status=status.value if status else None,
91
+ doc_type=doc_type.value if doc_type else None,
92
+ incomplete=incomplete,
93
+ )
94
+ conn.close()
95
+
96
+ if fmt is OutputFormat.json:
97
+ print(_json.dumps([row_to_dict(r) for r in rows], ensure_ascii=False, indent=2))
98
+ return
99
+
100
+ console.print(build_list_table(rows, paths.root))
101
+
102
+
103
+ # ---------- search ----------
104
+
105
+
106
+ @app.command()
107
+ def search(
108
+ archive: Optional[Path] = _archive_opt,
109
+ name: Optional[str] = typer.Option(None, "--name", help="合同名包含(LIKE)"),
110
+ party: Optional[str] = typer.Option(
111
+ None, "--party", help="甲方 OR 乙方包含(LIKE)"
112
+ ),
113
+ amount_min: Optional[float] = typer.Option(
114
+ None, "--amount-min", help="金额下限(元)"
115
+ ),
116
+ amount_max: Optional[float] = typer.Option(
117
+ None, "--amount-max", help="金额上限(元)"
118
+ ),
119
+ signed_after: Optional[str] = typer.Option(
120
+ None, "--signed-after", help="签订日 ≥ YYYY-MM-DD"
121
+ ),
122
+ signed_before: Optional[str] = typer.Option(
123
+ None, "--signed-before", help="签订日 ≤ YYYY-MM-DD"
124
+ ),
125
+ expire_before: Optional[str] = typer.Option(
126
+ None, "--expire-before", help="到期日 ≤ YYYY-MM-DD(找快到期)"
127
+ ),
128
+ auto_renewal: Optional[bool] = typer.Option(
129
+ None,
130
+ "--auto-renewal/--no-auto-renewal",
131
+ help="是否自动续约",
132
+ ),
133
+ has_risk: bool = typer.Option(False, "--has-risk", help="只显示有风险条款的"),
134
+ deadline_before: Optional[str] = typer.Option(
135
+ None,
136
+ "--deadline-before",
137
+ help="存在 deadline ≤ YYYY-MM-DD 的义务(找近期待办合同)",
138
+ ),
139
+ deadline_after: Optional[str] = typer.Option(
140
+ None, "--deadline-after", help="存在 deadline ≥ YYYY-MM-DD 的义务"
141
+ ),
142
+ actor: Optional[Actor] = typer.Option(
143
+ None, "--actor", help="义务 actor"
144
+ ),
145
+ status: Optional[DocStatus] = typer.Option(None, "--status", help="过滤状态"),
146
+ has_seal: Optional[bool] = typer.Option(
147
+ None, "--has-seal/--no-seal", help="有/无印章(默认不过滤);只想列章本身用 seals 命令"
148
+ ),
149
+ seal_owner: Optional[str] = typer.Option(
150
+ None, "--seal-owner", help="盖章主体包含(LIKE)"
151
+ ),
152
+ seal_type: Optional[str] = typer.Option(
153
+ None, "--seal-type", help="印章类型包含(LIKE),如 合同专用章/公章"
154
+ ),
155
+ subject: Optional[str] = typer.Option(
156
+ None, "--subject", help="主体包含(LIKE),覆盖所有文档类型(含合同甲乙方)"
157
+ ),
158
+ limit: int = typer.Option(50, "--limit", "-n", help="最多返回 N 条(按排序截断)"),
159
+ fmt: OutputFormat = typer.Option(OutputFormat.table, "--format", help="table | json"),
160
+ ) -> None:
161
+ """多字段 AND 过滤查询。"""
162
+ paths = _resolve_archive(archive)
163
+ if _archive_empty(paths, fmt):
164
+ return
165
+ conn = open_archive_db(paths.db_path)
166
+ flt = SearchFilter(
167
+ name=name,
168
+ party=party,
169
+ amount_min_cents=int(round(amount_min * 100)) if amount_min is not None else None,
170
+ amount_max_cents=int(round(amount_max * 100)) if amount_max is not None else None,
171
+ signed_after=signed_after,
172
+ signed_before=signed_before,
173
+ expire_before=expire_before,
174
+ auto_renewal=auto_renewal,
175
+ has_risk=has_risk,
176
+ deadline_before=deadline_before,
177
+ deadline_after=deadline_after,
178
+ actor=actor.value if actor else None,
179
+ status=status.value if status else None,
180
+ has_seal=has_seal,
181
+ seal_owner=seal_owner,
182
+ seal_type=seal_type,
183
+ subject=subject,
184
+ limit=limit,
185
+ )
186
+ rows = search_documents(conn, flt)
187
+ conn.close()
188
+
189
+ if fmt is OutputFormat.json:
190
+ print(_json.dumps([row_to_dict(r) for r in rows], ensure_ascii=False, indent=2))
191
+ return
192
+
193
+ console.print(build_search_table(rows))
194
+
195
+
196
+ # ---------- show ----------
197
+
198
+
199
+ @app.command()
200
+ def show(
201
+ ident: str = typer.Argument(..., help="档案 id (整数) 或 sha 前缀 (>=4 字符)"),
202
+ archive: Optional[Path] = _archive_opt,
203
+ fmt: OutputFormat = typer.Option(OutputFormat.table, "--format", help="table | json"),
204
+ ) -> None:
205
+ """显示单条档案详情。"""
206
+ paths = _resolve_archive(archive)
207
+ # show 请求的是具体一条;库不存在/查不到都是错误(exit 1)。
208
+ # table 模式提示走 stderr;json 模式吐 not_found 信封到 stdout(别让 | jq 拿到空输入)。
209
+ if not paths.db_path.exists():
210
+ if fmt is OutputFormat.json:
211
+ not_found_json(ident)
212
+ else:
213
+ err_console.print(f"[yellow]archive empty: {paths.db_path}[/yellow]")
214
+ raise typer.Exit(1)
215
+ conn = open_archive_db(paths.db_path)
216
+ row = _resolve_ident(conn, ident)
217
+ conn.close()
218
+
219
+ if not row:
220
+ if fmt is OutputFormat.json:
221
+ not_found_json(ident)
222
+ else:
223
+ err_console.print(f"[red]not found: {ident}[/red]")
224
+ raise typer.Exit(1)
225
+
226
+ if fmt is OutputFormat.json:
227
+ print(_json.dumps(
228
+ row_to_dict(row, archive_root=paths.root, include_original_source=False),
229
+ ensure_ascii=False,
230
+ indent=2,
231
+ ))
232
+ return
233
+
234
+ console.print(build_show_table(row, paths.root))
235
+
236
+
237
+ # ---------- raw ----------
238
+
239
+
240
+ @app.command()
241
+ def raw(
242
+ ident: str = typer.Argument(..., help="档案 id (整数) 或 sha 前缀 (>=4 字符)"),
243
+ archive: Optional[Path] = _archive_opt,
244
+ color: ColorWhen = typer.Option(
245
+ ColorWhen.auto, "--color",
246
+ help="auto=仅 TTY 上色(管道纯文本)| always(配 less -R)| never",
247
+ ),
248
+ ) -> None:
249
+ """
250
+ 打印文档原文(MinerU OCR 输出的纯文本)到 stdout。
251
+
252
+ 与 show 互补:show 看 LLM 抽出的结构化字段,raw 看抽取所依据的原始文本——
253
+ 这正是抽取时喂给 LLM 的同一份内容(raw_text.txt,缺失则退回 markdown.md),
254
+ 用于核对抽取结果是否忠于原文。
255
+
256
+ 交互终端下默认按抽取来源给命中关键字着色(当事人/金额/日期/风险/字段),
257
+ 一眼看出哪些被 LLM 识别到;管道(非 TTY)时输出纯文本,不破坏 raw|grep / raw|less。
258
+ """
259
+ paths = _resolve_archive(archive)
260
+ # 同 show:请求的是具体一条,库不存在/查不到都按错误处理(exit 1),提示走 stderr。
261
+ if not paths.db_path.exists():
262
+ err_console.print(f"[yellow]archive empty: {paths.db_path}[/yellow]")
263
+ raise typer.Exit(1)
264
+ conn = open_archive_db(paths.db_path)
265
+ row = _resolve_ident(conn, ident)
266
+ conn.close()
267
+
268
+ if not row:
269
+ err_console.print(f"[red]not found: {ident}[/red]")
270
+ raise typer.Exit(1)
271
+
272
+ # output_dir 可能为空串(失败入库的记录),Path("")/"mineru" 会落到不存在目录,
273
+ # load_document_text 返回 "",统一走下面的"无原文"分支,无需单独判空。
274
+ mineru_dir = Path(row.output_dir) / "mineru"
275
+ text = load_document_text(mineru_dir)
276
+ if not text:
277
+ err_console.print(
278
+ f"[red]no OCR text for id={row.id} sha={row.short_sha}: {mineru_dir}[/red]"
279
+ )
280
+ raise typer.Exit(1)
281
+
282
+ # 上色判定:always 强制(显式逃生口,压过 NO_COLOR);auto 仅当 stdout 是 TTY
283
+ # 且未被全局禁色(--no-color / NO_COLOR);never 禁用。
284
+ # 管道默认纯文本——保住 raw|grep / raw|less 的既有行为(不破坏 userspace)。
285
+ use_color = color is ColorWhen.always or (
286
+ color is ColorWhen.auto and sys.stdout.isatty() and not color_disabled()
287
+ )
288
+ if not use_color:
289
+ print(text)
290
+ return
291
+
292
+ terms = extracted_terms(row)
293
+ # 图例走 stderr:解释颜色含义,又不污染 stdout 的原文(even with | less -R)。
294
+ legend = color_legend(terms)
295
+ if legend and sys.stderr.isatty():
296
+ print(legend, file=sys.stderr)
297
+ sys.stdout.write(render_highlighted(text, terms))
298
+ if not text.endswith("\n"):
299
+ sys.stdout.write("\n")
300
+
301
+
302
+ # ---------- stats ----------
303
+
304
+
305
+ @app.command()
306
+ def stats(
307
+ archive: Optional[Path] = _archive_opt,
308
+ fmt: OutputFormat = typer.Option(OutputFormat.table, "--format", help="table | json"),
309
+ ) -> None:
310
+ """档案库统计:总数 / status 分布 / 按月签订分布 / 近 30 天到期数。"""
311
+ paths = _resolve_archive(archive)
312
+ # 库不存在 = 零文档档案:合成零值 Stats,走同一条渲染路径,
313
+ # 不为"空库"单开分支——json 形状始终是对象(不会退化成 list 的 [])。
314
+ if paths.db_path.exists():
315
+ conn = open_archive_db(paths.db_path)
316
+ s = collect_stats(conn)
317
+ conn.close()
318
+ else:
319
+ s = Stats(
320
+ total=0, by_status={}, by_sign_month={},
321
+ new_this_month=0, expiring_within_30d=0,
322
+ )
323
+
324
+ if fmt is OutputFormat.json:
325
+ print(_json.dumps(asdict(s), ensure_ascii=False, indent=2))
326
+ return
327
+
328
+ table = Table(title=f"Archive Stats · {paths.root}")
329
+ table.add_column("metric", style="cyan")
330
+ table.add_column("value")
331
+ table.add_row("total", str(s.total))
332
+ table.add_row(
333
+ "by_status",
334
+ ", ".join(f"{k}={v}" for k, v in sorted(s.by_status.items())) or "-",
335
+ )
336
+ table.add_row("new_this_month", str(s.new_this_month))
337
+ table.add_row("expiring_within_30d", str(s.expiring_within_30d))
338
+ table.add_row(
339
+ "by_sign_month",
340
+ "\n".join(f"{m}: {c}" for m, c in s.by_sign_month.items()) or "-",
341
+ )
342
+ console.print(table)
343
+
344
+
345
+ # ---------- todo ----------
346
+
347
+
348
+ @app.command()
349
+ def todo(
350
+ archive: Optional[Path] = _archive_opt,
351
+ actor: Optional[Actor] = typer.Option(
352
+ None, "--actor", help="义务 actor"
353
+ ),
354
+ before: Optional[str] = typer.Option(
355
+ None, "--before", help="deadline ≤ YYYY-MM-DD"
356
+ ),
357
+ after: Optional[str] = typer.Option(
358
+ None, "--after", help="deadline ≥ YYYY-MM-DD"
359
+ ),
360
+ include_undated: bool = typer.Option(
361
+ False, "--include-undated", help="同时显示无 deadline 的义务"
362
+ ),
363
+ within_days: Optional[int] = typer.Option(
364
+ None,
365
+ "--within-days",
366
+ help="便捷选项:deadline 在今天到 N 天内(等价于 --after today --before today+N)",
367
+ ),
368
+ limit: int = typer.Option(50, "--limit", "-n", help="最多返回 N 条(按排序截断)"),
369
+ fmt: OutputFormat = typer.Option(OutputFormat.table, "--format", help="table | json"),
370
+ ) -> None:
371
+ """
372
+ 跨合同列出待办义务("催办看板")。按 deadline 升序。
373
+
374
+ 用例:
375
+ contract-archive todo --within-days 30 本月需要做的事
376
+ contract-archive todo --actor party_b 乙方所有待办
377
+ contract-archive todo --actor party_a --before 2026-12-31
378
+ contract-archive todo --include-undated 含无日期的(如"签订当日支付定金")
379
+ """
380
+ from datetime import date, timedelta
381
+
382
+ if within_days is not None:
383
+ today = date.today().isoformat()
384
+ before = before or (date.today() + timedelta(days=within_days)).isoformat()
385
+ after = after or today
386
+
387
+ paths = _resolve_archive(archive)
388
+ if _archive_empty(paths, fmt):
389
+ return
390
+ conn = open_archive_db(paths.db_path)
391
+ items = list_obligations(
392
+ conn,
393
+ actor=actor.value if actor else None,
394
+ before=before,
395
+ after=after,
396
+ include_undated=include_undated,
397
+ limit=limit,
398
+ )
399
+ conn.close()
400
+
401
+ if fmt is OutputFormat.json:
402
+ print(
403
+ _json.dumps(
404
+ [
405
+ {
406
+ "doc_id": it.doc_id,
407
+ "contract_name": it.contract_name,
408
+ "actor": it.actor,
409
+ "action": it.action,
410
+ "deadline": it.deadline,
411
+ "evidence": it.evidence,
412
+ }
413
+ for it in items
414
+ ],
415
+ ensure_ascii=False,
416
+ indent=2,
417
+ )
418
+ )
419
+ return
420
+
421
+ table = Table(title=f"Todo · {len(items)} obligation(s)")
422
+ table.add_column("deadline", style="cyan")
423
+ table.add_column("actor")
424
+ table.add_column("action", overflow="fold")
425
+ table.add_column("contract", overflow="fold", style="dim")
426
+ table.add_column("doc", justify="right", style="dim")
427
+ actor_label = {"party_a": "甲方", "party_b": "乙方", "both": "双方"}
428
+ for it in items:
429
+ deadline = it.deadline or "[dim]无日期[/dim]"
430
+ table.add_row(
431
+ deadline,
432
+ actor_label.get(it.actor, it.actor),
433
+ it.action,
434
+ it.contract_name or "-",
435
+ f"#{it.doc_id}",
436
+ )
437
+ console.print(table)
438
+
439
+
440
+ # ---------- seals ----------
441
+
442
+
443
+ @app.command("seals")
444
+ def seals_cmd(
445
+ archive: Optional[Path] = _archive_opt,
446
+ owner: Optional[str] = typer.Option(
447
+ None, "--owner", "--seal-owner", help="盖章主体包含(LIKE);与 search 的 --seal-owner 同义"
448
+ ),
449
+ seal_type: Optional[str] = typer.Option(
450
+ None, "--type", "--seal-type",
451
+ help="印章类型包含(LIKE),如 合同专用章/公章;与 search 的 --seal-type 同义",
452
+ ),
453
+ limit: int = typer.Option(200, "--limit", "-n", help="最多列 N 枚印章"),
454
+ fmt: OutputFormat = typer.Option(OutputFormat.table, "--format", help="table | json"),
455
+ ) -> None:
456
+ """
457
+ 跨文档列印章:某主体有哪些章、各出现在哪些文档(按主体/类型聚合阅读)。
458
+
459
+ 与 search 互补:seals 列【印章】本身;要按印章条件筛【文档】(哪些合同盖了某章)
460
+ 用 `search --has-seal/--seal-owner/--seal-type`。
461
+
462
+ 用例:
463
+ contract-archive seals 全部印章
464
+ contract-archive seals --seal-owner 示例公司 某公司的章
465
+ contract-archive seals --seal-type 合同专用章
466
+ """
467
+ paths = _resolve_archive(archive)
468
+ if _archive_empty(paths, fmt):
469
+ return
470
+ conn = open_archive_db(paths.db_path)
471
+ rows = list_seals(conn, owner=owner, seal_type=seal_type, limit=limit)
472
+ conn.close()
473
+
474
+ if fmt is OutputFormat.json:
475
+ print(_json.dumps(seal_rows_to_dict(rows), ensure_ascii=False, indent=2))
476
+ return
477
+
478
+ table = Table(title=f"Seals · {len(rows)} 枚")
479
+ table.add_column("owner", overflow="fold", style="magenta")
480
+ table.add_column("type")
481
+ table.add_column("raw_text", overflow="fold", style="dim")
482
+ table.add_column("doc", overflow="fold")
483
+ table.add_column("id", justify="right", style="dim")
484
+ for r in rows:
485
+ table.add_row(
486
+ r.owner or "?",
487
+ r.seal_type or "-",
488
+ r.raw_text,
489
+ r.title or "-",
490
+ f"#{r.doc_id}",
491
+ )
492
+ console.print(table)