@tcos/broker-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.claude/skills/parse-statement/SKILL.md +134 -0
  2. package/.claude/skills/parse-statement/examples.md +257 -0
  3. package/.claude/skills/parse-statement/trigger-tests/cases.yaml +133 -0
  4. package/README.md +153 -0
  5. package/dist/cli/index.d.ts +17 -0
  6. package/dist/cli/index.d.ts.map +1 -0
  7. package/dist/cli/index.js +150 -0
  8. package/dist/cli/index.js.map +1 -0
  9. package/dist/core/cleaning.d.ts +78 -0
  10. package/dist/core/cleaning.d.ts.map +1 -0
  11. package/dist/core/cleaning.js +217 -0
  12. package/dist/core/cleaning.js.map +1 -0
  13. package/dist/core/pipeline.d.ts +49 -0
  14. package/dist/core/pipeline.d.ts.map +1 -0
  15. package/dist/core/pipeline.js +66 -0
  16. package/dist/core/pipeline.js.map +1 -0
  17. package/dist/core/registry.d.ts +24 -0
  18. package/dist/core/registry.d.ts.map +1 -0
  19. package/dist/core/registry.js +53 -0
  20. package/dist/core/registry.js.map +1 -0
  21. package/dist/index.d.ts +9 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +29 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/parsers/phillip/extract.py +90 -0
  26. package/dist/parsers/phillip/extractor.d.ts +215 -0
  27. package/dist/parsers/phillip/extractor.d.ts.map +1 -0
  28. package/dist/parsers/phillip/extractor.js +1012 -0
  29. package/dist/parsers/phillip/extractor.js.map +1 -0
  30. package/dist/parsers/phillip/formatter.d.ts +113 -0
  31. package/dist/parsers/phillip/formatter.d.ts.map +1 -0
  32. package/dist/parsers/phillip/formatter.js +760 -0
  33. package/dist/parsers/phillip/formatter.js.map +1 -0
  34. package/dist/parsers/phillip/index.d.ts +25 -0
  35. package/dist/parsers/phillip/index.d.ts.map +1 -0
  36. package/dist/parsers/phillip/index.js +59 -0
  37. package/dist/parsers/phillip/index.js.map +1 -0
  38. package/dist/types/formatter.d.ts +47 -0
  39. package/dist/types/formatter.d.ts.map +1 -0
  40. package/dist/types/formatter.js +9 -0
  41. package/dist/types/formatter.js.map +1 -0
  42. package/dist/types/plugin.d.ts +14 -0
  43. package/dist/types/plugin.d.ts.map +1 -0
  44. package/dist/types/plugin.js +5 -0
  45. package/dist/types/plugin.js.map +1 -0
  46. package/dist/types/raw.d.ts +136 -0
  47. package/dist/types/raw.d.ts.map +1 -0
  48. package/dist/types/raw.js +11 -0
  49. package/dist/types/raw.js.map +1 -0
  50. package/dist/types/statement.d.ts +55 -0
  51. package/dist/types/statement.d.ts.map +1 -0
  52. package/dist/types/statement.js +12 -0
  53. package/dist/types/statement.js.map +1 -0
  54. package/package.json +64 -0
  55. package/src/parsers/phillip/extract.py +90 -0
@@ -0,0 +1,134 @@
1
+ ---
2
+ name: parse-statement
3
+ description: 解析券商 PDF 结单文件,提取交易记录、持仓和资产汇总为结构化 JSON
4
+ ---
5
+
6
+ ## 触发条件
7
+
8
+ 以下情况触发此 Skill:
9
+
10
+ - 用户提供 PDF 文件,要求解析/读取/提取数据
11
+ - 用户提到「结单」、「月结单」、「交易结单」、「对账单」、「持仓报告」、「brokerage statement」
12
+ - 用户要求将 PDF 转为 JSON、提取交易记录、查看持仓
13
+ - 关键词:辉立证券、Phillip Securities、结单解析、parse statement
14
+
15
+ ## Setup
16
+
17
+ 运行以下 setup 脚本确保环境就绪:
18
+
19
+ ```bash
20
+ #!/usr/bin/env bash
21
+ set -e
22
+
23
+ # 1. 检查并安装 CLI
24
+ if ! command -v tcos-parse &>/dev/null; then
25
+ echo "Installing tcos-parse..."
26
+ npm install -g @tcos/broker-parser
27
+ fi
28
+
29
+ # 2. 检查 Python 3
30
+ if ! command -v python3 &>/dev/null; then
31
+ if command -v brew &>/dev/null; then
32
+ brew install python3
33
+ elif command -v apt-get &>/dev/null; then
34
+ sudo apt-get install -y python3 python3-pip
35
+ else
36
+ echo "ERROR: Cannot install Python automatically. Please install Python 3 manually."
37
+ exit 1
38
+ fi
39
+ fi
40
+
41
+ # 3. 检查并安装 pdfplumber
42
+ if ! python3 -c "import pdfplumber" 2>/dev/null; then
43
+ pip3 install pdfplumber
44
+ fi
45
+
46
+ # 4. 检查 poppler(pdfplumber 底层依赖)
47
+ if ! command -v pdftotext &>/dev/null; then
48
+ if command -v brew &>/dev/null; then
49
+ brew install poppler
50
+ elif command -v apt-get &>/dev/null; then
51
+ sudo apt-get install -y poppler-utils
52
+ fi
53
+ fi
54
+
55
+ echo "tcos-parse setup complete"
56
+ tcos-parse --version
57
+ ```
58
+
59
+ ## 使用方式
60
+
61
+ ```bash
62
+ # 基础解析
63
+ tcos-parse <pdf> # 解析并输出 JSON 到 stdout
64
+ tcos-parse <pdf> -o out.json # 输出到文件
65
+
66
+ # 指定券商(跳过自动检测)
67
+ tcos-parse -b phillip <pdf> # 指定券商为 phillip
68
+
69
+ # 阶段控制
70
+ tcos-parse <pdf> --raw # 只输出 Stage1 原始提取数据
71
+ tcos-parse <pdf> --no-clean # 跳过 Stage3 清理步骤
72
+
73
+ # 检测与查询
74
+ tcos-parse --detect <pdf> # 检测 PDF 所属券商
75
+ tcos-parse --list-parsers # 列出支持的券商解析器
76
+
77
+ # 输出控制
78
+ tcos-parse <pdf> -v # 显示各阶段耗时(输出到 stderr)
79
+ tcos-parse <pdf> -q # 静默模式,只输出 JSON(无额外提示信息)
80
+ ```
81
+
82
+ ### 选项说明
83
+
84
+ | 选项 | 说明 |
85
+ | ---------------- | -------------------------------------- |
86
+ | `-o, --output` | 输出到文件而非 stdout |
87
+ | `-b, --broker` | 指定券商名称,跳过自动检测 |
88
+ | `--raw` | 只输出 Stage1 原始表格数据,不做格式化 |
89
+ | `--no-clean` | 跳过 Stage3 数据清理步骤 |
90
+ | `--detect` | 检测 PDF 所属券商及置信度 |
91
+ | `--list-parsers` | 列出所有可用的券商解析器 |
92
+ | `-v, --verbose` | 显示各阶段耗时详情(输出到 stderr) |
93
+ | `-q, --quiet` | 静默模式,仅输出纯 JSON |
94
+
95
+ ## 输出格式
96
+
97
+ 完整解析结果(StatementData):
98
+
99
+ ```json
100
+ {
101
+ "broker": "phillip",
102
+ "accountCode": "M000001",
103
+ "statementDate": "2024-01-31",
104
+ "transactions": [
105
+ {
106
+ "date": "2024-01-15",
107
+ "ticker": "00700",
108
+ "name": "TENCENT",
109
+ "type": "BUY",
110
+ "quantity": 100,
111
+ "price": 298.4,
112
+ "amount": 29840.0,
113
+ "fee": 50.0,
114
+ "currency": "HKD"
115
+ }
116
+ ],
117
+ "holdings": [
118
+ {
119
+ "ticker": "00700",
120
+ "name": "TENCENT",
121
+ "quantity": 100,
122
+ "avgCost": 298.4,
123
+ "marketValue": 30000.0,
124
+ "currency": "HKD"
125
+ }
126
+ ],
127
+ "assets": {
128
+ "totalAssets": 150000.0,
129
+ "cashBalance": 120000.0,
130
+ "marketValue": 30000.0,
131
+ "currency": "HKD"
132
+ }
133
+ }
134
+ ```
@@ -0,0 +1,257 @@
1
+ # parse-statement 使用示例
2
+
3
+ ## 示例 1:基础解析 — 查看交易记录
4
+
5
+ **用户说**:帮我解析这份辉立的结单
6
+
7
+ **操作过程**:
8
+
9
+ 1. 运行 setup 安装环境(首次使用)
10
+ 2. 执行解析命令
11
+
12
+ ```bash
13
+ tcos-parse /path/to/statement_202401.pdf
14
+ ```
15
+
16
+ **输出**:
17
+
18
+ ```json
19
+ {
20
+ "broker": "phillip",
21
+ "accountCode": "M000001",
22
+ "statementDate": "2024-01-31",
23
+ "transactions": [
24
+ {
25
+ "date": "2024-01-10",
26
+ "ticker": "00700",
27
+ "name": "TENCENT",
28
+ "type": "BUY",
29
+ "quantity": 100,
30
+ "price": 298.4,
31
+ "amount": 29840.0,
32
+ "fee": 50.0,
33
+ "currency": "HKD"
34
+ },
35
+ {
36
+ "date": "2024-01-22",
37
+ "ticker": "09988",
38
+ "name": "BABA-SW",
39
+ "type": "SELL",
40
+ "quantity": 200,
41
+ "price": 72.5,
42
+ "amount": 14500.0,
43
+ "fee": 30.0,
44
+ "currency": "HKD"
45
+ }
46
+ ],
47
+ "holdings": [
48
+ {
49
+ "ticker": "00700",
50
+ "name": "TENCENT",
51
+ "quantity": 100,
52
+ "avgCost": 298.4,
53
+ "marketValue": 30000.0,
54
+ "currency": "HKD"
55
+ }
56
+ ],
57
+ "assets": {
58
+ "totalAssets": 150000.0,
59
+ "cashBalance": 120000.0,
60
+ "marketValue": 30000.0,
61
+ "currency": "HKD"
62
+ }
63
+ }
64
+ ```
65
+
66
+ ---
67
+
68
+ ## 示例 2:输出到文件
69
+
70
+ **用户说**:解析后保存到文件,方便我后续导入系统
71
+
72
+ ```bash
73
+ tcos-parse /path/to/statement_202401.pdf -o result.json
74
+ ```
75
+
76
+ **输出(stderr)**:
77
+
78
+ ```
79
+ Written to result.json
80
+ ```
81
+
82
+ 解析结果已写入 `result.json`,不会输出到终端。
83
+
84
+ ---
85
+
86
+ ## 示例 3:调试 — 查看原始提取数据
87
+
88
+ **用户说**:解析结果不对,我想看看 PDF 里原始提取出了什么
89
+
90
+ 使用 `--raw` 选项只执行 Stage1(pdfplumber 提取),跳过格式化和清理:
91
+
92
+ ```bash
93
+ tcos-parse /path/to/statement_202401.pdf --raw
94
+ ```
95
+
96
+ **输出**:
97
+
98
+ ```json
99
+ {
100
+ "pages": [
101
+ {
102
+ "pageNumber": 1,
103
+ "tables": [
104
+ [
105
+ ["Date", "Stock Code", "Description", "Buy/Sell", "Qty", "Price", "Amount"],
106
+ ["10/01/2024", "00700", "TENCENT", "B", "100", "298.40", "29,840.00"]
107
+ ]
108
+ ]
109
+ }
110
+ ]
111
+ }
112
+ ```
113
+
114
+ ---
115
+
116
+ ## 示例 4:调试 — 查看各阶段耗时
117
+
118
+ **用户说**:解析好慢,想看看慢在哪一步
119
+
120
+ 使用 `-v` 查看各阶段耗时(耗时输出到 stderr,不污染 JSON):
121
+
122
+ ```bash
123
+ tcos-parse /path/to/statement_202401.pdf -v
124
+ ```
125
+
126
+ **stderr 输出**:
127
+
128
+ ```
129
+ [timing] detect=120ms stage1=850ms stage2=30ms clean=15ms total=1015ms
130
+ ```
131
+
132
+ **stdout 输出**:正常的 JSON 解析结果。
133
+
134
+ 可以配合重定向只看耗时:
135
+
136
+ ```bash
137
+ tcos-parse /path/to/statement_202401.pdf -v > /dev/null
138
+ ```
139
+
140
+ ---
141
+
142
+ ## 示例 5:检测 PDF 所属券商
143
+
144
+ **用户说**:这个 PDF 是哪家券商的结单?
145
+
146
+ ```bash
147
+ tcos-parse --detect /path/to/unknown_statement.pdf
148
+ ```
149
+
150
+ **输出**:
151
+
152
+ ```
153
+ Detected broker: phillip (confidence: 0.95)
154
+ ```
155
+
156
+ 静默模式只输出券商名称:
157
+
158
+ ```bash
159
+ tcos-parse --detect /path/to/unknown_statement.pdf -q
160
+ ```
161
+
162
+ **输出**:
163
+
164
+ ```
165
+ phillip
166
+ ```
167
+
168
+ ---
169
+
170
+ ## 示例 6:列出支持的券商
171
+
172
+ **用户说**:目前支持解析哪些券商的结单?
173
+
174
+ ```bash
175
+ tcos-parse --list-parsers
176
+ ```
177
+
178
+ **输出**:
179
+
180
+ ```
181
+ Available parsers:
182
+ phillip — Phillip Securities (辉立证券)
183
+ ```
184
+
185
+ ---
186
+
187
+ ## 示例 7:指定券商跳过自动检测
188
+
189
+ **用户说**:我知道这是辉立的结单,不需要自动检测
190
+
191
+ 使用 `-b` 直接指定券商,省去检测步骤:
192
+
193
+ ```bash
194
+ tcos-parse -b phillip /path/to/statement_202401.pdf
195
+ ```
196
+
197
+ ---
198
+
199
+ ## 示例 8:管道处理 — 用 jq 过滤特定交易
200
+
201
+ **用户说**:我只想看买入交易
202
+
203
+ ```bash
204
+ tcos-parse /path/to/statement_202401.pdf -q | jq '.transactions[] | select(.type == "BUY")'
205
+ ```
206
+
207
+ **输出**:
208
+
209
+ ```json
210
+ {
211
+ "date": "2024-01-10",
212
+ "ticker": "00700",
213
+ "name": "TENCENT",
214
+ "type": "BUY",
215
+ "quantity": 100,
216
+ "price": 298.4,
217
+ "amount": 29840.0,
218
+ "fee": 50.0,
219
+ "currency": "HKD"
220
+ }
221
+ ```
222
+
223
+ 统计交易笔数:
224
+
225
+ ```bash
226
+ tcos-parse /path/to/statement_202401.pdf -q | jq '.transactions | length'
227
+ ```
228
+
229
+ ---
230
+
231
+ ## 示例 9:批量解析多份结单
232
+
233
+ **用户说**:我有几个月的结单,想一起解析
234
+
235
+ 用 shell 循环批量处理,每份结单输出到独立文件:
236
+
237
+ ```bash
238
+ for pdf in /path/to/statements/*.pdf; do
239
+ name=$(basename "$pdf" .pdf)
240
+ tcos-parse "$pdf" -q -o "${name}.json"
241
+ echo "Done: $pdf -> ${name}.json"
242
+ done
243
+ ```
244
+
245
+ ---
246
+
247
+ ## 示例 10:跳过清理步骤
248
+
249
+ **用户说**:Stage3 清理把我的某些数据删了,我想跳过清理
250
+
251
+ 使用 `--no-clean` 跳过 Stage3 数据清理:
252
+
253
+ ```bash
254
+ tcos-parse /path/to/statement_202401.pdf --no-clean
255
+ ```
256
+
257
+ 解析管道只执行 Stage1(提取)和 Stage2(格式化),不做去重和清洗。
@@ -0,0 +1,133 @@
1
+ # promptfoo Skill 触发率评估用例
2
+ # 正例(22 个):应触发 parse-statement skill
3
+ # 负例(12 个):不应触发
4
+
5
+ version: 1
6
+ testCases:
7
+ # ── 正例(positive)──────────────────────────────────────────────
8
+
9
+ # 直接要求解析(中英文)
10
+ - description: 中文直接要求解析 PDF 结单
11
+ input: '帮我解析这个 PDF 结单'
12
+ expected: triggered
13
+ - description: 英文要求解析券商结单
14
+ input: 'Parse this brokerage statement for me'
15
+ expected: triggered
16
+ - description: 英文解析 PDF 月结单
17
+ input: 'Can you parse my monthly PDF statement?'
18
+ expected: triggered
19
+
20
+ # 各种"结单"同义词
21
+ - description: 月结单
22
+ input: '这是我上个月的月结单,帮我分析一下'
23
+ expected: triggered
24
+ - description: 交易结单
25
+ input: '我有一份交易结单需要解析'
26
+ expected: triggered
27
+ - description: 对账单
28
+ input: '我的对账单来了,帮我解析'
29
+ expected: triggered
30
+ - description: 持仓报告(PDF)
31
+ input: '帮我读取这份 PDF 持仓报告'
32
+ expected: triggered
33
+
34
+ # 不同券商关键词
35
+ - description: 辉立关键词
36
+ input: '我有一份辉立的月结单'
37
+ expected: triggered
38
+ - description: Phillip 关键词
39
+ input: 'I have a Phillip Securities statement PDF'
40
+ expected: triggered
41
+ - description: 辉立证券全称
42
+ input: '帮我解析辉立证券的这份结单 PDF'
43
+ expected: triggered
44
+
45
+ # 不同动词
46
+ - description: 动词"读取"
47
+ input: '帮我读取这个 PDF 里的交易数据'
48
+ expected: triggered
49
+ - description: 动词"提取"
50
+ input: '从这个 PDF 里提取所有交易记录'
51
+ expected: triggered
52
+ - description: 动词"转换"
53
+ input: '把这个 PDF 转成 JSON 格式'
54
+ expected: triggered
55
+ - description: 动词"分析"
56
+ input: '分析一下这份结单 PDF'
57
+ expected: triggered
58
+ - description: 关键词"结单解析"
59
+ input: '能帮我做结单解析吗'
60
+ expected: triggered
61
+
62
+ # 用户描述场景
63
+ - description: 收到结单的场景
64
+ input: '我收到了券商发来的结单,帮我看看里面的交易'
65
+ expected: triggered
66
+ - description: 用户提供 PDF 路径
67
+ input: '帮我看看这个 PDF /tmp/statement_202401.pdf'
68
+ expected: triggered
69
+ - description: 帮我看看这个 PDF
70
+ input: '帮我看看这个 PDF 文件里有什么交易'
71
+ expected: triggered
72
+
73
+ # 组合场景
74
+ - description: 查看交易记录
75
+ input: '帮我从结单里查看我的交易记录'
76
+ expected: triggered
77
+ - description: 查看持仓情况
78
+ input: '帮我看看这份结单里的持仓情况'
79
+ expected: triggered
80
+ - description: 查看资产汇总
81
+ input: '解析一下这份 PDF,我想看资产汇总'
82
+ expected: triggered
83
+ - description: 批量解析
84
+ input: '我有几份 PDF 结单,能一起解析吗'
85
+ expected: triggered
86
+
87
+ # ── 负例(negative)──────────────────────────────────────────────
88
+
89
+ # 一般股市问题(无结单/PDF 语境)
90
+ - description: 一般股市行情问题
91
+ input: '帮我分析一下股市行情'
92
+ expected: not_triggered
93
+ - description: 股票推荐
94
+ input: '你觉得腾讯的股票值得买吗'
95
+ expected: not_triggered
96
+ - description: 港股交易时间
97
+ input: '港股的交易时间是什么时候'
98
+ expected: not_triggered
99
+
100
+ # 其他文档格式
101
+ - description: Excel 表格
102
+ input: '帮我解析这个 Excel 表格'
103
+ expected: not_triggered
104
+ - description: Word 文档
105
+ input: '帮我读取这份 Word 文档'
106
+ expected: not_triggered
107
+ - description: CSV 文件
108
+ input: '帮我导入这个 CSV 交易数据'
109
+ expected: not_triggered
110
+
111
+ # 一般财务问题(不涉及解析)
112
+ - description: 税务计算问题
113
+ input: '我今年的股票收益要交多少税'
114
+ expected: not_triggered
115
+ - description: 开户咨询
116
+ input: '怎么在辉立证券开户'
117
+ expected: not_triggered
118
+
119
+ # PDF 但不是结单
120
+ - description: 合同 PDF
121
+ input: '帮我看看这份 PDF 合同'
122
+ expected: not_triggered
123
+ - description: 招股说明书
124
+ input: '帮我读取这份 PDF 招股说明书'
125
+ expected: not_triggered
126
+
127
+ # IPO 相关但不涉及解析
128
+ - description: IPO 概念问题
129
+ input: '什么是 IPO?'
130
+ expected: not_triggered
131
+ - description: 申购操作问题
132
+ input: '怎么在手机上申购新股'
133
+ expected: not_triggered
package/README.md ADDED
@@ -0,0 +1,153 @@
1
+ # @tcos/broker-parser
2
+
3
+ > Parse brokerage PDF statements into structured JSON
4
+
5
+ [![CI](https://github.com/biggersun/broker-parser/actions/workflows/ci.yml/badge.svg)](https://github.com/biggersun/broker-parser/actions)
6
+ [![npm](https://img.shields.io/npm/v/@tcos/broker-parser)](https://www.npmjs.com/package/@tcos/broker-parser)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE)
8
+
9
+ ## Supported Brokers
10
+
11
+ | Broker | Status |
12
+ | ----------------------------- | --------- |
13
+ | Phillip Securities (辉立证券) | Supported |
14
+
15
+ ## Quick Start
16
+
17
+ ### Option 1: CLI
18
+
19
+ ```bash
20
+ # Install
21
+ npm install -g @tcos/broker-parser
22
+
23
+ # Parse a PDF statement
24
+ tcos-parse statement.pdf
25
+
26
+ # Output to file
27
+ tcos-parse statement.pdf -o result.json
28
+ ```
29
+
30
+ ### Option 2: As an npm Package
31
+
32
+ ```typescript
33
+ import { ParsePipeline, PluginRegistry, PhillipPlugin } from '@tcos/broker-parser';
34
+
35
+ const registry = new PluginRegistry();
36
+ registry.register(new PhillipPlugin());
37
+ const pipeline = new ParsePipeline(registry);
38
+
39
+ const result = await pipeline.parse('./statement.pdf');
40
+ console.log(result.data);
41
+ ```
42
+
43
+ ### Option 3: Claude Code Skill (Recommended for Non-Technical Users)
44
+
45
+ After installing, tell Claude: "Help me parse this PDF statement"
46
+
47
+ ## Prerequisites
48
+
49
+ - Node.js 18+
50
+ - Python 3.8+ with `pdfplumber` (`pip install pdfplumber`)
51
+ - poppler
52
+ - macOS: `brew install poppler`
53
+ - Ubuntu: `apt-get install poppler-utils`
54
+
55
+ ## CLI Reference
56
+
57
+ | Command | Description |
58
+ | ------------------------------ | ------------------------------------ |
59
+ | `tcos-parse <pdf>` | Parse PDF, output JSON to stdout |
60
+ | `tcos-parse <pdf> -o out.json` | Output to file |
61
+ | `tcos-parse <pdf> --raw` | Output Stage1 raw data only |
62
+ | `tcos-parse <pdf> --no-clean` | Skip Stage3 cleaning step |
63
+ | `tcos-parse <pdf> -b phillip` | Specify broker (skip auto-detect) |
64
+ | `tcos-parse --detect <pdf>` | Detect which broker a PDF belongs to |
65
+ | `tcos-parse --list-parsers` | List available broker parsers |
66
+ | `tcos-parse <pdf> -v` | Show stage timing to stderr |
67
+ | `tcos-parse <pdf> -q` | Quiet mode, output JSON only |
68
+
69
+ ## Pipeline Architecture
70
+
71
+ ```
72
+ PDF File
73
+ |
74
+ Stage1: Extract (pdfplumber)
75
+ | -> RawTableData
76
+ |
77
+ Stage2: Format (rule engine)
78
+ | -> StatementData
79
+ |
80
+ Stage3: Clean (dedup, normalize)
81
+ | -> StatementData (cleaned)
82
+ v
83
+ JSON Output
84
+ ```
85
+
86
+ ## Output Format
87
+
88
+ ```json
89
+ {
90
+ "accountCode": "M000001",
91
+ "clientName": "USER A",
92
+ "statementDate": "2024-01-31",
93
+ "transactions": [
94
+ {
95
+ "transactionDate": "2024-01-15",
96
+ "stockCode": "1234",
97
+ "stockName": "EXAMPLE CO",
98
+ "transactionType": "BUY",
99
+ "quantity": 1000,
100
+ "price": 12.34,
101
+ "amount": -12340.0,
102
+ "currency": "HKD"
103
+ }
104
+ ],
105
+ "ipo": [],
106
+ "snapshots": [
107
+ {
108
+ "symbol": "HKD",
109
+ "assetCategory": "Cash",
110
+ "quantity": 50000.0,
111
+ "currency": "HKD"
112
+ }
113
+ ]
114
+ }
115
+ ```
116
+
117
+ Key type definitions:
118
+
119
+ - **`StatementData`** — Full parsed statement (account info, transactions, IPO records, holdings snapshots)
120
+ - **`TradeData`** — Individual trade record (BUY, SELL, DIVIDEND, FEE, etc.)
121
+ - **`IPOData`** — IPO subscription/allotment record
122
+ - **`SnapshotData`** — Holdings snapshot (cash balances, stock positions)
123
+
124
+ ## Adding a New Broker
125
+
126
+ See [CONTRIBUTING.md](./CONTRIBUTING.md) for the guide on implementing a new broker plugin.
127
+
128
+ ## Development
129
+
130
+ ```bash
131
+ # Install dependencies
132
+ npm install
133
+
134
+ # Run CI tests (Stage2 + CLI, no PDF dependency)
135
+ npm test
136
+
137
+ # Run all tests including Stage1 (requires local PDFs)
138
+ npm run test:local
139
+
140
+ # Lint & format
141
+ npm run lint
142
+ npm run format:check
143
+
144
+ # Type check
145
+ npm run typecheck
146
+
147
+ # Build
148
+ npm run build
149
+ ```
150
+
151
+ ## License
152
+
153
+ MIT
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * CLI 入口 — tcos-parse 命令
4
+ *
5
+ * 用法:
6
+ * tcos-parse <pdf> # 解析并输出 JSON 到 stdout
7
+ * tcos-parse <pdf> -o out.json # 输出到文件
8
+ * tcos-parse <pdf> --raw # 只输出 Stage1 原始数据
9
+ * tcos-parse <pdf> --no-clean # 跳过 Stage3 清理
10
+ * tcos-parse -b phillip <pdf> # 指定券商
11
+ * tcos-parse --detect <pdf> # 检测 PDF 所属券商
12
+ * tcos-parse --list-parsers # 列出支持的券商
13
+ * tcos-parse -v <pdf> # 显示各阶段耗时
14
+ * tcos-parse -q <pdf> # 静默模式,只输出 JSON
15
+ */
16
+ export {};
17
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;;GAaG"}