@tcos/broker-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/parse-statement/SKILL.md +134 -0
- package/.claude/skills/parse-statement/examples.md +257 -0
- package/.claude/skills/parse-statement/trigger-tests/cases.yaml +133 -0
- package/README.md +153 -0
- package/dist/cli/index.d.ts +17 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +150 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/core/cleaning.d.ts +78 -0
- package/dist/core/cleaning.d.ts.map +1 -0
- package/dist/core/cleaning.js +217 -0
- package/dist/core/cleaning.js.map +1 -0
- package/dist/core/pipeline.d.ts +49 -0
- package/dist/core/pipeline.d.ts.map +1 -0
- package/dist/core/pipeline.js +66 -0
- package/dist/core/pipeline.js.map +1 -0
- package/dist/core/registry.d.ts +24 -0
- package/dist/core/registry.d.ts.map +1 -0
- package/dist/core/registry.js +53 -0
- package/dist/core/registry.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +29 -0
- package/dist/index.js.map +1 -0
- package/dist/parsers/phillip/extract.py +90 -0
- package/dist/parsers/phillip/extractor.d.ts +215 -0
- package/dist/parsers/phillip/extractor.d.ts.map +1 -0
- package/dist/parsers/phillip/extractor.js +1012 -0
- package/dist/parsers/phillip/extractor.js.map +1 -0
- package/dist/parsers/phillip/formatter.d.ts +113 -0
- package/dist/parsers/phillip/formatter.d.ts.map +1 -0
- package/dist/parsers/phillip/formatter.js +760 -0
- package/dist/parsers/phillip/formatter.js.map +1 -0
- package/dist/parsers/phillip/index.d.ts +25 -0
- package/dist/parsers/phillip/index.d.ts.map +1 -0
- package/dist/parsers/phillip/index.js +59 -0
- package/dist/parsers/phillip/index.js.map +1 -0
- package/dist/types/formatter.d.ts +47 -0
- package/dist/types/formatter.d.ts.map +1 -0
- package/dist/types/formatter.js +9 -0
- package/dist/types/formatter.js.map +1 -0
- package/dist/types/plugin.d.ts +14 -0
- package/dist/types/plugin.d.ts.map +1 -0
- package/dist/types/plugin.js +5 -0
- package/dist/types/plugin.js.map +1 -0
- package/dist/types/raw.d.ts +136 -0
- package/dist/types/raw.d.ts.map +1 -0
- package/dist/types/raw.js +11 -0
- package/dist/types/raw.js.map +1 -0
- package/dist/types/statement.d.ts +55 -0
- package/dist/types/statement.d.ts.map +1 -0
- package/dist/types/statement.js +12 -0
- package/dist/types/statement.js.map +1 -0
- package/package.json +64 -0
- package/src/parsers/phillip/extract.py +90 -0
|
@@ -0,0 +1,760 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Phillip 规则格式化器 (Stage 2)
|
|
4
|
+
*
|
|
5
|
+
* 将 Stage 1 输出的 RawTableData 转换为 StatementData。
|
|
6
|
+
* 使用规则引擎进行分类、字段提取、合并等操作。
|
|
7
|
+
*
|
|
8
|
+
* 规则对齐 LLM Prompt (DEEPSEEK_SYS_PROMPT):
|
|
9
|
+
* 1. 日期格式转换 (DD/MM/YY → YYYY-MM-DD)
|
|
10
|
+
* 2. 交易分类 (IPO, REDEEM, DIVIDEND 等)
|
|
11
|
+
* 3. 股票代码/名称提取
|
|
12
|
+
* 4. 数量/价格解析
|
|
13
|
+
* 5. 交易类型映射
|
|
14
|
+
* 6. IPO 中间记录过滤 (Apply/Loan/Refund 从 transactions 中丢弃)
|
|
15
|
+
* 7. 空金额记录过滤
|
|
16
|
+
* 8. 内部转账记录过滤 (金额为 0 的 Withdraw/Deposit)
|
|
17
|
+
*/
|
|
18
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
19
|
+
exports.PhillipRuleFormatter = void 0;
|
|
20
|
+
// ============================================================================
|
|
21
|
+
// 分类规则定义
|
|
22
|
+
// ============================================================================
|
|
23
|
+
/** 分类规则 */
|
|
24
|
+
const CATEGORY_RULES = [
|
|
25
|
+
{
|
|
26
|
+
pattern: ['IPO', '新股認購', '新股认购', '公开发售', '公開發售', 'PUBLIC OFFER'],
|
|
27
|
+
category: 'IPO',
|
|
28
|
+
subTypeRules: [
|
|
29
|
+
{ pattern: ['ALLOT', '配發', '配发', '中籤', '中签'], subType: 'IPO_ALLOT' },
|
|
30
|
+
{ pattern: ['HANDLING FEE', '手續費', '手续费', 'IPO FEE'], subType: 'IPO_FEE' },
|
|
31
|
+
{ pattern: ['INTEREST', '利息'], subType: 'IPO_INTEREST' },
|
|
32
|
+
{ pattern: ['Apply Deposit', '10%', '申請按金', '申请按金'], subType: 'IPO_APPLY_CASH' },
|
|
33
|
+
{ pattern: ['Refund', '退款'], subType: 'IPO_REFUND' },
|
|
34
|
+
{ pattern: ['REPAY', '還款', '还款'], subType: 'IPO_REPAY' },
|
|
35
|
+
{ pattern: ['APPLY', '申請孖展', '申请孖展', '孖展', 'LOAN'], subType: 'IPO_APPLY_MARGIN' },
|
|
36
|
+
],
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
// 注意: Reverse 贖回 必须先于 REDEEM 规则匹配,优先级更高
|
|
40
|
+
// "Reverse 贖回" = 撤销赎回,钱重新投入基金 = BUY,不是 SELL
|
|
41
|
+
// 错误识别会导致 FIFO 产生虚假盈亏(参见 bug: fjl_260109_reverse_redeem_stage2)
|
|
42
|
+
pattern: ['Reverse 贖回', 'Reverse 赎回', 'Reverse REDEEM'],
|
|
43
|
+
category: 'REVERSE_REDEEM',
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
pattern: ['贖回', '赎回', 'REDEEM'],
|
|
47
|
+
category: 'REDEEM',
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
// 注意: 不能使用 'DIV' 作为模式,因为会误匹配 'DIVERSIFIED' 等股票名
|
|
51
|
+
pattern: ['股息', 'DIVIDEND'],
|
|
52
|
+
category: 'DIVIDEND',
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
// 注意: 不能使用 'INT' 作为模式,因为会误匹配 'INTL' (如 ZIJIN GOLD INTL)
|
|
56
|
+
// 但 'INT ADJ' 是安全的完整匹配(利息调整)
|
|
57
|
+
pattern: ['利息', 'INTEREST', 'INT ADJ'],
|
|
58
|
+
category: 'INTEREST',
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
pattern: ['提貨', '提货', 'W/D'],
|
|
62
|
+
category: 'WITHDRAW',
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
// 注意: 不能使用 'DEP' 作为模式,因为会误匹配 'DEPT', 'DEEP' 等
|
|
66
|
+
pattern: ['存入', 'Deposit', '存貨', '存货'],
|
|
67
|
+
category: 'DEPOSIT',
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
// 注意: 不能使用 'SUB' 作为模式,因为会误匹配 'SUBWAY', 'SUBSIDIARY' 等
|
|
71
|
+
pattern: ['購買', '购买', 'Subscribe'],
|
|
72
|
+
category: 'FUND_SUBSCRIBE',
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
pattern: ['賣出', '卖出', 'Sell', 'Sold'],
|
|
76
|
+
category: 'STOCK_SELL',
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
pattern: ['買入', '买入', 'Buy', 'Bought'],
|
|
80
|
+
category: 'STOCK_BUY',
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
pattern: ['Epayment', '電子轉帳', '电子转账'],
|
|
84
|
+
category: 'EPAYMENT',
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
pattern: ['eDDA', 'Direct Debit', '直接轉帳', '直接转账'],
|
|
88
|
+
category: 'EDDA',
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
// 货币转换:过滤掉,不保存为交易记录
|
|
92
|
+
pattern: ['兌換', 'CONVERT', '货币转换', 'Currency Exchange'],
|
|
93
|
+
category: 'CURRENCY_EXCHANGE',
|
|
94
|
+
},
|
|
95
|
+
];
|
|
96
|
+
/** 交易类型映射 */
|
|
97
|
+
const TRANSACTION_TYPE_MAP = {
|
|
98
|
+
Payment: 'BUY',
|
|
99
|
+
Receipt: 'SELL',
|
|
100
|
+
Withdraw: 'WITHDRAWAL',
|
|
101
|
+
Deposit: 'DEPOSIT',
|
|
102
|
+
Buy: 'BUY',
|
|
103
|
+
Sell: 'SELL',
|
|
104
|
+
};
|
|
105
|
+
// ============================================================================
|
|
106
|
+
// 规则格式化器实现
|
|
107
|
+
// ============================================================================
|
|
108
|
+
/**
|
|
109
|
+
* Phillip 规则格式化器
|
|
110
|
+
*/
|
|
111
|
+
class PhillipRuleFormatter {
|
|
112
|
+
constructor(config) {
|
|
113
|
+
this._config = {
|
|
114
|
+
type: 'rule',
|
|
115
|
+
enableMerge: true,
|
|
116
|
+
...config,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
/** 获取格式化器配置(供外部读取) */
|
|
120
|
+
get config() {
|
|
121
|
+
return this._config;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* 将原始表格数据格式化为结构化数据
|
|
125
|
+
* 实现为同步逻辑,但接口要求返回 Promise 以兼容异步实现(如 LLM 方案)
|
|
126
|
+
*/
|
|
127
|
+
// eslint-disable-next-line @typescript-eslint/require-await
|
|
128
|
+
async format(rawData) {
|
|
129
|
+
// 1. 转换交易记录
|
|
130
|
+
const { transactions, ipoRecords } = this.formatTransactions(rawData.transactions);
|
|
131
|
+
// 2. 转换持仓数据
|
|
132
|
+
const snapshots = this.formatHoldings(rawData.holdings);
|
|
133
|
+
// 3. 转换日期格式
|
|
134
|
+
const statementDate = this.convertDateFormat(rawData.accountInfo.statementDate);
|
|
135
|
+
return {
|
|
136
|
+
accountCode: rawData.accountInfo.accountCode || '',
|
|
137
|
+
clientName: rawData.accountInfo.clientName || '',
|
|
138
|
+
period: statementDate,
|
|
139
|
+
statementDate,
|
|
140
|
+
transactions,
|
|
141
|
+
ipo: ipoRecords,
|
|
142
|
+
snapshots,
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* 格式化交易记录
|
|
147
|
+
* 遵循 LLM Prompt 规则:
|
|
148
|
+
* - 丢弃空金额记录
|
|
149
|
+
* - IPO 中间记录 (Apply/Loan/Refund) 不放入 transactions,只放入 ipo
|
|
150
|
+
* - 只有 IPO_FEE 和 IPO_ALLOT(BUY) 放入 transactions
|
|
151
|
+
* - 丢弃金额为 0 的内部转账
|
|
152
|
+
* - IPO 同一公司的申购记录合并为一条,保留申购数量
|
|
153
|
+
* - 配发(allot)记录单独保留
|
|
154
|
+
*/
|
|
155
|
+
formatTransactions(rawTransactions) {
|
|
156
|
+
const transactions = [];
|
|
157
|
+
// 用于收集 IPO 记录,按股票代码分组
|
|
158
|
+
const ipoApplyMap = new Map();
|
|
159
|
+
const ipoAllotRecords = [];
|
|
160
|
+
for (const raw of rawTransactions) {
|
|
161
|
+
// 跳过空金额记录 (LLM规则: Skip Empty)
|
|
162
|
+
if (this.isEmpty(raw.debit) && this.isEmpty(raw.credit)) {
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
// 解析分类
|
|
166
|
+
const { category, subType } = this.parseCategory(raw.particulars);
|
|
167
|
+
// 过滤货币转换记录:不保存为交易记录
|
|
168
|
+
if (category === 'CURRENCY_EXCHANGE') {
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
// 解析股票信息
|
|
172
|
+
const stockInfo = this.parseStockInfo(raw.particulars);
|
|
173
|
+
// 解析数量和价格 - 从 description 结尾提取 数量@价格
|
|
174
|
+
const qtyPrice = this.parseQuantityPriceFromEnd(raw.particulars);
|
|
175
|
+
// 转换日期格式
|
|
176
|
+
const transactionDate = this.convertDateFormat(raw.tradeDate);
|
|
177
|
+
const settlementDate = raw.settleDate ? this.convertDateFormat(raw.settleDate) : undefined;
|
|
178
|
+
// 计算金额 (负数 = 支出,正数 = 收入)
|
|
179
|
+
const amount = raw.credit ? raw.credit : raw.debit ? -raw.debit : 0;
|
|
180
|
+
// 处理 IPO 记录
|
|
181
|
+
if (category === 'IPO') {
|
|
182
|
+
const ipoType = this.getIPOType(subType);
|
|
183
|
+
const stockCode = stockInfo?.code || 'N/A';
|
|
184
|
+
if (ipoType === 'allot') {
|
|
185
|
+
// IPO ALLOT 使用专门的数量解析方法
|
|
186
|
+
const allotQtyPrice = this.parseIpoAllotQuantityPrice(raw.particulars);
|
|
187
|
+
const allotQuantity = allotQtyPrice?.quantity ?? qtyPrice?.quantity;
|
|
188
|
+
const allotPrice = allotQtyPrice?.price ?? qtyPrice?.price;
|
|
189
|
+
// 配发记录单独保留
|
|
190
|
+
const ipoRecord = {
|
|
191
|
+
transactionDate,
|
|
192
|
+
settlementDate,
|
|
193
|
+
stockCode,
|
|
194
|
+
stockName: stockInfo?.name || '',
|
|
195
|
+
quantity: allotQuantity,
|
|
196
|
+
price: allotPrice,
|
|
197
|
+
amount: Math.abs(amount),
|
|
198
|
+
fee: undefined,
|
|
199
|
+
currency: 'HKD',
|
|
200
|
+
description: raw.particulars,
|
|
201
|
+
type: 'allot',
|
|
202
|
+
};
|
|
203
|
+
ipoAllotRecords.push(ipoRecord);
|
|
204
|
+
// IPO 配发 → BUY 放入 transactions
|
|
205
|
+
const trade = {
|
|
206
|
+
transactionDate,
|
|
207
|
+
settlementDate,
|
|
208
|
+
refNo: raw.refNo,
|
|
209
|
+
stockCode,
|
|
210
|
+
stockName: stockInfo?.name || '',
|
|
211
|
+
transactionType: 'BUY',
|
|
212
|
+
quantity: allotQuantity,
|
|
213
|
+
price: allotPrice,
|
|
214
|
+
amount,
|
|
215
|
+
fee: undefined,
|
|
216
|
+
currency: 'HKD',
|
|
217
|
+
description: raw.particulars,
|
|
218
|
+
};
|
|
219
|
+
transactions.push(trade);
|
|
220
|
+
}
|
|
221
|
+
else if (subType === 'IPO_APPLY_CASH' || subType === 'IPO_APPLY_MARGIN') {
|
|
222
|
+
// 只有真正的申购记录才生成 apply 记录
|
|
223
|
+
// Refund(退款)、REPAY(还款)等中间记录不生成 IPO 记录
|
|
224
|
+
const existing = ipoApplyMap.get(stockCode);
|
|
225
|
+
if (existing) {
|
|
226
|
+
// 累加金额
|
|
227
|
+
existing.totalAmount += Math.abs(amount);
|
|
228
|
+
existing.descriptions.push(raw.particulars);
|
|
229
|
+
// 如果当前记录有数量和价格,更新(优先取有值的)
|
|
230
|
+
if (qtyPrice?.quantity && !existing.quantity) {
|
|
231
|
+
existing.quantity = qtyPrice.quantity;
|
|
232
|
+
}
|
|
233
|
+
if (qtyPrice?.price && !existing.price) {
|
|
234
|
+
existing.price = qtyPrice.price;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
else {
|
|
238
|
+
ipoApplyMap.set(stockCode, {
|
|
239
|
+
stockCode,
|
|
240
|
+
stockName: stockInfo?.name || '',
|
|
241
|
+
quantity: qtyPrice?.quantity,
|
|
242
|
+
price: qtyPrice?.price,
|
|
243
|
+
totalAmount: Math.abs(amount),
|
|
244
|
+
transactionDate,
|
|
245
|
+
settlementDate,
|
|
246
|
+
descriptions: [raw.particulars],
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// 其他 IPO 子类型(Refund、REPAY 等)不生成 IPO 记录
|
|
251
|
+
// IPO_FEE 放入 transactions
|
|
252
|
+
if (subType === 'IPO_FEE') {
|
|
253
|
+
const trade = {
|
|
254
|
+
transactionDate,
|
|
255
|
+
settlementDate,
|
|
256
|
+
refNo: raw.refNo,
|
|
257
|
+
stockCode,
|
|
258
|
+
stockName: stockInfo?.name || '',
|
|
259
|
+
transactionType: 'IPO_FEE',
|
|
260
|
+
quantity: undefined,
|
|
261
|
+
price: undefined,
|
|
262
|
+
amount,
|
|
263
|
+
fee: Math.abs(amount),
|
|
264
|
+
currency: 'HKD',
|
|
265
|
+
description: raw.particulars,
|
|
266
|
+
};
|
|
267
|
+
transactions.push(trade);
|
|
268
|
+
}
|
|
269
|
+
// IPO_INTEREST 放入 transactions
|
|
270
|
+
if (subType === 'IPO_INTEREST') {
|
|
271
|
+
const trade = {
|
|
272
|
+
transactionDate,
|
|
273
|
+
settlementDate,
|
|
274
|
+
refNo: raw.refNo,
|
|
275
|
+
stockCode,
|
|
276
|
+
stockName: stockInfo?.name || '',
|
|
277
|
+
transactionType: 'IPO_INTEREST',
|
|
278
|
+
quantity: undefined,
|
|
279
|
+
price: undefined,
|
|
280
|
+
amount,
|
|
281
|
+
fee: undefined,
|
|
282
|
+
currency: 'HKD',
|
|
283
|
+
description: raw.particulars,
|
|
284
|
+
};
|
|
285
|
+
transactions.push(trade);
|
|
286
|
+
}
|
|
287
|
+
continue;
|
|
288
|
+
}
|
|
289
|
+
// 处理内部转账:丢弃金额为 0 的 Withdraw/Deposit
|
|
290
|
+
if ((category === 'WITHDRAW' || category === 'DEPOSIT') && Math.abs(amount) < 0.01) {
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
293
|
+
// 处理 EPAYMENT → WITHDRAWAL
|
|
294
|
+
if (category === 'EPAYMENT') {
|
|
295
|
+
const trade = {
|
|
296
|
+
transactionDate,
|
|
297
|
+
settlementDate,
|
|
298
|
+
refNo: raw.refNo,
|
|
299
|
+
stockCode: 'N/A',
|
|
300
|
+
stockName: '',
|
|
301
|
+
transactionType: 'WITHDRAWAL',
|
|
302
|
+
quantity: undefined,
|
|
303
|
+
price: undefined,
|
|
304
|
+
amount,
|
|
305
|
+
fee: undefined,
|
|
306
|
+
currency: 'HKD',
|
|
307
|
+
description: raw.particulars,
|
|
308
|
+
};
|
|
309
|
+
transactions.push(trade);
|
|
310
|
+
continue;
|
|
311
|
+
}
|
|
312
|
+
// 处理 EDDA → DEPOSIT
|
|
313
|
+
if (category === 'EDDA') {
|
|
314
|
+
const trade = {
|
|
315
|
+
transactionDate,
|
|
316
|
+
settlementDate,
|
|
317
|
+
refNo: raw.refNo,
|
|
318
|
+
stockCode: 'N/A',
|
|
319
|
+
stockName: '',
|
|
320
|
+
transactionType: 'DEPOSIT',
|
|
321
|
+
quantity: undefined,
|
|
322
|
+
price: undefined,
|
|
323
|
+
amount,
|
|
324
|
+
fee: undefined,
|
|
325
|
+
currency: 'HKD',
|
|
326
|
+
description: raw.particulars,
|
|
327
|
+
};
|
|
328
|
+
transactions.push(trade);
|
|
329
|
+
continue;
|
|
330
|
+
}
|
|
331
|
+
// 非 IPO 记录放入 transactions
|
|
332
|
+
const transactionType = this.mapTransactionType(raw.transType, category);
|
|
333
|
+
const trade = {
|
|
334
|
+
transactionDate,
|
|
335
|
+
settlementDate,
|
|
336
|
+
refNo: raw.refNo,
|
|
337
|
+
stockCode: stockInfo?.code || 'N/A',
|
|
338
|
+
stockName: stockInfo?.name || '',
|
|
339
|
+
transactionType,
|
|
340
|
+
quantity: qtyPrice?.quantity,
|
|
341
|
+
price: qtyPrice?.price,
|
|
342
|
+
amount,
|
|
343
|
+
fee: undefined,
|
|
344
|
+
currency: 'HKD',
|
|
345
|
+
description: raw.particulars,
|
|
346
|
+
};
|
|
347
|
+
transactions.push(trade);
|
|
348
|
+
}
|
|
349
|
+
// 将合并后的 IPO 申购记录转换为 IPOData
|
|
350
|
+
const ipoRecords = [];
|
|
351
|
+
for (const [, apply] of ipoApplyMap) {
|
|
352
|
+
// amount = quantity × price,不累加
|
|
353
|
+
const calculatedAmount = apply.quantity && apply.price ? apply.quantity * apply.price : apply.totalAmount;
|
|
354
|
+
const ipoRecord = {
|
|
355
|
+
transactionDate: apply.transactionDate,
|
|
356
|
+
settlementDate: apply.settlementDate,
|
|
357
|
+
stockCode: apply.stockCode,
|
|
358
|
+
stockName: apply.stockName,
|
|
359
|
+
quantity: apply.quantity,
|
|
360
|
+
price: apply.price,
|
|
361
|
+
amount: calculatedAmount,
|
|
362
|
+
fee: undefined,
|
|
363
|
+
currency: 'HKD',
|
|
364
|
+
description: apply.descriptions[0], // 使用第一条描述
|
|
365
|
+
type: 'apply',
|
|
366
|
+
};
|
|
367
|
+
ipoRecords.push(ipoRecord);
|
|
368
|
+
}
|
|
369
|
+
// 添加配发记录
|
|
370
|
+
ipoRecords.push(...ipoAllotRecords);
|
|
371
|
+
return { transactions, ipoRecords };
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* 格式化持仓数据
|
|
375
|
+
* LLM规则: Cash 只提取 HKD 和 USD,排除 HKD(Base)
|
|
376
|
+
*/
|
|
377
|
+
formatHoldings(rawHoldings) {
|
|
378
|
+
return rawHoldings
|
|
379
|
+
.filter((raw) => {
|
|
380
|
+
// 排除 HKD(Base)
|
|
381
|
+
if (raw.symbol?.includes('(Base)') || raw.name?.includes('(Base)')) {
|
|
382
|
+
return false;
|
|
383
|
+
}
|
|
384
|
+
return true;
|
|
385
|
+
})
|
|
386
|
+
.map((raw) => {
|
|
387
|
+
// 智能处理名称
|
|
388
|
+
let description = raw.name || '';
|
|
389
|
+
// 处理基金名称 - 参考 parseStockInfo 的逻辑
|
|
390
|
+
if (raw.symbol?.includes('UT.') || raw.assetType === 'Fund') {
|
|
391
|
+
// 辉立港元货币市场基金
|
|
392
|
+
if ((raw.name?.includes('Phillip') && raw.name?.includes('Money')) ||
|
|
393
|
+
raw.name?.includes('輝立港元貨幣市場基金') ||
|
|
394
|
+
raw.name?.includes('辉立港元货币市场基金') ||
|
|
395
|
+
raw.symbol?.includes('PHILLIP')) {
|
|
396
|
+
description = 'Phillip HKD Money Market Fund 輝立港元貨幣市場基金';
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
return {
|
|
400
|
+
symbol: raw.symbol,
|
|
401
|
+
assetCategory: this.mapAssetCategory(raw.assetType),
|
|
402
|
+
quantity: raw.quantity || 0,
|
|
403
|
+
marketPrice: raw.marketPrice,
|
|
404
|
+
marketValue: raw.marketValue,
|
|
405
|
+
currency: raw.currency || 'HKD',
|
|
406
|
+
description,
|
|
407
|
+
};
|
|
408
|
+
});
|
|
409
|
+
}
|
|
410
|
+
// ============================================================================
|
|
411
|
+
// 解析方法
|
|
412
|
+
// ============================================================================
|
|
413
|
+
/**
|
|
414
|
+
* 检查值是否为空
|
|
415
|
+
*/
|
|
416
|
+
isEmpty(value) {
|
|
417
|
+
return value === undefined || value === null || value === 0;
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* 解析分类
|
|
421
|
+
*/
|
|
422
|
+
parseCategory(particulars) {
|
|
423
|
+
for (const rule of CATEGORY_RULES) {
|
|
424
|
+
const patterns = Array.isArray(rule.pattern) ? rule.pattern : [rule.pattern];
|
|
425
|
+
const matches = patterns.some((p) => p instanceof RegExp ? p.test(particulars) : particulars.includes(p));
|
|
426
|
+
if (matches) {
|
|
427
|
+
let subType;
|
|
428
|
+
if (rule.subTypeRules) {
|
|
429
|
+
for (const subRule of rule.subTypeRules) {
|
|
430
|
+
const subPatterns = Array.isArray(subRule.pattern)
|
|
431
|
+
? subRule.pattern
|
|
432
|
+
: [subRule.pattern];
|
|
433
|
+
const subMatches = subPatterns.some((p) => p instanceof RegExp ? p.test(particulars) : particulars.includes(p));
|
|
434
|
+
if (subMatches) {
|
|
435
|
+
subType = subRule.subType;
|
|
436
|
+
break;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
return { category: rule.category, subType };
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
return {};
|
|
444
|
+
}
|
|
445
|
+
/**
|
|
446
|
+
* 解析股票信息
|
|
447
|
+
*/
|
|
448
|
+
parseStockInfo(particulars) {
|
|
449
|
+
// 模式A: 股票交易格式 - 中文名/XHKG/代码 (如 "龍旗科技/XHKG/009611" 或 "寶濟藥業-B/XHKG/002659")
|
|
450
|
+
// 注意: 中文名可能包含全角字符如 "-B"
|
|
451
|
+
const stockTradeMatch = particulars.match(/([\u4e00-\u9fff\uff00-\uffef\u3000-\u303f]+)\/X[A-Z]{2,4}\/(\d{5,6})/);
|
|
452
|
+
if (stockTradeMatch) {
|
|
453
|
+
const chineseName = stockTradeMatch[1].trim();
|
|
454
|
+
const code = stockTradeMatch[2];
|
|
455
|
+
// 尝试从开头提取英文名 (如 "LONGCHEER 100 32.2000" 中的 LONGCHEER)
|
|
456
|
+
// 支持带连字符的名称如 "BAO PHARMA-B"
|
|
457
|
+
const englishNameMatch = particulars.match(/^([A-Z][A-Z\s-]+?)(?:\s+\d)/);
|
|
458
|
+
if (englishNameMatch) {
|
|
459
|
+
return { code, name: `${englishNameMatch[1].trim()} ${chineseName}` };
|
|
460
|
+
}
|
|
461
|
+
return { code, name: chineseName };
|
|
462
|
+
}
|
|
463
|
+
// 模式A2: 股票卖出格式 - 英文名 数量 价格 金额 ... /XHKG/代码
|
|
464
|
+
// 如 "MINIMAX-WP 20 288.0000 5,760.00 股票 賣出 /XHKG/000100"
|
|
465
|
+
// 注意:只在非 IPO 场景下使用,因为 IPO 记录可能包含 "/XHKG/" 但格式不同
|
|
466
|
+
const stockSellMatch = particulars.match(/\/X[A-Z]{2,4}\/(\d{5,6})/);
|
|
467
|
+
if (stockSellMatch && !particulars.includes('IPO') && !particulars.includes('新股認購')) {
|
|
468
|
+
const code = stockSellMatch[1];
|
|
469
|
+
// 尝试从开头提取英文名
|
|
470
|
+
const englishNameMatch = particulars.match(/^([A-Z][A-Z\s-]+?)(?:\s+\d)/);
|
|
471
|
+
// 尝试提取中文名(如 "股票 賣出" 前的部分不算)
|
|
472
|
+
const chineseMatch = particulars.match(/([\u4e00-\u9fff]{2,})(?=\/X)/);
|
|
473
|
+
if (englishNameMatch && chineseMatch) {
|
|
474
|
+
return { code, name: `${englishNameMatch[1].trim()} ${chineseMatch[1]}` };
|
|
475
|
+
}
|
|
476
|
+
if (englishNameMatch) {
|
|
477
|
+
return { code, name: englishNameMatch[1].trim() };
|
|
478
|
+
}
|
|
479
|
+
return { code };
|
|
480
|
+
}
|
|
481
|
+
// 检查是否是基金 - LLM规则: 輝立港元貨幣市場基金 的 stockCode 固定设置为 PHILLIP_HKD_MMF
|
|
482
|
+
if (particulars.includes('PHILLIP') && particulars.includes('MMF')) {
|
|
483
|
+
return { code: 'PHILLIP_HKD_MMF', name: '輝立港元貨幣市場基金' };
|
|
484
|
+
}
|
|
485
|
+
if (particulars.includes('輝立港元貨幣市場基金') ||
|
|
486
|
+
particulars.includes('辉立港元货币市场基金')) {
|
|
487
|
+
return { code: 'PHILLIP_HKD_MMF', name: '輝立港元貨幣市場基金' };
|
|
488
|
+
}
|
|
489
|
+
// 模式B: 提取股票代码: (100) 或 (2698) 或 (09988) - 支持 3-6 位数字,排除 (GROUP) 等
|
|
490
|
+
const codeMatch = particulars.match(/\((\d{3,6})\)/);
|
|
491
|
+
if (!codeMatch) {
|
|
492
|
+
return null;
|
|
493
|
+
}
|
|
494
|
+
// 统一补齐到 6 位 (港股标准格式)
|
|
495
|
+
// 例如: 100 -> 000100, 9980 -> 009980
|
|
496
|
+
const code = this.normalizeStockCode(codeMatch[1]);
|
|
497
|
+
// 尝试提取名称 - 从 particulars 中截取
|
|
498
|
+
let name;
|
|
499
|
+
// 保存原始代码用于在字符串中查找
|
|
500
|
+
const rawCode = codeMatch[1];
|
|
501
|
+
// 模式1: 新股認購 + 公司名称(包含括号的复杂名称,如 "EASTROC BEVERAGE (GROUP) CO., LTD.")
|
|
502
|
+
// 匹配从 "新股認購" 到股票代码 "(XXXX)" 之前的所有内容
|
|
503
|
+
const codeIndex = particulars.indexOf(`(${rawCode})`);
|
|
504
|
+
if (codeIndex > 0) {
|
|
505
|
+
// 查找 "新股認購" 或 "新股认购" 的位置
|
|
506
|
+
let startIndex = particulars.indexOf('新股認購');
|
|
507
|
+
if (startIndex === -1)
|
|
508
|
+
startIndex = particulars.indexOf('新股认购');
|
|
509
|
+
if (startIndex !== -1) {
|
|
510
|
+
// 提取从 "新股認購" 后到股票代码前的内容
|
|
511
|
+
const rawName = particulars.substring(startIndex + 4, codeIndex).trim();
|
|
512
|
+
// 清理名称:移除 "支付"、"收入" 等干扰词
|
|
513
|
+
const cleanedName = rawName
|
|
514
|
+
.replace(/\s*(支付|收入|Payment|Receipt)\s*/g, ' ')
|
|
515
|
+
.replace(/\s+/g, ' ')
|
|
516
|
+
.trim();
|
|
517
|
+
if (cleanedName.length > 0) {
|
|
518
|
+
name = cleanedName;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
// 模式2: 股票代码后面跟着的名称(回退方案)
|
|
523
|
+
if (!name) {
|
|
524
|
+
// 支持包含括号的公司名称,如 "(GROUP)"
|
|
525
|
+
const nameMatch2 = particulars.match(/\(\d{3,6}\)\s*([A-Z][A-Z\s.,()]+(?:LIMITED|LTD\.?|CO\.?))/i);
|
|
526
|
+
if (nameMatch2) {
|
|
527
|
+
// 尝试找中文名
|
|
528
|
+
const chineseMatch = particulars.match(/([\u4e00-\u9fff]{2,})/);
|
|
529
|
+
if (chineseMatch) {
|
|
530
|
+
name = `${nameMatch2[1].trim()} ${chineseMatch[1]}`;
|
|
531
|
+
}
|
|
532
|
+
else {
|
|
533
|
+
name = nameMatch2[1].trim();
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
return { code, name };
|
|
538
|
+
}
|
|
539
|
+
/**
|
|
540
|
+
* 解析 IPO ALLOT 的数量和价格
|
|
541
|
+
* 特殊处理:从 "獲配股數 XX" 或 "ALLOTED SHARES XX" 提取数量
|
|
542
|
+
* 格式示例:
|
|
543
|
+
* - "IPO 新股認購 MINIMAX GROUP INC. (100) ALLOTED AMOUNT 獲配金額 20 支付 @165"
|
|
544
|
+
* - "IPO 新股認購 MINIMAX GROUP INC. (100) ALLOTED SHARES 獲配股數 20 股票 存貨"
|
|
545
|
+
*/
|
|
546
|
+
parseIpoAllotQuantityPrice(particulars) {
|
|
547
|
+
// 从 "獲配股數 XX" 或 "獲配金額 XX" 提取数量
|
|
548
|
+
const allotQtyMatch = particulars.match(/(?:獲配股數|獲配金額|ALLOTED\s+(?:SHARES|AMOUNT))\s+(\d+)/i);
|
|
549
|
+
let quantity;
|
|
550
|
+
if (allotQtyMatch) {
|
|
551
|
+
quantity = parseInt(allotQtyMatch[1], 10);
|
|
552
|
+
}
|
|
553
|
+
// 从 @price 提取价格
|
|
554
|
+
const priceMatch = particulars.match(/@\s*(\d+(?:\.\d+)?)/);
|
|
555
|
+
let price;
|
|
556
|
+
if (priceMatch) {
|
|
557
|
+
price = parseFloat(priceMatch[1]);
|
|
558
|
+
}
|
|
559
|
+
if (quantity !== undefined || price !== undefined) {
|
|
560
|
+
return { quantity, price };
|
|
561
|
+
}
|
|
562
|
+
return null;
|
|
563
|
+
}
|
|
564
|
+
/**
|
|
565
|
+
* 解析股票买卖的数量和价格
|
|
566
|
+
* 格式: 股票名 数量 价格.xxxx 金额 股票 賣出/買入 /XHKG/代码
|
|
567
|
+
* 如: "MINIMAX-WP 20 288.0000 5,760.00 股票 賣出 /XHKG/000100 ..."
|
|
568
|
+
* 解析结果: quantity=20, price=288.0000
|
|
569
|
+
*/
|
|
570
|
+
parseStockTradeQuantityPrice(particulars) {
|
|
571
|
+
// 匹配模式: 股票名 数量 价格 金额 股票 賣出/買入 /XHKG/代码
|
|
572
|
+
// 股票名可能包含字母、数字、-、空格,但后面紧跟的是 数量 价格 金额
|
|
573
|
+
// 格式: NAME 数量(整数) 价格(小数,4位) 金额(带逗号)
|
|
574
|
+
const match = particulars.match(/^[A-Z][A-Z0-9\s-]+?\s+(\d+)\s+(\d+(?:\.\d+)?)\s+[\d,]+(?:\.\d+)?\s+股票\s+(?:賣出|買入|卖出|买入)/);
|
|
575
|
+
if (match) {
|
|
576
|
+
return {
|
|
577
|
+
quantity: parseInt(match[1], 10),
|
|
578
|
+
price: parseFloat(match[2]),
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
return null;
|
|
582
|
+
}
|
|
583
|
+
/**
|
|
584
|
+
* 解析数量和价格 - 从 description 结尾提取 数量@价格
|
|
585
|
+
* 格式: ... 1,000 @9.8 或 ... 40,000 @40
|
|
586
|
+
*/
|
|
587
|
+
parseQuantityPriceFromEnd(particulars) {
|
|
588
|
+
// 优先尝试解析股票买卖格式 (如 "MINIMAX-WP 20 288.0000 5,760.00 股票 賣出 /XHKG/000100")
|
|
589
|
+
const stockTrade = this.parseStockTradeQuantityPrice(particulars);
|
|
590
|
+
if (stockTrade) {
|
|
591
|
+
return stockTrade;
|
|
592
|
+
}
|
|
593
|
+
// 从结尾匹配: 数量 @价格 (数量可以有逗号分隔符)
|
|
594
|
+
const match = particulars.match(/([\d,]+)\s*@\s*([\d.]+)\s*$/);
|
|
595
|
+
if (match) {
|
|
596
|
+
return {
|
|
597
|
+
quantity: parseFloat(match[1].replace(/,/g, '')),
|
|
598
|
+
price: parseFloat(match[2]),
|
|
599
|
+
};
|
|
600
|
+
}
|
|
601
|
+
// 备选:匹配 数量@价格 紧挨着(可能不在结尾)
|
|
602
|
+
const match2 = particulars.match(/([\d,]+)\s*@\s*([\d.]+)/);
|
|
603
|
+
if (match2) {
|
|
604
|
+
return {
|
|
605
|
+
quantity: parseFloat(match2[1].replace(/,/g, '')),
|
|
606
|
+
price: parseFloat(match2[2]),
|
|
607
|
+
};
|
|
608
|
+
}
|
|
609
|
+
// 回退到原来的方法(支持更复杂的格式,如 REDEEM 86.65 ... @11.4219)
|
|
610
|
+
return this.parseQuantityPrice(particulars);
|
|
611
|
+
}
|
|
612
|
+
/**
|
|
613
|
+
* 解析数量和价格
|
|
614
|
+
*/
|
|
615
|
+
parseQuantityPrice(particulars) {
|
|
616
|
+
// 格式0: 申請按金 1,000 ... @165 (IPO 申购专用)
|
|
617
|
+
const match0 = particulars.match(/申請按金\s+([\d,]+).*?@\s*([\d.]+)/);
|
|
618
|
+
if (match0) {
|
|
619
|
+
return {
|
|
620
|
+
quantity: parseFloat(match0[1].replace(/,/g, '')),
|
|
621
|
+
price: parseFloat(match0[2]),
|
|
622
|
+
};
|
|
623
|
+
}
|
|
624
|
+
// 格式1: 3,000 @26.2 或 641.5 PHILLIP HKD MMF @11.4114
|
|
625
|
+
const match1 = particulars.match(/([\d,]+\.?\d*)\s*(?:.*?)@\s*([\d.]+)/);
|
|
626
|
+
if (match1) {
|
|
627
|
+
return {
|
|
628
|
+
quantity: parseFloat(match1[1].replace(/,/g, '')),
|
|
629
|
+
price: parseFloat(match1[2]),
|
|
630
|
+
};
|
|
631
|
+
}
|
|
632
|
+
// 格式2: REDEEM 86.65 ... @11.4219
|
|
633
|
+
const match2 = particulars.match(/REDEEM\s+([\d,]+\.?\d*)\s+.*?@\s*([\d.]+)/i);
|
|
634
|
+
if (match2) {
|
|
635
|
+
return {
|
|
636
|
+
quantity: parseFloat(match2[1].replace(/,/g, '')),
|
|
637
|
+
price: parseFloat(match2[2]),
|
|
638
|
+
};
|
|
639
|
+
}
|
|
640
|
+
// 格式3: W/D 86.65 (提货,没有价格)
|
|
641
|
+
const match3 = particulars.match(/W\/D\s+([\d,]+\.?\d*)/i);
|
|
642
|
+
if (match3) {
|
|
643
|
+
return {
|
|
644
|
+
quantity: parseFloat(match3[1].replace(/,/g, '')),
|
|
645
|
+
price: undefined,
|
|
646
|
+
};
|
|
647
|
+
}
|
|
648
|
+
return null;
|
|
649
|
+
}
|
|
650
|
+
/**
|
|
651
|
+
* 转换日期格式
|
|
652
|
+
* DD/MM/YY → YYYY-MM-DD
|
|
653
|
+
* LLM规则: Century 始终为 20XX
|
|
654
|
+
*/
|
|
655
|
+
convertDateFormat(dateStr) {
|
|
656
|
+
if (!dateStr)
|
|
657
|
+
return '';
|
|
658
|
+
// 检查是否已经是 YYYY-MM-DD 格式
|
|
659
|
+
if (/^\d{4}-\d{2}-\d{2}$/.test(dateStr)) {
|
|
660
|
+
return dateStr;
|
|
661
|
+
}
|
|
662
|
+
// DD/MM/YY 格式
|
|
663
|
+
const match = dateStr.match(/^(\d{2})\/(\d{2})\/(\d{2})$/);
|
|
664
|
+
if (match) {
|
|
665
|
+
const [, day, month, year] = match;
|
|
666
|
+
const fullYear = 2000 + parseInt(year, 10);
|
|
667
|
+
return `${fullYear}-${month}-${day}`;
|
|
668
|
+
}
|
|
669
|
+
// DD/MM/YYYY 格式
|
|
670
|
+
const match2 = dateStr.match(/^(\d{2})\/(\d{2})\/(\d{4})$/);
|
|
671
|
+
if (match2) {
|
|
672
|
+
const [, day, month, year] = match2;
|
|
673
|
+
return `${year}-${month}-${day}`;
|
|
674
|
+
}
|
|
675
|
+
return dateStr;
|
|
676
|
+
}
|
|
677
|
+
/**
|
|
678
|
+
* 映射交易类型
|
|
679
|
+
* 遵循 LLM Prompt 规则
|
|
680
|
+
*/
|
|
681
|
+
mapTransactionType(rawType, category) {
|
|
682
|
+
// 根据分类优先判断
|
|
683
|
+
if (category === 'REDEEM') {
|
|
684
|
+
return 'SELL'; // 赎回 → SELL
|
|
685
|
+
}
|
|
686
|
+
if (category === 'STOCK_SELL') {
|
|
687
|
+
return 'SELL';
|
|
688
|
+
}
|
|
689
|
+
if (category === 'DIVIDEND') {
|
|
690
|
+
return 'DIVIDEND';
|
|
691
|
+
}
|
|
692
|
+
if (category === 'INTEREST') {
|
|
693
|
+
return 'FEE'; // 利息计入费用 (IPO利息等)
|
|
694
|
+
}
|
|
695
|
+
if (category === 'WITHDRAW') {
|
|
696
|
+
return 'WITHDRAWAL';
|
|
697
|
+
}
|
|
698
|
+
if (category === 'DEPOSIT') {
|
|
699
|
+
return 'DEPOSIT';
|
|
700
|
+
}
|
|
701
|
+
if (category === 'FUND_SUBSCRIBE' ||
|
|
702
|
+
category === 'STOCK_BUY' ||
|
|
703
|
+
category === 'REVERSE_REDEEM') {
|
|
704
|
+
// REVERSE_REDEEM: 撤销赎回 = 钱回流基金 = BUY
|
|
705
|
+
return 'BUY';
|
|
706
|
+
}
|
|
707
|
+
// 根据原始类型映射
|
|
708
|
+
if (rawType && TRANSACTION_TYPE_MAP[rawType]) {
|
|
709
|
+
return TRANSACTION_TYPE_MAP[rawType];
|
|
710
|
+
}
|
|
711
|
+
return 'OTHER';
|
|
712
|
+
}
|
|
713
|
+
/**
|
|
714
|
+
* 获取 IPO 类型
|
|
715
|
+
* LLM规则: apply (申购) | allot (中签)
|
|
716
|
+
*/
|
|
717
|
+
getIPOType(subType) {
|
|
718
|
+
if (subType === 'IPO_ALLOT') {
|
|
719
|
+
return 'allot';
|
|
720
|
+
}
|
|
721
|
+
// 所有其他 IPO 子类型都是 apply 阶段
|
|
722
|
+
return 'apply';
|
|
723
|
+
}
|
|
724
|
+
/**
|
|
725
|
+
* 映射资产分类
|
|
726
|
+
*/
|
|
727
|
+
mapAssetCategory(assetType) {
|
|
728
|
+
if (!assetType)
|
|
729
|
+
return 'Other';
|
|
730
|
+
const lower = assetType.toLowerCase();
|
|
731
|
+
if (lower.includes('cash') || lower === 'hkd' || lower === 'usd') {
|
|
732
|
+
return 'Cash';
|
|
733
|
+
}
|
|
734
|
+
if (lower.includes('fund') || lower.includes('mmf')) {
|
|
735
|
+
return 'Fund';
|
|
736
|
+
}
|
|
737
|
+
if (lower.includes('stock')) {
|
|
738
|
+
return 'Stock';
|
|
739
|
+
}
|
|
740
|
+
if (lower.includes('bond')) {
|
|
741
|
+
return 'Bond';
|
|
742
|
+
}
|
|
743
|
+
return 'Other';
|
|
744
|
+
}
|
|
745
|
+
/**
|
|
746
|
+
* 标准化港股代码为 6 位格式
|
|
747
|
+
* 港交所官方代码为 5 位,但 PDF 数据源通常使用 6 位 (如 /XHKG/009611)
|
|
748
|
+
* 统一补齐到 6 位以确保一致性
|
|
749
|
+
* 例如: 100 -> 000100, 9980 -> 009980, 09988 -> 009988
|
|
750
|
+
*/
|
|
751
|
+
normalizeStockCode(code) {
|
|
752
|
+
// 只处理纯数字代码
|
|
753
|
+
if (/^\d+$/.test(code) && code.length < 6) {
|
|
754
|
+
return code.padStart(6, '0');
|
|
755
|
+
}
|
|
756
|
+
return code;
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
exports.PhillipRuleFormatter = PhillipRuleFormatter;
|
|
760
|
+
//# sourceMappingURL=formatter.js.map
|