@tcos/broker-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/parse-statement/SKILL.md +134 -0
- package/.claude/skills/parse-statement/examples.md +257 -0
- package/.claude/skills/parse-statement/trigger-tests/cases.yaml +133 -0
- package/README.md +153 -0
- package/dist/cli/index.d.ts +17 -0
- package/dist/cli/index.d.ts.map +1 -0
- package/dist/cli/index.js +150 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/core/cleaning.d.ts +78 -0
- package/dist/core/cleaning.d.ts.map +1 -0
- package/dist/core/cleaning.js +217 -0
- package/dist/core/cleaning.js.map +1 -0
- package/dist/core/pipeline.d.ts +49 -0
- package/dist/core/pipeline.d.ts.map +1 -0
- package/dist/core/pipeline.js +66 -0
- package/dist/core/pipeline.js.map +1 -0
- package/dist/core/registry.d.ts +24 -0
- package/dist/core/registry.d.ts.map +1 -0
- package/dist/core/registry.js +53 -0
- package/dist/core/registry.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +29 -0
- package/dist/index.js.map +1 -0
- package/dist/parsers/phillip/extract.py +90 -0
- package/dist/parsers/phillip/extractor.d.ts +215 -0
- package/dist/parsers/phillip/extractor.d.ts.map +1 -0
- package/dist/parsers/phillip/extractor.js +1012 -0
- package/dist/parsers/phillip/extractor.js.map +1 -0
- package/dist/parsers/phillip/formatter.d.ts +113 -0
- package/dist/parsers/phillip/formatter.d.ts.map +1 -0
- package/dist/parsers/phillip/formatter.js +760 -0
- package/dist/parsers/phillip/formatter.js.map +1 -0
- package/dist/parsers/phillip/index.d.ts +25 -0
- package/dist/parsers/phillip/index.d.ts.map +1 -0
- package/dist/parsers/phillip/index.js +59 -0
- package/dist/parsers/phillip/index.js.map +1 -0
- package/dist/types/formatter.d.ts +47 -0
- package/dist/types/formatter.d.ts.map +1 -0
- package/dist/types/formatter.js +9 -0
- package/dist/types/formatter.js.map +1 -0
- package/dist/types/plugin.d.ts +14 -0
- package/dist/types/plugin.d.ts.map +1 -0
- package/dist/types/plugin.js +5 -0
- package/dist/types/plugin.js.map +1 -0
- package/dist/types/raw.d.ts +136 -0
- package/dist/types/raw.d.ts.map +1 -0
- package/dist/types/raw.js +11 -0
- package/dist/types/raw.js.map +1 -0
- package/dist/types/statement.d.ts +55 -0
- package/dist/types/statement.d.ts.map +1 -0
- package/dist/types/statement.js +12 -0
- package/dist/types/statement.js.map +1 -0
- package/package.json +64 -0
- package/src/parsers/phillip/extract.py +90 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* PluginRegistry — 券商插件注册表
|
|
4
|
+
*
|
|
5
|
+
* 管理所有已注册的 IBrokerPlugin 实例,支持:
|
|
6
|
+
* - 按名称精确获取
|
|
7
|
+
* - 列出全部已注册插件
|
|
8
|
+
* - 自动检测 PDF 所属券商(取置信度最高者)
|
|
9
|
+
*/
|
|
10
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
11
|
+
exports.PluginRegistry = void 0;
|
|
12
|
+
class PluginRegistry {
|
|
13
|
+
constructor() {
|
|
14
|
+
this.plugins = new Map();
|
|
15
|
+
}
|
|
16
|
+
/** 注册一个券商插件 */
|
|
17
|
+
register(plugin) {
|
|
18
|
+
this.plugins.set(plugin.name, plugin);
|
|
19
|
+
}
|
|
20
|
+
/** 按名称获取插件,找不到则抛异常 */
|
|
21
|
+
getPlugin(name) {
|
|
22
|
+
const plugin = this.plugins.get(name);
|
|
23
|
+
if (!plugin) {
|
|
24
|
+
throw new Error(`Unknown broker: "${name}". Use --list-parsers to see available parsers.`);
|
|
25
|
+
}
|
|
26
|
+
return plugin;
|
|
27
|
+
}
|
|
28
|
+
/** 列出所有已注册插件 */
|
|
29
|
+
listPlugins() {
|
|
30
|
+
return Array.from(this.plugins.values());
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* 自动检测 PDF 所属券商
|
|
34
|
+
* 遍历所有插件调用 detect(),返回置信度最高且 >= 0.5 的插件
|
|
35
|
+
*/
|
|
36
|
+
async autoDetect(filePath) {
|
|
37
|
+
let bestPlugin = null;
|
|
38
|
+
let bestScore = 0;
|
|
39
|
+
for (const plugin of this.plugins.values()) {
|
|
40
|
+
const score = await plugin.detect(filePath);
|
|
41
|
+
if (score > bestScore) {
|
|
42
|
+
bestScore = score;
|
|
43
|
+
bestPlugin = plugin;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (!bestPlugin || bestScore < 0.5) {
|
|
47
|
+
throw new Error('Cannot auto-detect broker from this PDF. Try specifying with -b (e.g., -b phillip).');
|
|
48
|
+
}
|
|
49
|
+
return bestPlugin;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
exports.PluginRegistry = PluginRegistry;
|
|
53
|
+
//# sourceMappingURL=registry.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"registry.js","sourceRoot":"","sources":["../../src/core/registry.ts"],"names":[],"mappings":";AAAA;;;;;;;GAOG;;;AAIH,MAAa,cAAc;IAA3B;QACmB,YAAO,GAAG,IAAI,GAAG,EAAyB,CAAC;IA6C9D,CAAC;IA3CC,eAAe;IACf,QAAQ,CAAC,MAAqB;QAC5B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACxC,CAAC;IAED,sBAAsB;IACtB,SAAS,CAAC,IAAY;QACpB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,oBAAoB,IAAI,iDAAiD,CAAC,CAAC;QAC7F,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,gBAAgB;IAChB,WAAW;QACT,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3C,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,UAAU,CAAC,QAAgB;QAC/B,IAAI,UAAU,GAAyB,IAAI,CAAC;QAC5C,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;YAC3C,MAAM,KAAK,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC5C,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;gBACtB,SAAS,GAAG,KAAK,CAAC;gBAClB,UAAU,GAAG,MAAM,CAAC;YACtB,CAAC;QACH,CAAC;QAED,IAAI,CAAC,UAAU,IAAI,SAAS,GAAG,GAAG,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CACb,qFAAqF,CACtF,CAAC;QACJ,CAAC;QAED,OAAO,UAAU,CAAC;IACpB,CAAC;CACF;AA9CD,wCA8CC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
export * from './types/statement';
|
|
2
|
+
export * from './types/raw';
|
|
3
|
+
export * from './types/formatter';
|
|
4
|
+
export * from './types/plugin';
|
|
5
|
+
export { PhillipPlugin } from './parsers/phillip';
|
|
6
|
+
export { ParsePipeline } from './core/pipeline';
|
|
7
|
+
export type { ParseOptions, ParseResult, ParseTimings } from './core/pipeline';
|
|
8
|
+
export { PluginRegistry } from './core/registry';
|
|
9
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,cAAc,mBAAmB,CAAC;AAClC,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,gBAAgB,CAAC;AAC/B,OAAO,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAChD,YAAY,EAAE,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC/E,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// broker-parser 公共 API 入口
|
|
3
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
4
|
+
if (k2 === undefined) k2 = k;
|
|
5
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
6
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
7
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
8
|
+
}
|
|
9
|
+
Object.defineProperty(o, k2, desc);
|
|
10
|
+
}) : (function(o, m, k, k2) {
|
|
11
|
+
if (k2 === undefined) k2 = k;
|
|
12
|
+
o[k2] = m[k];
|
|
13
|
+
}));
|
|
14
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
15
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
16
|
+
};
|
|
17
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
18
|
+
exports.PluginRegistry = exports.ParsePipeline = exports.PhillipPlugin = void 0;
|
|
19
|
+
__exportStar(require("./types/statement"), exports);
|
|
20
|
+
__exportStar(require("./types/raw"), exports);
|
|
21
|
+
__exportStar(require("./types/formatter"), exports);
|
|
22
|
+
__exportStar(require("./types/plugin"), exports);
|
|
23
|
+
var phillip_1 = require("./parsers/phillip");
|
|
24
|
+
Object.defineProperty(exports, "PhillipPlugin", { enumerable: true, get: function () { return phillip_1.PhillipPlugin; } });
|
|
25
|
+
var pipeline_1 = require("./core/pipeline");
|
|
26
|
+
Object.defineProperty(exports, "ParsePipeline", { enumerable: true, get: function () { return pipeline_1.ParsePipeline; } });
|
|
27
|
+
var registry_1 = require("./core/registry");
|
|
28
|
+
Object.defineProperty(exports, "PluginRegistry", { enumerable: true, get: function () { return registry_1.PluginRegistry; } });
|
|
29
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAAA,0BAA0B;;;;;;;;;;;;;;;;;AAE1B,oDAAkC;AAClC,8CAA4B;AAC5B,oDAAkC;AAClC,iDAA+B;AAC/B,6CAAkD;AAAzC,wGAAA,aAAa,OAAA;AACtB,4CAAgD;AAAvC,yGAAA,aAAa,OAAA;AAEtB,4CAAiD;AAAxC,0GAAA,cAAc,OAAA"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
pdfplumber 字符提取脚本
|
|
4
|
+
|
|
5
|
+
供 TypeScript 规则引擎调用,输出 JSON 格式的字符数据。
|
|
6
|
+
|
|
7
|
+
使用方法:
|
|
8
|
+
python3 scripts/poc/pdfplumber_extract.py <pdf_path> --json
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import pdfplumber
|
|
12
|
+
import json
|
|
13
|
+
import sys
|
|
14
|
+
import os
|
|
15
|
+
from typing import List, Dict, Any
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def extract_chars_from_pdf(pdf_path: str) -> Dict[str, Any]:
|
|
19
|
+
"""
|
|
20
|
+
从 PDF 提取所有页面的字符及其坐标
|
|
21
|
+
|
|
22
|
+
返回结构:
|
|
23
|
+
{
|
|
24
|
+
"file": "文件名",
|
|
25
|
+
"totalPages": 页数,
|
|
26
|
+
"pages": [
|
|
27
|
+
{
|
|
28
|
+
"pageNum": 1,
|
|
29
|
+
"chars": [
|
|
30
|
+
{"text": "A", "x0": 10.0, "x1": 15.0, "top": 20.0, "bottom": 30.0},
|
|
31
|
+
...
|
|
32
|
+
],
|
|
33
|
+
"text": "完整文本"
|
|
34
|
+
}
|
|
35
|
+
]
|
|
36
|
+
}
|
|
37
|
+
"""
|
|
38
|
+
result = {
|
|
39
|
+
"file": os.path.basename(pdf_path),
|
|
40
|
+
"totalPages": 0,
|
|
41
|
+
"pages": []
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
with pdfplumber.open(pdf_path) as pdf:
|
|
45
|
+
result["totalPages"] = len(pdf.pages)
|
|
46
|
+
|
|
47
|
+
for i, page in enumerate(pdf.pages):
|
|
48
|
+
page_data = {
|
|
49
|
+
"pageNum": i + 1,
|
|
50
|
+
"chars": [],
|
|
51
|
+
"text": ""
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# 提取字符及坐标
|
|
55
|
+
for char in page.chars:
|
|
56
|
+
page_data["chars"].append({
|
|
57
|
+
"text": char["text"],
|
|
58
|
+
"x0": round(char["x0"], 2),
|
|
59
|
+
"x1": round(char["x1"], 2),
|
|
60
|
+
"top": round(char["top"], 2),
|
|
61
|
+
"bottom": round(char["bottom"], 2)
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
# 提取完整文本
|
|
65
|
+
page_data["text"] = page.extract_text() or ""
|
|
66
|
+
|
|
67
|
+
result["pages"].append(page_data)
|
|
68
|
+
|
|
69
|
+
return result
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def main():
|
|
73
|
+
if len(sys.argv) < 2:
|
|
74
|
+
print("Usage: python3 pdfplumber_extract.py <pdf_path> [--json]", file=sys.stderr)
|
|
75
|
+
sys.exit(1)
|
|
76
|
+
|
|
77
|
+
pdf_path = sys.argv[1]
|
|
78
|
+
|
|
79
|
+
if not os.path.exists(pdf_path):
|
|
80
|
+
print(f"Error: File not found: {pdf_path}", file=sys.stderr)
|
|
81
|
+
sys.exit(1)
|
|
82
|
+
|
|
83
|
+
result = extract_chars_from_pdf(pdf_path)
|
|
84
|
+
|
|
85
|
+
# 始终输出 JSON (供 TypeScript 调用)
|
|
86
|
+
print(json.dumps(result, ensure_ascii=False))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
if __name__ == "__main__":
|
|
90
|
+
main()
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phillip pdfplumber Stage 1 提取器
|
|
3
|
+
*
|
|
4
|
+
* 纯 PDF 表格提取,不做业务逻辑处理。
|
|
5
|
+
* 输出统一的 RawTableData 格式,供 Stage 2 处理。
|
|
6
|
+
*
|
|
7
|
+
* 职责:
|
|
8
|
+
* 1. 调用 pdfplumber 提取字符坐标
|
|
9
|
+
* 2. 按行/列分组
|
|
10
|
+
* 3. 提取原始字段值
|
|
11
|
+
* 4. 不做分类、不做字段合并
|
|
12
|
+
*/
|
|
13
|
+
import { RawTableData, IStage1Extractor, PdfplumberExtractorConfig } from '../../types/raw';
|
|
14
|
+
/**
|
|
15
|
+
* Phillip pdfplumber Stage 1 提取器
|
|
16
|
+
*/
|
|
17
|
+
export declare class PhillipPdfplumberExtractor implements IStage1Extractor {
|
|
18
|
+
private config;
|
|
19
|
+
constructor(config?: Partial<Omit<PdfplumberExtractorConfig, 'type'>>);
|
|
20
|
+
/**
|
|
21
|
+
* 从 PDF 提取原始表格数据
|
|
22
|
+
*/
|
|
23
|
+
extract(pdfPath: string): Promise<RawTableData>;
|
|
24
|
+
/**
|
|
25
|
+
* 调用 pdfplumber Python 脚本
|
|
26
|
+
*/
|
|
27
|
+
private callPdfplumber;
|
|
28
|
+
/**
|
|
29
|
+
* 提取账户信息
|
|
30
|
+
*
|
|
31
|
+
* 辉立日结单格式示例:
|
|
32
|
+
* 客戶名稱 Name : SUN XIAOXU 客戶編號 A/C No : M596241
|
|
33
|
+
* 日期 Issue Date : 05/11/25
|
|
34
|
+
*/
|
|
35
|
+
private extractAccountInfo;
|
|
36
|
+
/**
|
|
37
|
+
* 提取交易记录
|
|
38
|
+
*
|
|
39
|
+
* 跨页状态传递:inTransactionSection 和 lastTransaction 在页面间保持,
|
|
40
|
+
* 确保多页结单中 Page 2+ 即使没有区域标记也能继续提取交易。
|
|
41
|
+
*/
|
|
42
|
+
private extractTransactions;
|
|
43
|
+
/**
|
|
44
|
+
* 合并跨页续行
|
|
45
|
+
* 识别规则:tradeDate 为空字符串 = 续行
|
|
46
|
+
*/
|
|
47
|
+
private mergeCrossPageContinuations;
|
|
48
|
+
/**
|
|
49
|
+
* 从单页提取交易记录(支持跨页状态)
|
|
50
|
+
*
|
|
51
|
+
* 与原 extractPageTransactions 的区别:
|
|
52
|
+
* - 接收上一页的状态(是否在交易区域、最后一条交易)
|
|
53
|
+
* - 返回当前页处理后的状态,供下一页继续使用
|
|
54
|
+
* - 确保多页结单中 Page 2+ 没有区域标记时也能正确提取交易
|
|
55
|
+
*
|
|
56
|
+
* @param chars - 页面字符数据
|
|
57
|
+
* @param _warnings - 警告信息数组
|
|
58
|
+
* @param initialInSection - 初始是否在交易区域
|
|
59
|
+
* @param initialLastTx - 初始最后一条交易(用于续行合并)
|
|
60
|
+
* @returns 交易数据和更新后的状态
|
|
61
|
+
*/
|
|
62
|
+
private extractPageTransactionsWithState;
|
|
63
|
+
/**
|
|
64
|
+
* 解析单行交易记录
|
|
65
|
+
* 只提取原始字段值,不做业务分类
|
|
66
|
+
*/
|
|
67
|
+
private parseTransactionLine;
|
|
68
|
+
/**
|
|
69
|
+
* 智能提取字段 - 基于列位置
|
|
70
|
+
*
|
|
71
|
+
* 辉立日结单列边界参考:
|
|
72
|
+
* - tradeDate: x = 30-65
|
|
73
|
+
* - settleDate: x = 65-110
|
|
74
|
+
* - product: x = 100-145 (可选,如 UT)
|
|
75
|
+
* - refNo: x = 130-180
|
|
76
|
+
* - type: x = 170-230
|
|
77
|
+
* - particulars: x = 195-490
|
|
78
|
+
* - debit: x = 480-530
|
|
79
|
+
* - credit: x = 530-580
|
|
80
|
+
*/
|
|
81
|
+
private extractFieldsWithSmartBoundaries;
|
|
82
|
+
/**
|
|
83
|
+
* 提取续行中的摘要内容
|
|
84
|
+
*/
|
|
85
|
+
private extractContinuationText;
|
|
86
|
+
/**
|
|
87
|
+
* 按 Y 坐标分组字符
|
|
88
|
+
*/
|
|
89
|
+
private groupCharsByY;
|
|
90
|
+
/**
|
|
91
|
+
* 按间隔分组字符
|
|
92
|
+
*/
|
|
93
|
+
private groupCharsByGap;
|
|
94
|
+
/**
|
|
95
|
+
* 获取一行的文本
|
|
96
|
+
*/
|
|
97
|
+
private getLineText;
|
|
98
|
+
/**
|
|
99
|
+
* 检查是否匹配任意关键词
|
|
100
|
+
*/
|
|
101
|
+
private matchesAny;
|
|
102
|
+
/**
|
|
103
|
+
* 检查是否是表头或分隔行
|
|
104
|
+
*/
|
|
105
|
+
private isHeaderOrSeparator;
|
|
106
|
+
/**
|
|
107
|
+
* 检查是否是 PDF 页眉或页脚行
|
|
108
|
+
*
|
|
109
|
+
* 多页结单中每页都会重复出现的页面级标题和页脚,
|
|
110
|
+
* 与交易表格的表头行不同。跨页提取交易时需要跳过这些行。
|
|
111
|
+
*
|
|
112
|
+
* 注意:getLineText 将字符直接拼接,可能没有空格(如 "A/CNo:" 而非 "A/C No :"),
|
|
113
|
+
* 因此匹配模式需要兼容无空格情况。
|
|
114
|
+
*/
|
|
115
|
+
private isPageHeaderOrFooter;
|
|
116
|
+
/**
|
|
117
|
+
* 解析金额
|
|
118
|
+
*/
|
|
119
|
+
private parseAmount;
|
|
120
|
+
/**
|
|
121
|
+
* 提取持仓数据
|
|
122
|
+
*
|
|
123
|
+
* 包含两部分:
|
|
124
|
+
* 1. Account Details - 现金余额
|
|
125
|
+
* 2. Securities Portfolio - 股票/基金持仓
|
|
126
|
+
*
|
|
127
|
+
* 注意:Securities Portfolio 支持跨页提取,状态在页面之间传递
|
|
128
|
+
*
|
|
129
|
+
* @param pdfData - pdfplumber 输出数据
|
|
130
|
+
* @param warnings - 警告信息数组
|
|
131
|
+
* @returns 持仓数据数组
|
|
132
|
+
*/
|
|
133
|
+
private extractHoldings;
|
|
134
|
+
/**
|
|
135
|
+
* 从 Account Details 区域提取现金余额
|
|
136
|
+
*
|
|
137
|
+
* 辉立日结单 Account Details 格式示例:
|
|
138
|
+
* | Currency | Balance C/F | Unsettled T+1 | ... | Available Balance |
|
|
139
|
+
* | HKD | 63,832.41 | 0.00 | ... | 63,832.41 |
|
|
140
|
+
* | USD | -0.30 | 0.00 | ... | -0.30 |
|
|
141
|
+
* | HKD(Base)| 63,830.07 | ... | ... | 63,830.07 | <- 跳过
|
|
142
|
+
*
|
|
143
|
+
* @param chars - 页面字符数据
|
|
144
|
+
* @param warnings - 警告信息数组
|
|
145
|
+
* @returns 现金余额数组
|
|
146
|
+
*/
|
|
147
|
+
private extractAccountDetailsHoldings;
|
|
148
|
+
/**
|
|
149
|
+
* 检查是否是 Account Details 表头行
|
|
150
|
+
*/
|
|
151
|
+
private isAccountDetailsHeader;
|
|
152
|
+
/**
|
|
153
|
+
* 解析现金余额行
|
|
154
|
+
*
|
|
155
|
+
* 行格式: Currency Balance_C/F Unsettled_T+1 ... Available_Balance Ref_ExRate DR_Int_Rate
|
|
156
|
+
* 示例: HKD 63,832.41 0.00 ... 63,832.41 1.0000 列表1(Sch1)
|
|
157
|
+
*
|
|
158
|
+
* @param chars - 行字符数据
|
|
159
|
+
* @returns 现金余额持仓或 null
|
|
160
|
+
*/
|
|
161
|
+
private parseCashBalanceLine;
|
|
162
|
+
/**
|
|
163
|
+
* 从 Securities Portfolio 区域提取股票/基金持仓(支持跨页状态)
|
|
164
|
+
*
|
|
165
|
+
* - 接收上一页的状态(是否在 Portfolio 区域、当前货币、最后一条持仓)
|
|
166
|
+
* - 返回当前页处理后的状态,供下一页继续使用
|
|
167
|
+
* - 不再依赖结束标记(如 E. & O. E.)来判断区域结束
|
|
168
|
+
*
|
|
169
|
+
* @param chars - 页面字符数据
|
|
170
|
+
* @param warnings - 警告信息数组
|
|
171
|
+
* @param initialInSection - 初始是否在 Portfolio 区域
|
|
172
|
+
* @param initialCurrency - 初始货币
|
|
173
|
+
* @param initialLastHolding - 初始最后一条持仓(用于续行合并)
|
|
174
|
+
* @returns 持仓数据和更新后的状态
|
|
175
|
+
*/
|
|
176
|
+
private extractPortfolioHoldingsWithState;
|
|
177
|
+
/**
|
|
178
|
+
* 检查是否是 Portfolio 表头或汇总行
|
|
179
|
+
*/
|
|
180
|
+
private isPortfolioHeaderOrFooter;
|
|
181
|
+
/**
|
|
182
|
+
* 检查是否是续行(中文名称行)
|
|
183
|
+
*
|
|
184
|
+
* 续行特征:
|
|
185
|
+
* 1. 以 "股票" 或 "基金" 开头
|
|
186
|
+
* 2. 右侧没有数值金额
|
|
187
|
+
*/
|
|
188
|
+
private isPortfolioContinuationLine;
|
|
189
|
+
/**
|
|
190
|
+
* 从续行中提取中文名称
|
|
191
|
+
*/
|
|
192
|
+
private extractChineseNameFromLine;
|
|
193
|
+
/**
|
|
194
|
+
* 解析单行持仓数据
|
|
195
|
+
*
|
|
196
|
+
* 列边界(基于 pdfplumber 坐标分析):
|
|
197
|
+
* - Product (Equity/UT): x0 < 60
|
|
198
|
+
* - Market (XHKG/OTCU/XNGS): x0 = 80-115
|
|
199
|
+
* - InstrumentCd: x0 = 115-165
|
|
200
|
+
* - DisplayName: x0 = 160-270
|
|
201
|
+
* - Qty B/F: x0 = 260-295
|
|
202
|
+
* - LastBoughtOn (可选日期): x0 = 295-340
|
|
203
|
+
* - Qty C/F: x0 = 340-385
|
|
204
|
+
* - ClsPrice: x0 = 385-435
|
|
205
|
+
* - Market Value: x0 = 435-485
|
|
206
|
+
* - MgnRatio: x0 = 490-520
|
|
207
|
+
* - Margin Value: x0 = 530-570
|
|
208
|
+
*
|
|
209
|
+
* @param chars - 行字符数据
|
|
210
|
+
* @param currency - 当前货币
|
|
211
|
+
* @returns 持仓数据或 null
|
|
212
|
+
*/
|
|
213
|
+
private parsePortfolioLine;
|
|
214
|
+
}
|
|
215
|
+
//# sourceMappingURL=extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../../../src/parsers/phillip/extractor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAKH,OAAO,EACL,YAAY,EAIZ,gBAAgB,EAChB,yBAAyB,EAC1B,MAAM,iBAAiB,CAAC;AA4FzB;;GAEG;AACH,qBAAa,0BAA2B,YAAW,gBAAgB;IACjE,OAAO,CAAC,MAAM,CAA4B;gBAE9B,MAAM,CAAC,EAAE,OAAO,CAAC,IAAI,CAAC,yBAAyB,EAAE,MAAM,CAAC,CAAC;IAQrE;;OAEG;IACG,OAAO,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IA6BrD;;OAEG;YACW,cAAc;IAwC5B;;;;;;OAMG;IACH,OAAO,CAAC,kBAAkB;IA4B1B;;;;;OAKG;IACH,OAAO,CAAC,mBAAmB;IAuB3B;;;OAGG;IACH,OAAO,CAAC,2BAA2B;IAiBnC;;;;;;;;;;;;;OAaG;IACH,OAAO,CAAC,gCAAgC;IAkFxC;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IAsB5B;;;;;;;;;;;;OAYG;IACH,OAAO,CAAC,gCAAgC;IA8GxC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAmB/B;;OAEG;IACH,OAAO,CAAC,aAAa;IAUrB;;OAEG;IACH,OAAO,CAAC,eAAe;IAgCvB;;OAEG;IACH,OAAO,CAAC,WAAW;IAOnB;;OAEG;IACH,OAAO,CAAC,UAAU;IAIlB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAsB3B;;;;;;;;OAQG;IACH,OAAO,CAAC,oBAAoB;IA2B5B;;OAEG;IACH,OAAO,CAAC,WAAW;IAWnB;;;;;;;;;;;;OAYG;IACH,OAAO,CAAC,eAAe;IAoCvB;;;;;;;;;;;;OAYG;IACH,OAAO,CAAC,6BAA6B;IA2CrC;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAiB9B;;;;;;;;OAQG;IACH,OAAO,CAAC,oBAAoB;IAmC5B;;;;;;;;;;;;;OAaG;IACH,OAAO,CAAC,iCAAiC;IA0FzC;;OAEG;IACH,OAAO,CAAC,yBAAyB;IAyBjC;;;;;;OAMG;IACH,OAAO,CAAC,2BAA2B;IAiBnC;;OAEG;IACH,OAAO,CAAC,0BAA0B;IAYlC;;;;;;;;;;;;;;;;;;;OAmBG;IACH,OAAO,CAAC,kBAAkB;CAyG3B"}
|