@tcos/broker-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.claude/skills/parse-statement/SKILL.md +134 -0
  2. package/.claude/skills/parse-statement/examples.md +257 -0
  3. package/.claude/skills/parse-statement/trigger-tests/cases.yaml +133 -0
  4. package/README.md +153 -0
  5. package/dist/cli/index.d.ts +17 -0
  6. package/dist/cli/index.d.ts.map +1 -0
  7. package/dist/cli/index.js +150 -0
  8. package/dist/cli/index.js.map +1 -0
  9. package/dist/core/cleaning.d.ts +78 -0
  10. package/dist/core/cleaning.d.ts.map +1 -0
  11. package/dist/core/cleaning.js +217 -0
  12. package/dist/core/cleaning.js.map +1 -0
  13. package/dist/core/pipeline.d.ts +49 -0
  14. package/dist/core/pipeline.d.ts.map +1 -0
  15. package/dist/core/pipeline.js +66 -0
  16. package/dist/core/pipeline.js.map +1 -0
  17. package/dist/core/registry.d.ts +24 -0
  18. package/dist/core/registry.d.ts.map +1 -0
  19. package/dist/core/registry.js +53 -0
  20. package/dist/core/registry.js.map +1 -0
  21. package/dist/index.d.ts +9 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +29 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/parsers/phillip/extract.py +90 -0
  26. package/dist/parsers/phillip/extractor.d.ts +215 -0
  27. package/dist/parsers/phillip/extractor.d.ts.map +1 -0
  28. package/dist/parsers/phillip/extractor.js +1012 -0
  29. package/dist/parsers/phillip/extractor.js.map +1 -0
  30. package/dist/parsers/phillip/formatter.d.ts +113 -0
  31. package/dist/parsers/phillip/formatter.d.ts.map +1 -0
  32. package/dist/parsers/phillip/formatter.js +760 -0
  33. package/dist/parsers/phillip/formatter.js.map +1 -0
  34. package/dist/parsers/phillip/index.d.ts +25 -0
  35. package/dist/parsers/phillip/index.d.ts.map +1 -0
  36. package/dist/parsers/phillip/index.js +59 -0
  37. package/dist/parsers/phillip/index.js.map +1 -0
  38. package/dist/types/formatter.d.ts +47 -0
  39. package/dist/types/formatter.d.ts.map +1 -0
  40. package/dist/types/formatter.js +9 -0
  41. package/dist/types/formatter.js.map +1 -0
  42. package/dist/types/plugin.d.ts +14 -0
  43. package/dist/types/plugin.d.ts.map +1 -0
  44. package/dist/types/plugin.js +5 -0
  45. package/dist/types/plugin.js.map +1 -0
  46. package/dist/types/raw.d.ts +136 -0
  47. package/dist/types/raw.d.ts.map +1 -0
  48. package/dist/types/raw.js +11 -0
  49. package/dist/types/raw.js.map +1 -0
  50. package/dist/types/statement.d.ts +55 -0
  51. package/dist/types/statement.d.ts.map +1 -0
  52. package/dist/types/statement.js +12 -0
  53. package/dist/types/statement.js.map +1 -0
  54. package/package.json +64 -0
  55. package/src/parsers/phillip/extract.py +90 -0
@@ -0,0 +1,150 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ /**
4
+ * CLI 入口 — tcos-parse 命令
5
+ *
6
+ * 用法:
7
+ * tcos-parse <pdf> # 解析并输出 JSON 到 stdout
8
+ * tcos-parse <pdf> -o out.json # 输出到文件
9
+ * tcos-parse <pdf> --raw # 只输出 Stage1 原始数据
10
+ * tcos-parse <pdf> --no-clean # 跳过 Stage3 清理
11
+ * tcos-parse -b phillip <pdf> # 指定券商
12
+ * tcos-parse --detect <pdf> # 检测 PDF 所属券商
13
+ * tcos-parse --list-parsers # 列出支持的券商
14
+ * tcos-parse -v <pdf> # 显示各阶段耗时
15
+ * tcos-parse -q <pdf> # 静默模式,只输出 JSON
16
+ */
17
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
18
+ if (k2 === undefined) k2 = k;
19
+ var desc = Object.getOwnPropertyDescriptor(m, k);
20
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
21
+ desc = { enumerable: true, get: function() { return m[k]; } };
22
+ }
23
+ Object.defineProperty(o, k2, desc);
24
+ }) : (function(o, m, k, k2) {
25
+ if (k2 === undefined) k2 = k;
26
+ o[k2] = m[k];
27
+ }));
28
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
29
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
30
+ }) : function(o, v) {
31
+ o["default"] = v;
32
+ });
33
+ var __importStar = (this && this.__importStar) || (function () {
34
+ var ownKeys = function(o) {
35
+ ownKeys = Object.getOwnPropertyNames || function (o) {
36
+ var ar = [];
37
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
38
+ return ar;
39
+ };
40
+ return ownKeys(o);
41
+ };
42
+ return function (mod) {
43
+ if (mod && mod.__esModule) return mod;
44
+ var result = {};
45
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
46
+ __setModuleDefault(result, mod);
47
+ return result;
48
+ };
49
+ })();
50
+ Object.defineProperty(exports, "__esModule", { value: true });
51
+ const fs = __importStar(require("fs"));
52
+ const path = __importStar(require("path"));
53
+ const commander_1 = require("commander");
54
+ const pipeline_1 = require("../core/pipeline");
55
+ const registry_1 = require("../core/registry");
56
+ const phillip_1 = require("../parsers/phillip");
57
+ // 初始化插件注册表
58
+ const registry = new registry_1.PluginRegistry();
59
+ registry.register(new phillip_1.PhillipPlugin());
60
+ const pipeline = new pipeline_1.ParsePipeline(registry);
61
+ const program = new commander_1.Command();
62
+ program
63
+ .name('tcos-parse')
64
+ .description('Parse brokerage PDF statements into structured JSON')
65
+ .version('0.1.0');
66
+ // 主命令:解析 PDF
67
+ program
68
+ .argument('[pdf]', 'PDF statement file to parse')
69
+ .option('-o, --output <file>', 'output to file (default: stdout)')
70
+ .option('-b, --broker <name>', 'specify broker (default: auto-detect)')
71
+ .option('--raw', 'output Stage1 raw data only')
72
+ .option('--no-clean', 'skip cleaning step')
73
+ .option('-v, --verbose', 'show stage timing and details')
74
+ .option('-q, --quiet', 'silent mode, only output JSON')
75
+ .option('--detect', 'detect broker instead of parsing')
76
+ .option('--list-parsers', 'list available broker parsers')
77
+ .action(async (pdfArg, opts) => {
78
+ // --list-parsers:列出支持的券商
79
+ if (opts.listParsers) {
80
+ const plugins = registry.listPlugins();
81
+ console.log('Available parsers:');
82
+ for (const p of plugins) {
83
+ console.log(` ${p.name} — ${p.displayName}`);
84
+ }
85
+ return;
86
+ }
87
+ // 其他操作都需要 PDF 路径
88
+ if (!pdfArg) {
89
+ console.error('Error: PDF file path is required. Use --help for usage.');
90
+ process.exit(1);
91
+ }
92
+ const pdfPath = path.resolve(pdfArg);
93
+ // 检查文件是否存在
94
+ if (!fs.existsSync(pdfPath)) {
95
+ console.error(`Error: File not found: ${pdfPath}`);
96
+ process.exit(1);
97
+ }
98
+ // --detect:检测券商模式
99
+ if (opts.detect) {
100
+ const plugins = registry.listPlugins();
101
+ let bestName = 'unknown';
102
+ let bestScore = 0;
103
+ for (const p of plugins) {
104
+ const score = await p.detect(pdfPath);
105
+ if (score > bestScore) {
106
+ bestScore = score;
107
+ bestName = p.name;
108
+ }
109
+ }
110
+ if (!opts.quiet) {
111
+ console.log(`Detected broker: ${bestName} (confidence: ${bestScore.toFixed(2)})`);
112
+ }
113
+ else {
114
+ console.log(bestName);
115
+ }
116
+ return;
117
+ }
118
+ // 解析模式
119
+ try {
120
+ const result = await pipeline.parse(pdfPath, {
121
+ broker: opts.broker,
122
+ raw: opts.raw,
123
+ noClean: !opts.clean, // commander --no-clean → clean=false → noClean=true
124
+ verbose: opts.verbose,
125
+ });
126
+ // verbose 时把耗时输出到 stderr(不污染 stdout JSON)
127
+ if (opts.verbose) {
128
+ const t = result.metadata.timings;
129
+ process.stderr.write(`[timing] detect=${t.detect}ms stage1=${t.stage1}ms ` +
130
+ `stage2=${t.stage2}ms clean=${t.clean}ms total=${t.total}ms\n`);
131
+ }
132
+ const json = JSON.stringify(result.data, null, 2);
133
+ if (opts.output) {
134
+ fs.writeFileSync(path.resolve(opts.output), json, 'utf-8');
135
+ if (!opts.quiet) {
136
+ process.stderr.write(`Written to ${opts.output}\n`);
137
+ }
138
+ }
139
+ else {
140
+ console.log(json);
141
+ }
142
+ }
143
+ catch (err) {
144
+ const message = err instanceof Error ? err.message : String(err);
145
+ process.stderr.write(`Error: ${message}\n`);
146
+ process.exit(1);
147
+ }
148
+ });
149
+ program.parse();
150
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/cli/index.ts"],"names":[],"mappings":";;AACA;;;;;;;;;;;;;GAaG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAE7B,yCAAoC;AAEpC,+CAAiD;AACjD,+CAAkD;AAClD,gDAAmD;AAEnD,WAAW;AACX,MAAM,QAAQ,GAAG,IAAI,yBAAc,EAAE,CAAC;AACtC,QAAQ,CAAC,QAAQ,CAAC,IAAI,uBAAa,EAAE,CAAC,CAAC;AACvC,MAAM,QAAQ,GAAG,IAAI,wBAAa,CAAC,QAAQ,CAAC,CAAC;AAE7C,MAAM,OAAO,GAAG,IAAI,mBAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,YAAY,CAAC;KAClB,WAAW,CAAC,qDAAqD,CAAC;KAClE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,aAAa;AACb,OAAO;KACJ,QAAQ,CAAC,OAAO,EAAE,6BAA6B,CAAC;KAChD,MAAM,CAAC,qBAAqB,EAAE,kCAAkC,CAAC;KACjE,MAAM,CAAC,qBAAqB,EAAE,uCAAuC,CAAC;KACtE,MAAM,CAAC,OAAO,EAAE,6BAA6B,CAAC;KAC9C,MAAM,CAAC,YAAY,EAAE,oBAAoB,CAAC;KAC1C,MAAM,CAAC,eAAe,EAAE,+BAA+B,CAAC;KACxD,MAAM,CAAC,aAAa,EAAE,+BAA+B,CAAC;KACtD,MAAM,CAAC,UAAU,EAAE,kCAAkC,CAAC;KACtD,MAAM,CAAC,gBAAgB,EAAE,+BAA+B,CAAC;KACzD,MAAM,CACL,KAAK,EACH,MAA0B,EAC1B,IASC,EACD,EAAE;IACF,yBAAyB;IACzB,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;QACrB,MAAM,OAAO,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,CAAC;QAClC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;QAChD,CAAC;QACD,OAAO;IACT,CAAC;IAED,iBAAiB;IACjB,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,CAAC,KAAK,CAAC,yDAAyD,CAAC,CAAC;QACzE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAErC,WAAW;IACX,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;QAC5B,OAAO,CAAC,KAAK,CAAC,0BAA0B,OAAO,EAAE,CAAC,CAAC;QACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,kBAAkB;IAClB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,MAAM,OAAO,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;QACvC,IAAI,QAAQ,GAAG,SAAS,CAAC;QACzB,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YACtC,IAAI,KAAK,GAAG,SAAS,EAAE,CAAC;gBACtB,SAAS,GAAG,KAAK,CAAC;gBAClB,QAAQ,GAAG,CAAC,CAAC,IAAI,CAAC;YACpB,CAAC;QACH,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAChB,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,iBAAiB,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACpF,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACxB,CAAC;QACD,OAAO;IACT,CAAC;IAED,OAAO;IACP,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,CAAC,OAAO,EAAE;YAC3C,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,OAAO,EAAE,CAAC,IAAI,CAAC,KAAK,EAAE,oDAAoD;YAC1E,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC,CAAC;QAEH,0CAA0C;QAC1C,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,MAAM,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC;YAClC,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,mBAAmB,CAAC,CAAC,MAAM,aAAa,CAAC,CAAC,MAAM,KAAK;gBACnD,UAAU,CAAC,CAAC,MAAM,YAAY,CAAC,CAAC,KAAK,YAAY,CAAC,CAAC,KAAK,MAAM,CACjE,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;QAElD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;YAC3D,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;gBAChB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,cAAc,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC;YACtD,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACpB,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACjE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,OAAO,IAAI,CAAC,CAAC;QAC5C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;AACH,CAAC,CACF,CAAC;AAEJ,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * 数据清理工具模块
3
+ *
4
+ * 提取自 StatementValidator 的公共清理逻辑,不依赖数据库。
5
+ * 用于 Parser Tuning 的 Stage 3 验证测试。
6
+ *
7
+ * 与 StatementValidator 的区别:
8
+ * - 本模块只做基于规则的清理,不查询数据库去重
9
+ * - StatementValidator 会查询数据库判断 refNo 是否存在
10
+ */
11
+ import { TradeData, SnapshotData, IPOData, StatementData } from '../types/statement';
12
+ /**
13
+ * 清理规则配置接口
14
+ */
15
+ export interface CleaningConfig {
16
+ skipZeroAmount: boolean;
17
+ requireRefNo: boolean;
18
+ excludeHKDBase: boolean;
19
+ dedupeByRefNo: boolean;
20
+ filterInvalidIPO: boolean;
21
+ }
22
+ /**
23
+ * 默认清理配置
24
+ */
25
+ export declare const DEFAULT_CLEANING_CONFIG: CleaningConfig;
26
+ /**
27
+ * 清理结果接口
28
+ */
29
+ export interface CleaningResult<T> {
30
+ cleaned: T[];
31
+ filtered: T[];
32
+ filterReasons: string[];
33
+ }
34
+ /**
35
+ * 清理交易记录
36
+ *
37
+ * @param transactions - 原始交易记录数组
38
+ * @param config - 清理规则配置
39
+ * @returns 清理结果,包含清理后的记录和被过滤的记录
40
+ */
41
+ export declare function cleanTransactions(transactions: TradeData[], config: CleaningConfig): CleaningResult<TradeData>;
42
+ /**
43
+ * 标准化交易类型
44
+ * 根据描述自动判断 WITHDRAWAL/DEPOSIT 类型
45
+ */
46
+ export declare function normalizeTransaction(tx: TradeData): TradeData;
47
+ /**
48
+ * 清理 IPO 记录
49
+ *
50
+ * @param ipoList - 原始 IPO 记录数组
51
+ * @param config - 清理规则配置
52
+ * @returns 清理结果
53
+ */
54
+ export declare function cleanIPO(ipoList: IPOData[], config: CleaningConfig): CleaningResult<IPOData>;
55
+ /**
56
+ * 清理持仓快照记录
57
+ *
58
+ * @param snapshots - 原始快照记录数组
59
+ * @param config - 清理规则配置
60
+ * @returns 清理结果
61
+ */
62
+ export declare function cleanSnapshots(snapshots: SnapshotData[], config: CleaningConfig): CleaningResult<SnapshotData>;
63
+ /**
64
+ * 清理整个 StatementData
65
+ *
66
+ * @param data - 原始结单数据
67
+ * @param config - 清理规则配置
68
+ * @returns 清理后的结单数据和清理详情
69
+ */
70
+ export declare function cleanStatementData(data: StatementData, config?: Partial<CleaningConfig>): {
71
+ result: StatementData;
72
+ details: {
73
+ transactions: CleaningResult<TradeData>;
74
+ ipo: CleaningResult<IPOData>;
75
+ snapshots: CleaningResult<SnapshotData>;
76
+ };
77
+ };
78
+ //# sourceMappingURL=cleaning.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cleaning.d.ts","sourceRoot":"","sources":["../../src/core/cleaning.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAOrF;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,cAAc,EAAE,OAAO,CAAC;IACxB,YAAY,EAAE,OAAO,CAAC;IACtB,cAAc,EAAE,OAAO,CAAC;IACxB,aAAa,EAAE,OAAO,CAAC;IACvB,gBAAgB,EAAE,OAAO,CAAC;CAC3B;AAED;;GAEG;AACH,eAAO,MAAM,uBAAuB,EAAE,cAMrC,CAAC;AAEF;;GAEG;AACH,MAAM,WAAW,cAAc,CAAC,CAAC;IAC/B,OAAO,EAAE,CAAC,EAAE,CAAC;IACb,QAAQ,EAAE,CAAC,EAAE,CAAC;IACd,aAAa,EAAE,MAAM,EAAE,CAAC;CACzB;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAC/B,YAAY,EAAE,SAAS,EAAE,EACzB,MAAM,EAAE,cAAc,GACrB,cAAc,CAAC,SAAS,CAAC,CAuE3B;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,EAAE,EAAE,SAAS,GAAG,SAAS,CAe7D;AAED;;;;;;GAMG;AACH,wBAAgB,QAAQ,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE,MAAM,EAAE,cAAc,GAAG,cAAc,CAAC,OAAO,CAAC,CA4B5F;AAED;;;;;;GAMG;AACH,wBAAgB,cAAc,CAC5B,SAAS,EAAE,YAAY,EAAE,EACzB,MAAM,EAAE,cAAc,GACrB,cAAc,CAAC,YAAY,CAAC,CA8B9B;AAED;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,aAAa,EACnB,MAAM,GAAE,OAAO,CAAC,cAAc,CAAM,GACnC;IACD,MAAM,EAAE,aAAa,CAAC;IACtB,OAAO,EAAE;QACP,YAAY,EAAE,cAAc,CAAC,SAAS,CAAC,CAAC;QACxC,GAAG,EAAE,cAAc,CAAC,OAAO,CAAC,CAAC;QAC7B,SAAS,EAAE,cAAc,CAAC,YAAY,CAAC,CAAC;KACzC,CAAC;CACH,CAsBA"}
@@ -0,0 +1,217 @@
1
+ "use strict";
2
+ /**
3
+ * 数据清理工具模块
4
+ *
5
+ * 提取自 StatementValidator 的公共清理逻辑,不依赖数据库。
6
+ * 用于 Parser Tuning 的 Stage 3 验证测试。
7
+ *
8
+ * 与 StatementValidator 的区别:
9
+ * - 本模块只做基于规则的清理,不查询数据库去重
10
+ * - StatementValidator 会查询数据库判断 refNo 是否存在
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.DEFAULT_CLEANING_CONFIG = void 0;
14
+ exports.cleanTransactions = cleanTransactions;
15
+ exports.normalizeTransaction = normalizeTransaction;
16
+ exports.cleanIPO = cleanIPO;
17
+ exports.cleanSnapshots = cleanSnapshots;
18
+ exports.cleanStatementData = cleanStatementData;
19
+ /**
20
+ * IPO 识别关键词(与 Stage2 保持一致)
21
+ */
22
+ const IPO_KEYWORDS = ['IPO', '新股', '认购', '認購', '公开发售', '公開發售', 'PUBLIC OFFER'];
23
+ /**
24
+ * 默认清理配置
25
+ */
26
+ exports.DEFAULT_CLEANING_CONFIG = {
27
+ skipZeroAmount: true,
28
+ requireRefNo: true,
29
+ excludeHKDBase: true,
30
+ dedupeByRefNo: true,
31
+ filterInvalidIPO: true,
32
+ };
33
+ /**
34
+ * 清理交易记录
35
+ *
36
+ * @param transactions - 原始交易记录数组
37
+ * @param config - 清理规则配置
38
+ * @returns 清理结果,包含清理后的记录和被过滤的记录
39
+ */
40
+ function cleanTransactions(transactions, config) {
41
+ if (!transactions || transactions.length === 0) {
42
+ return { cleaned: [], filtered: [], filterReasons: [] };
43
+ }
44
+ const filtered = [];
45
+ const filterReasons = [];
46
+ const uniqueMap = new Map();
47
+ for (const tx of transactions) {
48
+ // 规则 1: 过滤零金额
49
+ if (config.skipZeroAmount) {
50
+ if (tx.amount === null || tx.amount === undefined || tx.amount === 0) {
51
+ filtered.push(tx);
52
+ filterReasons.push(`交易 ${tx.refNo || 'unknown'}: 金额为空或零`);
53
+ continue;
54
+ }
55
+ }
56
+ // 规则 2: 要求 RefNo
57
+ if (config.requireRefNo) {
58
+ if (!tx.refNo) {
59
+ filtered.push(tx);
60
+ filterReasons.push(`交易: 缺少参考编号`);
61
+ continue;
62
+ }
63
+ }
64
+ // 规则 3: RefNo 去重
65
+ if (config.dedupeByRefNo && tx.refNo) {
66
+ if (uniqueMap.has(tx.refNo)) {
67
+ const existing = uniqueMap.get(tx.refNo);
68
+ // 保留字段更完整的记录
69
+ if (Object.keys(tx).length > Object.keys(existing).length) {
70
+ filtered.push(existing);
71
+ filterReasons.push(`交易 ${existing.refNo}: 被更完整的记录替代`);
72
+ uniqueMap.set(tx.refNo, tx);
73
+ }
74
+ else {
75
+ filtered.push(tx);
76
+ filterReasons.push(`交易 ${tx.refNo}: 重复记录`);
77
+ }
78
+ continue;
79
+ }
80
+ uniqueMap.set(tx.refNo, tx);
81
+ }
82
+ else if (!config.dedupeByRefNo) {
83
+ // 不去重时直接添加到 map(使用索引作为 key)
84
+ uniqueMap.set(`idx_${uniqueMap.size}`, tx);
85
+ }
86
+ }
87
+ // 标准化交易类型
88
+ let cleaned = Array.from(uniqueMap.values()).map((tx) => normalizeTransaction(tx));
89
+ // 过滤 IPO 相关的无效记录
90
+ cleaned = cleaned.filter((tx) => {
91
+ const upDesc = (tx.description || '').toUpperCase();
92
+ const isIPO = IPO_KEYWORDS.some((kw) => upDesc.includes(kw.toUpperCase()));
93
+ if (isIPO) {
94
+ // 允许通过的 IPO 交易类型
95
+ const allowedIPOTypes = ['IPO_FEE', 'IPO_INTEREST', 'BUY'];
96
+ if (allowedIPOTypes.includes(tx.transactionType)) {
97
+ return true;
98
+ }
99
+ filtered.push(tx);
100
+ filterReasons.push(`交易 ${tx.refNo}: IPO 相关但类型为 ${tx.transactionType}`);
101
+ return false;
102
+ }
103
+ return true;
104
+ });
105
+ return { cleaned, filtered, filterReasons };
106
+ }
107
+ /**
108
+ * 标准化交易类型
109
+ * 根据描述自动判断 WITHDRAWAL/DEPOSIT 类型
110
+ */
111
+ function normalizeTransaction(tx) {
112
+ const desc = (tx.description || '').toUpperCase();
113
+ if (desc.includes('EPAYMENT') || desc.includes('電子轉帳')) {
114
+ tx.transactionType = 'WITHDRAWAL';
115
+ }
116
+ else if (desc.includes('EDDA') ||
117
+ desc.includes('DIRECT DEBIT') ||
118
+ desc.includes('DEP REFE') ||
119
+ desc.includes('直接轉帳')) {
120
+ tx.transactionType = 'DEPOSIT';
121
+ }
122
+ return tx;
123
+ }
124
+ /**
125
+ * 清理 IPO 记录
126
+ *
127
+ * @param ipoList - 原始 IPO 记录数组
128
+ * @param config - 清理规则配置
129
+ * @returns 清理结果
130
+ */
131
+ function cleanIPO(ipoList, config) {
132
+ if (!ipoList || ipoList.length === 0) {
133
+ return { cleaned: [], filtered: [], filterReasons: [] };
134
+ }
135
+ const cleaned = [];
136
+ const filtered = [];
137
+ const filterReasons = [];
138
+ for (const item of ipoList) {
139
+ // 规则: 过滤无效 IPO 记录
140
+ if (config.filterInvalidIPO) {
141
+ if (!item.stockCode) {
142
+ filtered.push(item);
143
+ filterReasons.push(`IPO: 缺少股票代码`);
144
+ continue;
145
+ }
146
+ if (item.amount === undefined || item.amount === null) {
147
+ filtered.push(item);
148
+ filterReasons.push(`IPO ${item.stockCode}: 金额为空`);
149
+ continue;
150
+ }
151
+ }
152
+ cleaned.push(item);
153
+ }
154
+ return { cleaned, filtered, filterReasons };
155
+ }
156
+ /**
157
+ * 清理持仓快照记录
158
+ *
159
+ * @param snapshots - 原始快照记录数组
160
+ * @param config - 清理规则配置
161
+ * @returns 清理结果
162
+ */
163
+ function cleanSnapshots(snapshots, config) {
164
+ if (!snapshots || snapshots.length === 0) {
165
+ return { cleaned: [], filtered: [], filterReasons: [] };
166
+ }
167
+ const cleaned = [];
168
+ const filtered = [];
169
+ const filterReasons = [];
170
+ for (const item of snapshots) {
171
+ // 规则 1: 排除 HKD(Base)
172
+ if (config.excludeHKDBase && item.symbol === 'HKD(Base)') {
173
+ filtered.push(item);
174
+ filterReasons.push(`快照 ${item.symbol}: HKD(Base) 被排除`);
175
+ continue;
176
+ }
177
+ // 规则 2: 现金类别只保留 HKD/USD
178
+ if (item.assetCategory === 'Cash') {
179
+ if (!['HKD', 'USD'].includes(item.symbol.toUpperCase())) {
180
+ filtered.push(item);
181
+ filterReasons.push(`快照 ${item.symbol}: 非 HKD/USD 现金被排除`);
182
+ continue;
183
+ }
184
+ }
185
+ cleaned.push(item);
186
+ }
187
+ return { cleaned, filtered, filterReasons };
188
+ }
189
+ /**
190
+ * 清理整个 StatementData
191
+ *
192
+ * @param data - 原始结单数据
193
+ * @param config - 清理规则配置
194
+ * @returns 清理后的结单数据和清理详情
195
+ */
196
+ function cleanStatementData(data, config = {}) {
197
+ // 合并配置
198
+ const mergedConfig = { ...exports.DEFAULT_CLEANING_CONFIG, ...config };
199
+ // 清理各部分
200
+ const transactionsResult = cleanTransactions(data.transactions || [], mergedConfig);
201
+ const ipoResult = cleanIPO(data.ipo || [], mergedConfig);
202
+ const snapshotsResult = cleanSnapshots(data.snapshots || [], mergedConfig);
203
+ return {
204
+ result: {
205
+ ...data,
206
+ transactions: transactionsResult.cleaned,
207
+ ipo: ipoResult.cleaned,
208
+ snapshots: snapshotsResult.cleaned,
209
+ },
210
+ details: {
211
+ transactions: transactionsResult,
212
+ ipo: ipoResult,
213
+ snapshots: snapshotsResult,
214
+ },
215
+ };
216
+ }
217
+ //# sourceMappingURL=cleaning.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cleaning.js","sourceRoot":"","sources":["../../src/core/cleaning.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AA+CH,8CA0EC;AAMD,oDAeC;AASD,4BA4BC;AASD,wCAiCC;AASD,gDAgCC;AAlQD;;GAEG;AACH,MAAM,YAAY,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,CAAC,CAAC;AAa/E;;GAEG;AACU,QAAA,uBAAuB,GAAmB;IACrD,cAAc,EAAE,IAAI;IACpB,YAAY,EAAE,IAAI;IAClB,cAAc,EAAE,IAAI;IACpB,aAAa,EAAE,IAAI;IACnB,gBAAgB,EAAE,IAAI;CACvB,CAAC;AAWF;;;;;;GAMG;AACH,SAAgB,iBAAiB,CAC/B,YAAyB,EACzB,MAAsB;IAEtB,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/C,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,aAAa,EAAE,EAAE,EAAE,CAAC;IAC1D,CAAC;IAED,MAAM,QAAQ,GAAgB,EAAE,CAAC;IACjC,MAAM,aAAa,GAAa,EAAE,CAAC;IACnC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAqB,CAAC;IAE/C,KAAK,MAAM,EAAE,IAAI,YAAY,EAAE,CAAC;QAC9B,cAAc;QACd,IAAI,MAAM,CAAC,cAAc,EAAE,CAAC;YAC1B,IAAI,EAAE,CAAC,MAAM,KAAK,IAAI,IAAI,EAAE,CAAC,MAAM,KAAK,SAAS,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACrE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAClB,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,KAAK,IAAI,SAAS,UAAU,CAAC,CAAC;gBAC1D,SAAS;YACX,CAAC;QACH,CAAC;QAED,iBAAiB;QACjB,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;YACxB,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;gBACd,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAClB,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;gBACjC,SAAS;YACX,CAAC;QACH,CAAC;QAED,iBAAiB;QACjB,IAAI,MAAM,CAAC,aAAa,IAAI,EAAE,CAAC,KAAK,EAAE,CAAC;YACrC,IAAI,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC5B,MAAM,QAAQ,GAAG,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,KAAK,CAAE,CAAC;gBAC1C,aAAa;gBACb,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;oBAC1D,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;oBACxB,aAAa,CAAC,IAAI,CAAC,MAAM,QAAQ,CAAC,KAAK,aAAa,CAAC,CAAC;oBACtD,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;gBAC9B,CAAC;qBAAM,CAAC;oBACN,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;oBAClB,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,KAAK,QAAQ,CAAC,CAAC;gBAC7C,CAAC;gBACD,SAAS;YACX,CAAC;YACD,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC9B,CAAC;aAAM,IAAI,CAAC,MAAM,CAAC,aAAa,EAAE,CAAC;YACjC,4BAA4B;YAC5B,SAAS,CAAC,GAAG,CAAC,OAAO,SAAS,CAAC,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IAED,UAAU;IACV,IAAI,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnF,iBAAiB;IACjB,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,EAAE,EAAE;QAC9B,MAAM,MAAM,GAAG,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;QACpD,MAAM,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;QAC3E,IAAI,KAAK,EAAE,CAAC;YACV,iBAAiB;YACjB,MAAM,eAAe,GAAG,CAAC,SAAS,EAAE,cAAc,EAAE,KAAK,CAAC,CAAC;YAC3D,IAAI,eAAe,CAAC,QAAQ,CAAC,EAAE,CAAC,eAAe,CAAC,EAAE,CAAC;gBACjD,OAAO,IAAI,CAAC;YACd,CAAC;YACD,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAClB,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,KAAK,gBAAgB,EAAE,CAAC,eAAe,EAAE,CAAC,CAAC;YACvE,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;IAEH,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,CAAC;AAC9C,CAAC;AAED;;;GAGG;AACH,SAAgB,oBAAoB,CAAC,EAAa;IAChD,MAAM,IAAI,GAAG,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IAElD,IAAI,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACvD,EAAE,CAAC,eAAe,GAAG,YAAY,CAAC;IACpC,CAAC;SAAM,IACL,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;QACrB,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC;QAC7B,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;QACzB,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EACrB,CAAC;QACD,EAAE,CAAC,eAAe,GAAG,SAAS,CAAC;IACjC,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,QAAQ,CAAC,OAAkB,EAAE,MAAsB;IACjE,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrC,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,aAAa,EAAE,EAAE,EAAE,CAAC;IAC1D,CAAC;IAED,MAAM,OAAO,GAAc,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,MAAM,aAAa,GAAa,EAAE,CAAC;IAEnC,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;QAC3B,kBAAkB;QAClB,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;YAC5B,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;gBACpB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACpB,aAAa,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;gBAClC,SAAS;YACX,CAAC;YACD,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS,IAAI,IAAI,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC;gBACtD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACpB,aAAa,CAAC,IAAI,CAAC,OAAO,IAAI,CAAC,SAAS,QAAQ,CAAC,CAAC;gBAClD,SAAS;YACX,CAAC;QACH,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,CAAC;AAC9C,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,cAAc,CAC5B,SAAyB,EACzB,MAAsB;IAEtB,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzC,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,aAAa,EAAE,EAAE,EAAE,CAAC;IAC1D,CAAC;IAED,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,MAAM,aAAa,GAAa,EAAE,CAAC;IAEnC,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC7B,qBAAqB;QACrB,IAAI,MAAM,CAAC,cAAc,IAAI,IAAI,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YACzD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACpB,aAAa,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,MAAM,iBAAiB,CAAC,CAAC;YACvD,SAAS;QACX,CAAC;QAED,wBAAwB;QACxB,IAAI,IAAI,CAAC,aAAa,KAAK,MAAM,EAAE,CAAC;YAClC,IAAI,CAAC,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;gBACxD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACpB,aAAa,CAAC,IAAI,CAAC,MAAM,IAAI,CAAC,MAAM,mBAAmB,CAAC,CAAC;gBACzD,SAAS;YACX,CAAC;QACH,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,CAAC;IAED,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,CAAC;AAC9C,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,kBAAkB,CAChC,IAAmB,EACnB,SAAkC,EAAE;IASpC,OAAO;IACP,MAAM,YAAY,GAAmB,EAAE,GAAG,+BAAuB,EAAE,GAAG,MAAM,EAAE,CAAC;IAE/E,QAAQ;IACR,MAAM,kBAAkB,GAAG,iBAAiB,CAAC,IAAI,CAAC,YAAY,IAAI,EAAE,EAAE,YAAY,CAAC,CAAC;IACpF,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,EAAE,YAAY,CAAC,CAAC;IACzD,MAAM,eAAe,GAAG,cAAc,CAAC,IAAI,CAAC,SAAS,IAAI,EAAE,EAAE,YAAY,CAAC,CAAC;IAE3E,OAAO;QACL,MAAM,EAAE;YACN,GAAG,IAAI;YACP,YAAY,EAAE,kBAAkB,CAAC,OAAO;YACxC,GAAG,EAAE,SAAS,CAAC,OAAO;YACtB,SAAS,EAAE,eAAe,CAAC,OAAO;SACnC;QACD,OAAO,EAAE;YACP,YAAY,EAAE,kBAAkB;YAChC,GAAG,EAAE,SAAS;YACd,SAAS,EAAE,eAAe;SAC3B;KACF,CAAC;AACJ,CAAC"}
@@ -0,0 +1,49 @@
1
+ /**
2
+ * ParsePipeline — 解析管道编排器
3
+ *
4
+ * 编排 Stage1(提取) → Stage2(格式化) → Stage3(清理)流程,
5
+ * 返回结构化结果和各阶段耗时元数据。
6
+ *
7
+ * 支持模式:
8
+ * - raw: 只执行 Stage1,输出原始表格数据
9
+ * - noClean: 跳过 Stage3 清理步骤
10
+ */
11
+ import { StatementData } from '../types/statement';
12
+ import { RawTableData } from '../types/raw';
13
+ import { PluginRegistry } from './registry';
14
+ /** 解析选项 */
15
+ export interface ParseOptions {
16
+ /** 指定券商名称,默认自动检测 */
17
+ broker?: string;
18
+ /** 只输出 Stage1 raw 数据 */
19
+ raw?: boolean;
20
+ /** 跳过 Stage3 清理 */
21
+ noClean?: boolean;
22
+ /** 输出耗时详情 */
23
+ verbose?: boolean;
24
+ }
25
+ /** 各阶段耗时(毫秒) */
26
+ export interface ParseTimings {
27
+ detect: number;
28
+ stage1: number;
29
+ stage2: number;
30
+ clean: number;
31
+ total: number;
32
+ }
33
+ /** 解析结果 */
34
+ export interface ParseResult {
35
+ data: StatementData | RawTableData;
36
+ broker: string;
37
+ raw: boolean;
38
+ metadata: {
39
+ timings: ParseTimings;
40
+ parserVersion: string;
41
+ };
42
+ }
43
+ export declare class ParsePipeline {
44
+ private readonly registry;
45
+ constructor(registry: PluginRegistry);
46
+ /** 执行完整解析管道 */
47
+ parse(pdfPath: string, opts?: ParseOptions): Promise<ParseResult>;
48
+ }
49
+ //# sourceMappingURL=pipeline.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAG5C,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAE5C,WAAW;AACX,MAAM,WAAW,YAAY;IAC3B,oBAAoB;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,wBAAwB;IACxB,GAAG,CAAC,EAAE,OAAO,CAAC;IACd,mBAAmB;IACnB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,aAAa;IACb,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,gBAAgB;AAChB,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED,WAAW;AACX,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,aAAa,GAAG,YAAY,CAAC;IACnC,MAAM,EAAE,MAAM,CAAC;IACf,GAAG,EAAE,OAAO,CAAC;IACb,QAAQ,EAAE;QACR,OAAO,EAAE,YAAY,CAAC;QACtB,aAAa,EAAE,MAAM,CAAC;KACvB,CAAC;CACH;AAED,qBAAa,aAAa;IACZ,OAAO,CAAC,QAAQ,CAAC,QAAQ;gBAAR,QAAQ,EAAE,cAAc;IAErD,eAAe;IACT,KAAK,CAAC,OAAO,EAAE,MAAM,EAAE,IAAI,GAAE,YAAiB,GAAG,OAAO,CAAC,WAAW,CAAC;CAmD5E"}
@@ -0,0 +1,66 @@
1
+ "use strict";
2
+ /**
3
+ * ParsePipeline — 解析管道编排器
4
+ *
5
+ * 编排 Stage1(提取) → Stage2(格式化) → Stage3(清理)流程,
6
+ * 返回结构化结果和各阶段耗时元数据。
7
+ *
8
+ * 支持模式:
9
+ * - raw: 只执行 Stage1,输出原始表格数据
10
+ * - noClean: 跳过 Stage3 清理步骤
11
+ */
12
+ Object.defineProperty(exports, "__esModule", { value: true });
13
+ exports.ParsePipeline = void 0;
14
+ const cleaning_1 = require("./cleaning");
15
+ class ParsePipeline {
16
+ constructor(registry) {
17
+ this.registry = registry;
18
+ }
19
+ /** 执行完整解析管道 */
20
+ async parse(pdfPath, opts = {}) {
21
+ const overallStart = Date.now();
22
+ const timings = { detect: 0, stage1: 0, stage2: 0, clean: 0, total: 0 };
23
+ // Step 1: 获取 plugin(指定券商或自动检测)
24
+ let t = Date.now();
25
+ const plugin = opts.broker
26
+ ? this.registry.getPlugin(opts.broker)
27
+ : await this.registry.autoDetect(pdfPath);
28
+ timings.detect = Date.now() - t;
29
+ // Step 2: Stage1 提取原始数据
30
+ t = Date.now();
31
+ const extractor = plugin.createExtractor();
32
+ const rawData = await extractor.extract(pdfPath);
33
+ timings.stage1 = Date.now() - t;
34
+ // raw 模式:只输出 Stage1 数据
35
+ if (opts.raw) {
36
+ timings.total = Date.now() - overallStart;
37
+ return {
38
+ data: rawData,
39
+ broker: plugin.name,
40
+ raw: true,
41
+ metadata: { timings, parserVersion: '0.1.0' },
42
+ };
43
+ }
44
+ // Step 3: Stage2 格式化
45
+ t = Date.now();
46
+ const formatter = plugin.createFormatter();
47
+ let statementData = await formatter.format(rawData);
48
+ timings.stage2 = Date.now() - t;
49
+ // Step 4: Stage3 清理(可选)
50
+ t = Date.now();
51
+ if (!opts.noClean) {
52
+ const { result } = (0, cleaning_1.cleanStatementData)(statementData);
53
+ statementData = result;
54
+ }
55
+ timings.clean = Date.now() - t;
56
+ timings.total = Date.now() - overallStart;
57
+ return {
58
+ data: statementData,
59
+ broker: plugin.name,
60
+ raw: false,
61
+ metadata: { timings, parserVersion: '0.1.0' },
62
+ };
63
+ }
64
+ }
65
+ exports.ParsePipeline = ParsePipeline;
66
+ //# sourceMappingURL=pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":";AAAA;;;;;;;;;GASG;;;AAKH,yCAAgD;AAmChD,MAAa,aAAa;IACxB,YAA6B,QAAwB;QAAxB,aAAQ,GAAR,QAAQ,CAAgB;IAAG,CAAC;IAEzD,eAAe;IACf,KAAK,CAAC,KAAK,CAAC,OAAe,EAAE,OAAqB,EAAE;QAClD,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAChC,MAAM,OAAO,GAAiB,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QAEtF,+BAA+B;QAC/B,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACnB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM;YACxB,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC;YACtC,CAAC,CAAC,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QAC5C,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QAEhC,wBAAwB;QACxB,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,CAAC,eAAe,EAAE,CAAC;QAC3C,MAAM,OAAO,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACjD,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QAEhC,uBAAuB;QACvB,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,YAAY,CAAC;YAC1C,OAAO;gBACL,IAAI,EAAE,OAAO;gBACb,MAAM,EAAE,MAAM,CAAC,IAAI;gBACnB,GAAG,EAAE,IAAI;gBACT,QAAQ,EAAE,EAAE,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE;aAC9C,CAAC;QACJ,CAAC;QAED,qBAAqB;QACrB,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,CAAC,eAAe,EAAE,CAAC;QAC3C,IAAI,aAAa,GAAG,MAAM,SAAS,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACpD,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QAEhC,wBAAwB;QACxB,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACf,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,MAAM,EAAE,MAAM,EAAE,GAAG,IAAA,6BAAkB,EAAC,aAAa,CAAC,CAAC;YACrD,aAAa,GAAG,MAAM,CAAC;QACzB,CAAC;QACD,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;QAE/B,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,YAAY,CAAC;QAE1C,OAAO;YACL,IAAI,EAAE,aAAa;YACnB,MAAM,EAAE,MAAM,CAAC,IAAI;YACnB,GAAG,EAAE,KAAK;YACV,QAAQ,EAAE,EAAE,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE;SAC9C,CAAC;IACJ,CAAC;CACF;AAvDD,sCAuDC"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * PluginRegistry — 券商插件注册表
3
+ *
4
+ * 管理所有已注册的 IBrokerPlugin 实例,支持:
5
+ * - 按名称精确获取
6
+ * - 列出全部已注册插件
7
+ * - 自动检测 PDF 所属券商(取置信度最高者)
8
+ */
9
+ import { IBrokerPlugin } from '../types/plugin';
10
+ export declare class PluginRegistry {
11
+ private readonly plugins;
12
+ /** 注册一个券商插件 */
13
+ register(plugin: IBrokerPlugin): void;
14
+ /** 按名称获取插件,找不到则抛异常 */
15
+ getPlugin(name: string): IBrokerPlugin;
16
+ /** 列出所有已注册插件 */
17
+ listPlugins(): IBrokerPlugin[];
18
+ /**
19
+ * 自动检测 PDF 所属券商
20
+ * 遍历所有插件调用 detect(),返回置信度最高且 >= 0.5 的插件
21
+ */
22
+ autoDetect(filePath: string): Promise<IBrokerPlugin>;
23
+ }
24
+ //# sourceMappingURL=registry.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"registry.d.ts","sourceRoot":"","sources":["../../src/core/registry.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAEhD,qBAAa,cAAc;IACzB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAoC;IAE5D,eAAe;IACf,QAAQ,CAAC,MAAM,EAAE,aAAa,GAAG,IAAI;IAIrC,sBAAsB;IACtB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,aAAa;IAQtC,gBAAgB;IAChB,WAAW,IAAI,aAAa,EAAE;IAI9B;;;OAGG;IACG,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;CAoB3D"}