sec-edgar-api 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -45
- package/build/index.d.ts +2 -7
- package/build/index.js +16 -8
- package/build/services/DocumentParser/DocumentParser.d.ts +16 -0
- package/build/services/DocumentParser/DocumentParser.js +22 -0
- package/build/services/DocumentParser/XMLNode/ColNode.d.ts +18 -0
- package/build/services/DocumentParser/XMLNode/ColNode.js +74 -0
- package/build/services/DocumentParser/XMLNode/DocumentNode.d.ts +3 -0
- package/build/services/DocumentParser/XMLNode/DocumentNode.js +27 -0
- package/build/services/DocumentParser/XMLNode/HRNode.d.ts +3 -0
- package/build/services/DocumentParser/XMLNode/HRNode.js +27 -0
- package/build/services/DocumentParser/XMLNode/NonTableNode.d.ts +3 -0
- package/build/services/DocumentParser/XMLNode/NonTableNode.js +27 -0
- package/build/services/DocumentParser/XMLNode/RowNode.d.ts +30 -0
- package/build/services/DocumentParser/XMLNode/RowNode.js +177 -0
- package/build/services/DocumentParser/XMLNode/TableNode.d.ts +20 -0
- package/build/services/DocumentParser/XMLNode/TableNode.js +132 -0
- package/build/services/DocumentParser/XMLNode/XMLNode.d.ts +37 -0
- package/build/services/DocumentParser/XMLNode/XMLNode.js +154 -0
- package/build/services/DocumentParser/XMLParser.d.ts +40 -0
- package/build/services/DocumentParser/XMLParser.js +306 -0
- package/build/services/DocumentParser/index.d.ts +2 -0
- package/build/services/DocumentParser/index.js +4 -0
- package/build/services/DocumentParser/parsers/index.d.ts +9 -0
- package/build/services/DocumentParser/parsers/index.js +11 -0
- package/build/services/DocumentParser/parsers/parse-form-10k.d.ts +3 -0
- package/build/services/DocumentParser/parsers/parse-form-10k.js +78 -0
- package/build/services/DocumentParser/parsers/parse-form-13g.d.ts +8 -0
- package/build/services/DocumentParser/parsers/parse-form-13g.js +88 -0
- package/build/services/DocumentParser/parsers/parse-form-4.d.ts +8 -0
- package/build/services/DocumentParser/parsers/parse-form-4.js +220 -0
- package/build/services/ReportParser/ReportParser.d.ts +2 -2
- package/build/services/ReportParser/ReportParser.js +8 -4
- package/build/services/SecEdgarApi/RequestWrapper.d.ts +37 -0
- package/build/services/SecEdgarApi/RequestWrapper.js +147 -0
- package/build/services/SecEdgarApi/SecEdgarApi.d.ts +156 -22
- package/build/services/SecEdgarApi/SecEdgarApi.js +314 -32
- package/build/types/common.type.d.ts +12 -0
- package/build/types/common.type.js +2 -0
- package/build/types/index.d.ts +2 -0
- package/build/types/index.js +2 -0
- package/build/types/parsed-filings.type.d.ts +66 -0
- package/build/types/parsed-filings.type.js +2 -0
- package/build/types/submission.type.d.ts +19 -1
- package/package.json +1 -5
- package/build/.DS_Store +0 -0
- package/build/downloader.d.ts +0 -6
- package/build/downloader.js +0 -9
- package/build/services/FactFileReader/FactFileReader.d.ts +0 -30
- package/build/services/FactFileReader/FactFileReader.js +0 -36
- package/build/services/FactFileReader/index.d.ts +0 -2
- package/build/services/FactFileReader/index.js +0 -4
- package/build/services/FactsDownloader/Downloader.d.ts +0 -26
- package/build/services/FactsDownloader/Downloader.js +0 -102
- package/build/services/FactsDownloader/FactsDownloader.d.ts +0 -37
- package/build/services/FactsDownloader/FactsDownloader.js +0 -131
- package/build/services/FactsDownloader/Unzipper.d.ts +0 -40
- package/build/services/FactsDownloader/Unzipper.js +0 -40
- package/build/services/FactsDownloader/index.d.ts +0 -2
- package/build/services/FactsDownloader/index.js +0 -4
- package/build/services/SecEdgarApi/Client.d.ts +0 -44
- package/build/services/SecEdgarApi/Client.js +0 -104
- package/build/services/SecEdgarApi/Downloader.d.ts +0 -26
- package/build/services/SecEdgarApi/Downloader.js +0 -102
- package/build/services/SecEdgarApi/FactsDownloader.d.ts +0 -30
- package/build/services/SecEdgarApi/FactsDownloader.js +0 -124
- package/build/services/SecEdgarApi/SecConnector.d.ts +0 -47
- package/build/services/SecEdgarApi/SecConnector.js +0 -143
- package/build/services/SecEdgarApi/Unzipper.d.ts +0 -40
- package/build/services/SecEdgarApi/Unzipper.js +0 -40
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __extends = (this && this.__extends) || (function () {
|
|
3
|
+
var extendStatics = function (d, b) {
|
|
4
|
+
extendStatics = Object.setPrototypeOf ||
|
|
5
|
+
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
|
6
|
+
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
|
|
7
|
+
return extendStatics(d, b);
|
|
8
|
+
};
|
|
9
|
+
return function (d, b) {
|
|
10
|
+
if (typeof b !== "function" && b !== null)
|
|
11
|
+
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
|
|
12
|
+
extendStatics(d, b);
|
|
13
|
+
function __() { this.constructor = d; }
|
|
14
|
+
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
15
|
+
};
|
|
16
|
+
})();
|
|
17
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
18
|
+
exports.TableNode = void 0;
|
|
19
|
+
var XMLNode_1 = require("./XMLNode");
|
|
20
|
+
var TableNode = /** @class */ (function (_super) {
|
|
21
|
+
__extends(TableNode, _super);
|
|
22
|
+
function TableNode() {
|
|
23
|
+
var _this = _super !== null && _super.apply(this, arguments) || this;
|
|
24
|
+
_this.title = null;
|
|
25
|
+
_this.headerRow = null;
|
|
26
|
+
return _this;
|
|
27
|
+
}
|
|
28
|
+
TableNode.prototype.getTitle = function () {
|
|
29
|
+
var _a;
|
|
30
|
+
return (_a = this.title) !== null && _a !== void 0 ? _a : '';
|
|
31
|
+
};
|
|
32
|
+
TableNode.prototype.setTitle = function (title) {
|
|
33
|
+
this.title = title;
|
|
34
|
+
};
|
|
35
|
+
TableNode.prototype.getChildren = function () {
|
|
36
|
+
return _super.prototype.getChildren.call(this);
|
|
37
|
+
};
|
|
38
|
+
TableNode.prototype.removeTopChild = function () {
|
|
39
|
+
this.removeChild(this.getChildren()[0]);
|
|
40
|
+
};
|
|
41
|
+
TableNode.prototype.removeEmptyTopRows = function () {
|
|
42
|
+
var _a;
|
|
43
|
+
while ((_a = this.getChildren()[0]) === null || _a === void 0 ? void 0 : _a.getIsEmpty()) {
|
|
44
|
+
this.removeChild(this.getChildren()[0]);
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
TableNode.prototype.prependChild = function (node) {
|
|
48
|
+
var prevTopChild = this.getChildren()[0];
|
|
49
|
+
this.getChildren().unshift(node);
|
|
50
|
+
if (node.getParent() !== this)
|
|
51
|
+
node.setParent(this);
|
|
52
|
+
prevTopChild === null || prevTopChild === void 0 ? void 0 : prevTopChild.setPreviousSibling(node);
|
|
53
|
+
var colArrTop = [];
|
|
54
|
+
var colArrBottom = [];
|
|
55
|
+
node.getChildren().forEach(function (col) {
|
|
56
|
+
colArrTop.push(col);
|
|
57
|
+
Array.from({ length: col.getColSpan() - 1 }).forEach(function () { return colArrTop.push(col); });
|
|
58
|
+
});
|
|
59
|
+
prevTopChild === null || prevTopChild === void 0 ? void 0 : prevTopChild.getChildren().forEach(function (col, i) {
|
|
60
|
+
colArrBottom.push(col);
|
|
61
|
+
Array.from({ length: col.getColSpan() - 1 }).forEach(function () { return colArrBottom.push(col); });
|
|
62
|
+
if (!col.getTopSiblings().includes(colArrTop[i])) {
|
|
63
|
+
col.addTopSibling(colArrTop[i]);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
};
|
|
67
|
+
TableNode.prototype.toArray = function (parseValues) {
|
|
68
|
+
if (parseValues === void 0) { parseValues = true; }
|
|
69
|
+
return this.getChildren().map(function (row) { return row.toArray(parseValues); });
|
|
70
|
+
};
|
|
71
|
+
TableNode.prototype.setHeaderRow = function (row) {
|
|
72
|
+
this.headerRow = row;
|
|
73
|
+
};
|
|
74
|
+
TableNode.prototype.getHeaderRowIndex = function () {
|
|
75
|
+
var rows = this.getChildren();
|
|
76
|
+
// assume body index starts with row that has a non-bold number in it.
|
|
77
|
+
var bodyIndex = rows.findIndex(function (row) {
|
|
78
|
+
return row.getChildren().some(function (col) { return typeof col.parseValue() === 'number' && !col.getText().includes('}}'); });
|
|
79
|
+
});
|
|
80
|
+
var getRowData = function (row) { return row.getChildren().map(function (col) { return col.parseValue(); }); };
|
|
81
|
+
// if the header row is only one value, or empty, it's likely a label in the table body, so keep moving up.
|
|
82
|
+
var headerIndex = bodyIndex - 1;
|
|
83
|
+
while (rows[headerIndex] && getRowData(rows[headerIndex]).filter(Boolean).length <= 1 && headerIndex >= 0) {
|
|
84
|
+
headerIndex--;
|
|
85
|
+
}
|
|
86
|
+
return headerIndex >= 0 ? headerIndex : null;
|
|
87
|
+
};
|
|
88
|
+
TableNode.prototype.mergeHeader = function (removeMergedChildren) {
|
|
89
|
+
var _a, _b, _c;
|
|
90
|
+
if (removeMergedChildren === void 0) { removeMergedChildren = true; }
|
|
91
|
+
var headerRowIndex = (_a = this.getHeaderRowIndex()) !== null && _a !== void 0 ? _a : -1;
|
|
92
|
+
var headerRow = (_b = this.getChildren()[headerRowIndex]) !== null && _b !== void 0 ? _b : null;
|
|
93
|
+
if (!headerRow)
|
|
94
|
+
return;
|
|
95
|
+
var table = headerRow.toTable(false);
|
|
96
|
+
var headerRowCols = headerRow.getChildren();
|
|
97
|
+
// start from the row above the header row to merge.
|
|
98
|
+
for (var rowIndex = headerRowIndex - 1; rowIndex >= 0; rowIndex--) {
|
|
99
|
+
var curRow = table[rowIndex];
|
|
100
|
+
// go through each header column to merge with the one above.
|
|
101
|
+
for (var colIndex = 0; colIndex < curRow.length; colIndex++) {
|
|
102
|
+
if (table[headerRowIndex][colIndex] === null)
|
|
103
|
+
continue;
|
|
104
|
+
// if prev header col is empty, get nearest to the left.
|
|
105
|
+
var colIndexCur = colIndex;
|
|
106
|
+
while (!curRow[colIndexCur] && colIndexCur >= 0) {
|
|
107
|
+
colIndexCur--;
|
|
108
|
+
}
|
|
109
|
+
// if the value is empty, continue.
|
|
110
|
+
var colValue = (_c = curRow[colIndexCur]) !== null && _c !== void 0 ? _c : null;
|
|
111
|
+
if (!colValue || !this.parseValue("".concat(colValue)))
|
|
112
|
+
continue;
|
|
113
|
+
headerRowCols[colIndex].setText("".concat(colValue, " ").concat(headerRowCols[colIndex].getText()).trim());
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
if (removeMergedChildren) {
|
|
117
|
+
while (this.getChildren()[0] !== headerRow && this.getChildren().length > 0) {
|
|
118
|
+
this.removeTopChild();
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return headerRow;
|
|
122
|
+
};
|
|
123
|
+
/**
|
|
124
|
+
* If header row is not set, this will try to find it.
|
|
125
|
+
*/
|
|
126
|
+
TableNode.prototype.getHeaderRow = function () {
|
|
127
|
+
var _a, _b, _c;
|
|
128
|
+
return (_c = (_a = this.headerRow) !== null && _a !== void 0 ? _a : this.getChildren()[(_b = this.getHeaderRowIndex()) !== null && _b !== void 0 ? _b : -1]) !== null && _c !== void 0 ? _c : null;
|
|
129
|
+
};
|
|
130
|
+
return TableNode;
|
|
131
|
+
}(XMLNode_1.XMLNode));
|
|
132
|
+
exports.TableNode = TableNode;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
interface XMLNodeArgs {
|
|
2
|
+
path?: string;
|
|
3
|
+
attributesStr?: string;
|
|
4
|
+
}
|
|
5
|
+
interface GetSiblingsParams {
|
|
6
|
+
dir: 'previous' | 'next';
|
|
7
|
+
stopAtType?: new () => XMLNode;
|
|
8
|
+
includeStopAtType?: boolean;
|
|
9
|
+
}
|
|
10
|
+
export declare class XMLNode {
|
|
11
|
+
private parent;
|
|
12
|
+
private previousSibling;
|
|
13
|
+
private nextSibling;
|
|
14
|
+
private children;
|
|
15
|
+
private text;
|
|
16
|
+
private attributesStr;
|
|
17
|
+
private path;
|
|
18
|
+
constructor(args?: XMLNodeArgs);
|
|
19
|
+
getSiblings(params: GetSiblingsParams): XMLNode[];
|
|
20
|
+
extractBold(str?: string): string;
|
|
21
|
+
parseValue(str?: string | null): string | number | null;
|
|
22
|
+
setPreviousSibling(node: XMLNode | null): void;
|
|
23
|
+
setNextSibling(node: XMLNode | null): void;
|
|
24
|
+
removeChild(node: XMLNode): void;
|
|
25
|
+
setParent(node: XMLNode | null): void;
|
|
26
|
+
getNextSibling(): XMLNode | null;
|
|
27
|
+
getPreviousSibling(): XMLNode | null;
|
|
28
|
+
getParent(): XMLNode | null;
|
|
29
|
+
getAttributes(): Record<string, string>;
|
|
30
|
+
getAttributesStr(): string;
|
|
31
|
+
getChildren(): XMLNode[];
|
|
32
|
+
addChild(node: XMLNode): void;
|
|
33
|
+
getText(): string;
|
|
34
|
+
setText(text: string): void;
|
|
35
|
+
getPath(): string;
|
|
36
|
+
}
|
|
37
|
+
export {};
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.XMLNode = void 0;
|
|
4
|
+
var XMLNode = /** @class */ (function () {
|
|
5
|
+
function XMLNode(args) {
|
|
6
|
+
this.parent = null;
|
|
7
|
+
this.previousSibling = null;
|
|
8
|
+
this.nextSibling = null;
|
|
9
|
+
this.children = [];
|
|
10
|
+
this.text = '';
|
|
11
|
+
this.attributesStr = '';
|
|
12
|
+
this.path = '';
|
|
13
|
+
var _a = args !== null && args !== void 0 ? args : {}, _b = _a.attributesStr, attributesStr = _b === void 0 ? '' : _b, _c = _a.path, path = _c === void 0 ? '' : _c;
|
|
14
|
+
var pathParts = path.split('.').filter(function (str) { return !str.includes(':'); });
|
|
15
|
+
this.attributesStr = attributesStr;
|
|
16
|
+
this.path = pathParts.join('.');
|
|
17
|
+
}
|
|
18
|
+
XMLNode.prototype.getSiblings = function (params) {
|
|
19
|
+
var _a = params.stopAtType, stopAtType = _a === void 0 ? XMLNode : _a, dir = params.dir, _b = params.includeStopAtType, includeStopAtType = _b === void 0 ? false : _b;
|
|
20
|
+
var isPrevious = dir === 'previous';
|
|
21
|
+
var siblings = [];
|
|
22
|
+
var nextSibling = isPrevious ? this.getPreviousSibling() : this.getNextSibling();
|
|
23
|
+
var i = 0;
|
|
24
|
+
while (nextSibling && !(nextSibling instanceof stopAtType)) {
|
|
25
|
+
siblings.push(nextSibling);
|
|
26
|
+
nextSibling = isPrevious
|
|
27
|
+
? nextSibling.getPreviousSibling()
|
|
28
|
+
: nextSibling.getNextSibling();
|
|
29
|
+
i++;
|
|
30
|
+
if (i > 1000)
|
|
31
|
+
throw new Error('infinite loop');
|
|
32
|
+
}
|
|
33
|
+
if (includeStopAtType && nextSibling instanceof stopAtType) {
|
|
34
|
+
siblings.push(nextSibling);
|
|
35
|
+
}
|
|
36
|
+
return siblings;
|
|
37
|
+
};
|
|
38
|
+
XMLNode.prototype.extractBold = function (str) {
|
|
39
|
+
var _a, _b;
|
|
40
|
+
if (str === void 0) { str = this.text; }
|
|
41
|
+
var boldText = (_b = (_a = str
|
|
42
|
+
.replace(/\n/g, '')
|
|
43
|
+
.replace(/\}\}\{\{/g, ' | ')
|
|
44
|
+
.replace(/ | /g, ' ')
|
|
45
|
+
.replace(/–/g, '- ')
|
|
46
|
+
.match(/(?<=\{\{).*?(?=\}\})/g)) === null || _a === void 0 ? void 0 : _a.join(' || ')) !== null && _b !== void 0 ? _b : '';
|
|
47
|
+
return boldText.replace(/\s+/g, ' ').trim();
|
|
48
|
+
};
|
|
49
|
+
XMLNode.prototype.parseValue = function (str) {
|
|
50
|
+
if (str === void 0) { str = this.text; }
|
|
51
|
+
if (str === null)
|
|
52
|
+
return null;
|
|
53
|
+
var text = str
|
|
54
|
+
.replace(/\n| | /g, ' ')
|
|
55
|
+
.replace(/®/g, '')
|
|
56
|
+
.replace(/–|—|—/g, '-')
|
|
57
|
+
.replace(/’|“|”/g, "'")
|
|
58
|
+
.replace(/\}\}\{\{/g, ' ')
|
|
59
|
+
.replace(/\{\{|\}\}/g, '')
|
|
60
|
+
.replace(/\s+/, ' ')
|
|
61
|
+
.trim();
|
|
62
|
+
if (str.replace(/–|—|—/g, '-') === '-')
|
|
63
|
+
return '-';
|
|
64
|
+
if (text === '')
|
|
65
|
+
return null;
|
|
66
|
+
var colNum = text.replace(/,|\(|\)|\%/g, '').trim();
|
|
67
|
+
if (colNum === '-' || colNum === '$')
|
|
68
|
+
return null;
|
|
69
|
+
colNum = colNum.replace(/\-|\$/g, '');
|
|
70
|
+
if (!isNaN(Number(colNum))) {
|
|
71
|
+
if (text.includes('%'))
|
|
72
|
+
return text.replace(/[^a-zA-Z\d\s:]/g, '') === '' ? null : text;
|
|
73
|
+
return text.includes('(') || text.includes('-') ? Number(colNum) * -1 : Number(colNum);
|
|
74
|
+
}
|
|
75
|
+
return text;
|
|
76
|
+
};
|
|
77
|
+
XMLNode.prototype.setPreviousSibling = function (node) {
|
|
78
|
+
var prevPreviousSibling = this.previousSibling;
|
|
79
|
+
this.previousSibling = node;
|
|
80
|
+
if ((prevPreviousSibling === null || prevPreviousSibling === void 0 ? void 0 : prevPreviousSibling.getNextSibling()) === this) {
|
|
81
|
+
prevPreviousSibling.setNextSibling(null);
|
|
82
|
+
}
|
|
83
|
+
if ((node === null || node === void 0 ? void 0 : node.getNextSibling()) !== this) {
|
|
84
|
+
node === null || node === void 0 ? void 0 : node.setNextSibling(this);
|
|
85
|
+
}
|
|
86
|
+
};
|
|
87
|
+
XMLNode.prototype.setNextSibling = function (node) {
|
|
88
|
+
var prevNextSibling = this.nextSibling;
|
|
89
|
+
this.nextSibling = node;
|
|
90
|
+
if ((prevNextSibling === null || prevNextSibling === void 0 ? void 0 : prevNextSibling.getPreviousSibling()) === this) {
|
|
91
|
+
prevNextSibling.setPreviousSibling(null);
|
|
92
|
+
}
|
|
93
|
+
if ((node === null || node === void 0 ? void 0 : node.getPreviousSibling()) !== this) {
|
|
94
|
+
node === null || node === void 0 ? void 0 : node.setPreviousSibling(this);
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
XMLNode.prototype.removeChild = function (node) {
|
|
98
|
+
this.children.splice(this.children.indexOf(node), 1);
|
|
99
|
+
if (node.getParent() === this) {
|
|
100
|
+
node.setParent(null);
|
|
101
|
+
}
|
|
102
|
+
};
|
|
103
|
+
XMLNode.prototype.setParent = function (node) {
|
|
104
|
+
var prevParent = this.parent;
|
|
105
|
+
this.parent = node;
|
|
106
|
+
if (prevParent === null || prevParent === void 0 ? void 0 : prevParent.getChildren().includes(this)) {
|
|
107
|
+
prevParent.removeChild(this);
|
|
108
|
+
}
|
|
109
|
+
if (!(node === null || node === void 0 ? void 0 : node.getChildren().includes(this))) {
|
|
110
|
+
node === null || node === void 0 ? void 0 : node.addChild(this);
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
XMLNode.prototype.getNextSibling = function () {
|
|
114
|
+
return this.nextSibling;
|
|
115
|
+
};
|
|
116
|
+
XMLNode.prototype.getPreviousSibling = function () {
|
|
117
|
+
return this.previousSibling;
|
|
118
|
+
};
|
|
119
|
+
XMLNode.prototype.getParent = function () {
|
|
120
|
+
return this.parent;
|
|
121
|
+
};
|
|
122
|
+
XMLNode.prototype.getAttributes = function () {
|
|
123
|
+
var _a;
|
|
124
|
+
var attributesObj = {};
|
|
125
|
+
(_a = this.attributesStr.match(/(\w+)=("[^"]*")/g)) === null || _a === void 0 ? void 0 : _a.forEach(function (attributeStr) {
|
|
126
|
+
var _a = attributeStr.split('='), key = _a[0], value = _a[1];
|
|
127
|
+
attributesObj[key.toLowerCase()] = value.replace(/"/g, '');
|
|
128
|
+
});
|
|
129
|
+
return attributesObj;
|
|
130
|
+
};
|
|
131
|
+
XMLNode.prototype.getAttributesStr = function () {
|
|
132
|
+
return this.attributesStr;
|
|
133
|
+
};
|
|
134
|
+
XMLNode.prototype.getChildren = function () {
|
|
135
|
+
return this.children;
|
|
136
|
+
};
|
|
137
|
+
XMLNode.prototype.addChild = function (node) {
|
|
138
|
+
this.children.push(node);
|
|
139
|
+
if (node.getParent() !== this) {
|
|
140
|
+
node.setParent(this);
|
|
141
|
+
}
|
|
142
|
+
};
|
|
143
|
+
XMLNode.prototype.getText = function () {
|
|
144
|
+
return this.text;
|
|
145
|
+
};
|
|
146
|
+
XMLNode.prototype.setText = function (text) {
|
|
147
|
+
this.text = text;
|
|
148
|
+
};
|
|
149
|
+
XMLNode.prototype.getPath = function () {
|
|
150
|
+
return this.path;
|
|
151
|
+
};
|
|
152
|
+
return XMLNode;
|
|
153
|
+
}());
|
|
154
|
+
exports.XMLNode = XMLNode;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { DocumentNode } from './XMLNode/DocumentNode';
|
|
2
|
+
interface OnCharacterData {
|
|
3
|
+
char: string;
|
|
4
|
+
index: number;
|
|
5
|
+
path: string;
|
|
6
|
+
pathOccurrenceCount: number;
|
|
7
|
+
attributesStr: string;
|
|
8
|
+
}
|
|
9
|
+
interface ParseTableNodesParams {
|
|
10
|
+
xml: string;
|
|
11
|
+
}
|
|
12
|
+
interface Parse2Params {
|
|
13
|
+
xml: string;
|
|
14
|
+
onCharacter?: (data: OnCharacterData) => void;
|
|
15
|
+
onOpenTag?: (data: OnCharacterData) => void;
|
|
16
|
+
onCloseTag?: (data: OnCharacterData) => void;
|
|
17
|
+
}
|
|
18
|
+
interface IterateTablesParams {
|
|
19
|
+
xml: string;
|
|
20
|
+
parentPath?: string;
|
|
21
|
+
trimSpaces?: boolean;
|
|
22
|
+
onCharacter?: (data: OnCharacterData & {
|
|
23
|
+
textMap: Map<string, string>;
|
|
24
|
+
}) => void;
|
|
25
|
+
onOpenTag?: (data: OnCharacterData & {
|
|
26
|
+
textMap: Map<string, string>;
|
|
27
|
+
}) => void;
|
|
28
|
+
onCloseTag?: (data: OnCharacterData & {
|
|
29
|
+
textMap: Map<string, string>;
|
|
30
|
+
}) => void;
|
|
31
|
+
}
|
|
32
|
+
export default class XMLParser {
|
|
33
|
+
iterateXML(params: Parse2Params): string[];
|
|
34
|
+
/**
|
|
35
|
+
* Returns text in each table cell mapped by `${table}.${row}.${col}`
|
|
36
|
+
*/
|
|
37
|
+
getTableTextMap(params: IterateTablesParams): Map<string, string>;
|
|
38
|
+
getDocumentNode(params: ParseTableNodesParams): DocumentNode;
|
|
39
|
+
}
|
|
40
|
+
export {};
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __assign = (this && this.__assign) || function () {
|
|
3
|
+
__assign = Object.assign || function(t) {
|
|
4
|
+
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
|
5
|
+
s = arguments[i];
|
|
6
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
|
|
7
|
+
t[p] = s[p];
|
|
8
|
+
}
|
|
9
|
+
return t;
|
|
10
|
+
};
|
|
11
|
+
return __assign.apply(this, arguments);
|
|
12
|
+
};
|
|
13
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
14
|
+
var ColNode_1 = require("./XMLNode/ColNode");
|
|
15
|
+
var DocumentNode_1 = require("./XMLNode/DocumentNode");
|
|
16
|
+
var HRNode_1 = require("./XMLNode/HRNode");
|
|
17
|
+
var NonTableNode_1 = require("./XMLNode/NonTableNode");
|
|
18
|
+
var RowNode_1 = require("./XMLNode/RowNode");
|
|
19
|
+
var TableNode_1 = require("./XMLNode/TableNode");
|
|
20
|
+
var XMLParser = /** @class */ (function () {
|
|
21
|
+
function XMLParser() {
|
|
22
|
+
}
|
|
23
|
+
XMLParser.prototype.iterateXML = function (params) {
|
|
24
|
+
var _a, _b, _c;
|
|
25
|
+
var xml = params.xml, onCharacter = params.onCharacter, onCloseTag = params.onCloseTag, onOpenTag = params.onOpenTag;
|
|
26
|
+
var selfEnclosingTags = new Set([
|
|
27
|
+
'filename',
|
|
28
|
+
'description',
|
|
29
|
+
'br',
|
|
30
|
+
'meta',
|
|
31
|
+
'link',
|
|
32
|
+
'img',
|
|
33
|
+
'input',
|
|
34
|
+
'hr',
|
|
35
|
+
'area',
|
|
36
|
+
'base',
|
|
37
|
+
'col',
|
|
38
|
+
'command',
|
|
39
|
+
'embed',
|
|
40
|
+
'keygen',
|
|
41
|
+
'param',
|
|
42
|
+
'source',
|
|
43
|
+
'track',
|
|
44
|
+
'wbr',
|
|
45
|
+
]);
|
|
46
|
+
var spaceChars = new Set(['\n', '\r', '\t', ' ']);
|
|
47
|
+
var pathOccurrenceCountMap = new Map();
|
|
48
|
+
var curPath = '';
|
|
49
|
+
var curTag = '';
|
|
50
|
+
var curAttributes = '';
|
|
51
|
+
var didStart = false;
|
|
52
|
+
var pathsArr = [];
|
|
53
|
+
for (var i = 0; i < xml.length; i++) {
|
|
54
|
+
var char = xml[i];
|
|
55
|
+
var isOpenTag = char === '<' && xml[i + 1] !== '/' && xml[i + 1] !== '?' && xml[i + 1] !== '!';
|
|
56
|
+
var isCloseTag = char === '<' && xml[i + 1] === '/';
|
|
57
|
+
var onCharacterData = {
|
|
58
|
+
char: char,
|
|
59
|
+
index: i,
|
|
60
|
+
path: curPath,
|
|
61
|
+
pathOccurrenceCount: (_a = pathOccurrenceCountMap.get(curPath)) !== null && _a !== void 0 ? _a : 0,
|
|
62
|
+
attributesStr: curAttributes,
|
|
63
|
+
};
|
|
64
|
+
if (isOpenTag) {
|
|
65
|
+
var didEndTagName = false;
|
|
66
|
+
var j = 0;
|
|
67
|
+
didStart = true;
|
|
68
|
+
i++;
|
|
69
|
+
while (xml[i] !== '>') {
|
|
70
|
+
didEndTagName = didEndTagName || spaceChars.has(xml[i]) || xml[i] === '/';
|
|
71
|
+
if (!didEndTagName) {
|
|
72
|
+
curTag += xml[i].toLowerCase();
|
|
73
|
+
}
|
|
74
|
+
else if (xml[i] !== '/') {
|
|
75
|
+
curAttributes += xml[i];
|
|
76
|
+
}
|
|
77
|
+
i++;
|
|
78
|
+
j++;
|
|
79
|
+
if (j > 1000000) {
|
|
80
|
+
throw new Error('too many iterations');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
var pathNew = "".concat(curPath).concat(curPath.length > 0 ? '.' : '').concat(curTag).toLowerCase();
|
|
84
|
+
var countBefore = (_b = pathOccurrenceCountMap.get(pathNew)) !== null && _b !== void 0 ? _b : 0;
|
|
85
|
+
var pathOccurrenceCount = (_c = pathOccurrenceCountMap.set(pathNew, countBefore + 1).get(pathNew)) !== null && _c !== void 0 ? _c : 0;
|
|
86
|
+
onCharacterData.path = pathNew;
|
|
87
|
+
onCharacterData.pathOccurrenceCount = pathOccurrenceCount;
|
|
88
|
+
onCharacterData.attributesStr = curAttributes;
|
|
89
|
+
pathsArr.push(pathNew);
|
|
90
|
+
onOpenTag === null || onOpenTag === void 0 ? void 0 : onOpenTag(onCharacterData);
|
|
91
|
+
if (selfEnclosingTags.has(curTag)) {
|
|
92
|
+
onCloseTag === null || onCloseTag === void 0 ? void 0 : onCloseTag(onCharacterData);
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
curPath = pathNew;
|
|
96
|
+
}
|
|
97
|
+
curTag = '';
|
|
98
|
+
}
|
|
99
|
+
else if (isCloseTag) {
|
|
100
|
+
while (xml[i] !== '>') {
|
|
101
|
+
i++;
|
|
102
|
+
}
|
|
103
|
+
onCloseTag === null || onCloseTag === void 0 ? void 0 : onCloseTag(onCharacterData);
|
|
104
|
+
curPath = curPath.slice(0, curPath.lastIndexOf('.'));
|
|
105
|
+
curAttributes = '';
|
|
106
|
+
}
|
|
107
|
+
else if (didStart) {
|
|
108
|
+
onCharacter === null || onCharacter === void 0 ? void 0 : onCharacter(onCharacterData);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return pathsArr;
|
|
112
|
+
};
|
|
113
|
+
/**
|
|
114
|
+
* Returns text in each table cell mapped by `${table}.${row}.${col}`
|
|
115
|
+
*/
|
|
116
|
+
XMLParser.prototype.getTableTextMap = function (params) {
|
|
117
|
+
var xml = params.xml, parentPath = params.parentPath, onCharacter = params.onCharacter, onCloseTag = params.onCloseTag, onOpenTag = params.onOpenTag, _a = params.trimSpaces, trimSpaces = _a === void 0 ? true : _a;
|
|
118
|
+
var rowPaths = new Set([
|
|
119
|
+
"".concat(parentPath, ".table.tbody.tr"),
|
|
120
|
+
"".concat(parentPath, ".table.thead.tr"),
|
|
121
|
+
"".concat(parentPath, ".table.tfoot.tr"),
|
|
122
|
+
"".concat(parentPath, ".table.tr"),
|
|
123
|
+
]);
|
|
124
|
+
var colPaths = new Set([
|
|
125
|
+
"".concat(parentPath, ".table.tbody.tr.td"),
|
|
126
|
+
"".concat(parentPath, ".table.thead.tr.td"),
|
|
127
|
+
"".concat(parentPath, ".table.tfoot.tr.td"),
|
|
128
|
+
"".concat(parentPath, ".table.tr.td"),
|
|
129
|
+
"".concat(parentPath, ".table.tbody.tr.th"),
|
|
130
|
+
"".concat(parentPath, ".table.thead.tr.th"),
|
|
131
|
+
"".concat(parentPath, ".table.tfoot.tr.th"),
|
|
132
|
+
"".concat(parentPath, ".table.tr.th"),
|
|
133
|
+
]);
|
|
134
|
+
var table = 0;
|
|
135
|
+
var row = 0;
|
|
136
|
+
var col = 0;
|
|
137
|
+
var textByColKey = new Map();
|
|
138
|
+
var spaceChars = new Set(['\n', '\r', '\t']);
|
|
139
|
+
this.iterateXML({
|
|
140
|
+
xml: xml,
|
|
141
|
+
onOpenTag: function (data) {
|
|
142
|
+
var _a;
|
|
143
|
+
var path = data.path;
|
|
144
|
+
var colKey = "".concat(table, ".").concat(row, ".").concat(col);
|
|
145
|
+
var textCur = (_a = textByColKey.get(colKey)) !== null && _a !== void 0 ? _a : '';
|
|
146
|
+
var pathLower = path.toLowerCase();
|
|
147
|
+
if (textCur.trim().length === 0 && col === 0) {
|
|
148
|
+
textByColKey.delete(colKey);
|
|
149
|
+
}
|
|
150
|
+
var isTable = parentPath ? pathLower === "".concat(parentPath, ".table") : pathLower.endsWith('table');
|
|
151
|
+
var isRow = parentPath ? rowPaths.has(pathLower) : pathLower.endsWith('tr');
|
|
152
|
+
var isCol = parentPath
|
|
153
|
+
? colPaths.has(pathLower)
|
|
154
|
+
: pathLower.endsWith('td') || pathLower.endsWith('th');
|
|
155
|
+
if (isTable) {
|
|
156
|
+
table++;
|
|
157
|
+
col = 0;
|
|
158
|
+
row = 0;
|
|
159
|
+
}
|
|
160
|
+
else if (isRow) {
|
|
161
|
+
row++;
|
|
162
|
+
col = 0;
|
|
163
|
+
}
|
|
164
|
+
else if (isCol) {
|
|
165
|
+
col++;
|
|
166
|
+
}
|
|
167
|
+
onOpenTag === null || onOpenTag === void 0 ? void 0 : onOpenTag(__assign(__assign({}, data), { textMap: textByColKey }));
|
|
168
|
+
},
|
|
169
|
+
onCharacter: function (data) {
|
|
170
|
+
var _a;
|
|
171
|
+
var char = spaceChars.has(data.char) ? ' ' : data.char;
|
|
172
|
+
var colKey = "".concat(table, ".").concat(row, ".").concat(col);
|
|
173
|
+
var textCur = (_a = textByColKey.get(colKey)) !== null && _a !== void 0 ? _a : '';
|
|
174
|
+
if (!(trimSpaces && char === ' ' && textCur.endsWith(' '))) {
|
|
175
|
+
textByColKey.set(colKey, "".concat(textCur).concat(char));
|
|
176
|
+
}
|
|
177
|
+
onCharacter === null || onCharacter === void 0 ? void 0 : onCharacter(__assign(__assign({}, data), { textMap: textByColKey }));
|
|
178
|
+
},
|
|
179
|
+
onCloseTag: function (data) {
|
|
180
|
+
var _a;
|
|
181
|
+
var colKey = "".concat(table, ".").concat(row, ".").concat(col);
|
|
182
|
+
var textCur = (_a = textByColKey.get(colKey)) !== null && _a !== void 0 ? _a : '';
|
|
183
|
+
if (textCur.trim().length === 0 && col === 0) {
|
|
184
|
+
textByColKey.delete(colKey);
|
|
185
|
+
}
|
|
186
|
+
else if (!textCur.endsWith(' ')) {
|
|
187
|
+
textByColKey.set(colKey, "".concat(textCur, " "));
|
|
188
|
+
}
|
|
189
|
+
onCloseTag === null || onCloseTag === void 0 ? void 0 : onCloseTag(__assign(__assign({}, data), { textMap: textByColKey }));
|
|
190
|
+
},
|
|
191
|
+
});
|
|
192
|
+
return textByColKey;
|
|
193
|
+
};
|
|
194
|
+
XMLParser.prototype.getDocumentNode = function (params) {
|
|
195
|
+
var xml = params.xml;
|
|
196
|
+
var rowsArr = [];
|
|
197
|
+
var colsArr = [];
|
|
198
|
+
var documentNode = new DocumentNode_1.DocumentNode();
|
|
199
|
+
var curNode = null;
|
|
200
|
+
var prevRowCols = [];
|
|
201
|
+
var curRowCols = [];
|
|
202
|
+
var isBold = false;
|
|
203
|
+
var boldPath = null;
|
|
204
|
+
var pushColToRow = function (col) {
|
|
205
|
+
var _a;
|
|
206
|
+
var colSpan = col.getColSpan();
|
|
207
|
+
var colIndex = curRowCols.length;
|
|
208
|
+
col.setIndex(colIndex);
|
|
209
|
+
for (var i = 0; i < colSpan; i++) {
|
|
210
|
+
curRowCols.push(col);
|
|
211
|
+
}
|
|
212
|
+
var topSibling = (_a = prevRowCols[colIndex]) !== null && _a !== void 0 ? _a : null;
|
|
213
|
+
topSibling === null || topSibling === void 0 ? void 0 : topSibling.addBottomSibling(col);
|
|
214
|
+
};
|
|
215
|
+
this.iterateXML({
|
|
216
|
+
xml: xml,
|
|
217
|
+
onCloseTag: function () {
|
|
218
|
+
var _a;
|
|
219
|
+
if ((curNode === null || curNode === void 0 ? void 0 : curNode.getPath()) === boldPath) {
|
|
220
|
+
curNode === null || curNode === void 0 ? void 0 : curNode.setText("".concat((_a = curNode === null || curNode === void 0 ? void 0 : curNode.getText()) !== null && _a !== void 0 ? _a : '', "}}"));
|
|
221
|
+
boldPath = null;
|
|
222
|
+
}
|
|
223
|
+
},
|
|
224
|
+
onCharacter: function (_a) {
|
|
225
|
+
var _b;
|
|
226
|
+
var char = _a.char;
|
|
227
|
+
curNode === null || curNode === void 0 ? void 0 : curNode.setText(((_b = curNode === null || curNode === void 0 ? void 0 : curNode.getText()) !== null && _b !== void 0 ? _b : '') + char);
|
|
228
|
+
},
|
|
229
|
+
onOpenTag: function (_a) {
|
|
230
|
+
var _b;
|
|
231
|
+
var path = _a.path, attributesStr = _a.attributesStr;
|
|
232
|
+
// skip nested tables
|
|
233
|
+
if (path.split('.').reduce(function (acc, cur) { return (cur === 'table' ? acc + 1 : acc); }, 0) > 1)
|
|
234
|
+
return;
|
|
235
|
+
var tag = path.split('.').pop();
|
|
236
|
+
var isInTable = path.includes('table');
|
|
237
|
+
var topLevelNodes = documentNode.getChildren();
|
|
238
|
+
var prevTopLevelNode = topLevelNodes[topLevelNodes.length - 1];
|
|
239
|
+
var wasHorizontalLine = prevTopLevelNode instanceof HRNode_1.HRNode;
|
|
240
|
+
var wasNonTableNode = prevTopLevelNode instanceof NonTableNode_1.NonTableNode;
|
|
241
|
+
var wasBold = isBold;
|
|
242
|
+
var attributesLower = attributesStr.toLowerCase().replace(/\s/g, '');
|
|
243
|
+
isBold =
|
|
244
|
+
tag === 'b' ||
|
|
245
|
+
tag === 'strong' ||
|
|
246
|
+
attributesLower.includes('font-weight:bold') ||
|
|
247
|
+
attributesLower.includes('font-weight:700') ||
|
|
248
|
+
attributesLower.includes('font-weight:800') ||
|
|
249
|
+
attributesLower.includes('font-weight:900');
|
|
250
|
+
if (!isInTable) {
|
|
251
|
+
prevRowCols = [];
|
|
252
|
+
curRowCols = [];
|
|
253
|
+
}
|
|
254
|
+
if (tag === 'hr' && !isInTable) {
|
|
255
|
+
var hr = new HRNode_1.HRNode({ attributesStr: attributesStr, path: path });
|
|
256
|
+
hr.setPreviousSibling(prevTopLevelNode !== null && prevTopLevelNode !== void 0 ? prevTopLevelNode : null);
|
|
257
|
+
topLevelNodes.push(hr);
|
|
258
|
+
curNode = hr;
|
|
259
|
+
}
|
|
260
|
+
else if (tag === 'table') {
|
|
261
|
+
var table = new TableNode_1.TableNode({ attributesStr: attributesStr, path: path });
|
|
262
|
+
table.setPreviousSibling(prevTopLevelNode !== null && prevTopLevelNode !== void 0 ? prevTopLevelNode : null);
|
|
263
|
+
topLevelNodes.push(table);
|
|
264
|
+
curNode = table;
|
|
265
|
+
}
|
|
266
|
+
else if (tag === 'tr') {
|
|
267
|
+
var row = new RowNode_1.RowNode({ attributesStr: attributesStr, path: path });
|
|
268
|
+
var prevRow = rowsArr[rowsArr.length - 1];
|
|
269
|
+
row.setParent(prevTopLevelNode);
|
|
270
|
+
row.setPreviousSibling((prevRow === null || prevRow === void 0 ? void 0 : prevRow.getParent()) === row.getParent() ? prevRow : null);
|
|
271
|
+
rowsArr.push(row);
|
|
272
|
+
prevRowCols = curRowCols;
|
|
273
|
+
curRowCols = [];
|
|
274
|
+
curNode = row;
|
|
275
|
+
}
|
|
276
|
+
else if (tag === 'td' || tag === 'th') {
|
|
277
|
+
var col = new ColNode_1.ColNode({ attributesStr: attributesStr, path: path });
|
|
278
|
+
var prevCol = colsArr[colsArr.length - 1];
|
|
279
|
+
col.setParent(rowsArr[rowsArr.length - 1]);
|
|
280
|
+
col.setPreviousSibling((prevCol === null || prevCol === void 0 ? void 0 : prevCol.getParent()) === col.getParent() ? prevCol : null);
|
|
281
|
+
colsArr.push(col);
|
|
282
|
+
pushColToRow(col);
|
|
283
|
+
curNode = col;
|
|
284
|
+
}
|
|
285
|
+
else if ((!isInTable && !wasNonTableNode) || (wasHorizontalLine && tag !== 'hr')) {
|
|
286
|
+
var node = new NonTableNode_1.NonTableNode({ attributesStr: attributesStr, path: path });
|
|
287
|
+
node.setPreviousSibling(prevTopLevelNode !== null && prevTopLevelNode !== void 0 ? prevTopLevelNode : null);
|
|
288
|
+
topLevelNodes.push(node);
|
|
289
|
+
curNode = node;
|
|
290
|
+
}
|
|
291
|
+
else if (curNode && !curNode.getText().endsWith('\n')) {
|
|
292
|
+
curNode.setText("".concat(curNode.getText().trim(), "\n"));
|
|
293
|
+
}
|
|
294
|
+
if (isBold && !wasBold && !(curNode === null || curNode === void 0 ? void 0 : curNode.getText().endsWith('{{'))) {
|
|
295
|
+
curNode === null || curNode === void 0 ? void 0 : curNode.setText("".concat(curNode === null || curNode === void 0 ? void 0 : curNode.getText().trim(), "{{"));
|
|
296
|
+
}
|
|
297
|
+
if (isBold) {
|
|
298
|
+
boldPath = (_b = curNode === null || curNode === void 0 ? void 0 : curNode.getPath()) !== null && _b !== void 0 ? _b : null;
|
|
299
|
+
}
|
|
300
|
+
},
|
|
301
|
+
});
|
|
302
|
+
return documentNode;
|
|
303
|
+
};
|
|
304
|
+
return XMLParser;
|
|
305
|
+
}());
|
|
306
|
+
exports.default = XMLParser;
|