md-spreadsheet-parser 1.2.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +14 -8
- package/dist/index.js +59 -34
- package/dist/interfaces/example-spreadsheet-types.d.ts +6 -0
- package/dist/interfaces/wasi-sockets-ip-name-lookup.d.ts +2 -2
- package/dist/parser.core.wasm +0 -0
- package/dist/parser.core2.wasm +0 -0
- package/dist/parser.core3.wasm +0 -0
- package/dist/parser.core4.wasm +0 -0
- package/dist/parser.core5.wasm +0 -0
- package/dist/parser.core6.wasm +0 -0
- package/dist/parser.core7.wasm +0 -0
- package/dist/parser.core8.wasm +0 -0
- package/dist/parser.core9.wasm +0 -0
- package/dist/parser.js +4846 -3081
- package/package.json +2 -2
- package/src/__pycache__/app.cpython-314.pyc +0 -0
- package/src/__pycache__/generated_adapter.cpython-314.pyc +0 -0
- package/src/generated_adapter.py +24 -0
- package/src/index.ts +73 -42
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "md-spreadsheet-parser",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.4.0",
|
|
4
4
|
"description": "A robust Markdown table parser and manipulator, powered by Python and WebAssembly.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
"author": "f-y",
|
|
25
25
|
"repository": {
|
|
26
26
|
"type": "git",
|
|
27
|
-
"url": "git+https://github.com/
|
|
27
|
+
"url": "git+https://github.com/fy-labs/md-spreadsheet-parser.git"
|
|
28
28
|
},
|
|
29
29
|
"license": "MIT",
|
|
30
30
|
"devDependencies": {
|
|
Binary file
|
|
Binary file
|
package/src/generated_adapter.py
CHANGED
|
@@ -67,6 +67,8 @@ def unwrap_table(obj: Any) -> Any:
|
|
|
67
67
|
class WitSheet:
|
|
68
68
|
name: Any = None
|
|
69
69
|
tables: Any = None
|
|
70
|
+
sheet_type: Any = None
|
|
71
|
+
content: Any = None
|
|
70
72
|
metadata: Any = None
|
|
71
73
|
|
|
72
74
|
def convert_sheet(obj: Any) -> WitSheet:
|
|
@@ -74,6 +76,8 @@ def convert_sheet(obj: Any) -> WitSheet:
|
|
|
74
76
|
res = WitSheet()
|
|
75
77
|
res.name = obj.name
|
|
76
78
|
res.tables = [convert_table(x) for x in obj.tables]
|
|
79
|
+
res.sheet_type = str(obj.sheet_type) if obj.sheet_type is not None else None
|
|
80
|
+
res.content = obj.content
|
|
77
81
|
res.metadata = json.dumps(obj.metadata or {}) if obj.metadata is not None else None
|
|
78
82
|
return res
|
|
79
83
|
|
|
@@ -84,6 +88,10 @@ def unwrap_sheet(obj: Any) -> Any:
|
|
|
84
88
|
kwargs['name'] = obj.name
|
|
85
89
|
if obj.tables is not None:
|
|
86
90
|
kwargs['tables'] = [unwrap_table(x) for x in obj.tables]
|
|
91
|
+
if obj.sheet_type is not None:
|
|
92
|
+
kwargs['sheet_type'] = obj.sheet_type
|
|
93
|
+
if obj.content is not None:
|
|
94
|
+
kwargs['content'] = obj.content
|
|
87
95
|
if obj.metadata is not None:
|
|
88
96
|
kwargs['metadata'] = json.loads(obj.metadata)
|
|
89
97
|
return models.Sheet(**kwargs)
|
|
@@ -91,13 +99,21 @@ def unwrap_sheet(obj: Any) -> Any:
|
|
|
91
99
|
@dataclass
|
|
92
100
|
class WitWorkbook:
|
|
93
101
|
sheets: Any = None
|
|
102
|
+
name: Any = None
|
|
103
|
+
start_line: Any = None
|
|
104
|
+
end_line: Any = None
|
|
94
105
|
metadata: Any = None
|
|
106
|
+
root_content: Any = None
|
|
95
107
|
|
|
96
108
|
def convert_workbook(obj: Any) -> WitWorkbook:
|
|
97
109
|
if obj is None: return None
|
|
98
110
|
res = WitWorkbook()
|
|
99
111
|
res.sheets = [convert_sheet(x) for x in obj.sheets]
|
|
112
|
+
res.name = obj.name
|
|
113
|
+
res.start_line = obj.start_line
|
|
114
|
+
res.end_line = obj.end_line
|
|
100
115
|
res.metadata = json.dumps(obj.metadata or {}) if obj.metadata is not None else None
|
|
116
|
+
res.root_content = obj.root_content
|
|
101
117
|
return res
|
|
102
118
|
|
|
103
119
|
def unwrap_workbook(obj: Any) -> Any:
|
|
@@ -105,8 +121,16 @@ def unwrap_workbook(obj: Any) -> Any:
|
|
|
105
121
|
kwargs = {}
|
|
106
122
|
if obj.sheets is not None:
|
|
107
123
|
kwargs['sheets'] = [unwrap_sheet(x) for x in obj.sheets]
|
|
124
|
+
if obj.name is not None:
|
|
125
|
+
kwargs['name'] = obj.name
|
|
126
|
+
if obj.start_line is not None:
|
|
127
|
+
kwargs['start_line'] = obj.start_line
|
|
128
|
+
if obj.end_line is not None:
|
|
129
|
+
kwargs['end_line'] = obj.end_line
|
|
108
130
|
if obj.metadata is not None:
|
|
109
131
|
kwargs['metadata'] = json.loads(obj.metadata)
|
|
132
|
+
if obj.root_content is not None:
|
|
133
|
+
kwargs['root_content'] = obj.root_content
|
|
110
134
|
return models.Workbook(**kwargs)
|
|
111
135
|
|
|
112
136
|
@dataclass
|
package/src/index.ts
CHANGED
|
@@ -144,16 +144,19 @@ export class Table {
|
|
|
144
144
|
startLine: number | undefined;
|
|
145
145
|
endLine: number | undefined;
|
|
146
146
|
|
|
147
|
-
constructor(data?: Partial<Table>) {
|
|
147
|
+
constructor(data?: Partial<Table> & Record<string, any>) {
|
|
148
148
|
if (data) {
|
|
149
|
-
this.headers = data.headers;
|
|
150
|
-
this.rows = data.rows;
|
|
151
|
-
this.alignments = data.alignments;
|
|
152
|
-
this.name = data.name;
|
|
153
|
-
this.description = data.description;
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
149
|
+
this.headers = data.headers ?? data.headers;
|
|
150
|
+
this.rows = data.rows ?? data.rows;
|
|
151
|
+
this.alignments = data.alignments ?? data.alignments;
|
|
152
|
+
this.name = data.name ?? data.name;
|
|
153
|
+
this.description = data.description ?? data.description;
|
|
154
|
+
{
|
|
155
|
+
const val = data.metadata ?? data.metadata;
|
|
156
|
+
this.metadata = (typeof val === 'string') ? JSON.parse(val) : val;
|
|
157
|
+
}
|
|
158
|
+
this.startLine = data.startLine ?? data.start_line;
|
|
159
|
+
this.endLine = data.endLine ?? data.end_line;
|
|
157
160
|
}
|
|
158
161
|
}
|
|
159
162
|
|
|
@@ -274,13 +277,20 @@ export class Table {
|
|
|
274
277
|
export class Sheet {
|
|
275
278
|
name: string | undefined;
|
|
276
279
|
tables: any[] | undefined;
|
|
280
|
+
sheetType: string | undefined;
|
|
281
|
+
content: any | undefined;
|
|
277
282
|
metadata: any | undefined;
|
|
278
283
|
|
|
279
|
-
constructor(data?: Partial<Sheet>) {
|
|
284
|
+
constructor(data?: Partial<Sheet> & Record<string, any>) {
|
|
280
285
|
if (data) {
|
|
281
|
-
this.name = data.name;
|
|
282
|
-
this.tables = (data.tables || []).map((x: any) => x instanceof Table ? x : new Table(x));
|
|
283
|
-
this.
|
|
286
|
+
this.name = data.name ?? data.name;
|
|
287
|
+
this.tables = ((data.tables ?? data.tables) || []).map((x: any) => x instanceof Table ? x : new Table(x));
|
|
288
|
+
this.sheetType = data.sheetType ?? data.sheet_type;
|
|
289
|
+
this.content = data.content ?? data.content;
|
|
290
|
+
{
|
|
291
|
+
const val = data.metadata ?? data.metadata;
|
|
292
|
+
this.metadata = (typeof val === 'string') ? JSON.parse(val) : val;
|
|
293
|
+
}
|
|
284
294
|
}
|
|
285
295
|
}
|
|
286
296
|
|
|
@@ -299,6 +309,8 @@ export class Sheet {
|
|
|
299
309
|
return {
|
|
300
310
|
name: this.name,
|
|
301
311
|
tables: (this.tables || []).map((t: any) => t.json ? t.json : t),
|
|
312
|
+
sheetType: this.sheetType,
|
|
313
|
+
content: this.content,
|
|
302
314
|
metadata: this.metadata ?? {},
|
|
303
315
|
};
|
|
304
316
|
}
|
|
@@ -360,12 +372,23 @@ export class Sheet {
|
|
|
360
372
|
|
|
361
373
|
export class Workbook {
|
|
362
374
|
sheets: any[] | undefined;
|
|
375
|
+
name: string | undefined;
|
|
376
|
+
startLine: number | undefined;
|
|
377
|
+
endLine: number | undefined;
|
|
363
378
|
metadata: any | undefined;
|
|
379
|
+
rootContent: any | undefined;
|
|
364
380
|
|
|
365
|
-
constructor(data?: Partial<Workbook>) {
|
|
381
|
+
constructor(data?: Partial<Workbook> & Record<string, any>) {
|
|
366
382
|
if (data) {
|
|
367
|
-
this.sheets = (data.sheets || []).map((x: any) => x instanceof Sheet ? x : new Sheet(x));
|
|
368
|
-
this.
|
|
383
|
+
this.sheets = ((data.sheets ?? data.sheets) || []).map((x: any) => x instanceof Sheet ? x : new Sheet(x));
|
|
384
|
+
this.name = data.name ?? data.name;
|
|
385
|
+
this.startLine = data.startLine ?? data.start_line;
|
|
386
|
+
this.endLine = data.endLine ?? data.end_line;
|
|
387
|
+
{
|
|
388
|
+
const val = data.metadata ?? data.metadata;
|
|
389
|
+
this.metadata = (typeof val === 'string') ? JSON.parse(val) : val;
|
|
390
|
+
}
|
|
391
|
+
this.rootContent = data.rootContent ?? data.root_content;
|
|
369
392
|
}
|
|
370
393
|
}
|
|
371
394
|
|
|
@@ -382,8 +405,10 @@ export class Workbook {
|
|
|
382
405
|
*/
|
|
383
406
|
get json(): any {
|
|
384
407
|
return {
|
|
408
|
+
name: this.name,
|
|
385
409
|
sheets: (this.sheets || []).map((s: any) => s.json ? s.json : s),
|
|
386
410
|
metadata: this.metadata ?? {},
|
|
411
|
+
rootContent: this.rootContent,
|
|
387
412
|
};
|
|
388
413
|
}
|
|
389
414
|
|
|
@@ -449,13 +474,13 @@ export class ParsingSchema {
|
|
|
449
474
|
stripWhitespace: boolean | undefined;
|
|
450
475
|
convertBrToNewline: boolean | undefined;
|
|
451
476
|
|
|
452
|
-
constructor(data?: Partial<ParsingSchema>) {
|
|
477
|
+
constructor(data?: Partial<ParsingSchema> & Record<string, any>) {
|
|
453
478
|
if (data) {
|
|
454
|
-
this.columnSeparator = data.columnSeparator;
|
|
455
|
-
this.headerSeparatorChar = data.headerSeparatorChar;
|
|
456
|
-
this.requireOuterPipes = data.requireOuterPipes;
|
|
457
|
-
this.stripWhitespace = data.stripWhitespace;
|
|
458
|
-
this.convertBrToNewline = data.convertBrToNewline;
|
|
479
|
+
this.columnSeparator = data.columnSeparator ?? data.column_separator;
|
|
480
|
+
this.headerSeparatorChar = data.headerSeparatorChar ?? data.header_separator_char;
|
|
481
|
+
this.requireOuterPipes = data.requireOuterPipes ?? data.require_outer_pipes;
|
|
482
|
+
this.stripWhitespace = data.stripWhitespace ?? data.strip_whitespace;
|
|
483
|
+
this.convertBrToNewline = data.convertBrToNewline ?? data.convert_br_to_newline;
|
|
459
484
|
}
|
|
460
485
|
}
|
|
461
486
|
|
|
@@ -471,22 +496,22 @@ export class MultiTableParsingSchema {
|
|
|
471
496
|
requireOuterPipes: boolean | undefined;
|
|
472
497
|
stripWhitespace: boolean | undefined;
|
|
473
498
|
convertBrToNewline: boolean | undefined;
|
|
474
|
-
rootMarker:
|
|
499
|
+
rootMarker: any | undefined;
|
|
475
500
|
sheetHeaderLevel: number | undefined;
|
|
476
501
|
tableHeaderLevel: number | undefined;
|
|
477
502
|
captureDescription: boolean | undefined;
|
|
478
503
|
|
|
479
|
-
constructor(data?: Partial<MultiTableParsingSchema>) {
|
|
504
|
+
constructor(data?: Partial<MultiTableParsingSchema> & Record<string, any>) {
|
|
480
505
|
if (data) {
|
|
481
|
-
this.columnSeparator = data.columnSeparator;
|
|
482
|
-
this.headerSeparatorChar = data.headerSeparatorChar;
|
|
483
|
-
this.requireOuterPipes = data.requireOuterPipes;
|
|
484
|
-
this.stripWhitespace = data.stripWhitespace;
|
|
485
|
-
this.convertBrToNewline = data.convertBrToNewline;
|
|
486
|
-
this.rootMarker = data.rootMarker;
|
|
487
|
-
this.sheetHeaderLevel = data.sheetHeaderLevel;
|
|
488
|
-
this.tableHeaderLevel = data.tableHeaderLevel;
|
|
489
|
-
this.captureDescription = data.captureDescription;
|
|
506
|
+
this.columnSeparator = data.columnSeparator ?? data.column_separator;
|
|
507
|
+
this.headerSeparatorChar = data.headerSeparatorChar ?? data.header_separator_char;
|
|
508
|
+
this.requireOuterPipes = data.requireOuterPipes ?? data.require_outer_pipes;
|
|
509
|
+
this.stripWhitespace = data.stripWhitespace ?? data.strip_whitespace;
|
|
510
|
+
this.convertBrToNewline = data.convertBrToNewline ?? data.convert_br_to_newline;
|
|
511
|
+
this.rootMarker = data.rootMarker ?? data.root_marker;
|
|
512
|
+
this.sheetHeaderLevel = data.sheetHeaderLevel ?? data.sheet_header_level;
|
|
513
|
+
this.tableHeaderLevel = data.tableHeaderLevel ?? data.table_header_level;
|
|
514
|
+
this.captureDescription = data.captureDescription ?? data.capture_description;
|
|
490
515
|
}
|
|
491
516
|
}
|
|
492
517
|
|
|
@@ -501,11 +526,17 @@ export class ConversionSchema {
|
|
|
501
526
|
customConverters: string | undefined;
|
|
502
527
|
fieldConverters: string | undefined;
|
|
503
528
|
|
|
504
|
-
constructor(data?: Partial<ConversionSchema>) {
|
|
529
|
+
constructor(data?: Partial<ConversionSchema> & Record<string, any>) {
|
|
505
530
|
if (data) {
|
|
506
|
-
this.booleanPairs = data.booleanPairs;
|
|
507
|
-
|
|
508
|
-
|
|
531
|
+
this.booleanPairs = data.booleanPairs ?? data.boolean_pairs;
|
|
532
|
+
{
|
|
533
|
+
const val = data.customConverters ?? data.custom_converters;
|
|
534
|
+
this.customConverters = (typeof val === 'string') ? JSON.parse(val) : val;
|
|
535
|
+
}
|
|
536
|
+
{
|
|
537
|
+
const val = data.fieldConverters ?? data.field_converters;
|
|
538
|
+
this.fieldConverters = (typeof val === 'string') ? JSON.parse(val) : val;
|
|
539
|
+
}
|
|
509
540
|
}
|
|
510
541
|
}
|
|
511
542
|
|
|
@@ -523,12 +554,12 @@ export class ExcelParsingSchema {
|
|
|
523
554
|
delimiter: string | undefined;
|
|
524
555
|
headerSeparator: string | undefined;
|
|
525
556
|
|
|
526
|
-
constructor(data?: Partial<ExcelParsingSchema>) {
|
|
557
|
+
constructor(data?: Partial<ExcelParsingSchema> & Record<string, any>) {
|
|
527
558
|
if (data) {
|
|
528
|
-
this.headerRows = data.headerRows;
|
|
529
|
-
this.fillMergedHeaders = data.fillMergedHeaders;
|
|
530
|
-
this.delimiter = data.delimiter;
|
|
531
|
-
this.headerSeparator = data.headerSeparator;
|
|
559
|
+
this.headerRows = data.headerRows ?? data.header_rows;
|
|
560
|
+
this.fillMergedHeaders = data.fillMergedHeaders ?? data.fill_merged_headers;
|
|
561
|
+
this.delimiter = data.delimiter ?? data.delimiter;
|
|
562
|
+
this.headerSeparator = data.headerSeparator ?? data.header_separator;
|
|
532
563
|
}
|
|
533
564
|
}
|
|
534
565
|
|