md-spreadsheet-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +158 -0
- package/dist/interfaces/example-spreadsheet-types.d.ts +50 -0
- package/dist/interfaces/exports.d.ts +100 -0
- package/dist/interfaces/wasi-cli-environment.d.ts +4 -0
- package/dist/interfaces/wasi-cli-exit.d.ts +3 -0
- package/dist/interfaces/wasi-cli-stderr.d.ts +3 -0
- package/dist/interfaces/wasi-cli-stdin.d.ts +3 -0
- package/dist/interfaces/wasi-cli-stdout.d.ts +3 -0
- package/dist/interfaces/wasi-cli-terminal-input.d.ts +8 -0
- package/dist/interfaces/wasi-cli-terminal-output.d.ts +8 -0
- package/dist/interfaces/wasi-cli-terminal-stderr.d.ts +3 -0
- package/dist/interfaces/wasi-cli-terminal-stdin.d.ts +3 -0
- package/dist/interfaces/wasi-cli-terminal-stdout.d.ts +3 -0
- package/dist/interfaces/wasi-clocks-monotonic-clock.d.ts +8 -0
- package/dist/interfaces/wasi-clocks-wall-clock.d.ts +7 -0
- package/dist/interfaces/wasi-filesystem-preopens.d.ts +3 -0
- package/dist/interfaces/wasi-filesystem-types.d.ts +208 -0
- package/dist/interfaces/wasi-io-error.d.ts +9 -0
- package/dist/interfaces/wasi-io-poll.d.ts +11 -0
- package/dist/interfaces/wasi-io-streams.d.ts +40 -0
- package/dist/interfaces/wasi-random-insecure-seed.d.ts +2 -0
- package/dist/interfaces/wasi-random-insecure.d.ts +3 -0
- package/dist/interfaces/wasi-random-random.d.ts +3 -0
- package/dist/interfaces/wasi-sockets-instance-network.d.ts +3 -0
- package/dist/interfaces/wasi-sockets-ip-name-lookup.d.ts +15 -0
- package/dist/interfaces/wasi-sockets-network.d.ts +92 -0
- package/dist/interfaces/wasi-sockets-tcp-create-socket.d.ts +5 -0
- package/dist/interfaces/wasi-sockets-tcp.d.ts +54 -0
- package/dist/interfaces/wasi-sockets-udp-create-socket.d.ts +5 -0
- package/dist/interfaces/wasi-sockets-udp.d.ts +53 -0
- package/dist/parser.core.wasm +0 -0
- package/dist/parser.core2.wasm +0 -0
- package/dist/parser.core3.wasm +0 -0
- package/dist/parser.core4.wasm +0 -0
- package/dist/parser.core5.wasm +0 -0
- package/dist/parser.core6.wasm +0 -0
- package/dist/parser.core7.wasm +0 -0
- package/dist/parser.core8.wasm +0 -0
- package/dist/parser.core9.wasm +0 -0
- package/dist/parser.d.ts +68 -0
- package/dist/parser.js +28872 -0
- package/dist/parser.wasm +0 -0
- package/package.json +36 -0
- package/src/__pycache__/app.cpython-314.pyc +0 -0
- package/src/__pycache__/generated_adapter.cpython-314.pyc +0 -0
- package/src/app.py +164 -0
- package/src/client-adapters.ts +40 -0
- package/src/generated_adapter.py +247 -0
- package/src/index.ts +317 -0
package/dist/parser.wasm
ADDED
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "md-spreadsheet-parser",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "A robust Markdown table parser and manipulator, powered by Python and WebAssembly.",
|
|
5
|
+
"main": "src/index.ts",
|
|
6
|
+
"types": "src/index.ts",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"scripts": {
|
|
9
|
+
"build": "node scripts/build.mjs",
|
|
10
|
+
"test": "node scripts/test.mjs"
|
|
11
|
+
},
|
|
12
|
+
"files": [
|
|
13
|
+
"dist",
|
|
14
|
+
"src"
|
|
15
|
+
],
|
|
16
|
+
"keywords": [
|
|
17
|
+
"markdown",
|
|
18
|
+
"spreadsheet",
|
|
19
|
+
"parser",
|
|
20
|
+
"wasm",
|
|
21
|
+
"webassembly",
|
|
22
|
+
"python"
|
|
23
|
+
],
|
|
24
|
+
"author": "f-y",
|
|
25
|
+
"license": "MIT",
|
|
26
|
+
"devDependencies": {
|
|
27
|
+
"@bytecodealliance/jco": "^1.0.0",
|
|
28
|
+
"@bytecodealliance/preview2-shim": "^0.17.0",
|
|
29
|
+
"typescript": "^5.9.3",
|
|
30
|
+
"zod": "^4.3.4"
|
|
31
|
+
},
|
|
32
|
+
"dependencies": {
|
|
33
|
+
"execa": "^9.6.1",
|
|
34
|
+
"shelljs": "^0.10.0"
|
|
35
|
+
}
|
|
36
|
+
}
|
|
Binary file
|
|
Binary file
|
package/src/app.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import md_spreadsheet_parser.parsing
|
|
2
|
+
import md_spreadsheet_parser.generator
|
|
3
|
+
import md_spreadsheet_parser.loader
|
|
4
|
+
import dataclasses
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
from generated_adapter import *
|
|
8
|
+
|
|
9
|
+
class WitWorld:
|
|
10
|
+
def clean_cell(self, cell: Any = None, schema: Any = None):
|
|
11
|
+
kwargs = {}
|
|
12
|
+
if cell is not None: kwargs['cell'] = cell
|
|
13
|
+
if schema is not None: kwargs['schema'] = unwrap_parsing_schema(schema)
|
|
14
|
+
return md_spreadsheet_parser.parsing.clean_cell(**kwargs)
|
|
15
|
+
def split_row_gfm(self, line: Any = None, separator: Any = None):
|
|
16
|
+
kwargs = {}
|
|
17
|
+
if line is not None: kwargs['line'] = line
|
|
18
|
+
if separator is not None: kwargs['separator'] = separator
|
|
19
|
+
return md_spreadsheet_parser.parsing.split_row_gfm(**kwargs)
|
|
20
|
+
def parse_row(self, line: Any = None, schema: Any = None):
|
|
21
|
+
kwargs = {}
|
|
22
|
+
if line is not None: kwargs['line'] = line
|
|
23
|
+
if schema is not None: kwargs['schema'] = unwrap_parsing_schema(schema)
|
|
24
|
+
return md_spreadsheet_parser.parsing.parse_row(**kwargs)
|
|
25
|
+
def parse_separator_row(self, row: Any = None, schema: Any = None):
|
|
26
|
+
kwargs = {}
|
|
27
|
+
if row is not None: kwargs['row'] = row
|
|
28
|
+
if schema is not None: kwargs['schema'] = unwrap_parsing_schema(schema)
|
|
29
|
+
return [convert_alignment_type(x) for x in md_spreadsheet_parser.parsing.parse_separator_row(**kwargs)]
|
|
30
|
+
def is_separator_row(self, row: Any = None, schema: Any = None):
|
|
31
|
+
kwargs = {}
|
|
32
|
+
if row is not None: kwargs['row'] = row
|
|
33
|
+
if schema is not None: kwargs['schema'] = unwrap_parsing_schema(schema)
|
|
34
|
+
return md_spreadsheet_parser.parsing.is_separator_row(**kwargs)
|
|
35
|
+
def parse_table(self, markdown: Any = None, schema: Any = None):
|
|
36
|
+
kwargs = {}
|
|
37
|
+
if markdown is not None: kwargs['markdown'] = markdown
|
|
38
|
+
if schema is not None: kwargs['schema'] = unwrap_parsing_schema(schema)
|
|
39
|
+
return convert_table(md_spreadsheet_parser.parsing.parse_table(**kwargs))
|
|
40
|
+
def parse_sheet(self, markdown: Any = None, name: Any = None, schema: Any = None, start_line_offset: Any = None):
|
|
41
|
+
kwargs = {}
|
|
42
|
+
if markdown is not None: kwargs['markdown'] = markdown
|
|
43
|
+
if name is not None: kwargs['name'] = name
|
|
44
|
+
if schema is not None: kwargs['schema'] = unwrap_multi_table_parsing_schema(schema)
|
|
45
|
+
if start_line_offset is not None: kwargs['start_line_offset'] = start_line_offset
|
|
46
|
+
return convert_sheet(md_spreadsheet_parser.parsing.parse_sheet(**kwargs))
|
|
47
|
+
def parse_workbook(self, markdown: Any = None, schema: Any = None):
|
|
48
|
+
kwargs = {}
|
|
49
|
+
if markdown is not None: kwargs['markdown'] = markdown
|
|
50
|
+
if schema is not None: kwargs['schema'] = unwrap_multi_table_parsing_schema(schema)
|
|
51
|
+
return convert_workbook(md_spreadsheet_parser.parsing.parse_workbook(**kwargs))
|
|
52
|
+
def scan_tables(self, markdown: Any = None, schema: Any = None):
|
|
53
|
+
kwargs = {}
|
|
54
|
+
if markdown is not None: kwargs['markdown'] = markdown
|
|
55
|
+
if schema is not None: kwargs['schema'] = unwrap_multi_table_parsing_schema(schema)
|
|
56
|
+
return [convert_table(x) for x in md_spreadsheet_parser.parsing.scan_tables(**kwargs)]
|
|
57
|
+
def generate_table_markdown(self, table: Any = None, schema: Any = None):
|
|
58
|
+
kwargs = {}
|
|
59
|
+
if table is not None: kwargs['table'] = unwrap_table(table)
|
|
60
|
+
if schema is not None: kwargs['schema'] = unwrap_parsing_schema(schema)
|
|
61
|
+
return md_spreadsheet_parser.generator.generate_table_markdown(**kwargs)
|
|
62
|
+
def generate_sheet_markdown(self, sheet: Any = None, schema: Any = None):
|
|
63
|
+
kwargs = {}
|
|
64
|
+
if sheet is not None: kwargs['sheet'] = unwrap_sheet(sheet)
|
|
65
|
+
if schema is not None: kwargs['schema'] = unwrap_parsing_schema(schema)
|
|
66
|
+
return md_spreadsheet_parser.generator.generate_sheet_markdown(**kwargs)
|
|
67
|
+
def generate_workbook_markdown(self, workbook: Any = None, schema: Any = None):
|
|
68
|
+
kwargs = {}
|
|
69
|
+
if workbook is not None: kwargs['workbook'] = unwrap_workbook(workbook)
|
|
70
|
+
if schema is not None: kwargs['schema'] = unwrap_multi_table_parsing_schema(schema)
|
|
71
|
+
return md_spreadsheet_parser.generator.generate_workbook_markdown(**kwargs)
|
|
72
|
+
def parse_table_from_file(self, source: Any = None, schema: Any = None):
|
|
73
|
+
kwargs = {}
|
|
74
|
+
if source is not None: kwargs['source'] = source
|
|
75
|
+
if schema is not None: kwargs['schema'] = unwrap_parsing_schema(schema)
|
|
76
|
+
return convert_table(md_spreadsheet_parser.loader.parse_table_from_file(**kwargs))
|
|
77
|
+
def parse_workbook_from_file(self, source: Any = None, schema: Any = None):
|
|
78
|
+
kwargs = {}
|
|
79
|
+
if source is not None: kwargs['source'] = source
|
|
80
|
+
if schema is not None: kwargs['schema'] = unwrap_multi_table_parsing_schema(schema)
|
|
81
|
+
return convert_workbook(md_spreadsheet_parser.loader.parse_workbook_from_file(**kwargs))
|
|
82
|
+
def scan_tables_from_file(self, source: Any = None, schema: Any = None):
|
|
83
|
+
kwargs = {}
|
|
84
|
+
if source is not None: kwargs['source'] = source
|
|
85
|
+
if schema is not None: kwargs['schema'] = unwrap_multi_table_parsing_schema(schema)
|
|
86
|
+
return [convert_table(x) for x in md_spreadsheet_parser.loader.scan_tables_from_file(**kwargs)]
|
|
87
|
+
def scan_tables_iter(self, source: Any = None, schema: Any = None):
|
|
88
|
+
kwargs = {}
|
|
89
|
+
if source is not None: kwargs['source'] = source
|
|
90
|
+
if schema is not None: kwargs['schema'] = unwrap_multi_table_parsing_schema(schema)
|
|
91
|
+
return str(md_spreadsheet_parser.loader.scan_tables_iter(**kwargs))
|
|
92
|
+
def table_to_models(self, self_obj: Any, schema_cls: Any = None, conversion_schema: Any = None):
|
|
93
|
+
real_self = unwrap_table(self_obj)
|
|
94
|
+
kwargs = {}
|
|
95
|
+
if schema_cls is not None: kwargs['schema_cls'] = resolve_model_class(schema_cls)
|
|
96
|
+
if conversion_schema is not None: kwargs['conversion_schema'] = unwrap_conversion_schema(conversion_schema)
|
|
97
|
+
return [json.dumps(dataclasses.asdict(x)) for x in real_self.to_models(**kwargs)]
|
|
98
|
+
def table_to_markdown(self, self_obj: Any, schema: Any = None):
|
|
99
|
+
real_self = unwrap_table(self_obj)
|
|
100
|
+
kwargs = {}
|
|
101
|
+
if schema is not None: kwargs['schema'] = unwrap_parsing_schema(schema)
|
|
102
|
+
return real_self.to_markdown(**kwargs)
|
|
103
|
+
def table_update_cell(self, self_obj: Any, row_idx: Any = None, col_idx: Any = None, value: Any = None):
|
|
104
|
+
real_self = unwrap_table(self_obj)
|
|
105
|
+
kwargs = {}
|
|
106
|
+
if row_idx is not None: kwargs['row_idx'] = row_idx
|
|
107
|
+
if col_idx is not None: kwargs['col_idx'] = col_idx
|
|
108
|
+
if value is not None: kwargs['value'] = value
|
|
109
|
+
return convert_table(real_self.update_cell(**kwargs))
|
|
110
|
+
def table_delete_row(self, self_obj: Any, row_idx: Any = None):
|
|
111
|
+
real_self = unwrap_table(self_obj)
|
|
112
|
+
kwargs = {}
|
|
113
|
+
if row_idx is not None: kwargs['row_idx'] = row_idx
|
|
114
|
+
return convert_table(real_self.delete_row(**kwargs))
|
|
115
|
+
def table_delete_column(self, self_obj: Any, col_idx: Any = None):
|
|
116
|
+
real_self = unwrap_table(self_obj)
|
|
117
|
+
kwargs = {}
|
|
118
|
+
if col_idx is not None: kwargs['col_idx'] = col_idx
|
|
119
|
+
return convert_table(real_self.delete_column(**kwargs))
|
|
120
|
+
def table_clear_column_data(self, self_obj: Any, col_idx: Any = None):
|
|
121
|
+
real_self = unwrap_table(self_obj)
|
|
122
|
+
kwargs = {}
|
|
123
|
+
if col_idx is not None: kwargs['col_idx'] = col_idx
|
|
124
|
+
return convert_table(real_self.clear_column_data(**kwargs))
|
|
125
|
+
def table_insert_row(self, self_obj: Any, row_idx: Any = None):
|
|
126
|
+
real_self = unwrap_table(self_obj)
|
|
127
|
+
kwargs = {}
|
|
128
|
+
if row_idx is not None: kwargs['row_idx'] = row_idx
|
|
129
|
+
return convert_table(real_self.insert_row(**kwargs))
|
|
130
|
+
def table_insert_column(self, self_obj: Any, col_idx: Any = None):
|
|
131
|
+
real_self = unwrap_table(self_obj)
|
|
132
|
+
kwargs = {}
|
|
133
|
+
if col_idx is not None: kwargs['col_idx'] = col_idx
|
|
134
|
+
return convert_table(real_self.insert_column(**kwargs))
|
|
135
|
+
def sheet_get_table(self, self_obj: Any, name: Any = None):
|
|
136
|
+
real_self = unwrap_sheet(self_obj)
|
|
137
|
+
kwargs = {}
|
|
138
|
+
if name is not None: kwargs['name'] = name
|
|
139
|
+
return convert_table(real_self.get_table(**kwargs))
|
|
140
|
+
def sheet_to_markdown(self, self_obj: Any, schema: Any = None):
|
|
141
|
+
real_self = unwrap_sheet(self_obj)
|
|
142
|
+
kwargs = {}
|
|
143
|
+
if schema is not None: kwargs['schema'] = unwrap_parsing_schema(schema)
|
|
144
|
+
return real_self.to_markdown(**kwargs)
|
|
145
|
+
def workbook_get_sheet(self, self_obj: Any, name: Any = None):
|
|
146
|
+
real_self = unwrap_workbook(self_obj)
|
|
147
|
+
kwargs = {}
|
|
148
|
+
if name is not None: kwargs['name'] = name
|
|
149
|
+
return convert_sheet(real_self.get_sheet(**kwargs))
|
|
150
|
+
def workbook_to_markdown(self, self_obj: Any, schema: Any = None):
|
|
151
|
+
real_self = unwrap_workbook(self_obj)
|
|
152
|
+
kwargs = {}
|
|
153
|
+
if schema is not None: kwargs['schema'] = unwrap_multi_table_parsing_schema(schema)
|
|
154
|
+
return real_self.to_markdown(**kwargs)
|
|
155
|
+
def workbook_add_sheet(self, self_obj: Any, name: Any = None):
|
|
156
|
+
real_self = unwrap_workbook(self_obj)
|
|
157
|
+
kwargs = {}
|
|
158
|
+
if name is not None: kwargs['name'] = name
|
|
159
|
+
return convert_workbook(real_self.add_sheet(**kwargs))
|
|
160
|
+
def workbook_delete_sheet(self, self_obj: Any, index: Any = None):
|
|
161
|
+
real_self = unwrap_workbook(self_obj)
|
|
162
|
+
kwargs = {}
|
|
163
|
+
if index is not None: kwargs['index'] = index
|
|
164
|
+
return convert_workbook(real_self.delete_sheet(**kwargs))
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
|
|
2
|
+
export function clientSideToModels(headers: string[] | undefined | null, rows: any[][], schemaCls: any): any[] | null {
|
|
3
|
+
// Client-Side Schema Support
|
|
4
|
+
if (typeof schemaCls === 'object' && schemaCls !== null) {
|
|
5
|
+
if (!headers) throw new Error('Table must have headers for client-side mapping');
|
|
6
|
+
if (!rows) throw new Error('Table has no rows');
|
|
7
|
+
|
|
8
|
+
// 1. Zod-like Schema (has .parse method)
|
|
9
|
+
if (typeof (schemaCls as any).parse === 'function') {
|
|
10
|
+
return rows.map((row: any) => {
|
|
11
|
+
const rawObj: any = {};
|
|
12
|
+
row.forEach((v: string, i: number) => {
|
|
13
|
+
if (headers && headers[i]) {
|
|
14
|
+
rawObj[headers[i]] = v;
|
|
15
|
+
}
|
|
16
|
+
});
|
|
17
|
+
return (schemaCls as any).parse(rawObj);
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// 2. Object Mapping Schema
|
|
22
|
+
return rows.map((row: any) => {
|
|
23
|
+
const obj: any = {};
|
|
24
|
+
row.forEach((v: string, i: number) => {
|
|
25
|
+
const h = headers ? headers[i] : undefined;
|
|
26
|
+
if (h) {
|
|
27
|
+
if (schemaCls[h] && typeof schemaCls[h] === 'function') {
|
|
28
|
+
obj[h] = schemaCls[h](v);
|
|
29
|
+
} else {
|
|
30
|
+
obj[h] = v;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
});
|
|
34
|
+
return obj;
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Return null to indicate fallthrough to WASM backend
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from dataclasses import dataclass, asdict
|
|
3
|
+
from typing import Any
|
|
4
|
+
import md_spreadsheet_parser.models as models
|
|
5
|
+
import md_spreadsheet_parser.schemas as schemas
|
|
6
|
+
|
|
7
|
+
def resolve_model_class(name: str) -> Any:
|
|
8
|
+
cls = None
|
|
9
|
+
if hasattr(models, name):
|
|
10
|
+
cls = getattr(models, name)
|
|
11
|
+
elif hasattr(schemas, name):
|
|
12
|
+
cls = getattr(schemas, name)
|
|
13
|
+
if cls:
|
|
14
|
+
return cls
|
|
15
|
+
raise ValueError(f'Unknown model/schema class: {name}')
|
|
16
|
+
|
|
17
|
+
def convert_alignment_type(val: str) -> str:
|
|
18
|
+
# Return string directly as WIT type is string
|
|
19
|
+
return val
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class WitTable:
|
|
23
|
+
headers: Any = None
|
|
24
|
+
rows: Any = None
|
|
25
|
+
alignments: Any = None
|
|
26
|
+
name: Any = None
|
|
27
|
+
description: Any = None
|
|
28
|
+
metadata: Any = None
|
|
29
|
+
start_line: Any = None
|
|
30
|
+
end_line: Any = None
|
|
31
|
+
|
|
32
|
+
def convert_table(obj: Any) -> WitTable:
|
|
33
|
+
if obj is None: return None
|
|
34
|
+
res = WitTable()
|
|
35
|
+
res.headers = obj.headers
|
|
36
|
+
res.rows = obj.rows
|
|
37
|
+
res.alignments = [convert_alignment_type(x) for x in obj.alignments] if obj.alignments is not None else None
|
|
38
|
+
res.name = obj.name
|
|
39
|
+
res.description = obj.description
|
|
40
|
+
res.metadata = json.dumps(obj.metadata or {}) if obj.metadata is not None else None
|
|
41
|
+
res.start_line = obj.start_line
|
|
42
|
+
res.end_line = obj.end_line
|
|
43
|
+
return res
|
|
44
|
+
|
|
45
|
+
def unwrap_table(obj: Any) -> Any:
|
|
46
|
+
if obj is None: return None
|
|
47
|
+
kwargs = {}
|
|
48
|
+
if obj.headers is not None:
|
|
49
|
+
kwargs['headers'] = obj.headers
|
|
50
|
+
if obj.rows is not None:
|
|
51
|
+
kwargs['rows'] = obj.rows
|
|
52
|
+
if obj.alignments is not None:
|
|
53
|
+
kwargs['alignments'] = obj.alignments
|
|
54
|
+
if obj.name is not None:
|
|
55
|
+
kwargs['name'] = obj.name
|
|
56
|
+
if obj.description is not None:
|
|
57
|
+
kwargs['description'] = obj.description
|
|
58
|
+
if obj.metadata is not None:
|
|
59
|
+
kwargs['metadata'] = json.loads(obj.metadata)
|
|
60
|
+
if obj.start_line is not None:
|
|
61
|
+
kwargs['start_line'] = obj.start_line
|
|
62
|
+
if obj.end_line is not None:
|
|
63
|
+
kwargs['end_line'] = obj.end_line
|
|
64
|
+
return models.Table(**kwargs)
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class WitSheet:
|
|
68
|
+
name: Any = None
|
|
69
|
+
tables: Any = None
|
|
70
|
+
metadata: Any = None
|
|
71
|
+
|
|
72
|
+
def convert_sheet(obj: Any) -> WitSheet:
|
|
73
|
+
if obj is None: return None
|
|
74
|
+
res = WitSheet()
|
|
75
|
+
res.name = obj.name
|
|
76
|
+
res.tables = [convert_table(x) for x in obj.tables]
|
|
77
|
+
res.metadata = json.dumps(obj.metadata or {}) if obj.metadata is not None else None
|
|
78
|
+
return res
|
|
79
|
+
|
|
80
|
+
def unwrap_sheet(obj: Any) -> Any:
|
|
81
|
+
if obj is None: return None
|
|
82
|
+
kwargs = {}
|
|
83
|
+
if obj.name is not None:
|
|
84
|
+
kwargs['name'] = obj.name
|
|
85
|
+
if obj.tables is not None:
|
|
86
|
+
kwargs['tables'] = [unwrap_table(x) for x in obj.tables]
|
|
87
|
+
if obj.metadata is not None:
|
|
88
|
+
kwargs['metadata'] = json.loads(obj.metadata)
|
|
89
|
+
return models.Sheet(**kwargs)
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class WitWorkbook:
|
|
93
|
+
sheets: Any = None
|
|
94
|
+
metadata: Any = None
|
|
95
|
+
|
|
96
|
+
def convert_workbook(obj: Any) -> WitWorkbook:
|
|
97
|
+
if obj is None: return None
|
|
98
|
+
res = WitWorkbook()
|
|
99
|
+
res.sheets = [convert_sheet(x) for x in obj.sheets]
|
|
100
|
+
res.metadata = json.dumps(obj.metadata or {}) if obj.metadata is not None else None
|
|
101
|
+
return res
|
|
102
|
+
|
|
103
|
+
def unwrap_workbook(obj: Any) -> Any:
|
|
104
|
+
if obj is None: return None
|
|
105
|
+
kwargs = {}
|
|
106
|
+
if obj.sheets is not None:
|
|
107
|
+
kwargs['sheets'] = [unwrap_sheet(x) for x in obj.sheets]
|
|
108
|
+
if obj.metadata is not None:
|
|
109
|
+
kwargs['metadata'] = json.loads(obj.metadata)
|
|
110
|
+
return models.Workbook(**kwargs)
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class WitParsingSchema:
|
|
114
|
+
column_separator: Any = None
|
|
115
|
+
header_separator_char: Any = None
|
|
116
|
+
require_outer_pipes: Any = None
|
|
117
|
+
strip_whitespace: Any = None
|
|
118
|
+
convert_br_to_newline: Any = None
|
|
119
|
+
|
|
120
|
+
def convert_parsing_schema(obj: Any) -> WitParsingSchema:
|
|
121
|
+
if obj is None: return None
|
|
122
|
+
res = WitParsingSchema()
|
|
123
|
+
res.column_separator = obj.column_separator
|
|
124
|
+
res.header_separator_char = obj.header_separator_char
|
|
125
|
+
res.require_outer_pipes = obj.require_outer_pipes
|
|
126
|
+
res.strip_whitespace = obj.strip_whitespace
|
|
127
|
+
res.convert_br_to_newline = obj.convert_br_to_newline
|
|
128
|
+
return res
|
|
129
|
+
|
|
130
|
+
def unwrap_parsing_schema(obj: Any) -> Any:
|
|
131
|
+
if obj is None: return None
|
|
132
|
+
kwargs = {}
|
|
133
|
+
if obj.column_separator is not None:
|
|
134
|
+
kwargs['column_separator'] = obj.column_separator
|
|
135
|
+
if obj.header_separator_char is not None:
|
|
136
|
+
kwargs['header_separator_char'] = obj.header_separator_char
|
|
137
|
+
if obj.require_outer_pipes is not None:
|
|
138
|
+
kwargs['require_outer_pipes'] = obj.require_outer_pipes
|
|
139
|
+
if obj.strip_whitespace is not None:
|
|
140
|
+
kwargs['strip_whitespace'] = obj.strip_whitespace
|
|
141
|
+
if obj.convert_br_to_newline is not None:
|
|
142
|
+
kwargs['convert_br_to_newline'] = obj.convert_br_to_newline
|
|
143
|
+
return schemas.ParsingSchema(**kwargs)
|
|
144
|
+
|
|
145
|
+
@dataclass
|
|
146
|
+
class WitMultiTableParsingSchema:
|
|
147
|
+
column_separator: Any = None
|
|
148
|
+
header_separator_char: Any = None
|
|
149
|
+
require_outer_pipes: Any = None
|
|
150
|
+
strip_whitespace: Any = None
|
|
151
|
+
convert_br_to_newline: Any = None
|
|
152
|
+
root_marker: Any = None
|
|
153
|
+
sheet_header_level: Any = None
|
|
154
|
+
table_header_level: Any = None
|
|
155
|
+
capture_description: Any = None
|
|
156
|
+
|
|
157
|
+
def convert_multi_table_parsing_schema(obj: Any) -> WitMultiTableParsingSchema:
|
|
158
|
+
if obj is None: return None
|
|
159
|
+
res = WitMultiTableParsingSchema()
|
|
160
|
+
res.column_separator = obj.column_separator
|
|
161
|
+
res.header_separator_char = obj.header_separator_char
|
|
162
|
+
res.require_outer_pipes = obj.require_outer_pipes
|
|
163
|
+
res.strip_whitespace = obj.strip_whitespace
|
|
164
|
+
res.convert_br_to_newline = obj.convert_br_to_newline
|
|
165
|
+
res.root_marker = obj.root_marker
|
|
166
|
+
res.sheet_header_level = obj.sheet_header_level
|
|
167
|
+
res.table_header_level = obj.table_header_level
|
|
168
|
+
res.capture_description = obj.capture_description
|
|
169
|
+
return res
|
|
170
|
+
|
|
171
|
+
def unwrap_multi_table_parsing_schema(obj: Any) -> Any:
|
|
172
|
+
if obj is None: return None
|
|
173
|
+
kwargs = {}
|
|
174
|
+
if obj.column_separator is not None:
|
|
175
|
+
kwargs['column_separator'] = obj.column_separator
|
|
176
|
+
if obj.header_separator_char is not None:
|
|
177
|
+
kwargs['header_separator_char'] = obj.header_separator_char
|
|
178
|
+
if obj.require_outer_pipes is not None:
|
|
179
|
+
kwargs['require_outer_pipes'] = obj.require_outer_pipes
|
|
180
|
+
if obj.strip_whitespace is not None:
|
|
181
|
+
kwargs['strip_whitespace'] = obj.strip_whitespace
|
|
182
|
+
if obj.convert_br_to_newline is not None:
|
|
183
|
+
kwargs['convert_br_to_newline'] = obj.convert_br_to_newline
|
|
184
|
+
if obj.root_marker is not None:
|
|
185
|
+
kwargs['root_marker'] = obj.root_marker
|
|
186
|
+
if obj.sheet_header_level is not None:
|
|
187
|
+
kwargs['sheet_header_level'] = obj.sheet_header_level
|
|
188
|
+
if obj.table_header_level is not None:
|
|
189
|
+
kwargs['table_header_level'] = obj.table_header_level
|
|
190
|
+
if obj.capture_description is not None:
|
|
191
|
+
kwargs['capture_description'] = obj.capture_description
|
|
192
|
+
return schemas.MultiTableParsingSchema(**kwargs)
|
|
193
|
+
|
|
194
|
+
@dataclass
|
|
195
|
+
class WitConversionSchema:
|
|
196
|
+
boolean_pairs: Any = None
|
|
197
|
+
custom_converters: Any = None
|
|
198
|
+
field_converters: Any = None
|
|
199
|
+
|
|
200
|
+
def convert_conversion_schema(obj: Any) -> WitConversionSchema:
|
|
201
|
+
if obj is None: return None
|
|
202
|
+
res = WitConversionSchema()
|
|
203
|
+
res.boolean_pairs = str(obj.boolean_pairs) if obj.boolean_pairs is not None else None
|
|
204
|
+
res.custom_converters = json.dumps(obj.custom_converters or {}) if obj.custom_converters is not None else None
|
|
205
|
+
res.field_converters = json.dumps(obj.field_converters or {}) if obj.field_converters is not None else None
|
|
206
|
+
return res
|
|
207
|
+
|
|
208
|
+
def unwrap_conversion_schema(obj: Any) -> Any:
|
|
209
|
+
if obj is None: return None
|
|
210
|
+
kwargs = {}
|
|
211
|
+
if obj.boolean_pairs is not None:
|
|
212
|
+
kwargs['boolean_pairs'] = obj.boolean_pairs
|
|
213
|
+
if obj.custom_converters is not None:
|
|
214
|
+
kwargs['custom_converters'] = json.loads(obj.custom_converters)
|
|
215
|
+
if obj.field_converters is not None:
|
|
216
|
+
kwargs['field_converters'] = json.loads(obj.field_converters)
|
|
217
|
+
return schemas.ConversionSchema(**kwargs)
|
|
218
|
+
|
|
219
|
+
@dataclass
|
|
220
|
+
class WitExcelParsingSchema:
|
|
221
|
+
header_rows: Any = None
|
|
222
|
+
fill_merged_headers: Any = None
|
|
223
|
+
delimiter: Any = None
|
|
224
|
+
header_separator: Any = None
|
|
225
|
+
|
|
226
|
+
def convert_excel_parsing_schema(obj: Any) -> WitExcelParsingSchema:
|
|
227
|
+
if obj is None: return None
|
|
228
|
+
res = WitExcelParsingSchema()
|
|
229
|
+
res.header_rows = obj.header_rows
|
|
230
|
+
res.fill_merged_headers = obj.fill_merged_headers
|
|
231
|
+
res.delimiter = obj.delimiter
|
|
232
|
+
res.header_separator = obj.header_separator
|
|
233
|
+
return res
|
|
234
|
+
|
|
235
|
+
def unwrap_excel_parsing_schema(obj: Any) -> Any:
|
|
236
|
+
if obj is None: return None
|
|
237
|
+
kwargs = {}
|
|
238
|
+
if obj.header_rows is not None:
|
|
239
|
+
kwargs['header_rows'] = obj.header_rows
|
|
240
|
+
if obj.fill_merged_headers is not None:
|
|
241
|
+
kwargs['fill_merged_headers'] = obj.fill_merged_headers
|
|
242
|
+
if obj.delimiter is not None:
|
|
243
|
+
kwargs['delimiter'] = obj.delimiter
|
|
244
|
+
if obj.header_separator is not None:
|
|
245
|
+
kwargs['header_separator'] = obj.header_separator
|
|
246
|
+
return schemas.ExcelParsingSchema(**kwargs)
|
|
247
|
+
|