informatica-python 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- informatica_python/__init__.py +4 -0
- informatica_python/cli.py +83 -0
- informatica_python/converter.py +285 -0
- informatica_python/generators/__init__.py +0 -0
- informatica_python/generators/config_gen.py +159 -0
- informatica_python/generators/error_log_gen.py +140 -0
- informatica_python/generators/helper_gen.py +693 -0
- informatica_python/generators/mapping_gen.py +649 -0
- informatica_python/generators/sql_gen.py +132 -0
- informatica_python/generators/workflow_gen.py +234 -0
- informatica_python/models.py +281 -0
- informatica_python/parser.py +468 -0
- informatica_python/utils/__init__.py +0 -0
- informatica_python/utils/datatype_map.py +105 -0
- informatica_python/utils/expression_converter.py +128 -0
- informatica_python-1.0.0.dist-info/METADATA +118 -0
- informatica_python-1.0.0.dist-info/RECORD +20 -0
- informatica_python-1.0.0.dist-info/WHEEL +5 -0
- informatica_python-1.0.0.dist-info/entry_points.txt +2 -0
- informatica_python-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
import xml.etree.ElementTree as ET
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from informatica_python.models import (
|
|
4
|
+
PowermartDef, RepositoryDef, FolderDef, SourceDef, TargetDef,
|
|
5
|
+
MappingDef, TransformationDef, ConnectorDef, InstanceDef,
|
|
6
|
+
FieldDef, TableAttribute, SessionDef, WorkflowDef,
|
|
7
|
+
TaskInstanceDef, WorkflowLink, WorkflowVariable,
|
|
8
|
+
MappingVariable, TargetLoadOrder, SessionTransformInst,
|
|
9
|
+
ConnectionRef, ConfigDef, SchedulerDef, MappletDef,
|
|
10
|
+
ShortcutDef, TaskDef,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class InformaticaParser:
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self.errors = []
|
|
17
|
+
self.warnings = []
|
|
18
|
+
|
|
19
|
+
def parse_file(self, file_path: str) -> PowermartDef:
|
|
20
|
+
try:
|
|
21
|
+
tree = ET.parse(file_path)
|
|
22
|
+
root = tree.getroot()
|
|
23
|
+
return self._parse_powermart(root)
|
|
24
|
+
except ET.ParseError as e:
|
|
25
|
+
self.errors.append(f"XML parse error: {e}")
|
|
26
|
+
try:
|
|
27
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
28
|
+
content = f.read()
|
|
29
|
+
content = self._strip_dtd(content)
|
|
30
|
+
root = ET.fromstring(content)
|
|
31
|
+
return self._parse_powermart(root)
|
|
32
|
+
except Exception as e2:
|
|
33
|
+
self.errors.append(f"Fallback parse also failed: {e2}")
|
|
34
|
+
return PowermartDef()
|
|
35
|
+
|
|
36
|
+
def parse_string(self, xml_string: str) -> PowermartDef:
|
|
37
|
+
try:
|
|
38
|
+
cleaned = self._strip_dtd(xml_string)
|
|
39
|
+
root = ET.fromstring(cleaned)
|
|
40
|
+
return self._parse_powermart(root)
|
|
41
|
+
except ET.ParseError as e:
|
|
42
|
+
self.errors.append(f"XML parse error: {e}")
|
|
43
|
+
return PowermartDef()
|
|
44
|
+
|
|
45
|
+
def _strip_dtd(self, content: str) -> str:
|
|
46
|
+
import re
|
|
47
|
+
content = re.sub(r'<!DOCTYPE[^>]*>', '', content)
|
|
48
|
+
return content
|
|
49
|
+
|
|
50
|
+
def _attr(self, elem, name, default=""):
|
|
51
|
+
val = elem.get(name, default)
|
|
52
|
+
return val.strip() if val else default
|
|
53
|
+
|
|
54
|
+
def _parse_powermart(self, elem) -> PowermartDef:
|
|
55
|
+
pm = PowermartDef(
|
|
56
|
+
creation_date=self._attr(elem, "CREATION_DATE"),
|
|
57
|
+
repository_version=self._attr(elem, "REPOSITORY_VERSION"),
|
|
58
|
+
)
|
|
59
|
+
for repo_elem in elem.findall("REPOSITORY"):
|
|
60
|
+
pm.repositories.append(self._parse_repository(repo_elem))
|
|
61
|
+
return pm
|
|
62
|
+
|
|
63
|
+
def _parse_repository(self, elem) -> RepositoryDef:
|
|
64
|
+
repo = RepositoryDef(
|
|
65
|
+
name=self._attr(elem, "NAME"),
|
|
66
|
+
version=self._attr(elem, "VERSION"),
|
|
67
|
+
codepage=self._attr(elem, "CODEPAGE"),
|
|
68
|
+
database_type=self._attr(elem, "DATABASETYPE"),
|
|
69
|
+
)
|
|
70
|
+
for folder_elem in elem.findall("FOLDER"):
|
|
71
|
+
repo.folders.append(self._parse_folder(folder_elem))
|
|
72
|
+
return repo
|
|
73
|
+
|
|
74
|
+
def _parse_folder(self, elem) -> FolderDef:
|
|
75
|
+
folder = FolderDef(
|
|
76
|
+
name=self._attr(elem, "NAME"),
|
|
77
|
+
owner=self._attr(elem, "OWNER"),
|
|
78
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
79
|
+
group=self._attr(elem, "GROUP"),
|
|
80
|
+
shared=self._attr(elem, "SHARED", "NOTSHARED"),
|
|
81
|
+
permissions=self._attr(elem, "PERMISSIONS"),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
for src in elem.findall("SOURCE"):
|
|
85
|
+
folder.sources.append(self._parse_source(src))
|
|
86
|
+
|
|
87
|
+
for tgt in elem.findall("TARGET"):
|
|
88
|
+
folder.targets.append(self._parse_target(tgt))
|
|
89
|
+
|
|
90
|
+
for mapping in elem.findall("MAPPING"):
|
|
91
|
+
folder.mappings.append(self._parse_mapping(mapping))
|
|
92
|
+
|
|
93
|
+
for mapplet in elem.findall("MAPPLET"):
|
|
94
|
+
folder.mapplets.append(self._parse_mapplet(mapplet))
|
|
95
|
+
|
|
96
|
+
for session in elem.findall("SESSION"):
|
|
97
|
+
folder.sessions.append(self._parse_session(session))
|
|
98
|
+
|
|
99
|
+
for wf in elem.findall("WORKFLOW"):
|
|
100
|
+
folder.workflows.append(self._parse_workflow(wf))
|
|
101
|
+
|
|
102
|
+
for task in elem.findall("TASK"):
|
|
103
|
+
folder.tasks.append(self._parse_task(task))
|
|
104
|
+
|
|
105
|
+
for cfg in elem.findall("CONFIG"):
|
|
106
|
+
folder.configs.append(self._parse_config(cfg))
|
|
107
|
+
|
|
108
|
+
for sched in elem.findall("SCHEDULER"):
|
|
109
|
+
folder.schedulers.append(self._parse_scheduler(sched))
|
|
110
|
+
|
|
111
|
+
for sc in elem.findall("SHORTCUT"):
|
|
112
|
+
folder.shortcuts.append(self._parse_shortcut(sc))
|
|
113
|
+
|
|
114
|
+
for tx in elem.findall("TRANSFORMATION"):
|
|
115
|
+
folder.transformations.append(self._parse_transformation(tx))
|
|
116
|
+
|
|
117
|
+
for worklet in elem.findall("WORKLET"):
|
|
118
|
+
wf_def = self._parse_workflow(worklet)
|
|
119
|
+
wf_def.metadata["is_worklet"] = "YES"
|
|
120
|
+
folder.workflows.append(wf_def)
|
|
121
|
+
|
|
122
|
+
return folder
|
|
123
|
+
|
|
124
|
+
def _parse_source(self, elem) -> SourceDef:
|
|
125
|
+
src = SourceDef(
|
|
126
|
+
name=self._attr(elem, "NAME"),
|
|
127
|
+
database_type=self._attr(elem, "DATABASETYPE"),
|
|
128
|
+
db_name=self._attr(elem, "DBDNAME"),
|
|
129
|
+
owner_name=self._attr(elem, "OWNERNAME"),
|
|
130
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
131
|
+
business_name=self._attr(elem, "BUSINESSNAME"),
|
|
132
|
+
)
|
|
133
|
+
for sf in elem.findall("SOURCEFIELD"):
|
|
134
|
+
src.fields.append(self._parse_source_field(sf))
|
|
135
|
+
for ta in elem.findall("TABLEATTRIBUTE"):
|
|
136
|
+
src.attributes.append(self._parse_table_attribute(ta))
|
|
137
|
+
for fa in elem.findall("FIELDATTRIBUTE"):
|
|
138
|
+
src.attributes.append(self._parse_table_attribute(fa))
|
|
139
|
+
for me in elem.findall("METADATAEXTENSION"):
|
|
140
|
+
src.attributes.append(self._parse_table_attribute(me))
|
|
141
|
+
return src
|
|
142
|
+
|
|
143
|
+
def _parse_source_field(self, elem) -> FieldDef:
|
|
144
|
+
return FieldDef(
|
|
145
|
+
name=self._attr(elem, "NAME"),
|
|
146
|
+
datatype=self._attr(elem, "DATATYPE"),
|
|
147
|
+
precision=int(self._attr(elem, "PRECISION", "0") or "0"),
|
|
148
|
+
scale=int(self._attr(elem, "SCALE", "0") or "0"),
|
|
149
|
+
nullable=self._attr(elem, "NULLABLE", "NULL"),
|
|
150
|
+
keytype=self._attr(elem, "KEYTYPE", "NOT A KEY"),
|
|
151
|
+
field_number=int(self._attr(elem, "FIELDNUMBER", "0") or "0"),
|
|
152
|
+
hidden=self._attr(elem, "HIDDEN", "NO"),
|
|
153
|
+
business_name=self._attr(elem, "BUSINESSNAME"),
|
|
154
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
def _parse_target(self, elem) -> TargetDef:
|
|
158
|
+
tgt = TargetDef(
|
|
159
|
+
name=self._attr(elem, "NAME"),
|
|
160
|
+
database_type=self._attr(elem, "DATABASETYPE"),
|
|
161
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
162
|
+
business_name=self._attr(elem, "BUSINESSNAME"),
|
|
163
|
+
constraint=self._attr(elem, "CONSTRAINT"),
|
|
164
|
+
table_options=self._attr(elem, "TABLEOPTIONS"),
|
|
165
|
+
)
|
|
166
|
+
for tf in elem.findall("TARGETFIELD"):
|
|
167
|
+
tgt.fields.append(self._parse_target_field(tf))
|
|
168
|
+
for ta in elem.findall("TABLEATTRIBUTE"):
|
|
169
|
+
tgt.attributes.append(self._parse_table_attribute(ta))
|
|
170
|
+
for ti in elem.findall("TARGETINDEX"):
|
|
171
|
+
tgt.attributes.append(TableAttribute(
|
|
172
|
+
name=f"INDEX_{self._attr(ti, 'NAME')}",
|
|
173
|
+
value=self._attr(ti, 'DESCRIPTION'),
|
|
174
|
+
))
|
|
175
|
+
return tgt
|
|
176
|
+
|
|
177
|
+
def _parse_target_field(self, elem) -> FieldDef:
|
|
178
|
+
return FieldDef(
|
|
179
|
+
name=self._attr(elem, "NAME"),
|
|
180
|
+
datatype=self._attr(elem, "DATATYPE"),
|
|
181
|
+
precision=int(self._attr(elem, "PRECISION", "0") or "0"),
|
|
182
|
+
scale=int(self._attr(elem, "SCALE", "0") or "0"),
|
|
183
|
+
nullable=self._attr(elem, "NULLABLE", "NULL"),
|
|
184
|
+
keytype=self._attr(elem, "KEYTYPE", "NOT A KEY"),
|
|
185
|
+
field_number=int(self._attr(elem, "FIELDNUMBER", "0") or "0"),
|
|
186
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
187
|
+
business_name=self._attr(elem, "BUSINESSNAME"),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def _parse_transformation(self, elem) -> TransformationDef:
|
|
191
|
+
tx = TransformationDef(
|
|
192
|
+
name=self._attr(elem, "NAME"),
|
|
193
|
+
type=self._attr(elem, "TYPE"),
|
|
194
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
195
|
+
reusable=self._attr(elem, "REUSABLE", "NO"),
|
|
196
|
+
)
|
|
197
|
+
for tf in elem.findall("TRANSFORMFIELD"):
|
|
198
|
+
tx.fields.append(self._parse_transform_field(tf))
|
|
199
|
+
for ta in elem.findall("TABLEATTRIBUTE"):
|
|
200
|
+
tx.attributes.append(self._parse_table_attribute(ta))
|
|
201
|
+
|
|
202
|
+
for child_tag in ["TRANSFORMFIELDATTR", "TRANSFORMFIELDATTRDEF", "INITPROP", "ERPINFO"]:
|
|
203
|
+
for child in elem.findall(child_tag):
|
|
204
|
+
tx.metadata[f"{child_tag}_{self._attr(child, 'NAME', child_tag)}"] = self._attr(child, "VALUE", str(child.attrib))
|
|
205
|
+
|
|
206
|
+
return tx
|
|
207
|
+
|
|
208
|
+
def _parse_transform_field(self, elem) -> FieldDef:
|
|
209
|
+
return FieldDef(
|
|
210
|
+
name=self._attr(elem, "NAME"),
|
|
211
|
+
datatype=self._attr(elem, "DATATYPE"),
|
|
212
|
+
precision=int(self._attr(elem, "PRECISION", "0") or "0"),
|
|
213
|
+
scale=int(self._attr(elem, "SCALE", "0") or "0"),
|
|
214
|
+
default_value=self._attr(elem, "DEFAULTVALUE"),
|
|
215
|
+
expression=self._attr(elem, "EXPRESSION"),
|
|
216
|
+
expression_type=self._attr(elem, "EXPRESSIONTYPE"),
|
|
217
|
+
porttype=self._attr(elem, "PORTTYPE"),
|
|
218
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
def _parse_table_attribute(self, elem) -> TableAttribute:
|
|
222
|
+
return TableAttribute(
|
|
223
|
+
name=self._attr(elem, "NAME"),
|
|
224
|
+
value=self._attr(elem, "VALUE"),
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
def _parse_connector(self, elem) -> ConnectorDef:
|
|
228
|
+
return ConnectorDef(
|
|
229
|
+
from_field=self._attr(elem, "FROMFIELD"),
|
|
230
|
+
from_instance=self._attr(elem, "FROMINSTANCE"),
|
|
231
|
+
from_instance_type=self._attr(elem, "FROMINSTANCETYPE"),
|
|
232
|
+
to_field=self._attr(elem, "TOFIELD"),
|
|
233
|
+
to_instance=self._attr(elem, "TOINSTANCE"),
|
|
234
|
+
to_instance_type=self._attr(elem, "TOINSTANCETYPE"),
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
def _parse_instance(self, elem) -> InstanceDef:
|
|
238
|
+
return InstanceDef(
|
|
239
|
+
name=self._attr(elem, "NAME"),
|
|
240
|
+
type=self._attr(elem, "TYPE"),
|
|
241
|
+
transformation_name=self._attr(elem, "TRANSFORMATION_NAME"),
|
|
242
|
+
transformation_type=self._attr(elem, "TRANSFORMATION_TYPE"),
|
|
243
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
244
|
+
reusable=self._attr(elem, "REUSABLE", "NO"),
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
def _parse_mapping(self, elem) -> MappingDef:
|
|
248
|
+
mapping = MappingDef(
|
|
249
|
+
name=self._attr(elem, "NAME"),
|
|
250
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
251
|
+
is_valid=self._attr(elem, "ISVALID", "YES"),
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
for tx in elem.findall("TRANSFORMATION"):
|
|
255
|
+
mapping.transformations.append(self._parse_transformation(tx))
|
|
256
|
+
|
|
257
|
+
for conn in elem.findall("CONNECTOR"):
|
|
258
|
+
mapping.connectors.append(self._parse_connector(conn))
|
|
259
|
+
|
|
260
|
+
for inst in elem.findall("INSTANCE"):
|
|
261
|
+
mapping.instances.append(self._parse_instance(inst))
|
|
262
|
+
|
|
263
|
+
for tlo in elem.findall("TARGETLOADORDER"):
|
|
264
|
+
order = TargetLoadOrder(
|
|
265
|
+
order=int(self._attr(tlo, "ORDER", "1") or "1"),
|
|
266
|
+
target_instance=self._attr(tlo, "TARGETINSTANCE"),
|
|
267
|
+
)
|
|
268
|
+
mapping.target_load_orders.append(order)
|
|
269
|
+
|
|
270
|
+
for mv in elem.findall("MAPPINGVARIABLE"):
|
|
271
|
+
mapping.variables.append(MappingVariable(
|
|
272
|
+
name=self._attr(mv, "NAME"),
|
|
273
|
+
datatype=self._attr(mv, "DATATYPE", "string"),
|
|
274
|
+
default_value=self._attr(mv, "DEFAULTVALUE"),
|
|
275
|
+
description=self._attr(mv, "DESCRIPTION"),
|
|
276
|
+
is_expression_variable=self._attr(mv, "ISEXPRESSIONVARIABLE", "NO"),
|
|
277
|
+
is_persistent=self._attr(mv, "ISPERSISTENT", "NO"),
|
|
278
|
+
precision=int(self._attr(mv, "PRECISION", "0") or "0"),
|
|
279
|
+
scale=int(self._attr(mv, "SCALE", "0") or "0"),
|
|
280
|
+
usage_type=self._attr(mv, "USAGETYPE"),
|
|
281
|
+
))
|
|
282
|
+
|
|
283
|
+
for me in elem.findall("METADATAEXTENSION"):
|
|
284
|
+
mapping.metadata[self._attr(me, "NAME")] = self._attr(me, "VALUE")
|
|
285
|
+
|
|
286
|
+
return mapping
|
|
287
|
+
|
|
288
|
+
def _parse_mapplet(self, elem) -> MappletDef:
|
|
289
|
+
mapplet = MappletDef(
|
|
290
|
+
name=self._attr(elem, "NAME"),
|
|
291
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
292
|
+
is_valid=self._attr(elem, "ISVALID", "YES"),
|
|
293
|
+
)
|
|
294
|
+
for tx in elem.findall("TRANSFORMATION"):
|
|
295
|
+
mapplet.transformations.append(self._parse_transformation(tx))
|
|
296
|
+
for conn in elem.findall("CONNECTOR"):
|
|
297
|
+
mapplet.connectors.append(self._parse_connector(conn))
|
|
298
|
+
for inst in elem.findall("INSTANCE"):
|
|
299
|
+
mapplet.instances.append(self._parse_instance(inst))
|
|
300
|
+
return mapplet
|
|
301
|
+
|
|
302
|
+
def _parse_session(self, elem) -> SessionDef:
|
|
303
|
+
session = SessionDef(
|
|
304
|
+
name=self._attr(elem, "NAME"),
|
|
305
|
+
mapping_name=self._attr(elem, "MAPPINGNAME"),
|
|
306
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
307
|
+
is_valid=self._attr(elem, "ISVALID", "YES"),
|
|
308
|
+
reusable=self._attr(elem, "REUSABLE", "NO"),
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
for ta in elem.findall(".//ATTRIBUTE"):
|
|
312
|
+
session.attributes.append(self._parse_table_attribute(ta))
|
|
313
|
+
|
|
314
|
+
for cr in elem.findall(".//CONFIGREFERENCE"):
|
|
315
|
+
session.config_references.append({
|
|
316
|
+
"name": self._attr(cr, "REFOBJECTNAME"),
|
|
317
|
+
"type": self._attr(cr, "TYPE"),
|
|
318
|
+
})
|
|
319
|
+
|
|
320
|
+
for sti in elem.findall(".//SESSTRANSFORMATIONINST"):
|
|
321
|
+
st = SessionTransformInst(
|
|
322
|
+
instance_name=self._attr(sti, "SINSTANCENAME"),
|
|
323
|
+
pipeline=self._attr(sti, "PIPELINE"),
|
|
324
|
+
stage=self._attr(sti, "STAGE"),
|
|
325
|
+
transformation_name=self._attr(sti, "TRANSFORMATIONNAME"),
|
|
326
|
+
transformation_type=self._attr(sti, "TRANSFORMATIONTYPE"),
|
|
327
|
+
is_partitionable=self._attr(sti, "ISREPARTITIONPOINT", "NO"),
|
|
328
|
+
)
|
|
329
|
+
for ta in sti.findall("ATTRIBUTE"):
|
|
330
|
+
st.attributes.append(self._parse_table_attribute(ta))
|
|
331
|
+
for cr in sti.findall("CONNECTIONREFERENCE"):
|
|
332
|
+
st.connections.append(ConnectionRef(
|
|
333
|
+
connection_name=self._attr(cr, "CONNECTIONNAME"),
|
|
334
|
+
connection_type=self._attr(cr, "CONNECTIONTYPE"),
|
|
335
|
+
connection_subtype=self._attr(cr, "CONNECTIONSUBTYPE"),
|
|
336
|
+
variable=self._attr(cr, "VARIABLE"),
|
|
337
|
+
))
|
|
338
|
+
session.transform_instances.append(st)
|
|
339
|
+
|
|
340
|
+
for comp in elem.findall(".//SESSIONCOMPONENT"):
|
|
341
|
+
comp_data = {
|
|
342
|
+
"name": self._attr(comp, "REFOBJECTNAME"),
|
|
343
|
+
"type": self._attr(comp, "TYPE"),
|
|
344
|
+
"attributes": [],
|
|
345
|
+
}
|
|
346
|
+
for ta in comp.findall("ATTRIBUTE"):
|
|
347
|
+
comp_data["attributes"].append({
|
|
348
|
+
"name": self._attr(ta, "NAME"),
|
|
349
|
+
"value": self._attr(ta, "VALUE"),
|
|
350
|
+
})
|
|
351
|
+
session.components.append(comp_data)
|
|
352
|
+
|
|
353
|
+
return session
|
|
354
|
+
|
|
355
|
+
def _parse_workflow(self, elem) -> WorkflowDef:
|
|
356
|
+
wf = WorkflowDef(
|
|
357
|
+
name=self._attr(elem, "NAME"),
|
|
358
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
359
|
+
is_valid=self._attr(elem, "ISVALID", "YES"),
|
|
360
|
+
reusable=self._attr(elem, "REUSABLE", "NO"),
|
|
361
|
+
scheduler_name=self._attr(elem, "SCHEDULERNAME"),
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
for ti in elem.findall("TASKINSTANCE"):
|
|
365
|
+
wf.task_instances.append(self._parse_task_instance(ti))
|
|
366
|
+
|
|
367
|
+
for link in elem.findall("WORKFLOWLINK"):
|
|
368
|
+
wf.links.append(WorkflowLink(
|
|
369
|
+
from_instance=self._attr(link, "FROMTASK"),
|
|
370
|
+
to_instance=self._attr(link, "TOTASK"),
|
|
371
|
+
condition=self._attr(link, "CONDITION"),
|
|
372
|
+
link_type=self._attr(link, "LINKTYPE"),
|
|
373
|
+
))
|
|
374
|
+
|
|
375
|
+
for wv in elem.findall("WORKFLOWVARIABLE"):
|
|
376
|
+
wf.variables.append(WorkflowVariable(
|
|
377
|
+
name=self._attr(wv, "NAME"),
|
|
378
|
+
datatype=self._attr(wv, "DATATYPE", "string"),
|
|
379
|
+
default_value=self._attr(wv, "DEFAULTVALUE"),
|
|
380
|
+
description=self._attr(wv, "DESCRIPTION"),
|
|
381
|
+
is_null=self._attr(wv, "ISNULL", "NO"),
|
|
382
|
+
is_persistent=self._attr(wv, "ISPERSISTENT", "NO"),
|
|
383
|
+
is_user_defined=self._attr(wv, "ISUSERDEFINED", "YES"),
|
|
384
|
+
precision=int(self._attr(wv, "PRECISION", "0") or "0"),
|
|
385
|
+
scale=int(self._attr(wv, "SCALE", "0") or "0"),
|
|
386
|
+
usage_type=self._attr(wv, "USAGETYPE"),
|
|
387
|
+
))
|
|
388
|
+
|
|
389
|
+
for ta in elem.findall("ATTRIBUTE"):
|
|
390
|
+
wf.attributes.append(self._parse_table_attribute(ta))
|
|
391
|
+
|
|
392
|
+
for me in elem.findall("METADATAEXTENSION"):
|
|
393
|
+
wf.metadata[self._attr(me, "NAME")] = self._attr(me, "VALUE")
|
|
394
|
+
|
|
395
|
+
for session in elem.findall("SESSION"):
|
|
396
|
+
pass
|
|
397
|
+
|
|
398
|
+
return wf
|
|
399
|
+
|
|
400
|
+
def _parse_task_instance(self, elem) -> TaskInstanceDef:
|
|
401
|
+
ti = TaskInstanceDef(
|
|
402
|
+
name=self._attr(elem, "NAME"),
|
|
403
|
+
task_name=self._attr(elem, "TASKNAME"),
|
|
404
|
+
task_type=self._attr(elem, "TASKTYPE"),
|
|
405
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
406
|
+
is_valid=self._attr(elem, "ISVALID", "YES"),
|
|
407
|
+
reusable=self._attr(elem, "REUSABLE", "NO"),
|
|
408
|
+
fail_parent_if_instance_fails=self._attr(elem, "FAIL_PARENT_IF_INSTANCE_FAILS", "YES"),
|
|
409
|
+
fail_parent_if_instance_did_not_run=self._attr(elem, "FAIL_PARENT_IF_INSTANCE_DID_NOT_RUN", "NO"),
|
|
410
|
+
treat_input_link_as_and=self._attr(elem, "TREAT_INPUTLINKS_AS_AND", "YES"),
|
|
411
|
+
)
|
|
412
|
+
for ta in elem.findall("ATTRIBUTE"):
|
|
413
|
+
ti.attributes.append(self._parse_table_attribute(ta))
|
|
414
|
+
return ti
|
|
415
|
+
|
|
416
|
+
def _parse_config(self, elem) -> ConfigDef:
|
|
417
|
+
cfg = ConfigDef(
|
|
418
|
+
name=self._attr(elem, "NAME"),
|
|
419
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
420
|
+
is_valid=self._attr(elem, "ISVALID", "YES"),
|
|
421
|
+
)
|
|
422
|
+
for ta in elem.findall("ATTRIBUTE"):
|
|
423
|
+
cfg.attributes.append(self._parse_table_attribute(ta))
|
|
424
|
+
return cfg
|
|
425
|
+
|
|
426
|
+
def _parse_scheduler(self, elem) -> SchedulerDef:
|
|
427
|
+
sched = SchedulerDef(
|
|
428
|
+
name=self._attr(elem, "NAME"),
|
|
429
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
430
|
+
reusable=self._attr(elem, "REUSABLE", "NO"),
|
|
431
|
+
)
|
|
432
|
+
for ta in elem.findall("ATTRIBUTE"):
|
|
433
|
+
sched.attributes.append(self._parse_table_attribute(ta))
|
|
434
|
+
for child_tag in ["SCHEDULEINFO", "STARTOPTIONS", "ENDOPTIONS",
|
|
435
|
+
"SCHEDULEOPTIONS", "RECURRING", "CUSTOM",
|
|
436
|
+
"DAILYFREQUENCY", "REPEAT", "FILTER"]:
|
|
437
|
+
for child in elem.findall(f".//{child_tag}"):
|
|
438
|
+
for k, v in child.attrib.items():
|
|
439
|
+
sched.attributes.append(TableAttribute(
|
|
440
|
+
name=f"{child_tag}_{k}",
|
|
441
|
+
value=v,
|
|
442
|
+
))
|
|
443
|
+
return sched
|
|
444
|
+
|
|
445
|
+
def _parse_shortcut(self, elem) -> ShortcutDef:
|
|
446
|
+
return ShortcutDef(
|
|
447
|
+
name=self._attr(elem, "NAME"),
|
|
448
|
+
shortcut_type=self._attr(elem, "OBJECTSUBTYPE"),
|
|
449
|
+
reference_name=self._attr(elem, "REFOBJECTNAME"),
|
|
450
|
+
folder_name=self._attr(elem, "FOLDERNAME"),
|
|
451
|
+
repository_name=self._attr(elem, "REPOSITORYNAME"),
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
def _parse_task(self, elem) -> TaskDef:
|
|
455
|
+
task = TaskDef(
|
|
456
|
+
name=self._attr(elem, "NAME"),
|
|
457
|
+
type=self._attr(elem, "TYPE"),
|
|
458
|
+
description=self._attr(elem, "DESCRIPTION"),
|
|
459
|
+
reusable=self._attr(elem, "REUSABLE", "NO"),
|
|
460
|
+
)
|
|
461
|
+
for ta in elem.findall("ATTRIBUTE"):
|
|
462
|
+
task.attributes.append(self._parse_table_attribute(ta))
|
|
463
|
+
for vp in elem.findall("VALUEPAIR"):
|
|
464
|
+
task.attributes.append(TableAttribute(
|
|
465
|
+
name=self._attr(vp, "NAME"),
|
|
466
|
+
value=self._attr(vp, "VALUE"),
|
|
467
|
+
))
|
|
468
|
+
return task
|
|
File without changes
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
INFORMATICA_TO_PYTHON = {
|
|
2
|
+
"bigint": "int",
|
|
3
|
+
"integer": "int",
|
|
4
|
+
"int": "int",
|
|
5
|
+
"small integer": "int",
|
|
6
|
+
"smallint": "int",
|
|
7
|
+
"tinyint": "int",
|
|
8
|
+
"numeric": "float",
|
|
9
|
+
"decimal": "float",
|
|
10
|
+
"float": "float",
|
|
11
|
+
"double": "float",
|
|
12
|
+
"real": "float",
|
|
13
|
+
"money": "float",
|
|
14
|
+
"smallmoney": "float",
|
|
15
|
+
"string": "str",
|
|
16
|
+
"nstring": "str",
|
|
17
|
+
"text": "str",
|
|
18
|
+
"ntext": "str",
|
|
19
|
+
"varchar": "str",
|
|
20
|
+
"nvarchar": "str",
|
|
21
|
+
"char": "str",
|
|
22
|
+
"nchar": "str",
|
|
23
|
+
"binary": "bytes",
|
|
24
|
+
"varbinary": "bytes",
|
|
25
|
+
"image": "bytes",
|
|
26
|
+
"date/time": "str",
|
|
27
|
+
"datetime": "str",
|
|
28
|
+
"datetime2": "str",
|
|
29
|
+
"date": "str",
|
|
30
|
+
"time": "str",
|
|
31
|
+
"timestamp": "str",
|
|
32
|
+
"bit": "bool",
|
|
33
|
+
"boolean": "bool",
|
|
34
|
+
"uniqueidentifier": "str",
|
|
35
|
+
"xml": "str",
|
|
36
|
+
"sql_variant": "str",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
INFORMATICA_TO_SPARK = {
|
|
40
|
+
"bigint": "LongType()",
|
|
41
|
+
"integer": "IntegerType()",
|
|
42
|
+
"int": "IntegerType()",
|
|
43
|
+
"small integer": "ShortType()",
|
|
44
|
+
"smallint": "ShortType()",
|
|
45
|
+
"tinyint": "ByteType()",
|
|
46
|
+
"numeric": "DecimalType({precision}, {scale})",
|
|
47
|
+
"decimal": "DecimalType({precision}, {scale})",
|
|
48
|
+
"float": "FloatType()",
|
|
49
|
+
"double": "DoubleType()",
|
|
50
|
+
"real": "FloatType()",
|
|
51
|
+
"money": "DecimalType(19, 4)",
|
|
52
|
+
"smallmoney": "DecimalType(10, 4)",
|
|
53
|
+
"string": "StringType()",
|
|
54
|
+
"nstring": "StringType()",
|
|
55
|
+
"text": "StringType()",
|
|
56
|
+
"ntext": "StringType()",
|
|
57
|
+
"varchar": "StringType()",
|
|
58
|
+
"nvarchar": "StringType()",
|
|
59
|
+
"char": "StringType()",
|
|
60
|
+
"nchar": "StringType()",
|
|
61
|
+
"binary": "BinaryType()",
|
|
62
|
+
"varbinary": "BinaryType()",
|
|
63
|
+
"image": "BinaryType()",
|
|
64
|
+
"date/time": "TimestampType()",
|
|
65
|
+
"datetime": "TimestampType()",
|
|
66
|
+
"datetime2": "TimestampType()",
|
|
67
|
+
"date": "DateType()",
|
|
68
|
+
"time": "StringType()",
|
|
69
|
+
"timestamp": "TimestampType()",
|
|
70
|
+
"bit": "BooleanType()",
|
|
71
|
+
"boolean": "BooleanType()",
|
|
72
|
+
"uniqueidentifier": "StringType()",
|
|
73
|
+
"xml": "StringType()",
|
|
74
|
+
"sql_variant": "StringType()",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
DB_TYPE_MAP = {
|
|
78
|
+
"Microsoft SQL Server": "mssql",
|
|
79
|
+
"Oracle": "oracle",
|
|
80
|
+
"Sybase": "sybase",
|
|
81
|
+
"Informix": "informix",
|
|
82
|
+
"DB2": "db2",
|
|
83
|
+
"Teradata": "teradata",
|
|
84
|
+
"ODBC": "odbc",
|
|
85
|
+
"Flat File": "flatfile",
|
|
86
|
+
"XML": "xml",
|
|
87
|
+
"SAP": "sap",
|
|
88
|
+
"": "unknown",
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def get_python_type(informatica_type):
|
|
93
|
+
return INFORMATICA_TO_PYTHON.get(informatica_type.lower().strip(), "str")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def get_spark_type(informatica_type, precision=10, scale=0):
|
|
97
|
+
key = informatica_type.lower().strip()
|
|
98
|
+
spark_type = INFORMATICA_TO_SPARK.get(key, "StringType()")
|
|
99
|
+
if "{precision}" in spark_type:
|
|
100
|
+
spark_type = spark_type.format(precision=precision, scale=scale)
|
|
101
|
+
return spark_type
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def get_db_type(database_type):
|
|
105
|
+
return DB_TYPE_MAP.get(database_type, database_type.lower() if database_type else "unknown")
|