informatica-python 1.3.0__tar.gz → 1.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {informatica_python-1.3.0 → informatica_python-1.4.0}/PKG-INFO +1 -1
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/helper_gen.py +19 -4
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/mapping_gen.py +173 -19
- informatica_python-1.4.0/informatica_python/generators/workflow_gen.py +387 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/PKG-INFO +1 -1
- {informatica_python-1.3.0 → informatica_python-1.4.0}/pyproject.toml +1 -1
- {informatica_python-1.3.0 → informatica_python-1.4.0}/tests/test_converter.py +197 -0
- informatica_python-1.3.0/informatica_python/generators/workflow_gen.py +0 -290
- {informatica_python-1.3.0 → informatica_python-1.4.0}/README.md +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/__init__.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/cli.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/converter.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/__init__.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/config_gen.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/error_log_gen.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/sql_gen.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/models.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/parser.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/utils/__init__.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/utils/datatype_map.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/utils/expression_converter.py +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/SOURCES.txt +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/dependency_links.txt +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/entry_points.txt +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/requires.txt +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/top_level.txt +0 -0
- {informatica_python-1.3.0 → informatica_python-1.4.0}/setup.cfg +0 -0
{informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/helper_gen.py
RENAMED
|
@@ -208,7 +208,11 @@ def _add_file_functions(lines, data_lib):
|
|
|
208
208
|
lines.append(" delimiter = file_config.get('delimiter', ',')")
|
|
209
209
|
lines.append(" header = file_config.get('header', True)")
|
|
210
210
|
lines.append(" encoding = file_config.get('encoding', 'utf-8')")
|
|
211
|
-
lines.append(" header_row = 0 if header else None")
|
|
211
|
+
lines.append(" header_row = file_config.get('header_row', 0 if header else None)")
|
|
212
|
+
lines.append(" skip_rows = file_config.get('skip_rows', 0)")
|
|
213
|
+
lines.append(" quotechar = file_config.get('quotechar', '\"')")
|
|
214
|
+
lines.append(" escapechar = file_config.get('escapechar', None)")
|
|
215
|
+
lines.append(" lineterminator = file_config.get('lineterminator', None)")
|
|
212
216
|
lines.append("")
|
|
213
217
|
lines.append(" logger.info(f'Reading file: {file_path} (ext={ext})')")
|
|
214
218
|
lines.append("")
|
|
@@ -245,7 +249,17 @@ def _add_file_functions(lines, data_lib):
|
|
|
245
249
|
lines.append(" return dd.read_csv(file_path, sep=delimiter, header=header_row)")
|
|
246
250
|
else:
|
|
247
251
|
lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
|
|
248
|
-
lines.append("
|
|
252
|
+
lines.append(" csv_kwargs = dict(sep=delimiter, header=header_row, encoding=encoding,")
|
|
253
|
+
lines.append(" quotechar=quotechar, escapechar=escapechar)")
|
|
254
|
+
lines.append(" if skip_rows:")
|
|
255
|
+
lines.append(" csv_kwargs['skiprows'] = skip_rows")
|
|
256
|
+
lines.append(" if lineterminator:")
|
|
257
|
+
lines.append(" csv_kwargs['lineterminator'] = lineterminator")
|
|
258
|
+
lines.append(" df = pd.read_csv(file_path, **csv_kwargs)")
|
|
259
|
+
lines.append(" if file_config.get('strip_trailing_blanks'):")
|
|
260
|
+
lines.append(" str_cols = df.select_dtypes(include=['object']).columns")
|
|
261
|
+
lines.append(" df[str_cols] = df[str_cols].apply(lambda c: c.str.rstrip())")
|
|
262
|
+
lines.append(" return df")
|
|
249
263
|
lines.append(" elif ext in ('.xlsx', '.xls'):")
|
|
250
264
|
lines.append(" return pd.read_excel(file_path, header=header_row)")
|
|
251
265
|
lines.append(" elif ext == '.xml':")
|
|
@@ -271,6 +285,7 @@ def _add_file_functions(lines, data_lib):
|
|
|
271
285
|
lines.append(" delimiter = file_config.get('delimiter', ',')")
|
|
272
286
|
lines.append(" header = file_config.get('header', True)")
|
|
273
287
|
lines.append(" encoding = file_config.get('encoding', 'utf-8')")
|
|
288
|
+
lines.append(" quotechar = file_config.get('quotechar', '\"')")
|
|
274
289
|
lines.append("")
|
|
275
290
|
lines.append(" os.makedirs(os.path.dirname(file_path) or '.', exist_ok=True)")
|
|
276
291
|
lines.append(" logger.info(f'Writing file: {file_path}')")
|
|
@@ -290,7 +305,7 @@ def _add_file_functions(lines, data_lib):
|
|
|
290
305
|
lines.append(" df.write_csv(file_path, separator=delimiter, has_header=header)")
|
|
291
306
|
elif data_lib == "dask":
|
|
292
307
|
lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
|
|
293
|
-
lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding)")
|
|
308
|
+
lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding, quotechar=quotechar)")
|
|
294
309
|
lines.append(" elif ext in ('.xlsx', '.xls'):")
|
|
295
310
|
lines.append(" df.compute().to_excel(file_path, header=header, index=False)")
|
|
296
311
|
lines.append(" elif ext == '.json':")
|
|
@@ -301,7 +316,7 @@ def _add_file_functions(lines, data_lib):
|
|
|
301
316
|
lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False)")
|
|
302
317
|
else:
|
|
303
318
|
lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
|
|
304
|
-
lines.append(" df.to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding)")
|
|
319
|
+
lines.append(" df.to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding, quotechar=quotechar)")
|
|
305
320
|
lines.append(" elif ext in ('.xlsx', '.xls'):")
|
|
306
321
|
lines.append(" df.to_excel(file_path, header=header, index=False)")
|
|
307
322
|
lines.append(" elif ext == '.json':")
|
{informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/mapping_gen.py
RENAMED
|
@@ -135,6 +135,46 @@ def _inline_mapplets(mapping, folder):
|
|
|
135
135
|
return extra_transforms, extra_connectors + rewired_connectors, mapplet_instances
|
|
136
136
|
|
|
137
137
|
|
|
138
|
+
def _build_session_conn_overrides(mapping, folder):
|
|
139
|
+
overrides = {}
|
|
140
|
+
for session in folder.sessions:
|
|
141
|
+
if session.mapping_name != mapping.name:
|
|
142
|
+
continue
|
|
143
|
+
for sti in session.transform_instances:
|
|
144
|
+
inst_name = sti.instance_name or sti.transformation_name
|
|
145
|
+
for conn_ref in sti.connections:
|
|
146
|
+
conn_key = conn_ref.connection_name or conn_ref.variable
|
|
147
|
+
if conn_key:
|
|
148
|
+
overrides[inst_name] = {
|
|
149
|
+
"connection_name": conn_ref.connection_name,
|
|
150
|
+
"connection_type": conn_ref.connection_type,
|
|
151
|
+
"connection_subtype": conn_ref.connection_subtype,
|
|
152
|
+
"variable": conn_ref.variable,
|
|
153
|
+
}
|
|
154
|
+
for attr in sti.attributes:
|
|
155
|
+
if attr.name == "Connection Information" and attr.value:
|
|
156
|
+
if inst_name not in overrides:
|
|
157
|
+
overrides[inst_name] = {}
|
|
158
|
+
overrides[inst_name]["connection_info"] = attr.value
|
|
159
|
+
elif attr.name == "Source File Directory" and attr.value:
|
|
160
|
+
if inst_name not in overrides:
|
|
161
|
+
overrides[inst_name] = {}
|
|
162
|
+
overrides[inst_name]["source_file_directory"] = attr.value
|
|
163
|
+
elif attr.name == "Source filename" and attr.value:
|
|
164
|
+
if inst_name not in overrides:
|
|
165
|
+
overrides[inst_name] = {}
|
|
166
|
+
overrides[inst_name]["source_filename"] = attr.value
|
|
167
|
+
elif attr.name == "Output File Directory" and attr.value:
|
|
168
|
+
if inst_name not in overrides:
|
|
169
|
+
overrides[inst_name] = {}
|
|
170
|
+
overrides[inst_name]["output_file_directory"] = attr.value
|
|
171
|
+
elif attr.name == "Output filename" and attr.value:
|
|
172
|
+
if inst_name not in overrides:
|
|
173
|
+
overrides[inst_name] = {}
|
|
174
|
+
overrides[inst_name]["output_filename"] = attr.value
|
|
175
|
+
return overrides
|
|
176
|
+
|
|
177
|
+
|
|
138
178
|
def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
139
179
|
data_lib: str = "pandas", mapping_index: int = 1) -> str:
|
|
140
180
|
lines = []
|
|
@@ -151,15 +191,20 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
151
191
|
inlined_transforms, inlined_connectors, mapplet_instance_names = _inline_mapplets(mapping, folder)
|
|
152
192
|
|
|
153
193
|
all_transforms = list(mapping.transformations) + inlined_transforms
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
194
|
+
if mapplet_instance_names:
|
|
195
|
+
kept_originals = [c for c in mapping.connectors
|
|
196
|
+
if c.from_instance not in mapplet_instance_names
|
|
197
|
+
and c.to_instance not in mapplet_instance_names]
|
|
198
|
+
all_connectors = kept_originals + inlined_connectors
|
|
199
|
+
else:
|
|
200
|
+
all_connectors = list(mapping.connectors)
|
|
157
201
|
|
|
158
202
|
source_map = _build_source_map(mapping, folder)
|
|
159
203
|
target_map = _build_target_map(mapping, folder)
|
|
160
204
|
transform_map = {t.name: t for t in all_transforms}
|
|
161
205
|
connector_graph = _build_connector_graph(all_connectors)
|
|
162
206
|
instance_map = {i.name: i for i in mapping.instances}
|
|
207
|
+
session_overrides = _build_session_conn_overrides(mapping, folder)
|
|
163
208
|
|
|
164
209
|
lines.append(f"def run_{_safe_name(mapping.name)}(config):")
|
|
165
210
|
lines.append(f' """Execute mapping: {mapping.name}"""')
|
|
@@ -176,6 +221,11 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
176
221
|
lines.append(f" {safe_var} = {default}")
|
|
177
222
|
lines.append("")
|
|
178
223
|
|
|
224
|
+
if session_overrides:
|
|
225
|
+
lines.append(" # Session connection overrides")
|
|
226
|
+
lines.append(f" _sess_overrides = {repr(session_overrides)}")
|
|
227
|
+
lines.append("")
|
|
228
|
+
|
|
179
229
|
source_dfs = {}
|
|
180
230
|
for src_name, src_def in source_map.items():
|
|
181
231
|
safe = _safe_name(src_name)
|
|
@@ -185,13 +235,23 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
185
235
|
if t.type in ("Source Qualifier", "Application Source Qualifier")]
|
|
186
236
|
if sq_transforms:
|
|
187
237
|
for sq in sq_transforms:
|
|
188
|
-
_generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map)
|
|
238
|
+
_generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides)
|
|
189
239
|
else:
|
|
190
240
|
for src_name, src_def in source_map.items():
|
|
191
241
|
safe = _safe_name(src_name)
|
|
242
|
+
override = session_overrides.get(src_name, {})
|
|
192
243
|
lines.append(f" # Read source: {src_name}")
|
|
193
|
-
if
|
|
194
|
-
|
|
244
|
+
if override.get("source_file_directory") or override.get("source_filename"):
|
|
245
|
+
src_dir = override.get("source_file_directory", ".")
|
|
246
|
+
src_file = override.get("source_filename", src_def.name)
|
|
247
|
+
lines.append(f" _src_path_{safe} = config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path',")
|
|
248
|
+
lines.append(f" os.path.join('{src_dir}', '{src_file}'))")
|
|
249
|
+
if src_def.flatfile:
|
|
250
|
+
_emit_flatfile_read(lines, safe, src_def)
|
|
251
|
+
else:
|
|
252
|
+
lines.append(f" df_{safe} = read_file(_src_path_{safe}, config.get('sources', {{}}).get('{src_def.name}', {{}}))")
|
|
253
|
+
elif src_def.database_type and src_def.database_type != "Flat File":
|
|
254
|
+
conn_name = override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
|
|
195
255
|
schema = src_def.owner_name or "dbo"
|
|
196
256
|
lines.append(f" df_{safe} = read_from_db(config, 'SELECT * FROM {schema}.{src_name}', '{conn_name}')")
|
|
197
257
|
elif src_def.flatfile:
|
|
@@ -209,7 +269,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
|
|
|
209
269
|
_generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map)
|
|
210
270
|
|
|
211
271
|
for tgt_name, tgt_def in target_map.items():
|
|
212
|
-
_generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map)
|
|
272
|
+
_generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides)
|
|
213
273
|
|
|
214
274
|
lines.append("")
|
|
215
275
|
lines.append(f" log_mapping_end('{mapping.name}', start_time)")
|
|
@@ -401,7 +461,7 @@ def _get_processing_order(transformations, connector_graph, sq_transforms):
|
|
|
401
461
|
return ordered
|
|
402
462
|
|
|
403
463
|
|
|
404
|
-
def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map):
|
|
464
|
+
def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides=None):
|
|
405
465
|
sq_safe = _safe_name(sq.name)
|
|
406
466
|
sql_override = ""
|
|
407
467
|
pre_sql = ""
|
|
@@ -438,7 +498,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
438
498
|
if sql_override:
|
|
439
499
|
src_name = next(iter(connected_sources)) if connected_sources else "source"
|
|
440
500
|
src_def = source_map.get(src_name, SourceDef(name=src_name))
|
|
441
|
-
|
|
501
|
+
sq_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
|
|
502
|
+
conn_name = sq_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
|
|
442
503
|
|
|
443
504
|
lines.append(f" sql_{sq_safe} = '''")
|
|
444
505
|
for sql_line in sql_override.strip().split("\n"):
|
|
@@ -450,8 +511,9 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
|
|
|
450
511
|
src_name = next(iter(connected_sources))
|
|
451
512
|
src_def = source_map.get(src_name, SourceDef(name=src_name))
|
|
452
513
|
safe_src = _safe_name(src_name)
|
|
514
|
+
src_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
|
|
453
515
|
if src_def.database_type and src_def.database_type != "Flat File":
|
|
454
|
-
conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
|
|
516
|
+
conn_name = src_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
|
|
455
517
|
schema = src_def.owner_name or "dbo"
|
|
456
518
|
cols = ", ".join(f.name for f in src_def.fields) if src_def.fields else "*"
|
|
457
519
|
lines.append(f" df_{sq_safe} = read_from_db(config, 'SELECT {cols} FROM {schema}.{src_def.name}', '{conn_name}')")
|
|
@@ -996,21 +1058,31 @@ def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
996
1058
|
|
|
997
1059
|
ascending = top_bottom.upper() != "TOP"
|
|
998
1060
|
|
|
1061
|
+
rank_out_field = "RANKINDEX"
|
|
1062
|
+
for fld in tx.fields:
|
|
1063
|
+
if fld.name.upper() == "RANKINDEX" or "RANK" in fld.name.upper():
|
|
1064
|
+
pt = (fld.porttype or "").upper()
|
|
1065
|
+
if "OUTPUT" in pt and "INPUT" not in pt:
|
|
1066
|
+
rank_out_field = fld.name
|
|
1067
|
+
break
|
|
1068
|
+
|
|
999
1069
|
lines.append(f" df_{tx_safe} = {input_df}.copy()")
|
|
1000
1070
|
if rank_port and group_by_ports:
|
|
1001
1071
|
lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
|
|
1002
|
-
lines.append(f"
|
|
1072
|
+
lines.append(f" _rank_vals = df_{tx_safe}.groupby({group_by_ports})['{rank_port}'].rank(")
|
|
1003
1073
|
lines.append(f" method='min', ascending={ascending}")
|
|
1004
|
-
lines.append(f" )
|
|
1074
|
+
lines.append(f" )")
|
|
1075
|
+
lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
|
|
1005
1076
|
if top_n:
|
|
1006
|
-
lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['
|
|
1077
|
+
lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
|
|
1007
1078
|
elif rank_port:
|
|
1008
1079
|
lines.append(f" # Rank by '{rank_port}' (no group-by)")
|
|
1009
|
-
lines.append(f"
|
|
1080
|
+
lines.append(f" _rank_vals = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending})")
|
|
1081
|
+
lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
|
|
1010
1082
|
if top_n:
|
|
1011
|
-
lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['
|
|
1083
|
+
lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
|
|
1012
1084
|
else:
|
|
1013
|
-
lines.append(f" df_{tx_safe}['
|
|
1085
|
+
lines.append(f" df_{tx_safe}['{rank_out_field}'] = range(1, len(df_{tx_safe}) + 1)")
|
|
1014
1086
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1015
1087
|
|
|
1016
1088
|
|
|
@@ -1087,7 +1159,7 @@ def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
|
|
|
1087
1159
|
source_dfs[tx.name] = f"df_{tx_safe}"
|
|
1088
1160
|
|
|
1089
1161
|
|
|
1090
|
-
def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map):
|
|
1162
|
+
def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides=None):
|
|
1091
1163
|
tgt_safe = _safe_name(tgt_name)
|
|
1092
1164
|
|
|
1093
1165
|
to_conns = connector_graph.get("to", {}).get(tgt_name, [])
|
|
@@ -1119,10 +1191,92 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
|
|
|
1119
1191
|
else:
|
|
1120
1192
|
lines.append(f" df_target_{tgt_safe} = {input_df}")
|
|
1121
1193
|
|
|
1122
|
-
|
|
1123
|
-
|
|
1194
|
+
_emit_type_casting(lines, tgt_safe, tgt_def)
|
|
1195
|
+
|
|
1196
|
+
tgt_override = (session_overrides or {}).get(tgt_name, {})
|
|
1197
|
+
tgt_conn = tgt_override.get("connection_name")
|
|
1198
|
+
|
|
1199
|
+
if tgt_override.get("output_file_directory") or tgt_override.get("output_filename"):
|
|
1200
|
+
out_dir = tgt_override.get("output_file_directory", ".")
|
|
1201
|
+
out_file = tgt_override.get("output_filename", tgt_def.name)
|
|
1202
|
+
lines.append(f" _tgt_path_{tgt_safe} = config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path',")
|
|
1203
|
+
lines.append(f" os.path.join('{out_dir}', '{out_file}'))")
|
|
1204
|
+
if tgt_def.flatfile:
|
|
1205
|
+
_emit_flatfile_write(lines, tgt_safe, tgt_def)
|
|
1206
|
+
else:
|
|
1207
|
+
lines.append(f" write_file(df_target_{tgt_safe}, _tgt_path_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
1208
|
+
elif tgt_def.database_type and tgt_def.database_type != "Flat File":
|
|
1209
|
+
conn_label = tgt_conn or "target"
|
|
1210
|
+
lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
|
|
1124
1211
|
elif tgt_def.flatfile:
|
|
1125
1212
|
_emit_flatfile_write(lines, tgt_safe, tgt_def)
|
|
1126
1213
|
else:
|
|
1127
1214
|
lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
|
|
1128
1215
|
lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
|
|
1216
|
+
|
|
1217
|
+
|
|
1218
|
+
CAST_MAP = {
|
|
1219
|
+
"bigint": ("int", "Int64"),
|
|
1220
|
+
"integer": ("int", "Int32"),
|
|
1221
|
+
"int": ("int", "Int32"),
|
|
1222
|
+
"small integer": ("int", "Int16"),
|
|
1223
|
+
"smallint": ("int", "Int16"),
|
|
1224
|
+
"tinyint": ("int", "Int8"),
|
|
1225
|
+
"numeric": ("float", "float64"),
|
|
1226
|
+
"decimal": ("float", "float64"),
|
|
1227
|
+
"float": ("float", "float64"),
|
|
1228
|
+
"double": ("float", "float64"),
|
|
1229
|
+
"real": ("float", "float32"),
|
|
1230
|
+
"money": ("float", "float64"),
|
|
1231
|
+
"smallmoney": ("float", "float64"),
|
|
1232
|
+
"string": ("str", "object"),
|
|
1233
|
+
"nstring": ("str", "object"),
|
|
1234
|
+
"text": ("str", "object"),
|
|
1235
|
+
"ntext": ("str", "object"),
|
|
1236
|
+
"varchar": ("str", "object"),
|
|
1237
|
+
"nvarchar": ("str", "object"),
|
|
1238
|
+
"char": ("str", "object"),
|
|
1239
|
+
"nchar": ("str", "object"),
|
|
1240
|
+
"date/time": ("str", "datetime64[ns]"),
|
|
1241
|
+
"datetime": ("str", "datetime64[ns]"),
|
|
1242
|
+
"datetime2": ("str", "datetime64[ns]"),
|
|
1243
|
+
"date": ("str", "datetime64[ns]"),
|
|
1244
|
+
"timestamp": ("str", "datetime64[ns]"),
|
|
1245
|
+
"bit": ("bool", "boolean"),
|
|
1246
|
+
"boolean": ("bool", "boolean"),
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
|
|
1250
|
+
def _emit_type_casting(lines, tgt_safe, tgt_def):
|
|
1251
|
+
cast_ops = []
|
|
1252
|
+
for fld in tgt_def.fields:
|
|
1253
|
+
dt_key = fld.datatype.lower().strip()
|
|
1254
|
+
if dt_key not in CAST_MAP:
|
|
1255
|
+
continue
|
|
1256
|
+
py_type, pd_dtype = CAST_MAP[dt_key]
|
|
1257
|
+
if pd_dtype in ("datetime64[ns]",):
|
|
1258
|
+
cast_ops.append((fld.name, "datetime", pd_dtype, fld.nullable == "NULL"))
|
|
1259
|
+
elif pd_dtype in ("Int64", "Int32", "Int16", "Int8"):
|
|
1260
|
+
cast_ops.append((fld.name, "int", pd_dtype, fld.nullable == "NULL"))
|
|
1261
|
+
elif pd_dtype in ("float64", "float32"):
|
|
1262
|
+
cast_ops.append((fld.name, "float", pd_dtype, fld.nullable == "NULL"))
|
|
1263
|
+
elif pd_dtype == "boolean":
|
|
1264
|
+
cast_ops.append((fld.name, "bool", pd_dtype, fld.nullable == "NULL"))
|
|
1265
|
+
|
|
1266
|
+
if not cast_ops:
|
|
1267
|
+
return
|
|
1268
|
+
|
|
1269
|
+
lines.append(f" # Type casting for target fields")
|
|
1270
|
+
for col_name, cast_type, pd_dtype, nullable in cast_ops:
|
|
1271
|
+
lines.append(f" if '{col_name}' in df_target_{tgt_safe}.columns:")
|
|
1272
|
+
if cast_type == "datetime":
|
|
1273
|
+
lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_datetime(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
|
|
1274
|
+
elif cast_type == "int":
|
|
1275
|
+
if nullable:
|
|
1276
|
+
lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce').astype('{pd_dtype}')")
|
|
1277
|
+
else:
|
|
1278
|
+
lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce').fillna(0).astype(int)")
|
|
1279
|
+
elif cast_type == "float":
|
|
1280
|
+
lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
|
|
1281
|
+
elif cast_type == "bool":
|
|
1282
|
+
lines.append(f" df_target_{tgt_safe}['{col_name}'] = df_target_{tgt_safe}['{col_name}'].astype('{pd_dtype}')")
|