informatica-python 1.3.0__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {informatica_python-1.3.0 → informatica_python-1.4.0}/PKG-INFO +1 -1
  2. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/helper_gen.py +19 -4
  3. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/mapping_gen.py +173 -19
  4. informatica_python-1.4.0/informatica_python/generators/workflow_gen.py +387 -0
  5. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/PKG-INFO +1 -1
  6. {informatica_python-1.3.0 → informatica_python-1.4.0}/pyproject.toml +1 -1
  7. {informatica_python-1.3.0 → informatica_python-1.4.0}/tests/test_converter.py +197 -0
  8. informatica_python-1.3.0/informatica_python/generators/workflow_gen.py +0 -290
  9. {informatica_python-1.3.0 → informatica_python-1.4.0}/README.md +0 -0
  10. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/__init__.py +0 -0
  11. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/cli.py +0 -0
  12. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/converter.py +0 -0
  13. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/__init__.py +0 -0
  14. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/config_gen.py +0 -0
  15. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/error_log_gen.py +0 -0
  16. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/generators/sql_gen.py +0 -0
  17. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/models.py +0 -0
  18. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/parser.py +0 -0
  19. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/utils/__init__.py +0 -0
  20. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/utils/datatype_map.py +0 -0
  21. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python/utils/expression_converter.py +0 -0
  22. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/SOURCES.txt +0 -0
  23. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/dependency_links.txt +0 -0
  24. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/entry_points.txt +0 -0
  25. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/requires.txt +0 -0
  26. {informatica_python-1.3.0 → informatica_python-1.4.0}/informatica_python.egg-info/top_level.txt +0 -0
  27. {informatica_python-1.3.0 → informatica_python-1.4.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.3.0
3
+ Version: 1.4.0
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -208,7 +208,11 @@ def _add_file_functions(lines, data_lib):
208
208
  lines.append(" delimiter = file_config.get('delimiter', ',')")
209
209
  lines.append(" header = file_config.get('header', True)")
210
210
  lines.append(" encoding = file_config.get('encoding', 'utf-8')")
211
- lines.append(" header_row = 0 if header else None")
211
+ lines.append(" header_row = file_config.get('header_row', 0 if header else None)")
212
+ lines.append(" skip_rows = file_config.get('skip_rows', 0)")
213
+ lines.append(" quotechar = file_config.get('quotechar', '\"')")
214
+ lines.append(" escapechar = file_config.get('escapechar', None)")
215
+ lines.append(" lineterminator = file_config.get('lineterminator', None)")
212
216
  lines.append("")
213
217
  lines.append(" logger.info(f'Reading file: {file_path} (ext={ext})')")
214
218
  lines.append("")
@@ -245,7 +249,17 @@ def _add_file_functions(lines, data_lib):
245
249
  lines.append(" return dd.read_csv(file_path, sep=delimiter, header=header_row)")
246
250
  else:
247
251
  lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
248
- lines.append(" return pd.read_csv(file_path, sep=delimiter, header=header_row, encoding=encoding)")
252
+ lines.append(" csv_kwargs = dict(sep=delimiter, header=header_row, encoding=encoding,")
253
+ lines.append(" quotechar=quotechar, escapechar=escapechar)")
254
+ lines.append(" if skip_rows:")
255
+ lines.append(" csv_kwargs['skiprows'] = skip_rows")
256
+ lines.append(" if lineterminator:")
257
+ lines.append(" csv_kwargs['lineterminator'] = lineterminator")
258
+ lines.append(" df = pd.read_csv(file_path, **csv_kwargs)")
259
+ lines.append(" if file_config.get('strip_trailing_blanks'):")
260
+ lines.append(" str_cols = df.select_dtypes(include=['object']).columns")
261
+ lines.append(" df[str_cols] = df[str_cols].apply(lambda c: c.str.rstrip())")
262
+ lines.append(" return df")
249
263
  lines.append(" elif ext in ('.xlsx', '.xls'):")
250
264
  lines.append(" return pd.read_excel(file_path, header=header_row)")
251
265
  lines.append(" elif ext == '.xml':")
@@ -271,6 +285,7 @@ def _add_file_functions(lines, data_lib):
271
285
  lines.append(" delimiter = file_config.get('delimiter', ',')")
272
286
  lines.append(" header = file_config.get('header', True)")
273
287
  lines.append(" encoding = file_config.get('encoding', 'utf-8')")
288
+ lines.append(" quotechar = file_config.get('quotechar', '\"')")
274
289
  lines.append("")
275
290
  lines.append(" os.makedirs(os.path.dirname(file_path) or '.', exist_ok=True)")
276
291
  lines.append(" logger.info(f'Writing file: {file_path}')")
@@ -290,7 +305,7 @@ def _add_file_functions(lines, data_lib):
290
305
  lines.append(" df.write_csv(file_path, separator=delimiter, has_header=header)")
291
306
  elif data_lib == "dask":
292
307
  lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
293
- lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding)")
308
+ lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding, quotechar=quotechar)")
294
309
  lines.append(" elif ext in ('.xlsx', '.xls'):")
295
310
  lines.append(" df.compute().to_excel(file_path, header=header, index=False)")
296
311
  lines.append(" elif ext == '.json':")
@@ -301,7 +316,7 @@ def _add_file_functions(lines, data_lib):
301
316
  lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False)")
302
317
  else:
303
318
  lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
304
- lines.append(" df.to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding)")
319
+ lines.append(" df.to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding, quotechar=quotechar)")
305
320
  lines.append(" elif ext in ('.xlsx', '.xls'):")
306
321
  lines.append(" df.to_excel(file_path, header=header, index=False)")
307
322
  lines.append(" elif ext == '.json':")
@@ -135,6 +135,46 @@ def _inline_mapplets(mapping, folder):
135
135
  return extra_transforms, extra_connectors + rewired_connectors, mapplet_instances
136
136
 
137
137
 
138
+ def _build_session_conn_overrides(mapping, folder):
139
+ overrides = {}
140
+ for session in folder.sessions:
141
+ if session.mapping_name != mapping.name:
142
+ continue
143
+ for sti in session.transform_instances:
144
+ inst_name = sti.instance_name or sti.transformation_name
145
+ for conn_ref in sti.connections:
146
+ conn_key = conn_ref.connection_name or conn_ref.variable
147
+ if conn_key:
148
+ overrides[inst_name] = {
149
+ "connection_name": conn_ref.connection_name,
150
+ "connection_type": conn_ref.connection_type,
151
+ "connection_subtype": conn_ref.connection_subtype,
152
+ "variable": conn_ref.variable,
153
+ }
154
+ for attr in sti.attributes:
155
+ if attr.name == "Connection Information" and attr.value:
156
+ if inst_name not in overrides:
157
+ overrides[inst_name] = {}
158
+ overrides[inst_name]["connection_info"] = attr.value
159
+ elif attr.name == "Source File Directory" and attr.value:
160
+ if inst_name not in overrides:
161
+ overrides[inst_name] = {}
162
+ overrides[inst_name]["source_file_directory"] = attr.value
163
+ elif attr.name == "Source filename" and attr.value:
164
+ if inst_name not in overrides:
165
+ overrides[inst_name] = {}
166
+ overrides[inst_name]["source_filename"] = attr.value
167
+ elif attr.name == "Output File Directory" and attr.value:
168
+ if inst_name not in overrides:
169
+ overrides[inst_name] = {}
170
+ overrides[inst_name]["output_file_directory"] = attr.value
171
+ elif attr.name == "Output filename" and attr.value:
172
+ if inst_name not in overrides:
173
+ overrides[inst_name] = {}
174
+ overrides[inst_name]["output_filename"] = attr.value
175
+ return overrides
176
+
177
+
138
178
  def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
139
179
  data_lib: str = "pandas", mapping_index: int = 1) -> str:
140
180
  lines = []
@@ -151,15 +191,20 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
151
191
  inlined_transforms, inlined_connectors, mapplet_instance_names = _inline_mapplets(mapping, folder)
152
192
 
153
193
  all_transforms = list(mapping.transformations) + inlined_transforms
154
- all_connectors = [c for c in mapping.connectors
155
- if c.from_instance not in mapplet_instance_names
156
- and c.to_instance not in mapplet_instance_names] + inlined_connectors
194
+ if mapplet_instance_names:
195
+ kept_originals = [c for c in mapping.connectors
196
+ if c.from_instance not in mapplet_instance_names
197
+ and c.to_instance not in mapplet_instance_names]
198
+ all_connectors = kept_originals + inlined_connectors
199
+ else:
200
+ all_connectors = list(mapping.connectors)
157
201
 
158
202
  source_map = _build_source_map(mapping, folder)
159
203
  target_map = _build_target_map(mapping, folder)
160
204
  transform_map = {t.name: t for t in all_transforms}
161
205
  connector_graph = _build_connector_graph(all_connectors)
162
206
  instance_map = {i.name: i for i in mapping.instances}
207
+ session_overrides = _build_session_conn_overrides(mapping, folder)
163
208
 
164
209
  lines.append(f"def run_{_safe_name(mapping.name)}(config):")
165
210
  lines.append(f' """Execute mapping: {mapping.name}"""')
@@ -176,6 +221,11 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
176
221
  lines.append(f" {safe_var} = {default}")
177
222
  lines.append("")
178
223
 
224
+ if session_overrides:
225
+ lines.append(" # Session connection overrides")
226
+ lines.append(f" _sess_overrides = {repr(session_overrides)}")
227
+ lines.append("")
228
+
179
229
  source_dfs = {}
180
230
  for src_name, src_def in source_map.items():
181
231
  safe = _safe_name(src_name)
@@ -185,13 +235,23 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
185
235
  if t.type in ("Source Qualifier", "Application Source Qualifier")]
186
236
  if sq_transforms:
187
237
  for sq in sq_transforms:
188
- _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map)
238
+ _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides)
189
239
  else:
190
240
  for src_name, src_def in source_map.items():
191
241
  safe = _safe_name(src_name)
242
+ override = session_overrides.get(src_name, {})
192
243
  lines.append(f" # Read source: {src_name}")
193
- if src_def.database_type and src_def.database_type != "Flat File":
194
- conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
244
+ if override.get("source_file_directory") or override.get("source_filename"):
245
+ src_dir = override.get("source_file_directory", ".")
246
+ src_file = override.get("source_filename", src_def.name)
247
+ lines.append(f" _src_path_{safe} = config.get('sources', {{}}).get('{src_def.name}', {{}}).get('file_path',")
248
+ lines.append(f" os.path.join('{src_dir}', '{src_file}'))")
249
+ if src_def.flatfile:
250
+ _emit_flatfile_read(lines, safe, src_def)
251
+ else:
252
+ lines.append(f" df_{safe} = read_file(_src_path_{safe}, config.get('sources', {{}}).get('{src_def.name}', {{}}))")
253
+ elif src_def.database_type and src_def.database_type != "Flat File":
254
+ conn_name = override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
195
255
  schema = src_def.owner_name or "dbo"
196
256
  lines.append(f" df_{safe} = read_from_db(config, 'SELECT * FROM {schema}.{src_name}', '{conn_name}')")
197
257
  elif src_def.flatfile:
@@ -209,7 +269,7 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
209
269
  _generate_transformation(lines, tx, connector_graph, source_dfs, transform_map, instance_map)
210
270
 
211
271
  for tgt_name, tgt_def in target_map.items():
212
- _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map)
272
+ _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides)
213
273
 
214
274
  lines.append("")
215
275
  lines.append(f" log_mapping_end('{mapping.name}', start_time)")
@@ -401,7 +461,7 @@ def _get_processing_order(transformations, connector_graph, sq_transforms):
401
461
  return ordered
402
462
 
403
463
 
404
- def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map):
464
+ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_graph, instance_map, session_overrides=None):
405
465
  sq_safe = _safe_name(sq.name)
406
466
  sql_override = ""
407
467
  pre_sql = ""
@@ -438,7 +498,8 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
438
498
  if sql_override:
439
499
  src_name = next(iter(connected_sources)) if connected_sources else "source"
440
500
  src_def = source_map.get(src_name, SourceDef(name=src_name))
441
- conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
501
+ sq_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
502
+ conn_name = sq_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
442
503
 
443
504
  lines.append(f" sql_{sq_safe} = '''")
444
505
  for sql_line in sql_override.strip().split("\n"):
@@ -450,8 +511,9 @@ def _generate_source_qualifier(lines, sq, source_map, source_dfs, connector_grap
450
511
  src_name = next(iter(connected_sources))
451
512
  src_def = source_map.get(src_name, SourceDef(name=src_name))
452
513
  safe_src = _safe_name(src_name)
514
+ src_override = (session_overrides or {}).get(sq.name, {}) or (session_overrides or {}).get(src_name, {})
453
515
  if src_def.database_type and src_def.database_type != "Flat File":
454
- conn_name = _safe_name(src_def.db_name) if src_def.db_name else "default"
516
+ conn_name = src_override.get("connection_name") or (_safe_name(src_def.db_name) if src_def.db_name else "default")
455
517
  schema = src_def.owner_name or "dbo"
456
518
  cols = ", ".join(f.name for f in src_def.fields) if src_def.fields else "*"
457
519
  lines.append(f" df_{sq_safe} = read_from_db(config, 'SELECT {cols} FROM {schema}.{src_def.name}', '{conn_name}')")
@@ -996,21 +1058,31 @@ def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
996
1058
 
997
1059
  ascending = top_bottom.upper() != "TOP"
998
1060
 
1061
+ rank_out_field = "RANKINDEX"
1062
+ for fld in tx.fields:
1063
+ if fld.name.upper() == "RANKINDEX" or "RANK" in fld.name.upper():
1064
+ pt = (fld.porttype or "").upper()
1065
+ if "OUTPUT" in pt and "INPUT" not in pt:
1066
+ rank_out_field = fld.name
1067
+ break
1068
+
999
1069
  lines.append(f" df_{tx_safe} = {input_df}.copy()")
1000
1070
  if rank_port and group_by_ports:
1001
1071
  lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
1002
- lines.append(f" df_{tx_safe}['RANKINDEX'] = df_{tx_safe}.groupby({group_by_ports})['{rank_port}'].rank(")
1072
+ lines.append(f" _rank_vals = df_{tx_safe}.groupby({group_by_ports})['{rank_port}'].rank(")
1003
1073
  lines.append(f" method='min', ascending={ascending}")
1004
- lines.append(f" ).astype(int)")
1074
+ lines.append(f" )")
1075
+ lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
1005
1076
  if top_n:
1006
- lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['RANKINDEX'] <= {top_n}].reset_index(drop=True)")
1077
+ lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
1007
1078
  elif rank_port:
1008
1079
  lines.append(f" # Rank by '{rank_port}' (no group-by)")
1009
- lines.append(f" df_{tx_safe}['RANKINDEX'] = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending}).astype(int)")
1080
+ lines.append(f" _rank_vals = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending})")
1081
+ lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
1010
1082
  if top_n:
1011
- lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['RANKINDEX'] <= {top_n}].reset_index(drop=True)")
1083
+ lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
1012
1084
  else:
1013
- lines.append(f" df_{tx_safe}['RANKINDEX'] = range(1, len(df_{tx_safe}) + 1)")
1085
+ lines.append(f" df_{tx_safe}['{rank_out_field}'] = range(1, len(df_{tx_safe}) + 1)")
1014
1086
  source_dfs[tx.name] = f"df_{tx_safe}"
1015
1087
 
1016
1088
 
@@ -1087,7 +1159,7 @@ def _gen_sql_transform(lines, tx, tx_safe, input_df, source_dfs):
1087
1159
  source_dfs[tx.name] = f"df_{tx_safe}"
1088
1160
 
1089
1161
 
1090
- def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map):
1162
+ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs, transform_map, instance_map, session_overrides=None):
1091
1163
  tgt_safe = _safe_name(tgt_name)
1092
1164
 
1093
1165
  to_conns = connector_graph.get("to", {}).get(tgt_name, [])
@@ -1119,10 +1191,92 @@ def _generate_target_write(lines, tgt_name, tgt_def, connector_graph, source_dfs
1119
1191
  else:
1120
1192
  lines.append(f" df_target_{tgt_safe} = {input_df}")
1121
1193
 
1122
- if tgt_def.database_type and tgt_def.database_type != "Flat File":
1123
- lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', 'target')")
1194
+ _emit_type_casting(lines, tgt_safe, tgt_def)
1195
+
1196
+ tgt_override = (session_overrides or {}).get(tgt_name, {})
1197
+ tgt_conn = tgt_override.get("connection_name")
1198
+
1199
+ if tgt_override.get("output_file_directory") or tgt_override.get("output_filename"):
1200
+ out_dir = tgt_override.get("output_file_directory", ".")
1201
+ out_file = tgt_override.get("output_filename", tgt_def.name)
1202
+ lines.append(f" _tgt_path_{tgt_safe} = config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path',")
1203
+ lines.append(f" os.path.join('{out_dir}', '{out_file}'))")
1204
+ if tgt_def.flatfile:
1205
+ _emit_flatfile_write(lines, tgt_safe, tgt_def)
1206
+ else:
1207
+ lines.append(f" write_file(df_target_{tgt_safe}, _tgt_path_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
1208
+ elif tgt_def.database_type and tgt_def.database_type != "Flat File":
1209
+ conn_label = tgt_conn or "target"
1210
+ lines.append(f" write_to_db(config, df_target_{tgt_safe}, '{tgt_def.name}', '{conn_label}')")
1124
1211
  elif tgt_def.flatfile:
1125
1212
  _emit_flatfile_write(lines, tgt_safe, tgt_def)
1126
1213
  else:
1127
1214
  lines.append(f" write_file(df_target_{tgt_safe}, config.get('targets', {{}}).get('{tgt_def.name}', {{}}).get('file_path', '{tgt_def.name}'),")
1128
1215
  lines.append(f" config.get('targets', {{}}).get('{tgt_def.name}', {{}}))")
1216
+
1217
+
1218
+ CAST_MAP = {
1219
+ "bigint": ("int", "Int64"),
1220
+ "integer": ("int", "Int32"),
1221
+ "int": ("int", "Int32"),
1222
+ "small integer": ("int", "Int16"),
1223
+ "smallint": ("int", "Int16"),
1224
+ "tinyint": ("int", "Int8"),
1225
+ "numeric": ("float", "float64"),
1226
+ "decimal": ("float", "float64"),
1227
+ "float": ("float", "float64"),
1228
+ "double": ("float", "float64"),
1229
+ "real": ("float", "float32"),
1230
+ "money": ("float", "float64"),
1231
+ "smallmoney": ("float", "float64"),
1232
+ "string": ("str", "object"),
1233
+ "nstring": ("str", "object"),
1234
+ "text": ("str", "object"),
1235
+ "ntext": ("str", "object"),
1236
+ "varchar": ("str", "object"),
1237
+ "nvarchar": ("str", "object"),
1238
+ "char": ("str", "object"),
1239
+ "nchar": ("str", "object"),
1240
+ "date/time": ("str", "datetime64[ns]"),
1241
+ "datetime": ("str", "datetime64[ns]"),
1242
+ "datetime2": ("str", "datetime64[ns]"),
1243
+ "date": ("str", "datetime64[ns]"),
1244
+ "timestamp": ("str", "datetime64[ns]"),
1245
+ "bit": ("bool", "boolean"),
1246
+ "boolean": ("bool", "boolean"),
1247
+ }
1248
+
1249
+
1250
+ def _emit_type_casting(lines, tgt_safe, tgt_def):
1251
+ cast_ops = []
1252
+ for fld in tgt_def.fields:
1253
+ dt_key = fld.datatype.lower().strip()
1254
+ if dt_key not in CAST_MAP:
1255
+ continue
1256
+ py_type, pd_dtype = CAST_MAP[dt_key]
1257
+ if pd_dtype in ("datetime64[ns]",):
1258
+ cast_ops.append((fld.name, "datetime", pd_dtype, fld.nullable == "NULL"))
1259
+ elif pd_dtype in ("Int64", "Int32", "Int16", "Int8"):
1260
+ cast_ops.append((fld.name, "int", pd_dtype, fld.nullable == "NULL"))
1261
+ elif pd_dtype in ("float64", "float32"):
1262
+ cast_ops.append((fld.name, "float", pd_dtype, fld.nullable == "NULL"))
1263
+ elif pd_dtype == "boolean":
1264
+ cast_ops.append((fld.name, "bool", pd_dtype, fld.nullable == "NULL"))
1265
+
1266
+ if not cast_ops:
1267
+ return
1268
+
1269
+ lines.append(f" # Type casting for target fields")
1270
+ for col_name, cast_type, pd_dtype, nullable in cast_ops:
1271
+ lines.append(f" if '{col_name}' in df_target_{tgt_safe}.columns:")
1272
+ if cast_type == "datetime":
1273
+ lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_datetime(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
1274
+ elif cast_type == "int":
1275
+ if nullable:
1276
+ lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce').astype('{pd_dtype}')")
1277
+ else:
1278
+ lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce').fillna(0).astype(int)")
1279
+ elif cast_type == "float":
1280
+ lines.append(f" df_target_{tgt_safe}['{col_name}'] = pd.to_numeric(df_target_{tgt_safe}['{col_name}'], errors='coerce')")
1281
+ elif cast_type == "bool":
1282
+ lines.append(f" df_target_{tgt_safe}['{col_name}'] = df_target_{tgt_safe}['{col_name}'].astype('{pd_dtype}')")