informatica-python 1.3.0__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {informatica_python-1.3.0 → informatica_python-1.3.1}/PKG-INFO +1 -1
  2. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/generators/helper_gen.py +19 -4
  3. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/generators/mapping_gen.py +23 -9
  4. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/generators/workflow_gen.py +4 -6
  5. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python.egg-info/PKG-INFO +1 -1
  6. {informatica_python-1.3.0 → informatica_python-1.3.1}/pyproject.toml +1 -1
  7. {informatica_python-1.3.0 → informatica_python-1.3.1}/README.md +0 -0
  8. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/__init__.py +0 -0
  9. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/cli.py +0 -0
  10. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/converter.py +0 -0
  11. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/generators/__init__.py +0 -0
  12. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/generators/config_gen.py +0 -0
  13. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/generators/error_log_gen.py +0 -0
  14. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/generators/sql_gen.py +0 -0
  15. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/models.py +0 -0
  16. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/parser.py +0 -0
  17. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/utils/__init__.py +0 -0
  18. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/utils/datatype_map.py +0 -0
  19. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python/utils/expression_converter.py +0 -0
  20. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python.egg-info/SOURCES.txt +0 -0
  21. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python.egg-info/dependency_links.txt +0 -0
  22. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python.egg-info/entry_points.txt +0 -0
  23. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python.egg-info/requires.txt +0 -0
  24. {informatica_python-1.3.0 → informatica_python-1.3.1}/informatica_python.egg-info/top_level.txt +0 -0
  25. {informatica_python-1.3.0 → informatica_python-1.3.1}/setup.cfg +0 -0
  26. {informatica_python-1.3.0 → informatica_python-1.3.1}/tests/test_converter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.3.0
3
+ Version: 1.3.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -208,7 +208,11 @@ def _add_file_functions(lines, data_lib):
208
208
  lines.append(" delimiter = file_config.get('delimiter', ',')")
209
209
  lines.append(" header = file_config.get('header', True)")
210
210
  lines.append(" encoding = file_config.get('encoding', 'utf-8')")
211
- lines.append(" header_row = 0 if header else None")
211
+ lines.append(" header_row = file_config.get('header_row', 0 if header else None)")
212
+ lines.append(" skip_rows = file_config.get('skip_rows', 0)")
213
+ lines.append(" quotechar = file_config.get('quotechar', '\"')")
214
+ lines.append(" escapechar = file_config.get('escapechar', None)")
215
+ lines.append(" lineterminator = file_config.get('lineterminator', None)")
212
216
  lines.append("")
213
217
  lines.append(" logger.info(f'Reading file: {file_path} (ext={ext})')")
214
218
  lines.append("")
@@ -245,7 +249,17 @@ def _add_file_functions(lines, data_lib):
245
249
  lines.append(" return dd.read_csv(file_path, sep=delimiter, header=header_row)")
246
250
  else:
247
251
  lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
248
- lines.append(" return pd.read_csv(file_path, sep=delimiter, header=header_row, encoding=encoding)")
252
+ lines.append(" csv_kwargs = dict(sep=delimiter, header=header_row, encoding=encoding,")
253
+ lines.append(" quotechar=quotechar, escapechar=escapechar)")
254
+ lines.append(" if skip_rows:")
255
+ lines.append(" csv_kwargs['skiprows'] = skip_rows")
256
+ lines.append(" if lineterminator:")
257
+ lines.append(" csv_kwargs['lineterminator'] = lineterminator")
258
+ lines.append(" df = pd.read_csv(file_path, **csv_kwargs)")
259
+ lines.append(" if file_config.get('strip_trailing_blanks'):")
260
+ lines.append(" str_cols = df.select_dtypes(include=['object']).columns")
261
+ lines.append(" df[str_cols] = df[str_cols].apply(lambda c: c.str.rstrip())")
262
+ lines.append(" return df")
249
263
  lines.append(" elif ext in ('.xlsx', '.xls'):")
250
264
  lines.append(" return pd.read_excel(file_path, header=header_row)")
251
265
  lines.append(" elif ext == '.xml':")
@@ -271,6 +285,7 @@ def _add_file_functions(lines, data_lib):
271
285
  lines.append(" delimiter = file_config.get('delimiter', ',')")
272
286
  lines.append(" header = file_config.get('header', True)")
273
287
  lines.append(" encoding = file_config.get('encoding', 'utf-8')")
288
+ lines.append(" quotechar = file_config.get('quotechar', '\"')")
274
289
  lines.append("")
275
290
  lines.append(" os.makedirs(os.path.dirname(file_path) or '.', exist_ok=True)")
276
291
  lines.append(" logger.info(f'Writing file: {file_path}')")
@@ -290,7 +305,7 @@ def _add_file_functions(lines, data_lib):
290
305
  lines.append(" df.write_csv(file_path, separator=delimiter, has_header=header)")
291
306
  elif data_lib == "dask":
292
307
  lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
293
- lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding)")
308
+ lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding, quotechar=quotechar)")
294
309
  lines.append(" elif ext in ('.xlsx', '.xls'):")
295
310
  lines.append(" df.compute().to_excel(file_path, header=header, index=False)")
296
311
  lines.append(" elif ext == '.json':")
@@ -301,7 +316,7 @@ def _add_file_functions(lines, data_lib):
301
316
  lines.append(" df.compute().to_csv(file_path, sep=delimiter, header=header, index=False)")
302
317
  else:
303
318
  lines.append(" if ext in ('.csv', '.dat', '.txt', ''):")
304
- lines.append(" df.to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding)")
319
+ lines.append(" df.to_csv(file_path, sep=delimiter, header=header, index=False, encoding=encoding, quotechar=quotechar)")
305
320
  lines.append(" elif ext in ('.xlsx', '.xls'):")
306
321
  lines.append(" df.to_excel(file_path, header=header, index=False)")
307
322
  lines.append(" elif ext == '.json':")
@@ -151,9 +151,13 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
151
151
  inlined_transforms, inlined_connectors, mapplet_instance_names = _inline_mapplets(mapping, folder)
152
152
 
153
153
  all_transforms = list(mapping.transformations) + inlined_transforms
154
- all_connectors = [c for c in mapping.connectors
155
- if c.from_instance not in mapplet_instance_names
156
- and c.to_instance not in mapplet_instance_names] + inlined_connectors
154
+ if mapplet_instance_names:
155
+ kept_originals = [c for c in mapping.connectors
156
+ if c.from_instance not in mapplet_instance_names
157
+ and c.to_instance not in mapplet_instance_names]
158
+ all_connectors = kept_originals + inlined_connectors
159
+ else:
160
+ all_connectors = list(mapping.connectors)
157
161
 
158
162
  source_map = _build_source_map(mapping, folder)
159
163
  target_map = _build_target_map(mapping, folder)
@@ -996,21 +1000,31 @@ def _gen_rank_transform(lines, tx, tx_safe, input_df, source_dfs):
996
1000
 
997
1001
  ascending = top_bottom.upper() != "TOP"
998
1002
 
1003
+ rank_out_field = "RANKINDEX"
1004
+ for fld in tx.fields:
1005
+ if fld.name.upper() == "RANKINDEX" or "RANK" in fld.name.upper():
1006
+ pt = (fld.porttype or "").upper()
1007
+ if "OUTPUT" in pt and "INPUT" not in pt:
1008
+ rank_out_field = fld.name
1009
+ break
1010
+
999
1011
  lines.append(f" df_{tx_safe} = {input_df}.copy()")
1000
1012
  if rank_port and group_by_ports:
1001
1013
  lines.append(f" # Rank by '{rank_port}' within groups {group_by_ports}")
1002
- lines.append(f" df_{tx_safe}['RANKINDEX'] = df_{tx_safe}.groupby({group_by_ports})['{rank_port}'].rank(")
1014
+ lines.append(f" _rank_vals = df_{tx_safe}.groupby({group_by_ports})['{rank_port}'].rank(")
1003
1015
  lines.append(f" method='min', ascending={ascending}")
1004
- lines.append(f" ).astype(int)")
1016
+ lines.append(f" )")
1017
+ lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
1005
1018
  if top_n:
1006
- lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['RANKINDEX'] <= {top_n}].reset_index(drop=True)")
1019
+ lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
1007
1020
  elif rank_port:
1008
1021
  lines.append(f" # Rank by '{rank_port}' (no group-by)")
1009
- lines.append(f" df_{tx_safe}['RANKINDEX'] = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending}).astype(int)")
1022
+ lines.append(f" _rank_vals = df_{tx_safe}['{rank_port}'].rank(method='min', ascending={ascending})")
1023
+ lines.append(f" df_{tx_safe}['{rank_out_field}'] = _rank_vals.fillna(0).astype(int)")
1010
1024
  if top_n:
1011
- lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['RANKINDEX'] <= {top_n}].reset_index(drop=True)")
1025
+ lines.append(f" df_{tx_safe} = df_{tx_safe}[df_{tx_safe}['{rank_out_field}'] <= {top_n}].reset_index(drop=True)")
1012
1026
  else:
1013
- lines.append(f" df_{tx_safe}['RANKINDEX'] = range(1, len(df_{tx_safe}) + 1)")
1027
+ lines.append(f" df_{tx_safe}['{rank_out_field}'] = range(1, len(df_{tx_safe}) + 1)")
1014
1028
  source_dfs[tx.name] = f"df_{tx_safe}"
1015
1029
 
1016
1030
 
@@ -147,13 +147,11 @@ def _generate_workflow_function(lines, wf: WorkflowDef, folder: FolderDef):
147
147
  fail_targets = []
148
148
  for link in wf.links:
149
149
  if link.from_instance == task.name:
150
- cond_text = (link.condition or "").strip()
151
- if cond_text and ("$" in cond_text or "SUCCEEDED" in cond_text.upper()
152
- or "TRUE" in cond_text.upper()):
153
- succ_targets.append(link.to_instance)
154
- elif cond_text and ("FAILED" in cond_text.upper()
155
- or "FALSE" in cond_text.upper()):
150
+ cond_text = (link.condition or "").strip().upper()
151
+ if "FAILED" in cond_text or "FALSE" in cond_text:
156
152
  fail_targets.append(link.to_instance)
153
+ elif "SUCCEEDED" in cond_text or "TRUE" in cond_text or cond_text:
154
+ succ_targets.append(link.to_instance)
157
155
  else:
158
156
  succ_targets.append(link.to_instance)
159
157
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.3.0
3
+ Version: 1.3.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.8
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.3.0"
7
+ version = "1.3.1"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = "MIT"