informatica-python 1.5.0__tar.gz → 1.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {informatica_python-1.5.0 → informatica_python-1.5.1}/PKG-INFO +1 -1
  2. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/__init__.py +1 -1
  3. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/cli.py +1 -0
  4. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/converter.py +6 -4
  5. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/mapping_gen.py +18 -9
  6. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/workflow_gen.py +6 -1
  7. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/expression_converter.py +16 -9
  8. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/PKG-INFO +1 -1
  9. {informatica_python-1.5.0 → informatica_python-1.5.1}/pyproject.toml +1 -1
  10. {informatica_python-1.5.0 → informatica_python-1.5.1}/tests/test_integration.py +22 -0
  11. {informatica_python-1.5.0 → informatica_python-1.5.1}/LICENSE +0 -0
  12. {informatica_python-1.5.0 → informatica_python-1.5.1}/README.md +0 -0
  13. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/__init__.py +0 -0
  14. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/config_gen.py +0 -0
  15. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/error_log_gen.py +0 -0
  16. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/helper_gen.py +0 -0
  17. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/generators/sql_gen.py +0 -0
  18. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/models.py +0 -0
  19. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/parser.py +0 -0
  20. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/__init__.py +0 -0
  21. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/datatype_map.py +0 -0
  22. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python/utils/lib_adapters.py +0 -0
  23. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/SOURCES.txt +0 -0
  24. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/dependency_links.txt +0 -0
  25. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/entry_points.txt +0 -0
  26. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/requires.txt +0 -0
  27. {informatica_python-1.5.0 → informatica_python-1.5.1}/informatica_python.egg-info/top_level.txt +0 -0
  28. {informatica_python-1.5.0 → informatica_python-1.5.1}/setup.cfg +0 -0
  29. {informatica_python-1.5.0 → informatica_python-1.5.1}/tests/test_converter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.5.0
3
+ Version: 1.5.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.5.0"
10
+ __version__ = "1.5.1"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -66,6 +66,7 @@ def main():
66
66
  args.input_file,
67
67
  output_dir=args.output,
68
68
  output_zip=args.zip,
69
+ param_file=args.param_file,
69
70
  )
70
71
  print(f"Conversion complete! Output: {output_path}")
71
72
  print(f"Files generated:")
@@ -33,7 +33,8 @@ class InformaticaConverter:
33
33
  return self._powermart_to_dict(self.powermart)
34
34
 
35
35
  def convert(self, file_path: str, output_dir: str = "output",
36
- output_zip: Optional[str] = None) -> str:
36
+ output_zip: Optional[str] = None,
37
+ param_file: Optional[str] = None) -> str:
37
38
  self.powermart = self.parser.parse_file(file_path)
38
39
 
39
40
  if not self.powermart.repositories:
@@ -47,7 +48,7 @@ class InformaticaConverter:
47
48
  raise ValueError("No folder found in XML file")
48
49
 
49
50
  if len(all_folders) == 1:
50
- return self._convert_folder(all_folders[0], output_dir, output_zip)
51
+ return self._convert_folder(all_folders[0], output_dir, output_zip, param_file)
51
52
 
52
53
  result_path = output_dir if not output_zip else os.path.dirname(output_zip) or "."
53
54
  for folder in all_folders:
@@ -56,7 +57,7 @@ class InformaticaConverter:
56
57
  if output_zip:
57
58
  base, ext = os.path.splitext(output_zip)
58
59
  folder_zip = f"{base}_{folder.name}{ext}"
59
- self._convert_folder(folder, folder_dir, folder_zip)
60
+ self._convert_folder(folder, folder_dir, folder_zip, param_file)
60
61
  return result_path
61
62
 
62
63
  def convert_string(self, xml_string: str, output_dir: str = "output",
@@ -87,7 +88,8 @@ class InformaticaConverter:
87
88
  return result_path
88
89
 
89
90
  def _convert_folder(self, folder: FolderDef, output_dir: str,
90
- output_zip: Optional[str] = None) -> str:
91
+ output_zip: Optional[str] = None,
92
+ param_file: Optional[str] = None) -> str:
91
93
  files = {}
92
94
 
93
95
  files["helper_functions.py"] = generate_helper_functions(folder, self.data_lib)
@@ -283,7 +283,12 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
283
283
  lines.append("")
284
284
  lines.append("")
285
285
  lines.append("if __name__ == '__main__':")
286
- lines.append(" config = load_config()")
286
+ lines.append(" import argparse as _ap")
287
+ lines.append(" _parser = _ap.ArgumentParser()")
288
+ lines.append(" _parser.add_argument('--param-file', default=None)")
289
+ lines.append(" _parser.add_argument('--config', default='config.yml')")
290
+ lines.append(" _args = _parser.parse_args()")
291
+ lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
287
292
  lines.append(f" run_{_safe_name(mapping.name)}(config)")
288
293
  lines.append("")
289
294
 
@@ -590,9 +595,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
590
595
  lines.append(f" # Transformation: {tx.name} (Type: {tx.type})")
591
596
 
592
597
  if tx_type == "expression":
593
- _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs)
598
+ _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
594
599
  elif tx_type == "filter":
595
- _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs)
600
+ _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
596
601
  elif tx_type in ("aggregator",):
597
602
  _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
598
603
  elif tx_type == "sorter":
@@ -630,8 +635,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
630
635
  lines.append("")
631
636
 
632
637
 
633
- def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
634
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
638
+ def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
639
+ copy_expr = lib_copy(data_lib, input_df)
640
+ lines.append(f" df_{tx_safe} = {copy_expr}")
635
641
  has_expressions = False
636
642
  for fld in tx.fields:
637
643
  if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
@@ -647,7 +653,7 @@ def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
647
653
  source_dfs[tx.name] = f"df_{tx_safe}"
648
654
 
649
655
 
650
- def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
656
+ def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
651
657
  filter_condition = ""
652
658
  for attr in tx.attributes:
653
659
  if attr.name == "Filter Condition":
@@ -655,9 +661,11 @@ def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
655
661
  if filter_condition:
656
662
  expr_vec = convert_filter_vectorized(filter_condition, input_df)
657
663
  lines.append(f" # Filter: {filter_condition}")
658
- lines.append(f" df_{tx_safe} = {input_df}[{expr_vec}].copy()")
664
+ copy_expr = lib_copy(data_lib, f"{input_df}[{expr_vec}]")
665
+ lines.append(f" df_{tx_safe} = {copy_expr}")
659
666
  else:
660
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
667
+ copy_expr = lib_copy(data_lib, input_df)
668
+ lines.append(f" df_{tx_safe} = {copy_expr}")
661
669
  source_dfs[tx.name] = f"df_{tx_safe}"
662
670
 
663
671
 
@@ -845,7 +853,8 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
845
853
  elif lookup_table:
846
854
  lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
847
855
  else:
848
- lines.append(f" df_lkp_{tx_safe} = pd.DataFrame()")
856
+ empty_expr = lib_empty_df(data_lib)
857
+ lines.append(f" df_lkp_{tx_safe} = {empty_expr}")
849
858
 
850
859
  input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
851
860
 
@@ -51,7 +51,12 @@ def generate_workflow_code(folder: FolderDef) -> str:
51
51
 
52
52
  lines.append("")
53
53
  lines.append("if __name__ == '__main__':")
54
- lines.append(" config = load_config()")
54
+ lines.append(" import argparse as _ap")
55
+ lines.append(" _parser = _ap.ArgumentParser()")
56
+ lines.append(" _parser.add_argument('--param-file', default=None)")
57
+ lines.append(" _parser.add_argument('--config', default='config.yml')")
58
+ lines.append(" _args = _parser.parse_args()")
59
+ lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
55
60
  lines.append(" success = run_workflow(config)")
56
61
  lines.append(" sys.exit(0 if success else 1)")
57
62
  lines.append("")
@@ -297,21 +297,28 @@ def _vectorize_value(val, df_var="df"):
297
297
 
298
298
  def _vectorize_condition(cond, df_var="df"):
299
299
  c = cond.strip()
300
+
300
301
  c = re.sub(r'\bISNULL\s*\(\s*([A-Za-z_]\w*)\s*\)',
301
302
  lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
302
- c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
303
- lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
304
303
  c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NOT\s+NULL\b',
305
304
  lambda m: f'{df_var}["{m.group(1)}"].notna()', c, flags=re.IGNORECASE)
306
- c = re.sub(r'(?<!["\w])([A-Za-z_][A-Za-z0-9_]*)(?!["\w(.])',
307
- lambda m: f'{df_var}["{m.group(1)}"]' if m.group(1) not in (
308
- 'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
309
- 'str', 'int', 'float',
310
- ) else m.group(1), c)
311
- c = re.sub(r'<>', '!=', c)
312
- c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
305
+ c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
306
+ lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
307
+
313
308
  c = re.sub(r'\bAND\b', ' & ', c, flags=re.IGNORECASE)
314
309
  c = re.sub(r'\bOR\b', ' | ', c, flags=re.IGNORECASE)
310
+ c = re.sub(r'\bNOT\s+', ' ~', c, flags=re.IGNORECASE)
311
+ c = re.sub(r'<>', '!=', c)
312
+ c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
313
+
314
+ skip_words = {
315
+ 'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
316
+ 'str', 'int', 'float', 'isna', 'notna', 'fillna',
317
+ }
318
+ df_base = re.escape(df_var)
319
+ c = re.sub(r'(?<!["\w])(?!' + df_base + r'\b)([A-Za-z_][A-Za-z0-9_]*)(?!["\w(.\[])',
320
+ lambda m: f'{df_var}["{m.group(1)}"]' if m.group(1) not in skip_words else m.group(1),
321
+ c)
315
322
  return c
316
323
 
317
324
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.5.0
3
+ Version: 1.5.1
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.5.0"
7
+ version = "1.5.1"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -202,15 +202,37 @@ class TestFilterVectorized:
202
202
  result = convert_filter_vectorized("VALUE > 50", "df_src")
203
203
  assert ">" in result
204
204
  assert "50" in result
205
+ assert 'df_src["VALUE"]' in result
206
+
207
+ def test_and_condition(self):
208
+ result = convert_filter_vectorized("A > 1 AND B < 2", "df")
209
+ assert "&" in result
210
+ assert 'df["A"]' in result
211
+ assert 'df["B"]' in result
212
+ assert "AND" not in result
213
+
214
+ def test_or_condition(self):
215
+ result = convert_filter_vectorized("STATUS = 'A' OR STATUS = 'B'", "df")
216
+ assert "|" in result
205
217
 
206
218
  def test_is_null_filter(self):
207
219
  result = convert_filter_vectorized("NAME IS NULL", "df_src")
208
220
  assert ".isna()" in result
221
+ assert 'df_src["NAME"]' in result
222
+
223
+ def test_is_not_null_filter(self):
224
+ result = convert_filter_vectorized("NAME IS NOT NULL", "df_src")
225
+ assert ".notna()" in result
209
226
 
210
227
  def test_empty_filter(self):
211
228
  assert convert_filter_vectorized("") == "True"
212
229
  assert convert_filter_vectorized(None) == "True"
213
230
 
231
+ def test_compound_iif(self):
232
+ result = convert_expression_vectorized("IIF(A > 1 AND B < 2, 1, 0)", "df")
233
+ assert "np.where" in result
234
+ assert "&" in result
235
+
214
236
 
215
237
  class TestLibAdapters:
216
238