informatica-python 1.5.0__tar.gz → 1.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {informatica_python-1.5.0 → informatica_python-1.5.2}/PKG-INFO +1 -1
  2. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/__init__.py +1 -1
  3. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/cli.py +1 -0
  4. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/converter.py +10 -4
  5. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/mapping_gen.py +18 -9
  6. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/workflow_gen.py +6 -1
  7. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/utils/expression_converter.py +54 -11
  8. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/PKG-INFO +1 -1
  9. {informatica_python-1.5.0 → informatica_python-1.5.2}/pyproject.toml +1 -1
  10. {informatica_python-1.5.0 → informatica_python-1.5.2}/tests/test_integration.py +31 -0
  11. {informatica_python-1.5.0 → informatica_python-1.5.2}/LICENSE +0 -0
  12. {informatica_python-1.5.0 → informatica_python-1.5.2}/README.md +0 -0
  13. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/__init__.py +0 -0
  14. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/config_gen.py +0 -0
  15. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/error_log_gen.py +0 -0
  16. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/helper_gen.py +0 -0
  17. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/generators/sql_gen.py +0 -0
  18. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/models.py +0 -0
  19. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/parser.py +0 -0
  20. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/utils/__init__.py +0 -0
  21. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/utils/datatype_map.py +0 -0
  22. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python/utils/lib_adapters.py +0 -0
  23. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/SOURCES.txt +0 -0
  24. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/dependency_links.txt +0 -0
  25. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/entry_points.txt +0 -0
  26. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/requires.txt +0 -0
  27. {informatica_python-1.5.0 → informatica_python-1.5.2}/informatica_python.egg-info/top_level.txt +0 -0
  28. {informatica_python-1.5.0 → informatica_python-1.5.2}/setup.cfg +0 -0
  29. {informatica_python-1.5.0 → informatica_python-1.5.2}/tests/test_converter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.5.0
3
+ Version: 1.5.2
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -7,7 +7,7 @@ Licensed under the MIT License.
7
7
 
8
8
  from informatica_python.converter import InformaticaConverter
9
9
 
10
- __version__ = "1.5.0"
10
+ __version__ = "1.5.2"
11
11
  __author__ = "Nick"
12
12
  __license__ = "MIT"
13
13
  __all__ = ["InformaticaConverter"]
@@ -66,6 +66,7 @@ def main():
66
66
  args.input_file,
67
67
  output_dir=args.output,
68
68
  output_zip=args.zip,
69
+ param_file=args.param_file,
69
70
  )
70
71
  print(f"Conversion complete! Output: {output_path}")
71
72
  print(f"Files generated:")
@@ -33,7 +33,8 @@ class InformaticaConverter:
33
33
  return self._powermart_to_dict(self.powermart)
34
34
 
35
35
  def convert(self, file_path: str, output_dir: str = "output",
36
- output_zip: Optional[str] = None) -> str:
36
+ output_zip: Optional[str] = None,
37
+ param_file: Optional[str] = None) -> str:
37
38
  self.powermart = self.parser.parse_file(file_path)
38
39
 
39
40
  if not self.powermart.repositories:
@@ -47,7 +48,7 @@ class InformaticaConverter:
47
48
  raise ValueError("No folder found in XML file")
48
49
 
49
50
  if len(all_folders) == 1:
50
- return self._convert_folder(all_folders[0], output_dir, output_zip)
51
+ return self._convert_folder(all_folders[0], output_dir, output_zip, param_file)
51
52
 
52
53
  result_path = output_dir if not output_zip else os.path.dirname(output_zip) or "."
53
54
  for folder in all_folders:
@@ -56,7 +57,7 @@ class InformaticaConverter:
56
57
  if output_zip:
57
58
  base, ext = os.path.splitext(output_zip)
58
59
  folder_zip = f"{base}_{folder.name}{ext}"
59
- self._convert_folder(folder, folder_dir, folder_zip)
60
+ self._convert_folder(folder, folder_dir, folder_zip, param_file)
60
61
  return result_path
61
62
 
62
63
  def convert_string(self, xml_string: str, output_dir: str = "output",
@@ -87,7 +88,12 @@ class InformaticaConverter:
87
88
  return result_path
88
89
 
89
90
  def _convert_folder(self, folder: FolderDef, output_dir: str,
90
- output_zip: Optional[str] = None) -> str:
91
+ output_zip: Optional[str] = None,
92
+ param_file: Optional[str] = None) -> str:
93
+ if param_file:
94
+ from informatica_python.utils.expression_converter import parse_param_file
95
+ parse_param_file(param_file)
96
+
91
97
  files = {}
92
98
 
93
99
  files["helper_functions.py"] = generate_helper_functions(folder, self.data_lib)
@@ -283,7 +283,12 @@ def generate_mapping_code(mapping: MappingDef, folder: FolderDef,
283
283
  lines.append("")
284
284
  lines.append("")
285
285
  lines.append("if __name__ == '__main__':")
286
- lines.append(" config = load_config()")
286
+ lines.append(" import argparse as _ap")
287
+ lines.append(" _parser = _ap.ArgumentParser()")
288
+ lines.append(" _parser.add_argument('--param-file', default=None)")
289
+ lines.append(" _parser.add_argument('--config', default='config.yml')")
290
+ lines.append(" _args = _parser.parse_args()")
291
+ lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
287
292
  lines.append(f" run_{_safe_name(mapping.name)}(config)")
288
293
  lines.append("")
289
294
 
@@ -590,9 +595,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
590
595
  lines.append(f" # Transformation: {tx.name} (Type: {tx.type})")
591
596
 
592
597
  if tx_type == "expression":
593
- _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs)
598
+ _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
594
599
  elif tx_type == "filter":
595
- _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs)
600
+ _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
596
601
  elif tx_type in ("aggregator",):
597
602
  _gen_aggregator_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib)
598
603
  elif tx_type == "sorter":
@@ -630,8 +635,9 @@ def _generate_transformation(lines, tx, connector_graph, source_dfs, transform_m
630
635
  lines.append("")
631
636
 
632
637
 
633
- def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
634
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
638
+ def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
639
+ copy_expr = lib_copy(data_lib, input_df)
640
+ lines.append(f" df_{tx_safe} = {copy_expr}")
635
641
  has_expressions = False
636
642
  for fld in tx.fields:
637
643
  if fld.expression and fld.expression.strip() and fld.expression.strip() != fld.name:
@@ -647,7 +653,7 @@ def _gen_expression_transform(lines, tx, tx_safe, input_df, source_dfs):
647
653
  source_dfs[tx.name] = f"df_{tx_safe}"
648
654
 
649
655
 
650
- def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
656
+ def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pandas"):
651
657
  filter_condition = ""
652
658
  for attr in tx.attributes:
653
659
  if attr.name == "Filter Condition":
@@ -655,9 +661,11 @@ def _gen_filter_transform(lines, tx, tx_safe, input_df, source_dfs):
655
661
  if filter_condition:
656
662
  expr_vec = convert_filter_vectorized(filter_condition, input_df)
657
663
  lines.append(f" # Filter: {filter_condition}")
658
- lines.append(f" df_{tx_safe} = {input_df}[{expr_vec}].copy()")
664
+ copy_expr = lib_copy(data_lib, f"{input_df}[{expr_vec}]")
665
+ lines.append(f" df_{tx_safe} = {copy_expr}")
659
666
  else:
660
- lines.append(f" df_{tx_safe} = {input_df}.copy()")
667
+ copy_expr = lib_copy(data_lib, input_df)
668
+ lines.append(f" df_{tx_safe} = {copy_expr}")
661
669
  source_dfs[tx.name] = f"df_{tx_safe}"
662
670
 
663
671
 
@@ -845,7 +853,8 @@ def _gen_lookup_transform(lines, tx, tx_safe, input_df, source_dfs, data_lib="pa
845
853
  elif lookup_table:
846
854
  lines.append(f" df_lkp_{tx_safe} = read_from_db(config, 'SELECT * FROM {lookup_table}', 'default')")
847
855
  else:
848
- lines.append(f" df_lkp_{tx_safe} = pd.DataFrame()")
856
+ empty_expr = lib_empty_df(data_lib)
857
+ lines.append(f" df_lkp_{tx_safe} = {empty_expr}")
849
858
 
850
859
  input_keys, lookup_keys = parse_lookup_condition(lookup_condition)
851
860
 
@@ -51,7 +51,12 @@ def generate_workflow_code(folder: FolderDef) -> str:
51
51
 
52
52
  lines.append("")
53
53
  lines.append("if __name__ == '__main__':")
54
- lines.append(" config = load_config()")
54
+ lines.append(" import argparse as _ap")
55
+ lines.append(" _parser = _ap.ArgumentParser()")
56
+ lines.append(" _parser.add_argument('--param-file', default=None)")
57
+ lines.append(" _parser.add_argument('--config', default='config.yml')")
58
+ lines.append(" _args = _parser.parse_args()")
59
+ lines.append(" config = load_config(_args.config, param_file=_args.param_file)")
55
60
  lines.append(" success = run_workflow(config)")
56
61
  lines.append(" sys.exit(0 if success else 1)")
57
62
  lines.append("")
@@ -295,26 +295,69 @@ def _vectorize_value(val, df_var="df"):
295
295
  return val
296
296
 
297
297
 
298
- def _vectorize_condition(cond, df_var="df"):
299
- c = cond.strip()
298
+ def _vectorize_simple(part, df_var):
299
+ c = part.strip()
300
+
300
301
  c = re.sub(r'\bISNULL\s*\(\s*([A-Za-z_]\w*)\s*\)',
301
302
  lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
302
- c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
303
- lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
304
303
  c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NOT\s+NULL\b',
305
304
  lambda m: f'{df_var}["{m.group(1)}"].notna()', c, flags=re.IGNORECASE)
306
- c = re.sub(r'(?<!["\w])([A-Za-z_][A-Za-z0-9_]*)(?!["\w(.])',
307
- lambda m: f'{df_var}["{m.group(1)}"]' if m.group(1) not in (
308
- 'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
309
- 'str', 'int', 'float',
310
- ) else m.group(1), c)
305
+ c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
306
+ lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
307
+
311
308
  c = re.sub(r'<>', '!=', c)
312
309
  c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
313
- c = re.sub(r'\bAND\b', ' & ', c, flags=re.IGNORECASE)
314
- c = re.sub(r'\bOR\b', ' | ', c, flags=re.IGNORECASE)
310
+
311
+ skip_words = {
312
+ 'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
313
+ 'str', 'int', 'float', 'isna', 'notna', 'fillna',
314
+ }
315
+ df_base = re.escape(df_var)
316
+ c = re.sub(r'(?<!["\w])(?!' + df_base + r'\b)([A-Za-z_][A-Za-z0-9_]*)(?!["\w(.\[])',
317
+ lambda m: f'{df_var}["{m.group(1)}"]' if m.group(1) not in skip_words else m.group(1),
318
+ c)
315
319
  return c
316
320
 
317
321
 
322
+ def _vectorize_condition(cond, df_var="df"):
323
+ c = cond.strip()
324
+
325
+ tokens = re.split(r'\b(AND|OR)\b', c, flags=re.IGNORECASE)
326
+
327
+ parts = []
328
+ ops = []
329
+ for tok in tokens:
330
+ stripped = tok.strip()
331
+ if stripped.upper() in ('AND', 'OR'):
332
+ ops.append('&' if stripped.upper() == 'AND' else '|')
333
+ elif stripped:
334
+ parts.append(stripped)
335
+
336
+ if not parts:
337
+ return "True"
338
+
339
+ vectorized = []
340
+ for part in parts:
341
+ negate = False
342
+ inner = part.strip()
343
+ if re.match(r'^NOT\s+', inner, flags=re.IGNORECASE):
344
+ negate = True
345
+ inner = re.sub(r'^NOT\s+', '', inner, flags=re.IGNORECASE).strip()
346
+ v = _vectorize_simple(inner, df_var)
347
+ if negate:
348
+ v = f"~({v})"
349
+ vectorized.append(v)
350
+
351
+ if len(vectorized) == 1:
352
+ return vectorized[0]
353
+
354
+ result_parts = [f"({vectorized[0]})"]
355
+ for i, op in enumerate(ops):
356
+ result_parts.append(f" {op} ")
357
+ result_parts.append(f"({vectorized[i + 1]})")
358
+ return "".join(result_parts)
359
+
360
+
318
361
  def convert_filter_expression(expr):
319
362
  if not expr or not expr.strip():
320
363
  return "True"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.5.0
3
+ Version: 1.5.2
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "informatica-python"
7
- version = "1.5.0"
7
+ version = "1.5.2"
8
8
  description = "Convert Informatica PowerCenter workflow XML to Python/PySpark code"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -202,15 +202,46 @@ class TestFilterVectorized:
202
202
  result = convert_filter_vectorized("VALUE > 50", "df_src")
203
203
  assert ">" in result
204
204
  assert "50" in result
205
+ assert 'df_src["VALUE"]' in result
206
+
207
+ def test_and_condition(self):
208
+ result = convert_filter_vectorized("A > 1 AND B < 2", "df")
209
+ assert "&" in result
210
+ assert 'df["A"]' in result
211
+ assert 'df["B"]' in result
212
+ assert "AND" not in result
213
+ assert "(df[" in result
214
+
215
+ def test_or_condition(self):
216
+ result = convert_filter_vectorized("STATUS = 'A' OR STATUS = 'B'", "df")
217
+ assert "|" in result
218
+ assert "(df[" in result
219
+
220
+ def test_not_condition(self):
221
+ result = convert_filter_vectorized("NOT A = 1", "df")
222
+ assert "~(" in result
223
+ assert 'df["A"]' in result
224
+ assert "==" in result
205
225
 
206
226
  def test_is_null_filter(self):
207
227
  result = convert_filter_vectorized("NAME IS NULL", "df_src")
208
228
  assert ".isna()" in result
229
+ assert 'df_src["NAME"]' in result
230
+
231
+ def test_is_not_null_filter(self):
232
+ result = convert_filter_vectorized("NAME IS NOT NULL", "df_src")
233
+ assert ".notna()" in result
209
234
 
210
235
  def test_empty_filter(self):
211
236
  assert convert_filter_vectorized("") == "True"
212
237
  assert convert_filter_vectorized(None) == "True"
213
238
 
239
+ def test_compound_iif(self):
240
+ result = convert_expression_vectorized("IIF(A > 1 AND B < 2, 1, 0)", "df")
241
+ assert "np.where" in result
242
+ assert "&" in result
243
+ assert "(" in result
244
+
214
245
 
215
246
  class TestLibAdapters:
216
247