informatica-python 1.9.6__py3-none-any.whl → 1.9.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -481,7 +481,12 @@ def _emit_flatfile_read(lines, var_name, src_def, indent=" ", file_path_overr
481
481
  if fc.get("fixed_width"):
482
482
  widths = []
483
483
  for fld in src_def.fields:
484
- widths.append(fld.precision if fld.precision else 10)
484
+ if fld.physical_length and fld.physical_length > 0:
485
+ widths.append(fld.physical_length)
486
+ elif fld.precision:
487
+ widths.append(fld.precision)
488
+ else:
489
+ widths.append(10)
485
490
  lines.append(f"{indent}df_{var_name} = pd.read_fwf(")
486
491
  lines.append(f"{indent} {default_path},")
487
492
  lines.append(f"{indent} widths={widths},")
@@ -18,6 +18,9 @@ class FieldDef:
18
18
  field_number: int = 0
19
19
  hidden: str = "NO"
20
20
  business_name: str = ""
21
+ offset: int = 0
22
+ physical_offset: int = 0
23
+ physical_length: int = 0
21
24
  field_attributes: List[Dict[str, str]] = field(default_factory=list)
22
25
 
23
26
 
@@ -417,6 +417,9 @@ class InformaticaParser:
417
417
  hidden=self._attr(elem, "HIDDEN", "NO"),
418
418
  business_name=self._attr(elem, "BUSINESSNAME"),
419
419
  description=self._attr(elem, "DESCRIPTION"),
420
+ offset=self._int_attr(elem, "OFFSET"),
421
+ physical_offset=self._int_attr(elem, "PHYSICALOFFSET"),
422
+ physical_length=self._int_attr(elem, "PHYSICALLENGTH"),
420
423
  )
421
424
  for fa in elem.findall("FIELDATTRIBUTE"):
422
425
  fld.field_attributes.append({
@@ -184,7 +184,9 @@ def convert_expression(expr):
184
184
  return cleaned
185
185
 
186
186
  if cleaned.startswith("'") and cleaned.endswith("'"):
187
- return cleaned
187
+ close_pos = cleaned.find("'", 1)
188
+ if close_pos == len(cleaned) - 1:
189
+ return cleaned
188
190
 
189
191
  converted = cleaned
190
192
 
@@ -428,7 +430,9 @@ def _vec_recursive(expr, df_var):
428
430
  return cleaned
429
431
 
430
432
  if cleaned.startswith("'") and cleaned.endswith("'"):
431
- return cleaned
433
+ close_pos = cleaned.find("'", 1)
434
+ if close_pos == len(cleaned) - 1:
435
+ return cleaned
432
436
 
433
437
  upper = cleaned.upper()
434
438
 
@@ -452,6 +456,17 @@ def _vec_recursive(expr, df_var):
452
456
  var_name = cleaned[2:]
453
457
  return f'get_variable("{var_name}")'
454
458
 
459
+ if re.match(r'^\$PM\w+$', cleaned):
460
+ var_name = cleaned[1:]
461
+ return f'resolve_builtin_variable("{var_name}")'
462
+
463
+ not_result = _find_func_call(cleaned, 'NOT')
464
+ if not_result and not_result[0] == 0 and not_result[1] == len(cleaned):
465
+ _, _, args = not_result
466
+ if len(args) >= 1:
467
+ inner = _vec_recursive(args[0], df_var)
468
+ return f'~({inner})'
469
+
455
470
  lkp_result = _find_func_call(cleaned, 'LKP')
456
471
  if lkp_result is None:
457
472
  lkp_match = re.match(r'^:LKP\.(\w+)\s*\(', cleaned, re.IGNORECASE)
@@ -666,6 +681,8 @@ def _vec_recursive(expr, df_var):
666
681
  if len(args) >= 2:
667
682
  fmt = _convert_infa_date_format(args[1])
668
683
  return f'{field_val}.dt.strftime("{fmt}")'
684
+ if any(op in field_val for op in (' + ', ' - ', ' * ', ' / ', ' % ')):
685
+ return f'({field_val}).astype(str)'
669
686
  return f'{field_val}.astype(str)'
670
687
 
671
688
  make_dt_result = _find_func_call(cleaned, 'MAKE_DATE_TIME')
@@ -866,8 +883,10 @@ def _vec_recursive(expr, df_var):
866
883
  v = _vec_recursive(p, df_var)
867
884
  if v.startswith("'") and v.endswith("'"):
868
885
  vec_parts.append(v)
869
- else:
886
+ elif v.startswith(df_var + '[') or v.startswith('pd.') or '.str.' in v:
870
887
  vec_parts.append(f'{v}.fillna(\'\').astype(str)')
888
+ else:
889
+ vec_parts.append(f'str({v})')
871
890
  return " + ".join(vec_parts)
872
891
 
873
892
  for func_name in sorted(INFA_FUNC_MAP.keys(), key=lambda x: -len(x)):
@@ -883,6 +902,7 @@ def _vec_recursive(expr, df_var):
883
902
  converted = re.sub(r':LKP\.(\w+)\s*\(', r'lookup_func("\1", ', converted)
884
903
 
885
904
  converted = re.sub(r'\$\$(\w+)', r'get_variable("\1")', converted)
905
+ converted = re.sub(r'\$(PM\w+)', r'resolve_builtin_variable("\1")', converted)
886
906
 
887
907
  converted = re.sub(r'\b([A-Za-z_][A-Za-z0-9_]*)\s*IS\s+NOT\s+NULL\b',
888
908
  lambda m: f'{df_var}["{m.group(1)}"].notna()', converted, flags=re.IGNORECASE)
@@ -895,8 +915,15 @@ def _vec_recursive(expr, df_var):
895
915
 
896
916
  converted = _convert_remaining_funcs(converted, df_var)
897
917
 
918
+ converted = re.sub(r'\bAND\b', ' & ', converted, flags=re.IGNORECASE)
919
+ converted = re.sub(r'\bOR\b', ' | ', converted, flags=re.IGNORECASE)
920
+ converted = re.sub(r'\bNOT\b', ' ~ ', converted, flags=re.IGNORECASE)
921
+ converted = re.sub(r'<>', '!=', converted)
922
+ converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
923
+
898
924
  skip_words = {
899
925
  'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd', 'get_variable',
926
+ 'resolve_builtin_variable',
900
927
  'str', 'int', 'float', 'bool', 'len', 'abs', 'round',
901
928
  'fillna', 'astype', 'isna', 'notna', 'where', 'errors', 'coerce',
902
929
  'lookup_func', 'expand', 'extract', 'regex', 'contains', 'replace',
@@ -904,11 +931,6 @@ def _vec_recursive(expr, df_var):
904
931
  }
905
932
  converted = _substitute_fields(converted, df_var, skip_words)
906
933
 
907
- converted = re.sub(r'\bAND\b', ' & ', converted, flags=re.IGNORECASE)
908
- converted = re.sub(r'\bOR\b', ' | ', converted, flags=re.IGNORECASE)
909
- converted = re.sub(r'\bNOT\b', ' ~', converted, flags=re.IGNORECASE)
910
- converted = re.sub(r'<>', '!=', converted)
911
- converted = re.sub(r'(?<![<>!=])=(?!=)', '==', converted)
912
934
  converted = re.sub(r'\berrors\s*==\s*(["\'])', r'errors=\1', converted)
913
935
  converted = re.sub(r'\bexpand\s*==\s*', 'expand=', converted)
914
936
  converted = re.sub(r'\bregex\s*==\s*', 'regex=', converted)
@@ -1041,6 +1063,8 @@ def _vectorize_simple(part, df_var):
1041
1063
  c = re.sub(r'\b([A-Za-z_]\w*)\s*IS\s+NULL\b',
1042
1064
  lambda m: f'{df_var}["{m.group(1)}"].isna()', c, flags=re.IGNORECASE)
1043
1065
 
1066
+ c = re.sub(r'\$(PM\w+)', r'resolve_builtin_variable("\1")', c)
1067
+
1044
1068
  c = re.sub(r'<>', '!=', c)
1045
1069
  c = re.sub(r'(?<![<>!=])=(?!=)', '==', c)
1046
1070
 
@@ -1048,8 +1072,13 @@ def _vectorize_simple(part, df_var):
1048
1072
  c = re.sub(r'\bTRUE\b', 'True', c, flags=re.IGNORECASE)
1049
1073
  c = re.sub(r'\bFALSE\b', 'False', c, flags=re.IGNORECASE)
1050
1074
 
1075
+ c = re.sub(r'\bAND\b', ' & ', c, flags=re.IGNORECASE)
1076
+ c = re.sub(r'\bOR\b', ' | ', c, flags=re.IGNORECASE)
1077
+ c = re.sub(r'\bNOT\b', ' ~ ', c, flags=re.IGNORECASE)
1078
+
1051
1079
  skip_words = {
1052
1080
  'True', 'False', 'None', 'and', 'or', 'not', 'np', 'pd',
1081
+ 'resolve_builtin_variable',
1053
1082
  'str', 'int', 'float', 'isna', 'notna', 'fillna',
1054
1083
  'get_variable', 'lookup_func', 'isin', 'eq',
1055
1084
  'expand', 'extract', 'astype', 'errors', 'coerce', 'regex',
@@ -1089,8 +1118,9 @@ def _split_condition_tokens(text):
1089
1118
  current.append(ch)
1090
1119
  elif depth == 0:
1091
1120
  rest = text[i:]
1092
- and_match = re.match(r'\bAND\b', rest, re.IGNORECASE)
1093
- or_match = re.match(r'\bOR\b', rest, re.IGNORECASE)
1121
+ prev_is_word = i > 0 and (text[i - 1].isalnum() or text[i - 1] == '_')
1122
+ and_match = re.match(r'\bAND\b', rest, re.IGNORECASE) if not prev_is_word else None
1123
+ or_match = re.match(r'\bOR\b', rest, re.IGNORECASE) if not prev_is_word else None
1094
1124
  if and_match:
1095
1125
  tokens.append(''.join(current).strip())
1096
1126
  current = []
@@ -1134,9 +1164,10 @@ def _vectorize_condition(cond, df_var="df"):
1134
1164
  for part in parts:
1135
1165
  negate = False
1136
1166
  inner = part.strip()
1137
- if re.match(r'^NOT\s+', inner, flags=re.IGNORECASE):
1167
+ not_match = re.match(r'^NOT\b\s*', inner, flags=re.IGNORECASE)
1168
+ if not_match:
1138
1169
  negate = True
1139
- inner = re.sub(r'^NOT\s+', '', inner, flags=re.IGNORECASE).strip()
1170
+ inner = inner[not_match.end():].strip()
1140
1171
 
1141
1172
  v = _vectorize_simple(inner, df_var)
1142
1173
  if negate:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: informatica-python
3
- Version: 1.9.6
3
+ Version: 1.9.8
4
4
  Summary: Convert Informatica PowerCenter workflow XML to Python/PySpark code
5
5
  Author: Nick
6
6
  License: MIT
@@ -1,23 +1,23 @@
1
1
  informatica_python/__init__.py,sha256=JFO8fVMClSWe0SR-CBseX4RaPyyC3rZBdxxjy47ZT5E,337
2
2
  informatica_python/cli.py,sha256=gFwg0O99vKM-OLO0HoHA4emd-6qrgjMNqa9T59e4e_s,2905
3
3
  informatica_python/converter.py,sha256=xCuWrYzDji0yN72D3QqOgZCVVM2j3k2_CvlGplCWxLU,22779
4
- informatica_python/models.py,sha256=G_C2WfQL-ykKjNj23m8vKFtLZYrQozp99HJzrLTKG1Y,17293
5
- informatica_python/parser.py,sha256=v0qoTlAi3RZ3IHN_5g5t6f66XzRpJIjpAfpyMzZ5cuA,45223
4
+ informatica_python/models.py,sha256=sZvVzYrEIRAfzV_HduN-qCeOAt5KZ_z7jzNTmPP3Oxs,17371
5
+ informatica_python/parser.py,sha256=RVxoT1j6QTer2RyeG-PCEyKaoZAQhFepRcrRdsEm6OM,45410
6
6
  informatica_python/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  informatica_python/generators/config_gen.py,sha256=4tqcNKTB06kyGZIiM4yl0q97q_i3zeCHXTjuE1dNFKY,5726
8
8
  informatica_python/generators/error_log_gen.py,sha256=2cc0rEcblydHkb9VAMXlrH7WdSQ-CNqAXcwVk3FYZeM,21319
9
9
  informatica_python/generators/helper_gen.py,sha256=lC30hyZn6RIkbo4e_6sbqdrCfmZHWaXdr-p0tmtfILc,82376
10
- informatica_python/generators/mapping_gen.py,sha256=a5UZCIoU5E5ff9Q8Nxp_m-6k8wZv2NTZL96o2hDddZo,72603
10
+ informatica_python/generators/mapping_gen.py,sha256=5wPS9t3OLpbo89gYsHMbVqCg9Jgfzmt13IqK4diOS2g,72781
11
11
  informatica_python/generators/sql_gen.py,sha256=O8Y-aJz9EyFJ0DXeuISRt5yKwC3wlp2K3B0BHrmxrXw,4872
12
12
  informatica_python/generators/workflow_gen.py,sha256=_uSlBg31ZRMhMlCYk4hWDRBPaBROrepD8_v3QGEWJxE,18089
13
13
  informatica_python/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  informatica_python/utils/datatype_map.py,sha256=iLOYg-iBKT4rMecGbrFkTpJj4yqs5S9HeBOTLUIWhX0,2809
15
- informatica_python/utils/expression_converter.py,sha256=SkkT2CyhIZzUms9TT4cEimZlxjOoVq96AQgGTrO_Lmc,46859
15
+ informatica_python/utils/expression_converter.py,sha256=ynprsvZGvavML3Y8C485GyjaoqQ-k67OESXHShafeTo,48244
16
16
  informatica_python/utils/lib_adapters.py,sha256=1ZtuMbgDg9Ukf-OF_EG1L_BeeR-6JQk8Kx3WwMfvNRU,6516
17
17
  informatica_python/utils/sql_dialect.py,sha256=_IHJbfu8a3mT_OvHpybgSfZKqz6mwVy5ItTKDRChqnU,5461
18
- informatica_python-1.9.6.dist-info/licenses/LICENSE,sha256=77RaRDdXgey1D90YZAjXqEQdBxWfvUQqLQX3pC1qjUE,1061
19
- informatica_python-1.9.6.dist-info/METADATA,sha256=4XrzBAs63VuLy6Wf_WsgmbGLWK9iaJeHjIZiyOb2PBw,26097
20
- informatica_python-1.9.6.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
21
- informatica_python-1.9.6.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
22
- informatica_python-1.9.6.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
23
- informatica_python-1.9.6.dist-info/RECORD,,
18
+ informatica_python-1.9.8.dist-info/licenses/LICENSE,sha256=77RaRDdXgey1D90YZAjXqEQdBxWfvUQqLQX3pC1qjUE,1061
19
+ informatica_python-1.9.8.dist-info/METADATA,sha256=YErpXHS5T-sSTEUwENMiaCWaYhj6xiQALyyLZACrc2g,26097
20
+ informatica_python-1.9.8.dist-info/WHEEL,sha256=PovZm1ExVWmrRefZoXCfejlbKLnQI5SVIf1SWRV4QQI,97
21
+ informatica_python-1.9.8.dist-info/entry_points.txt,sha256=030jjTrx-1oRRQ16HZz52rdcKS8R8_llnymsTUtn_Xc,67
22
+ informatica_python-1.9.8.dist-info/top_level.txt,sha256=Dngg-WNteYi22XAJU2XKAQS8aZ52yM2LYC0tzxrlbVQ,19
23
+ informatica_python-1.9.8.dist-info/RECORD,,