pydpm_xl 0.2.5rc3__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ This module provides a clean, abstracted interface for generating ASTs from DPM-
6
6
  without exposing internal complexity or version compatibility issues.
7
7
  """
8
8
 
9
- from typing import Dict, Any, Optional, List, Union
9
+ from typing import Dict, Any, Optional, List, Union, Tuple
10
10
  from pathlib import Path
11
11
  import json
12
12
  from datetime import datetime
@@ -48,6 +48,7 @@ class ASTGeneratorAPI:
48
48
 
49
49
  def __init__(self, database_path: Optional[str] = None,
50
50
  connection_url: Optional[str] = None,
51
+ pool_config: Optional[Dict[str, Any]] = None,
51
52
  compatibility_mode: str = "auto",
52
53
  enable_semantic_validation: bool = False):
53
54
  """
@@ -56,13 +57,19 @@ class ASTGeneratorAPI:
56
57
  Args:
57
58
  database_path: Optional path to SQLite data dictionary database
58
59
  connection_url: Optional SQLAlchemy connection URL for PostgreSQL
60
+ pool_config: Connection pool configuration for PostgreSQL/MySQL
59
61
  compatibility_mode: "auto", "3.1.0", "4.0.0", or "current"
60
62
  enable_semantic_validation: Enable semantic validation (requires database)
61
63
  """
62
64
  self.syntax_api = SyntaxAPI()
63
- self.semantic_api = SemanticAPI(database_path=database_path, connection_url=connection_url) if enable_semantic_validation else None
65
+ self.semantic_api = SemanticAPI(
66
+ database_path=database_path,
67
+ connection_url=connection_url,
68
+ pool_config=pool_config
69
+ ) if enable_semantic_validation else None
64
70
  self.database_path = database_path
65
71
  self.connection_url = connection_url
72
+ self.pool_config = pool_config
66
73
  self.compatibility_mode = compatibility_mode
67
74
  self.enable_semantic = enable_semantic_validation
68
75
 
@@ -139,24 +146,6 @@ class ASTGeneratorAPI:
139
146
  }
140
147
  }
141
148
 
142
- def parse_batch(self, expressions: List[str]) -> List[Dict[str, Any]]:
143
- """
144
- Parse multiple expressions efficiently.
145
-
146
- Args:
147
- expressions: List of DPM-XL expression strings
148
-
149
- Returns:
150
- List of parse results (same format as parse_expression)
151
- """
152
- results = []
153
- for i, expr in enumerate(expressions):
154
- result = self.parse_expression(expr)
155
- result['metadata']['batch_index'] = i
156
- results.append(result)
157
-
158
- return results
159
-
160
149
  def validate_expression(self, expression: str) -> Dict[str, Any]:
161
150
  """
162
151
  Validate expression syntax without full parsing.
@@ -333,43 +322,42 @@ class ASTGeneratorAPI:
333
322
  "data_populated": False,
334
323
  }
335
324
 
336
- def generate_complete_batch(
325
+ # ============================================================================
326
+ # Enriched AST Generation (requires database)
327
+ # ============================================================================
328
+
329
+ def _normalize_expressions_input(
337
330
  self,
338
- expressions: List[str],
339
- release_id: Optional[int] = None,
340
- ) -> List[Dict[str, Any]]:
331
+ expressions: Union[str, List[Tuple[str, str, Optional[str]]]]
332
+ ) -> List[Tuple[str, str, Optional[str]]]:
341
333
  """
342
- Generate complete ASTs for multiple expressions.
334
+ Normalize input to list of (expression, operation_code, precondition) tuples.
335
+
336
+ Supports:
337
+ - Single expression string: "expr" -> [("expr", "default_code", None)]
338
+ - List of tuples: [("expr1", "op1", "precond1"), ("expr2", "op2", None)]
343
339
 
344
340
  Args:
345
- expressions: List of DPM-XL expression strings
346
- release_id: Optional release ID to filter database lookups by specific release.
347
- If None, uses all available data (release-agnostic).
341
+ expressions: Either a single expression string or a list of tuples
348
342
 
349
343
  Returns:
350
- list: List of result dictionaries (same format as generate_complete_ast)
344
+ List of (expression, operation_code, precondition) tuples
351
345
  """
352
- results = []
353
- for i, expr in enumerate(expressions):
354
- result = self.generate_complete_ast(expr, release_id=release_id)
355
- result["batch_index"] = i
356
- results.append(result)
357
- return results
358
-
359
- # ============================================================================
360
- # Enriched AST Generation (requires database)
361
- # ============================================================================
346
+ if isinstance(expressions, str):
347
+ return [(expressions, "default_code", None)]
348
+ return expressions
362
349
 
363
350
  def generate_enriched_ast(
364
351
  self,
365
- expression: str,
366
- dpm_version: Optional[str] = None,
367
- operation_code: Optional[str] = None,
352
+ expressions: Union[str, List[Tuple[str, str, Optional[str]]]],
353
+ release_code: Optional[str] = None,
368
354
  table_context: Optional[Dict[str, Any]] = None,
369
- precondition: Optional[str] = None,
370
355
  release_id: Optional[int] = None,
371
356
  output_path: Optional[Union[str, Path]] = None,
372
357
  primary_module_vid: Optional[int] = None,
358
+ module_code: Optional[str] = None,
359
+ preferred_module_dependencies: Optional[List[str]] = None,
360
+ module_version_number: Optional[str] = None,
373
361
  ) -> Dict[str, Any]:
374
362
  """
375
363
  Generate enriched, engine-ready AST with framework structure (Level 3).
@@ -378,6 +366,9 @@ class ASTGeneratorAPI:
378
366
  framework structure with operations, variables, tables, and preconditions sections.
379
367
  This is the format required by business rule execution engines.
380
368
 
369
+ Supports both single expressions (for backward compatibility) and multiple
370
+ expression/operation/precondition tuples for generating scripts with multiple operations.
371
+
381
372
  **What you get:**
382
373
  - Everything from generate_complete_ast() PLUS:
383
374
  - Framework structure: operations, variables, tables, preconditions
@@ -392,13 +383,17 @@ class ASTGeneratorAPI:
392
383
  - Module exports with cross-module dependency tracking
393
384
 
394
385
  Args:
395
- expression: DPM-XL expression string
396
- dpm_version: DPM version code (e.g., "4.0", "4.1", "4.2")
397
- operation_code: Optional operation code (defaults to "default_code")
386
+ expressions: Either a single DPM-XL expression string (backward compatible),
387
+ or a list of tuples: [(expression, operation_code, precondition), ...].
388
+ Each tuple contains:
389
+ - expression (str): The DPM-XL expression (required)
390
+ - operation_code (str): The operation code (required)
391
+ - precondition (Optional[str]): Optional precondition reference (e.g., {v_F_44_04})
392
+ release_code: Optional release code (e.g., "4.0", "4.1", "4.2").
393
+ Mutually exclusive with release_id and module_version_number.
398
394
  table_context: Optional table context dict with keys: 'table', 'columns', 'rows', 'sheets', 'default', 'interval'
399
- precondition: Optional precondition variable reference (e.g., {v_F_44_04})
400
395
  release_id: Optional release ID to filter database lookups by specific release.
401
- If None, uses all available data (release-agnostic).
396
+ Mutually exclusive with release_code and module_version_number.
402
397
  output_path: Optional path (string or Path) to save the enriched_ast as JSON file.
403
398
  If provided, the enriched_ast will be automatically saved to this location.
404
399
  primary_module_vid: Optional module version ID of the module being exported.
@@ -406,6 +401,18 @@ class ASTGeneratorAPI:
406
401
  other modules will be identified and added to dependency_modules and
407
402
  cross_instance_dependencies fields. If None, cross-module detection uses
408
403
  the first table's module as the primary module.
404
+ module_code: Optional module code (e.g., "FINREP9") to specify the main module.
405
+ The main module's URL will be used as the root key of the output.
406
+ If provided, this takes precedence over primary_module_vid for determining
407
+ the main module.
408
+ preferred_module_dependencies: Optional list of module codes to prefer when
409
+ multiple dependency scopes are possible. If a table belongs to multiple modules,
410
+ the module in this list will be selected as the dependency.
411
+ module_version_number: Optional module version number (e.g., "4.1.0") to specify
412
+ which version of the module to use. Requires module_code to be specified.
413
+ Mutually exclusive with release_code and release_id.
414
+ If none of release_code, release_id, or module_version_number are provided,
415
+ the latest (active) module version is used.
409
416
 
410
417
  Returns:
411
418
  dict: {
@@ -414,51 +421,68 @@ class ASTGeneratorAPI:
414
421
  'error': str # Error message if failed
415
422
  }
416
423
 
424
+ Raises:
425
+ ValueError: If more than one of release_id, release_code, or module_version_number
426
+ are specified; if module_version_number is specified without module_code; or if
427
+ no operation scope belongs to the specified module.
428
+
417
429
  Example:
418
430
  >>> generator = ASTGeneratorAPI(database_path="data.db")
431
+ >>> # Single expression (backward compatible)
419
432
  >>> result = generator.generate_enriched_ast(
420
433
  ... "{tF_01.00, r0010, c0010}",
421
- ... dpm_version="4.2",
422
- ... operation_code="my_validation"
434
+ ... release_code="4.2",
423
435
  ... )
424
- >>> # result['enriched_ast'] contains framework structure ready for engines
425
436
  >>>
426
- >>> # For module exports with cross-module dependency tracking:
437
+ >>> # Multiple expressions with operations and preconditions
427
438
  >>> result = generator.generate_enriched_ast(
428
- ... "{tC_26.00, r030, c010} * {tC_01.00, r0015, c0010}",
429
- ... dpm_version="4.2",
430
- ... operation_code="v2814_m",
431
- ... primary_module_vid=123, # Module being exported
432
- ... release_id=42
439
+ ... [
440
+ ... ("{tF_01.00, r0010, c0010} = 0", "v1234_m", None),
441
+ ... ("{tF_01.00, r0020, c0010} > 0", "v1235_m", "{v_F_44_04}"),
442
+ ... ("{tF_01.00, r0030, c0010} >= 0", "v1236_m", "{v_F_44_04}"), # Same precondition, deduplicated
443
+ ... ],
444
+ ... release_code="4.2",
445
+ ... module_code="FINREP9",
433
446
  ... )
434
- >>> # result['enriched_ast']['dependency_modules'] contains external module info
435
- >>> # result['enriched_ast']['dependency_information']['cross_instance_dependencies']
436
- >>> # contains list of external module dependencies
437
447
  """
438
- try:
439
- # Generate complete AST first
440
- complete_result = self.generate_complete_ast(expression, release_id=release_id)
448
+ # Validate mutually exclusive parameters
449
+ version_params = [release_id, release_code, module_version_number]
450
+ if sum(p is not None for p in version_params) > 1:
451
+ raise ValueError(
452
+ "Specify a maximum of one of release_id, release_code, or module_version_number."
453
+ )
441
454
 
442
- if not complete_result["success"]:
443
- return {
444
- "success": False,
445
- "enriched_ast": None,
446
- "error": f"Failed to generate complete AST: {complete_result['error']}",
447
- }
455
+ # Validate module_version_number requires module_code
456
+ if module_version_number is not None and module_code is None:
457
+ raise ValueError(
458
+ "module_version_number requires module_code to be specified."
459
+ )
448
460
 
449
- complete_ast = complete_result["ast"]
450
- context = complete_result.get("context") or table_context
461
+ # Resolve version parameters to release_id
462
+ effective_release_id = release_id
463
+ effective_release_code = release_code
451
464
 
452
- # Enrich with framework structure
453
- enriched_ast = self._enrich_ast_with_metadata(
454
- ast_dict=complete_ast,
455
- expression=expression,
456
- context=context,
457
- dpm_version=dpm_version,
458
- operation_code=operation_code,
459
- precondition=precondition,
460
- release_id=release_id,
465
+ if release_code is not None:
466
+ effective_release_id = self._resolve_release_code(release_code)
467
+ elif module_version_number is not None:
468
+ # Resolve module_version_number to release_id
469
+ effective_release_id, effective_release_code = self._resolve_module_version(
470
+ module_code, module_version_number
471
+ )
472
+
473
+ # Normalize input to list of tuples
474
+ expression_tuples = self._normalize_expressions_input(expressions)
475
+
476
+ try:
477
+ # Enrich with framework structure for multiple expressions
478
+ enriched_ast = self._enrich_ast_with_metadata_multi(
479
+ expression_tuples=expression_tuples,
480
+ table_context=table_context,
481
+ release_code=effective_release_code,
482
+ release_id=effective_release_id,
461
483
  primary_module_vid=primary_module_vid,
484
+ module_code=module_code,
485
+ preferred_module_dependencies=preferred_module_dependencies,
462
486
  )
463
487
 
464
488
  # Save to file if output_path is provided
@@ -732,11 +756,13 @@ class ASTGeneratorAPI:
732
756
  ast_dict: Dict[str, Any],
733
757
  expression: str,
734
758
  context: Optional[Dict[str, Any]],
735
- dpm_version: Optional[str] = None,
759
+ release_code: Optional[str] = None,
736
760
  operation_code: Optional[str] = None,
737
761
  precondition: Optional[str] = None,
738
762
  release_id: Optional[int] = None,
739
763
  primary_module_vid: Optional[int] = None,
764
+ module_code: Optional[str] = None,
765
+ preferred_module_dependencies: Optional[List[str]] = None,
740
766
  ) -> Dict[str, Any]:
741
767
  """
742
768
  Add framework structure (operations, variables, tables, preconditions) to complete AST.
@@ -747,7 +773,7 @@ class ASTGeneratorAPI:
747
773
  ast_dict: Complete AST dictionary
748
774
  expression: Original DPM-XL expression
749
775
  context: Context dict with table, rows, columns, sheets, default, interval
750
- dpm_version: DPM version code (e.g., "4.2")
776
+ release_code: Release code (e.g., "4.2")
751
777
  operation_code: Operation code (defaults to "default_code")
752
778
  precondition: Precondition variable reference (e.g., {v_F_44_04})
753
779
  release_id: Optional release ID to filter database lookups
@@ -766,25 +792,38 @@ class ASTGeneratorAPI:
766
792
  # Get current date for framework structure
767
793
  current_date = datetime.now().strftime("%Y-%m-%d")
768
794
 
795
+ # Detect primary module from the expression (or use provided module_code)
796
+ primary_module_info = self._get_primary_module_info(
797
+ expression=expression,
798
+ primary_module_vid=primary_module_vid,
799
+ release_id=release_id,
800
+ module_code=module_code,
801
+ )
802
+
769
803
  # Query database for release information
770
- release_info = self._get_release_info(dpm_version, engine)
804
+ release_info = self._get_release_info(release_code, engine)
771
805
 
772
- # Build module info
806
+ # Build module info using detected primary module or defaults
773
807
  module_info = {
774
- "module_code": "default",
775
- "module_version": "1.0.0",
776
- "framework_code": "default",
808
+ "module_code": primary_module_info.get("module_code", "default"),
809
+ "module_version": primary_module_info.get("module_version", "1.0.0"),
810
+ "framework_code": primary_module_info.get("framework_code", "default"),
777
811
  "dpm_release": {
778
812
  "release": release_info["release"],
779
813
  "publication_date": release_info["publication_date"],
780
814
  },
781
- "dates": {"from": "2001-01-01", "to": None},
815
+ "dates": {
816
+ "from": primary_module_info.get("from_date", "2001-01-01"),
817
+ "to": primary_module_info.get("to_date"),
818
+ },
782
819
  }
783
820
 
784
821
  # Add coordinates to AST data entries
785
822
  ast_with_coords = self._add_coordinates_to_ast(ast_dict, context)
786
823
 
787
824
  # Build operations section
825
+ # Use module's from_date for from_submission_date (fallback to current date)
826
+ submission_date = primary_module_info.get("from_date", current_date)
788
827
  operations = {
789
828
  operation_code: {
790
829
  "version_id": hash(expression) % 10000,
@@ -792,20 +831,70 @@ class ASTGeneratorAPI:
792
831
  "expression": expression,
793
832
  "root_operator_id": 24, # Default for now
794
833
  "ast": ast_with_coords,
795
- "from_submission_date": current_date,
834
+ "from_submission_date": submission_date,
796
835
  "severity": "Error",
797
836
  }
798
837
  }
799
838
 
800
839
  # Build variables section by extracting from the complete AST
801
- all_variables, variables_by_table = self._extract_variables_from_ast(ast_with_coords)
840
+ # This gives us the tables referenced in the expression
841
+ _, variables_by_table = self._extract_variables_from_ast(ast_with_coords)
802
842
 
803
- variables = all_variables
843
+ # Clean extra fields from data entries (after extraction, as it uses data_type)
844
+ self._clean_ast_data_entries(ast_with_coords)
845
+
846
+ all_variables = {}
804
847
  tables = {}
805
848
 
806
- # Build tables with their specific variables
807
- for table_code, table_variables in variables_by_table.items():
808
- tables[table_code] = {"variables": table_variables, "open_keys": {}}
849
+ # Get tables_with_modules to filter tables by primary module
850
+ tables_with_modules = primary_module_info.get("tables_with_modules", [])
851
+ primary_module_vid = primary_module_info.get("module_vid")
852
+
853
+ # Build mapping of table_code -> module_vid for filtering
854
+ table_to_module = {}
855
+ for table_info in tables_with_modules:
856
+ table_code = table_info.get("code", "")
857
+ module_vid = table_info.get("module_vid")
858
+ if table_code and module_vid:
859
+ table_to_module[table_code] = module_vid
860
+
861
+ # Initialize DataDictionaryAPI to query open keys and all variables
862
+ from py_dpm.api.dpm import DataDictionaryAPI
863
+ data_dict_api = DataDictionaryAPI(
864
+ database_path=self.database_path,
865
+ connection_url=self.connection_url
866
+ )
867
+
868
+ # Build tables with ALL variables from database (not just from expression)
869
+ # Only include tables belonging to the primary module
870
+ for table_code in variables_by_table.keys():
871
+ # Check if this table belongs to the primary module
872
+ table_module_vid = table_to_module.get(table_code)
873
+ if table_module_vid and table_module_vid != primary_module_vid:
874
+ # This table belongs to a different module, skip it for the main tables section
875
+ continue
876
+
877
+ # Get table version info to get table_vid
878
+ table_info = data_dict_api.get_table_version(table_code, release_id)
879
+
880
+ if table_info and table_info.get("table_vid"):
881
+ table_vid = table_info["table_vid"]
882
+ # Get ALL variables for this table from database
883
+ table_variables = data_dict_api.get_all_variables_for_table(table_vid)
884
+ else:
885
+ # Fallback to expression variables if table not found
886
+ table_variables = variables_by_table[table_code]
887
+
888
+ # Query open keys for this table
889
+ open_keys_list = data_dict_api.get_open_keys_for_table(table_code, release_id)
890
+ open_keys = {item["property_code"]: item["data_type_code"] for item in open_keys_list}
891
+
892
+ tables[table_code] = {"variables": table_variables, "open_keys": open_keys}
893
+
894
+ # Add table variables to all_variables
895
+ all_variables.update(table_variables)
896
+
897
+ data_dict_api.close()
809
898
 
810
899
  # Build preconditions
811
900
  preconditions = {}
@@ -816,32 +905,44 @@ class ASTGeneratorAPI:
816
905
  precondition=precondition,
817
906
  context=context,
818
907
  operation_code=operation_code,
819
- engine=engine,
908
+ release_id=release_id,
820
909
  )
821
910
 
822
911
  # Detect cross-module dependencies
912
+ # Use ALL variables from tables (not just expression variables)
913
+ full_variables_by_table = {
914
+ table_code: table_data["variables"]
915
+ for table_code, table_data in tables.items()
916
+ }
917
+ # Use module_vid from primary_module_info (may have been resolved from module_code)
918
+ resolved_primary_module_vid = primary_module_info.get("module_vid") or primary_module_vid
823
919
  dependency_modules, cross_instance_dependencies = self._detect_cross_module_dependencies(
824
920
  expression=expression,
825
- variables_by_table=variables_by_table,
826
- primary_module_vid=primary_module_vid,
921
+ variables_by_table=full_variables_by_table,
922
+ primary_module_vid=resolved_primary_module_vid,
827
923
  operation_code=operation_code,
828
924
  release_id=release_id,
925
+ preferred_module_dependencies=preferred_module_dependencies,
829
926
  )
830
927
 
831
928
  # Build dependency information
929
+ # intra_instance_validations should be empty for cross-module operations
930
+ # (operations that have cross_instance_dependencies)
931
+ is_cross_module = bool(cross_instance_dependencies)
832
932
  dependency_info = {
833
- "intra_instance_validations": [operation_code],
933
+ "intra_instance_validations": [] if is_cross_module else [operation_code],
834
934
  "cross_instance_dependencies": cross_instance_dependencies,
835
935
  }
836
936
 
837
937
  # Build complete structure
838
- namespace = "default_module"
938
+ # Use module URI as namespace if available, otherwise use "default_module"
939
+ namespace = primary_module_info.get("module_uri", "default_module")
839
940
 
840
941
  return {
841
942
  namespace: {
842
943
  **module_info,
843
944
  "operations": operations,
844
- "variables": variables,
945
+ "variables": all_variables,
845
946
  "tables": tables,
846
947
  "preconditions": preconditions,
847
948
  "precondition_variables": precondition_variables,
@@ -850,32 +951,591 @@ class ASTGeneratorAPI:
850
951
  }
851
952
  }
852
953
 
853
- def _get_release_info(self, dpm_version: Optional[str], engine) -> Dict[str, Any]:
954
+ def _enrich_ast_with_metadata_multi(
955
+ self,
956
+ expression_tuples: List[Tuple[str, str, Optional[str]]],
957
+ table_context: Optional[Dict[str, Any]],
958
+ release_code: Optional[str] = None,
959
+ release_id: Optional[int] = None,
960
+ primary_module_vid: Optional[int] = None,
961
+ module_code: Optional[str] = None,
962
+ preferred_module_dependencies: Optional[List[str]] = None,
963
+ ) -> Dict[str, Any]:
964
+ """
965
+ Add framework structure for multiple expressions (operations, variables, tables, preconditions).
966
+
967
+ This creates the engine-ready format with all metadata sections, aggregating
968
+ multiple expressions into a single script structure.
969
+
970
+ Args:
971
+ expression_tuples: List of (expression, operation_code, precondition) tuples
972
+ table_context: Context dict with table, rows, columns, sheets, default, interval
973
+ release_code: Release code (e.g., "4.2")
974
+ release_id: Optional release ID to filter database lookups
975
+ primary_module_vid: Module VID being exported (to identify external dependencies)
976
+ module_code: Optional module code to specify the main module
977
+ preferred_module_dependencies: Optional list of module codes to prefer for dependencies
978
+
979
+ Returns:
980
+ Dict with the enriched AST structure
981
+
982
+ Raises:
983
+ ValueError: If no operation scope belongs to the specified module
984
+ """
985
+ from py_dpm.dpm.utils import get_engine
986
+ from py_dpm.api.dpm import DataDictionaryAPI
987
+
988
+ # Initialize database connection
989
+ engine = get_engine(database_path=self.database_path, connection_url=self.connection_url)
990
+
991
+ # Get current date for framework structure
992
+ current_date = datetime.now().strftime("%Y-%m-%d")
993
+
994
+ # Aggregated structures
995
+ all_operations = {}
996
+ all_variables = {}
997
+ all_tables = {}
998
+ all_preconditions = {}
999
+ all_precondition_variables = {}
1000
+ all_dependency_modules = {}
1001
+ all_cross_instance_deps = []
1002
+ all_intra_instance_ops = []
1003
+
1004
+ # Track processed preconditions to avoid duplicates
1005
+ # Maps precondition string -> list of precondition keys generated from it
1006
+ processed_preconditions: Dict[str, List[str]] = {}
1007
+
1008
+ # Track all tables with their modules for validation
1009
+ all_tables_with_modules = []
1010
+
1011
+ # Flag to track if at least one operation belongs to the primary module
1012
+ has_primary_module_operation = False
1013
+
1014
+ # Initialize DataDictionaryAPI once for all expressions
1015
+ data_dict_api = DataDictionaryAPI(
1016
+ database_path=self.database_path,
1017
+ connection_url=self.connection_url
1018
+ )
1019
+
1020
+ # Primary module info will be determined from the first expression or module_code
1021
+ primary_module_info = None
1022
+ namespace = None
1023
+
1024
+ try:
1025
+ for idx, (expression, operation_code, precondition) in enumerate(expression_tuples):
1026
+ # Generate complete AST for this expression
1027
+ complete_result = self.generate_complete_ast(expression, release_id=release_id)
1028
+
1029
+ if not complete_result["success"]:
1030
+ raise ValueError(
1031
+ f"Failed to generate complete AST for expression {idx + 1} "
1032
+ f"(operation '{operation_code}'): {complete_result['error']}"
1033
+ )
1034
+
1035
+ complete_ast = complete_result["ast"]
1036
+ context = complete_result.get("context") or table_context
1037
+
1038
+ # Get primary module info from first expression (or use module_code)
1039
+ if primary_module_info is None:
1040
+ primary_module_info = self._get_primary_module_info(
1041
+ expression=expression,
1042
+ primary_module_vid=primary_module_vid,
1043
+ release_id=release_id,
1044
+ module_code=module_code,
1045
+ )
1046
+ namespace = primary_module_info.get("module_uri", "default_module")
1047
+
1048
+ # Add coordinates to AST data entries
1049
+ ast_with_coords = self._add_coordinates_to_ast(complete_ast, context)
1050
+
1051
+ # Build operation entry
1052
+ submission_date = primary_module_info.get("from_date", current_date)
1053
+ all_operations[operation_code] = {
1054
+ "version_id": hash(expression) % 10000,
1055
+ "code": operation_code,
1056
+ "expression": expression,
1057
+ "root_operator_id": 24,
1058
+ "ast": ast_with_coords,
1059
+ "from_submission_date": submission_date,
1060
+ "severity": "Error",
1061
+ }
1062
+
1063
+ # Extract variables from this expression's AST
1064
+ _, variables_by_table = self._extract_variables_from_ast(ast_with_coords)
1065
+
1066
+ # Clean extra fields from data entries
1067
+ self._clean_ast_data_entries(ast_with_coords)
1068
+
1069
+ # Get tables with modules for this expression
1070
+ from py_dpm.api.dpm_xl.operation_scopes import OperationScopesAPI
1071
+ scopes_api = OperationScopesAPI(
1072
+ database_path=self.database_path,
1073
+ connection_url=self.connection_url
1074
+ )
1075
+ tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
1076
+ expression=expression,
1077
+ release_id=release_id
1078
+ )
1079
+ all_tables_with_modules.extend(tables_with_modules)
1080
+
1081
+ # Build mapping of table_code -> module_vid
1082
+ # Prefer the module VID that matches the detected primary module
1083
+ table_to_module = {}
1084
+ primary_module_code = primary_module_info.get("module_code")
1085
+
1086
+ # First pass: record mappings for tables belonging to the primary module (by code)
1087
+ if primary_module_code:
1088
+ for table_info in tables_with_modules:
1089
+ table_code = table_info.get("code", "")
1090
+ table_module_vid = table_info.get("module_vid")
1091
+ table_module_code = table_info.get("module_code")
1092
+ if (
1093
+ table_code
1094
+ and table_module_vid
1095
+ and table_module_code == primary_module_code
1096
+ ):
1097
+ table_to_module[table_code] = table_module_vid
1098
+
1099
+ # Second pass: fill in any remaining tables with the first available module VID
1100
+ for table_info in tables_with_modules:
1101
+ table_code = table_info.get("code", "")
1102
+ table_module_vid = table_info.get("module_vid")
1103
+ if table_code and table_module_vid and table_code not in table_to_module:
1104
+ table_to_module[table_code] = table_module_vid
1105
+
1106
+ resolved_primary_module_vid = primary_module_info.get("module_vid") or primary_module_vid
1107
+
1108
+ # Process tables from this expression
1109
+ for table_code in variables_by_table.keys():
1110
+ # Check if this table belongs to the primary module
1111
+ table_module_vid = table_to_module.get(table_code)
1112
+
1113
+ if table_module_vid and table_module_vid != resolved_primary_module_vid:
1114
+ # This table belongs to a different module, skip for main tables
1115
+ continue
1116
+
1117
+ # Skip if we already have this table
1118
+ if table_code in all_tables:
1119
+ # Table already added, it passed the module filter before
1120
+ has_primary_module_operation = True
1121
+ continue
1122
+
1123
+ # Get table version info
1124
+ table_info = data_dict_api.get_table_version(table_code, release_id)
1125
+
1126
+ if table_info and table_info.get("table_vid"):
1127
+ table_vid = table_info["table_vid"]
1128
+ table_variables = data_dict_api.get_all_variables_for_table(table_vid)
1129
+ else:
1130
+ table_variables = variables_by_table[table_code]
1131
+
1132
+ # Query open keys for this table
1133
+ open_keys_list = data_dict_api.get_open_keys_for_table(table_code, release_id)
1134
+ open_keys = {item["property_code"]: item["data_type_code"] for item in open_keys_list}
1135
+
1136
+ all_tables[table_code] = {"variables": table_variables, "open_keys": open_keys}
1137
+ all_variables.update(table_variables)
1138
+
1139
+ # We successfully added a table that passed the module filter
1140
+ # This means at least one operation references the primary module
1141
+ has_primary_module_operation = True
1142
+
1143
+ # Handle precondition (deduplicate by precondition string)
1144
+ if precondition and precondition not in processed_preconditions:
1145
+ preconds, precond_vars = self._build_preconditions(
1146
+ precondition=precondition,
1147
+ context=context,
1148
+ operation_code=operation_code,
1149
+ release_id=release_id,
1150
+ )
1151
+ # Track which keys were generated for this precondition string
1152
+ processed_preconditions[precondition] = list(preconds.keys())
1153
+ # Merge preconditions
1154
+ for precond_key, precond_data in preconds.items():
1155
+ if precond_key not in all_preconditions:
1156
+ all_preconditions[precond_key] = precond_data
1157
+ else:
1158
+ # Add this operation to affected_operations if not already there
1159
+ if operation_code not in all_preconditions[precond_key]["affected_operations"]:
1160
+ all_preconditions[precond_key]["affected_operations"].append(operation_code)
1161
+ all_precondition_variables.update(precond_vars)
1162
+ elif precondition and precondition in processed_preconditions:
1163
+ # Precondition already processed, add this operation ONLY to the matching precondition(s)
1164
+ matching_keys = processed_preconditions[precondition]
1165
+ for precond_key in matching_keys:
1166
+ if precond_key in all_preconditions:
1167
+ if operation_code not in all_preconditions[precond_key]["affected_operations"]:
1168
+ all_preconditions[precond_key]["affected_operations"].append(operation_code)
1169
+
1170
+ # Detect cross-module dependencies for this expression
1171
+ full_variables_by_table = {
1172
+ table_code: table_data["variables"]
1173
+ for table_code, table_data in all_tables.items()
1174
+ }
1175
+ dep_modules, cross_deps = self._detect_cross_module_dependencies(
1176
+ expression=expression,
1177
+ variables_by_table=full_variables_by_table,
1178
+ primary_module_vid=resolved_primary_module_vid,
1179
+ operation_code=operation_code,
1180
+ release_id=release_id,
1181
+ preferred_module_dependencies=preferred_module_dependencies,
1182
+ )
1183
+
1184
+ # Merge dependency modules (avoid table duplicates)
1185
+ self._merge_dependency_modules(all_dependency_modules, dep_modules)
1186
+
1187
+ # Merge cross-instance dependencies (avoid duplicates)
1188
+ self._merge_cross_instance_dependencies(all_cross_instance_deps, cross_deps)
1189
+
1190
+ # Track intra-instance operations
1191
+ if not cross_deps:
1192
+ all_intra_instance_ops.append(operation_code)
1193
+
1194
+ finally:
1195
+ data_dict_api.close()
1196
+
1197
+ # Validate: at least one operation must belong to the primary module
1198
+ if not has_primary_module_operation and module_code:
1199
+ raise ValueError(
1200
+ f"No operation scope belongs to the specified module '{module_code}'. "
1201
+ "At least one expression must reference tables from the primary module."
1202
+ )
1203
+
1204
+ # Query database for release information
1205
+ release_info = self._get_release_info(release_code, engine)
1206
+
1207
+ # Build module info
1208
+ module_info = {
1209
+ "module_code": primary_module_info.get("module_code", "default"),
1210
+ "module_version": primary_module_info.get("module_version", "1.0.0"),
1211
+ "framework_code": primary_module_info.get("framework_code", "default"),
1212
+ "dpm_release": {
1213
+ "release": release_info["release"],
1214
+ "publication_date": release_info["publication_date"],
1215
+ },
1216
+ "dates": {
1217
+ "from": primary_module_info.get("from_date", "2001-01-01"),
1218
+ "to": primary_module_info.get("to_date"),
1219
+ },
1220
+ }
1221
+
1222
+ # Build dependency information
1223
+ dependency_info = {
1224
+ "intra_instance_validations": all_intra_instance_ops,
1225
+ "cross_instance_dependencies": all_cross_instance_deps,
1226
+ }
1227
+
1228
+ return {
1229
+ namespace: {
1230
+ **module_info,
1231
+ "operations": all_operations,
1232
+ "variables": all_variables,
1233
+ "tables": all_tables,
1234
+ "preconditions": all_preconditions,
1235
+ "precondition_variables": all_precondition_variables,
1236
+ "dependency_information": dependency_info,
1237
+ "dependency_modules": all_dependency_modules,
1238
+ }
1239
+ }
1240
+
1241
+ def _merge_dependency_modules(
1242
+ self,
1243
+ existing: Dict[str, Any],
1244
+ new: Dict[str, Any]
1245
+ ) -> None:
1246
+ """
1247
+ Merge new dependency_modules into existing, avoiding table duplicates.
1248
+
1249
+ Args:
1250
+ existing: Existing dependency_modules dict (modified in place)
1251
+ new: New dependency_modules dict to merge
1252
+ """
1253
+ for uri, module_data in new.items():
1254
+ if uri not in existing:
1255
+ existing[uri] = module_data
1256
+ else:
1257
+ # Merge tables (avoid duplicates)
1258
+ for table_code, table_data in module_data.get("tables", {}).items():
1259
+ if table_code not in existing[uri].get("tables", {}):
1260
+ existing[uri].setdefault("tables", {})[table_code] = table_data
1261
+ # Merge variables
1262
+ existing[uri].setdefault("variables", {}).update(
1263
+ module_data.get("variables", {})
1264
+ )
1265
+
1266
+ def _merge_cross_instance_dependencies(
1267
+ self,
1268
+ existing: List[Dict[str, Any]],
1269
+ new: List[Dict[str, Any]]
1270
+ ) -> None:
1271
+ """
1272
+ Merge new cross_instance_dependencies into existing, avoiding duplicates.
1273
+
1274
+ Duplicates are identified by the set of module URIs involved.
1275
+
1276
+ Args:
1277
+ existing: Existing list (modified in place)
1278
+ new: New list to merge
1279
+ """
1280
+ def get_module_uris(dep: Dict[str, Any]) -> tuple:
1281
+ """Extract sorted URIs from modules list for deduplication."""
1282
+ modules = dep.get("modules", [])
1283
+ uris = []
1284
+ for m in modules:
1285
+ if isinstance(m, dict):
1286
+ uris.append(m.get("URI", ""))
1287
+ else:
1288
+ uris.append(str(m))
1289
+ return tuple(sorted(uris))
1290
+
1291
+ # Build a set of existing module URI combinations for deduplication
1292
+ existing_module_sets = set()
1293
+ for dep in existing:
1294
+ existing_module_sets.add(get_module_uris(dep))
1295
+
1296
+ for dep in new:
1297
+ dep_uris = get_module_uris(dep)
1298
+ if dep_uris not in existing_module_sets:
1299
+ existing.append(dep)
1300
+ existing_module_sets.add(dep_uris)
1301
+ else:
1302
+ # Merge affected_operations for existing dependency
1303
+ for existing_dep in existing:
1304
+ if get_module_uris(existing_dep) == dep_uris:
1305
+ for op in dep.get("affected_operations", []):
1306
+ if op not in existing_dep.get("affected_operations", []):
1307
+ existing_dep.setdefault("affected_operations", []).append(op)
1308
+ break
1309
+
1310
+ def _get_primary_module_info(
1311
+ self,
1312
+ expression: str,
1313
+ primary_module_vid: Optional[int],
1314
+ release_id: Optional[int],
1315
+ module_code: Optional[str] = None,
1316
+ ) -> Dict[str, Any]:
1317
+ """
1318
+ Detect and return metadata for the primary module from the expression.
1319
+
1320
+ Args:
1321
+ expression: DPM-XL expression
1322
+ primary_module_vid: Optional module VID (if known)
1323
+ release_id: Optional release ID for filtering
1324
+ module_code: Optional module code (e.g., "FINREP9") - takes precedence over
1325
+ primary_module_vid if provided
1326
+
1327
+ Returns:
1328
+ Dict with module_uri, module_code, module_version, framework_code,
1329
+ from_date, to_date, module_vid
1330
+ """
1331
+ from py_dpm.api.dpm_xl.operation_scopes import OperationScopesAPI
1332
+ from py_dpm.dpm.queries.explorer_queries import ExplorerQuery
1333
+
1334
+ default_info = {
1335
+ "module_uri": "default_module",
1336
+ "module_code": "default",
1337
+ "module_version": "1.0.0",
1338
+ "framework_code": "default",
1339
+ "from_date": "2001-01-01",
1340
+ "to_date": None,
1341
+ "module_vid": None,
1342
+ }
1343
+
1344
+ try:
1345
+ scopes_api = OperationScopesAPI(
1346
+ database_path=self.database_path,
1347
+ connection_url=self.connection_url
1348
+ )
1349
+
1350
+ # Get tables with module metadata from expression
1351
+ tables_with_modules = scopes_api.get_tables_with_metadata_from_expression(
1352
+ expression=expression,
1353
+ release_id=release_id
1354
+ )
1355
+
1356
+ if not tables_with_modules:
1357
+ scopes_api.close()
1358
+ return default_info
1359
+
1360
+ # Determine primary module
1361
+ # Priority: module_code (param) > primary_module_vid > first table
1362
+ primary_table = None
1363
+
1364
+ if module_code:
1365
+ # Find table matching the provided module_code
1366
+ for table_info in tables_with_modules:
1367
+ if table_info.get("module_code") == module_code:
1368
+ primary_table = table_info
1369
+ break
1370
+ elif primary_module_vid:
1371
+ # Find table matching the provided module VID
1372
+ for table_info in tables_with_modules:
1373
+ if table_info.get("module_vid") == primary_module_vid:
1374
+ primary_table = table_info
1375
+ break
1376
+
1377
+ # If no match found, use first table
1378
+ if not primary_table:
1379
+ primary_table = tables_with_modules[0]
1380
+
1381
+ resolved_module_code = primary_table.get("module_code")
1382
+ module_vid = primary_table.get("module_vid")
1383
+
1384
+ # Get module URI
1385
+ try:
1386
+ module_uri = ExplorerQuery.get_module_url(
1387
+ scopes_api.session,
1388
+ module_code=resolved_module_code,
1389
+ release_id=release_id,
1390
+ )
1391
+ # Remove .json extension if present
1392
+ if module_uri and module_uri.endswith(".json"):
1393
+ module_uri = module_uri[:-5]
1394
+ except Exception:
1395
+ module_uri = "default_module"
1396
+
1397
+ # Get module version dates from scopes metadata
1398
+ from_date = "2001-01-01"
1399
+ to_date = None
1400
+ scopes_metadata = scopes_api.get_scopes_with_metadata_from_expression(
1401
+ expression=expression,
1402
+ release_id=release_id
1403
+ )
1404
+ for scope_info in scopes_metadata:
1405
+ for module in scope_info.module_versions:
1406
+ if module.get("module_vid") == module_vid:
1407
+ from_date = module.get("from_reference_date", from_date)
1408
+ to_date = module.get("to_reference_date", to_date)
1409
+ break
1410
+
1411
+ scopes_api.close()
1412
+
1413
+ return {
1414
+ "module_uri": module_uri or "default_module",
1415
+ "module_code": resolved_module_code or "default",
1416
+ "module_version": primary_table.get("module_version", "1.0.0"),
1417
+ "framework_code": resolved_module_code or "default", # Framework code typically matches module code
1418
+ "from_date": str(from_date) if from_date else "2001-01-01",
1419
+ "to_date": str(to_date) if to_date else None,
1420
+ "module_vid": module_vid,
1421
+ "tables_with_modules": tables_with_modules, # Include table-to-module mapping
1422
+ }
1423
+
1424
+ except Exception as e:
1425
+ import logging
1426
+ logging.warning(f"Failed to detect primary module info: {e}")
1427
+ return {**default_info, "tables_with_modules": []}
1428
+
1429
+ def _resolve_release_code(self, release_code: str) -> Optional[int]:
1430
+ """
1431
+ Resolve a release code (e.g., "4.2") to its release ID.
1432
+
1433
+ Args:
1434
+ release_code: The release code string (e.g., "4.2")
1435
+
1436
+ Returns:
1437
+ The release ID if found, None otherwise.
1438
+ """
1439
+ from py_dpm.dpm.utils import get_engine
1440
+ from py_dpm.dpm.models import Release
1441
+ from sqlalchemy.orm import sessionmaker
1442
+
1443
+ engine = get_engine(database_path=self.database_path, connection_url=self.connection_url)
1444
+ Session = sessionmaker(bind=engine)
1445
+ session = Session()
1446
+
1447
+ try:
1448
+ release = (
1449
+ session.query(Release)
1450
+ .filter(Release.code == release_code)
1451
+ .first()
1452
+ )
1453
+ if release:
1454
+ return release.releaseid
1455
+ return None
1456
+ except Exception:
1457
+ return None
1458
+ finally:
1459
+ session.close()
1460
+
1461
+ def _resolve_module_version(
1462
+ self, module_code: str, module_version_number: str
1463
+ ) -> Tuple[Optional[int], Optional[str]]:
1464
+ """
1465
+ Resolve a module version number to its release ID and release code.
1466
+
1467
+ Args:
1468
+ module_code: The module code (e.g., "COREP_LR")
1469
+ module_version_number: The module version number (e.g., "4.1.0")
1470
+
1471
+ Returns:
1472
+ Tuple of (release_id, release_code) if found, (None, None) otherwise.
1473
+ """
1474
+ from py_dpm.dpm.utils import get_engine
1475
+ from py_dpm.dpm.models import ModuleVersion, Release
1476
+ from sqlalchemy.orm import sessionmaker
1477
+
1478
+ engine = get_engine(database_path=self.database_path, connection_url=self.connection_url)
1479
+ Session = sessionmaker(bind=engine)
1480
+ session = Session()
1481
+
1482
+ try:
1483
+ # Find the module version by code and version number
1484
+ module_version = (
1485
+ session.query(ModuleVersion)
1486
+ .filter(
1487
+ ModuleVersion.code == module_code,
1488
+ ModuleVersion.versionnumber == module_version_number
1489
+ )
1490
+ .first()
1491
+ )
1492
+ if not module_version:
1493
+ raise ValueError(
1494
+ f"Module version '{module_version_number}' not found for module '{module_code}'."
1495
+ )
1496
+
1497
+ # Get the release code from the start release
1498
+ release = (
1499
+ session.query(Release)
1500
+ .filter(Release.releaseid == module_version.startreleaseid)
1501
+ .first()
1502
+ )
1503
+ release_code = release.code if release else None
1504
+
1505
+ return module_version.startreleaseid, release_code
1506
+ finally:
1507
+ session.close()
1508
+
1509
+ def _get_release_info(self, release_code: Optional[str], engine) -> Dict[str, Any]:
854
1510
  """Get release information from database using SQLAlchemy."""
855
1511
  from py_dpm.dpm.models import Release
856
1512
  from sqlalchemy.orm import sessionmaker
857
1513
 
1514
+ def format_date(date_value) -> str:
1515
+ """Format date whether it's a string or datetime object."""
1516
+ if date_value is None:
1517
+ return "2001-01-01"
1518
+ if isinstance(date_value, str):
1519
+ return date_value
1520
+ # Assume it's a datetime-like object
1521
+ return date_value.strftime("%Y-%m-%d")
1522
+
858
1523
  Session = sessionmaker(bind=engine)
859
1524
  session = Session()
860
1525
 
861
1526
  try:
862
- if dpm_version:
1527
+ if release_code:
863
1528
  # Query for specific version
864
- version_float = float(dpm_version)
865
1529
  release = (
866
1530
  session.query(Release)
867
- .filter(Release.code == str(version_float))
1531
+ .filter(Release.code == release_code)
868
1532
  .first()
869
1533
  )
870
1534
 
871
1535
  if release:
872
1536
  return {
873
- "release": str(release.code) if release.code else dpm_version,
874
- "publication_date": (
875
- release.date.strftime("%Y-%m-%d")
876
- if release.date
877
- else "2001-01-01"
878
- ),
1537
+ "release": str(release.code) if release.code else release_code,
1538
+ "publication_date": format_date(release.date),
879
1539
  }
880
1540
 
881
1541
  # Fallback: get latest released version
@@ -889,9 +1549,7 @@ class ASTGeneratorAPI:
889
1549
  if release:
890
1550
  return {
891
1551
  "release": str(release.code) if release.code else "4.1",
892
- "publication_date": (
893
- release.date.strftime("%Y-%m-%d") if release.date else "2001-01-01"
894
- ),
1552
+ "publication_date": format_date(release.date),
895
1553
  }
896
1554
 
897
1555
  # Final fallback
@@ -958,47 +1616,133 @@ class ASTGeneratorAPI:
958
1616
  precondition: Optional[str],
959
1617
  context: Optional[Dict[str, Any]],
960
1618
  operation_code: str,
961
- engine,
1619
+ release_id: Optional[int] = None,
962
1620
  ) -> tuple:
963
- """Build preconditions and precondition_variables sections."""
1621
+ """Build preconditions and precondition_variables sections.
1622
+
1623
+ Handles both simple preconditions like {v_C_47.00} and compound
1624
+ preconditions like {v_C_01.00} and {v_C_05.01} and {v_C_47.00}.
1625
+
1626
+ For compound preconditions, generates a full AST with BinOp nodes
1627
+ for 'and' operators connecting PreconditionItem nodes.
1628
+
1629
+ Uses ExplorerQueryAPI to fetch actual variable_id and variable_vid
1630
+ from the database based on variable codes.
1631
+
1632
+ Args:
1633
+ precondition: Precondition string like "{v_C_01.00}" or
1634
+ "{v_C_01.00} and {v_C_05.01} and {v_C_47.00}"
1635
+ context: Optional context dict
1636
+ operation_code: Operation code to associate with this precondition
1637
+ release_id: Optional release ID for filtering variable versions
1638
+ """
964
1639
  import re
1640
+ from py_dpm.api.dpm.explorer import ExplorerQueryAPI
965
1641
 
966
1642
  preconditions = {}
967
1643
  precondition_variables = {}
968
1644
 
969
- # Extract table code from precondition or context
970
- table_code = None
1645
+ if not precondition:
1646
+ return preconditions, precondition_variables
971
1647
 
972
- if precondition:
973
- # Extract variable code from precondition reference like {v_F_44_04}
974
- match = re.match(r"\{v_([^}]+)\}", precondition)
975
- if match:
976
- table_code = match.group(1)
977
-
978
- if table_code:
979
- # Query database for actual variable ID and version
980
- table_info = self._get_table_info(table_code, engine)
981
-
982
- if table_info:
983
- precondition_var_id = table_info["table_vid"]
984
- version_id = table_info["table_vid"]
985
- precondition_code = f"p_{precondition_var_id}"
986
-
987
- preconditions[precondition_code] = {
988
- "ast": {
989
- "class_name": "PreconditionItem",
990
- "variable_id": precondition_var_id,
991
- "variable_code": table_code,
992
- },
993
- "affected_operations": [operation_code],
994
- "version_id": version_id,
995
- "code": precondition_code,
1648
+ # Extract all variable codes from precondition (handles both simple and compound)
1649
+ # Pattern matches {v_VARIABLE_CODE} references
1650
+ var_matches = re.findall(r"\{v_([^}]+)\}", precondition)
1651
+
1652
+ if not var_matches:
1653
+ return preconditions, precondition_variables
1654
+
1655
+ # Normalize variable codes (F_44_04 -> F_44.04)
1656
+ variable_codes = [self._normalize_table_code(v) for v in var_matches]
1657
+
1658
+ # Batch lookup variable IDs from database (single query for efficiency)
1659
+ explorer_api = ExplorerQueryAPI()
1660
+ try:
1661
+ variables_info = explorer_api.get_variables_by_codes(
1662
+ variable_codes=variable_codes,
1663
+ release_id=release_id,
1664
+ )
1665
+ finally:
1666
+ explorer_api.close()
1667
+
1668
+ # Build variable infos list preserving order from precondition
1669
+ var_infos = []
1670
+ for var_code in variable_codes:
1671
+ if var_code in variables_info:
1672
+ info = variables_info[var_code]
1673
+ var_infos.append({
1674
+ "variable_code": var_code,
1675
+ "variable_id": info["variable_id"],
1676
+ "variable_vid": info["variable_vid"],
1677
+ })
1678
+ # Add to precondition_variables
1679
+ precondition_variables[str(info["variable_vid"])] = "b"
1680
+
1681
+ if not var_infos:
1682
+ return preconditions, precondition_variables
1683
+
1684
+ # Build the AST based on number of variables
1685
+ if len(var_infos) == 1:
1686
+ # Simple precondition - single PreconditionItem
1687
+ info = var_infos[0]
1688
+ precondition_code = f"p_{info['variable_vid']}"
1689
+
1690
+ preconditions[precondition_code] = {
1691
+ "ast": {
1692
+ "class_name": "PreconditionItem",
1693
+ "variable_id": info["variable_id"],
1694
+ "variable_code": info["variable_code"],
1695
+ },
1696
+ "affected_operations": [operation_code],
1697
+ "version_id": info["variable_vid"],
1698
+ "code": precondition_code,
1699
+ }
1700
+ else:
1701
+ # Compound precondition - build BinOp tree with 'and' operators
1702
+ # Create a unique key based on sorted variable VIDs
1703
+ sorted_var_vids = sorted([info["variable_vid"] for info in var_infos])
1704
+ precondition_code = "p_" + "_".join(str(vid) for vid in sorted_var_vids)
1705
+
1706
+ # Build AST: left-associative chain of BinOp 'and' nodes
1707
+ # E.g., for [A, B, C]: ((A and B) and C)
1708
+ ast = self._build_precondition_item_ast(var_infos[0])
1709
+ for info in var_infos[1:]:
1710
+ right_ast = self._build_precondition_item_ast(info)
1711
+ ast = {
1712
+ "class_name": "BinOp",
1713
+ "op": "and",
1714
+ "left": ast,
1715
+ "right": right_ast,
996
1716
  }
997
1717
 
998
- precondition_variables[str(precondition_var_id)] = "b"
1718
+ # Use the first variable's VID as version_id
1719
+ preconditions[precondition_code] = {
1720
+ "ast": ast,
1721
+ "affected_operations": [operation_code],
1722
+ "version_id": sorted_var_vids[0],
1723
+ "code": precondition_code,
1724
+ }
999
1725
 
1000
1726
  return preconditions, precondition_variables
1001
1727
 
1728
+ def _build_precondition_item_ast(self, var_info: Dict[str, Any]) -> Dict[str, Any]:
1729
+ """Build a PreconditionItem AST node for a single variable."""
1730
+ return {
1731
+ "class_name": "PreconditionItem",
1732
+ "variable_id": var_info["variable_id"],
1733
+ "variable_code": var_info["variable_code"],
1734
+ }
1735
+
1736
+ def _normalize_table_code(self, table_code: str) -> str:
1737
+ """Normalize table/variable code format (e.g., F_44_04 -> F_44.04)."""
1738
+ import re
1739
+ # Handle format like C_01_00 -> C_01.00 or F_44_04 -> F_44.04
1740
+ match = re.match(r"([A-Z]+)_(\d+)_(\d+)", table_code)
1741
+ if match:
1742
+ return f"{match.group(1)}_{match.group(2)}.{match.group(3)}"
1743
+ # Already in correct format or different format
1744
+ return table_code
1745
+
1002
1746
  def _extract_variables_from_ast(self, ast_dict: Dict[str, Any]) -> tuple:
1003
1747
  """
1004
1748
  Extract variables from complete AST by table.
@@ -1119,6 +1863,7 @@ class ASTGeneratorAPI:
1119
1863
  primary_module_vid: Optional[int],
1120
1864
  operation_code: str,
1121
1865
  release_id: Optional[int] = None,
1866
+ preferred_module_dependencies: Optional[List[str]] = None,
1122
1867
  ) -> tuple:
1123
1868
  """
1124
1869
  Detect cross-module dependencies for a single expression.
@@ -1132,6 +1877,8 @@ class ASTGeneratorAPI:
1132
1877
  primary_module_vid: The module being exported (if known)
1133
1878
  operation_code: Current operation code
1134
1879
  release_id: Optional release ID for filtering
1880
+ preferred_module_dependencies: Optional list of module codes to prefer when
1881
+ a table belongs to multiple modules
1135
1882
 
1136
1883
  Returns:
1137
1884
  Tuple of (dependency_modules, cross_instance_dependencies)
@@ -1185,30 +1932,54 @@ class ASTGeneratorAPI:
1185
1932
  return ref or "T"
1186
1933
 
1187
1934
  # Helper to lookup variables for a table
1188
- def get_table_variables(table_code: str) -> dict:
1935
+ # For external module tables, fetch from database if not in variables_by_table
1936
+ from py_dpm.api.dpm import DataDictionaryAPI
1937
+ data_dict_api = DataDictionaryAPI(
1938
+ database_path=self.database_path,
1939
+ connection_url=self.connection_url
1940
+ )
1941
+
1942
+ def get_table_variables(table_code: str, table_vid: int = None) -> dict:
1189
1943
  if not table_code:
1190
1944
  return {}
1945
+ # First try from passed variables_by_table
1191
1946
  variables = variables_by_table.get(table_code)
1192
1947
  if not variables:
1193
1948
  variables = variables_by_table.get(f"t{table_code}", {})
1949
+ # If still empty and table_vid is provided, fetch from database
1950
+ if not variables and table_vid:
1951
+ variables = data_dict_api.get_all_variables_for_table(table_vid)
1194
1952
  return variables or {}
1195
1953
 
1196
1954
  # Group external tables by module
1955
+ # If preferred_module_dependencies is set, only include those modules
1197
1956
  external_modules = {}
1957
+
1958
+ # TEMPORARY WORKAROUND: Also collect primary module tables to add to dependency_modules
1959
+ # This is conceptually wrong but required for current implementation.
1960
+ # See /docs/dependency_modules_main_tables_workaround.md for how to revert this.
1961
+ primary_module_tables = []
1962
+
1198
1963
  for table_info in tables_with_modules:
1199
1964
  module_vid = table_info.get("module_vid")
1200
1965
  if module_vid == primary_module_vid:
1201
- continue # Skip primary module
1966
+ # Collect primary module tables for later inclusion in dependency_modules
1967
+ primary_module_tables.append(table_info)
1968
+ continue # Skip for now, will add later
1202
1969
 
1203
- module_code = table_info.get("module_code")
1204
- if not module_code:
1970
+ ext_module_code = table_info.get("module_code")
1971
+ if not ext_module_code:
1972
+ continue
1973
+
1974
+ # If preferred_module_dependencies is set, only include preferred modules
1975
+ if preferred_module_dependencies and ext_module_code not in preferred_module_dependencies:
1205
1976
  continue
1206
1977
 
1207
1978
  # Get module URI
1208
1979
  try:
1209
1980
  module_uri = ExplorerQuery.get_module_url(
1210
1981
  scopes_api.session,
1211
- module_code=module_code,
1982
+ module_code=ext_module_code,
1212
1983
  release_id=release_id,
1213
1984
  )
1214
1985
  if module_uri.endswith(".json"):
@@ -1235,13 +2006,29 @@ class ASTGeneratorAPI:
1235
2006
 
1236
2007
  # Add table and variables
1237
2008
  if table_code:
1238
- table_variables = get_table_variables(table_code)
2009
+ table_vid = table_info.get("table_vid")
2010
+ table_variables = get_table_variables(table_code, table_vid)
1239
2011
  external_modules[module_uri]["tables"][table_code] = {
1240
2012
  "variables": table_variables,
1241
2013
  "open_keys": {}
1242
2014
  }
1243
2015
  external_modules[module_uri]["variables"].update(table_variables)
1244
2016
 
2017
+ # TEMPORARY WORKAROUND: Add primary module tables to each dependency module entry
2018
+ # This includes main module tables/variables in dependency_modules for cross-module validations
2019
+ # See /docs/dependency_modules_main_tables_workaround.md for how to revert this.
2020
+ for uri in external_modules:
2021
+ for table_info in primary_module_tables:
2022
+ table_code = table_info.get("code")
2023
+ if table_code:
2024
+ table_vid = table_info.get("table_vid")
2025
+ table_variables = get_table_variables(table_code, table_vid)
2026
+ external_modules[uri]["tables"][table_code] = {
2027
+ "variables": table_variables,
2028
+ "open_keys": {}
2029
+ }
2030
+ external_modules[uri]["variables"].update(table_variables)
2031
+
1245
2032
  # Get date info from scopes metadata
1246
2033
  scopes_metadata = scopes_api.get_scopes_with_metadata_from_expression(
1247
2034
  expression=expression,
@@ -1284,6 +2071,8 @@ class ASTGeneratorAPI:
1284
2071
  "to_reference_date": str(to_date) if to_date else ""
1285
2072
  })
1286
2073
 
2074
+ # Close data_dict_api before returning
2075
+ data_dict_api.close()
1287
2076
  return dependency_modules, cross_instance_dependencies
1288
2077
 
1289
2078
  except Exception as e:
@@ -1295,53 +2084,96 @@ class ASTGeneratorAPI:
1295
2084
  def _add_coordinates_to_ast(
1296
2085
  self, ast_dict: Dict[str, Any], context: Optional[Dict[str, Any]]
1297
2086
  ) -> Dict[str, Any]:
1298
- """Add x/y/z coordinates to data entries in AST."""
2087
+ """
2088
+ Add x/y/z coordinates to data entries in AST.
2089
+
2090
+ Coordinates are assigned based on:
2091
+ - x: row position (1-indexed)
2092
+ - y: column position (1-indexed)
2093
+ - z: sheet position (1-indexed)
2094
+
2095
+ If context provides column/row/sheet lists, those are used for ordering.
2096
+ Otherwise, the order is extracted from the data entries themselves.
2097
+ """
1299
2098
  import copy
1300
2099
 
1301
2100
  def add_coords_to_node(node):
1302
2101
  if isinstance(node, dict):
1303
2102
  # Handle VarID nodes with data arrays
1304
2103
  if node.get("class_name") == "VarID" and "data" in node:
1305
- # Get column information from context
1306
- cols = []
1307
- if context and "columns" in context and context["columns"]:
1308
- cols = context["columns"]
1309
-
1310
- # Group data entries by row to assign coordinates correctly
1311
- entries_by_row = {}
1312
- for data_entry in node["data"]:
1313
- row_code = data_entry.get("row", "")
1314
- if row_code not in entries_by_row:
1315
- entries_by_row[row_code] = []
1316
- entries_by_row[row_code].append(data_entry)
1317
-
1318
- # Assign coordinates based on column order and row grouping
1319
- rows = list(entries_by_row.keys())
1320
- for x_index, row_code in enumerate(rows, 1):
1321
- for data_entry in entries_by_row[row_code]:
1322
- column_code = data_entry.get("column", "")
1323
-
1324
- # Find y coordinate based on column position in context
1325
- y_index = 1 # default
1326
- if cols and column_code in cols:
1327
- y_index = cols.index(column_code) + 1
1328
- elif cols:
1329
- # Fallback to order in data
1330
- row_columns = [
1331
- entry.get("column", "")
1332
- for entry in entries_by_row[row_code]
1333
- ]
1334
- if column_code in row_columns:
1335
- y_index = row_columns.index(column_code) + 1
1336
-
1337
- # Always add y coordinate
1338
- data_entry["y"] = y_index
1339
-
1340
- # Add x coordinate only if there are multiple rows
2104
+ data_entries = node["data"]
2105
+ if not data_entries:
2106
+ return
2107
+
2108
+ # Get context lists (may be empty)
2109
+ context_cols = []
2110
+ context_rows = []
2111
+ context_sheets = []
2112
+ if context:
2113
+ context_cols = context.get("columns") or []
2114
+ context_rows = context.get("rows") or []
2115
+ context_sheets = context.get("sheets") or []
2116
+
2117
+ # Extract unique rows, columns, sheets from data entries
2118
+ # Use these if context doesn't provide them
2119
+ data_rows = []
2120
+ data_cols = []
2121
+ data_sheets = []
2122
+ seen_rows = set()
2123
+ seen_cols = set()
2124
+ seen_sheets = set()
2125
+
2126
+ for entry in data_entries:
2127
+ row = entry.get("row", "")
2128
+ col = entry.get("column", "")
2129
+ sheet = entry.get("sheet", "")
2130
+ if row and row not in seen_rows:
2131
+ data_rows.append(row)
2132
+ seen_rows.add(row)
2133
+ if col and col not in seen_cols:
2134
+ data_cols.append(col)
2135
+ seen_cols.add(col)
2136
+ if sheet and sheet not in seen_sheets:
2137
+ data_sheets.append(sheet)
2138
+ seen_sheets.add(sheet)
2139
+
2140
+ # Sort for consistent ordering
2141
+ data_rows.sort()
2142
+ data_cols.sort()
2143
+ data_sheets.sort()
2144
+
2145
+ # Use context lists if provided, otherwise use extracted lists
2146
+ rows = context_rows if context_rows else data_rows
2147
+ cols = context_cols if context_cols else data_cols
2148
+ sheets = context_sheets if context_sheets else data_sheets
2149
+
2150
+ # Assign coordinates to each data entry
2151
+ for entry in data_entries:
2152
+ row_code = entry.get("row", "")
2153
+ col_code = entry.get("column", "")
2154
+ sheet_code = entry.get("sheet", "")
2155
+
2156
+ # Calculate x coordinate (row position)
2157
+ if rows and row_code in rows:
2158
+ x_index = rows.index(row_code) + 1
2159
+ # Only add x if there are multiple rows
1341
2160
  if len(rows) > 1:
1342
- data_entry["x"] = x_index
1343
-
1344
- # TODO: Add z coordinate for sheets when needed
2161
+ entry["x"] = x_index
2162
+
2163
+ # Calculate y coordinate (column position)
2164
+ if cols and col_code in cols:
2165
+ y_index = cols.index(col_code) + 1
2166
+ entry["y"] = y_index
2167
+ elif not cols:
2168
+ # Default to 1 if no column info
2169
+ entry["y"] = 1
2170
+
2171
+ # Calculate z coordinate (sheet position)
2172
+ if sheets and sheet_code in sheets:
2173
+ z_index = sheets.index(sheet_code) + 1
2174
+ # Only add z if there are multiple sheets
2175
+ if len(sheets) > 1:
2176
+ entry["z"] = z_index
1345
2177
 
1346
2178
  # Recursively process child nodes
1347
2179
  for key, value in node.items():
@@ -1356,6 +2188,44 @@ class ASTGeneratorAPI:
1356
2188
  add_coords_to_node(result)
1357
2189
  return result
1358
2190
 
2191
+ def _clean_ast_data_entries(self, ast_dict: Dict[str, Any]) -> Dict[str, Any]:
2192
+ """
2193
+ Remove extra fields from data entries in the AST.
2194
+
2195
+ Keeps only the fields required by the engine:
2196
+ - datapoint, operand_reference_id, y, column, x (if multiple rows), z (if multiple sheets)
2197
+
2198
+ Removes internal/debug fields:
2199
+ - data_type, cell_code, table_code, table_vid, row
2200
+ """
2201
+ # Fields to keep in data entries
2202
+ ALLOWED_FIELDS = {"datapoint", "operand_reference_id", "x", "y", "z", "column", "sheet"}
2203
+
2204
+ def clean_node(node):
2205
+ if isinstance(node, dict):
2206
+ # Handle VarID nodes with data arrays
2207
+ if node.get("class_name") == "VarID" and "data" in node:
2208
+ cleaned_data = []
2209
+ for data_entry in node["data"]:
2210
+ # Keep only allowed fields
2211
+ cleaned_entry = {
2212
+ k: v for k, v in data_entry.items() if k in ALLOWED_FIELDS
2213
+ }
2214
+ cleaned_data.append(cleaned_entry)
2215
+ node["data"] = cleaned_data
2216
+
2217
+ # Recursively process child nodes
2218
+ for key, value in node.items():
2219
+ if isinstance(value, (dict, list)):
2220
+ clean_node(value)
2221
+ elif isinstance(node, list):
2222
+ for item in node:
2223
+ clean_node(item)
2224
+
2225
+ # Modify in place (ast_dict is already a copy from _add_coordinates_to_ast)
2226
+ clean_node(ast_dict)
2227
+ return ast_dict
2228
+
1359
2229
 
1360
2230
  # Convenience functions for simple usage
1361
2231
 
@@ -1387,18 +2257,3 @@ def validate_expression(expression: str) -> bool:
1387
2257
  generator = ASTGeneratorAPI()
1388
2258
  result = generator.validate_expression(expression)
1389
2259
  return result['valid']
1390
-
1391
-
1392
- def parse_batch(expressions: List[str], compatibility_mode: str = "auto") -> List[Dict[str, Any]]:
1393
- """
1394
- Simple function to parse multiple expressions.
1395
-
1396
- Args:
1397
- expressions: List of DPM-XL expression strings
1398
- compatibility_mode: Version compatibility mode
1399
-
1400
- Returns:
1401
- List of parse results
1402
- """
1403
- generator = ASTGeneratorAPI(compatibility_mode=compatibility_mode)
1404
- return generator.parse_batch(expressions)