focus-validator 1.0.0.dev1__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. focus_validator-2.0.1/PKG-INFO +1316 -0
  2. focus_validator-2.0.1/README.md +1287 -0
  3. focus_validator-2.0.1/build.py +7 -0
  4. {focus_validator-1.0.0.dev1 → focus_validator-2.0.1}/focus_validator/__init__.py +0 -1
  5. focus_validator-2.0.1/focus_validator/config/logging.yaml +57 -0
  6. focus_validator-2.0.1/focus_validator/config_objects/__init__.py +11 -0
  7. focus_validator-2.0.1/focus_validator/config_objects/common.py +91 -0
  8. focus_validator-2.0.1/focus_validator/config_objects/focus_to_duckdb_converter.py +4463 -0
  9. focus_validator-2.0.1/focus_validator/config_objects/json_loader.py +230 -0
  10. focus_validator-2.0.1/focus_validator/config_objects/plan_builder.py +314 -0
  11. focus_validator-2.0.1/focus_validator/config_objects/rule.py +116 -0
  12. focus_validator-2.0.1/focus_validator/config_objects/rule_dependency_resolver.py +554 -0
  13. focus_validator-2.0.1/focus_validator/data_loaders/csv_data_loader.py +669 -0
  14. focus_validator-2.0.1/focus_validator/data_loaders/csv_data_loader_pandas_backup.py +309 -0
  15. focus_validator-2.0.1/focus_validator/data_loaders/data_loader.py +94 -0
  16. focus_validator-2.0.1/focus_validator/data_loaders/parquet_data_loader.py +286 -0
  17. {focus_validator-1.0.0.dev1 → focus_validator-2.0.1}/focus_validator/exceptions.py +10 -0
  18. focus_validator-2.0.1/focus_validator/main.py +393 -0
  19. focus_validator-2.0.1/focus_validator/outputter/outputter.py +31 -0
  20. focus_validator-2.0.1/focus_validator/outputter/outputter_console.py +132 -0
  21. {focus_validator-1.0.0.dev1 → focus_validator-2.0.1}/focus_validator/outputter/outputter_unittest.py +121 -4
  22. focus_validator-2.0.1/focus_validator/outputter/outputter_validation_graph.py +333 -0
  23. focus_validator-2.0.1/focus_validator/outputter/outputter_web.py +2792 -0
  24. focus_validator-2.0.1/focus_validator/rules/model-1.2.json +18578 -0
  25. focus_validator-2.0.1/focus_validator/rules/spec_rules.py +507 -0
  26. focus_validator-2.0.1/focus_validator/utils/download_currency_codes.py +39 -0
  27. focus_validator-2.0.1/focus_validator/utils/performance_logging.py +132 -0
  28. focus_validator-2.0.1/focus_validator/validator.py +494 -0
  29. focus_validator-2.0.1/pyproject.toml +94 -0
  30. focus_validator-1.0.0.dev1/PKG-INFO +0 -96
  31. focus_validator-1.0.0.dev1/README.md +0 -72
  32. focus_validator-1.0.0.dev1/build.py +0 -21
  33. focus_validator-1.0.0.dev1/focus_validator/config_objects/__init__.py +0 -11
  34. focus_validator-1.0.0.dev1/focus_validator/config_objects/common.py +0 -75
  35. focus_validator-1.0.0.dev1/focus_validator/config_objects/focus_to_pandera_schema_converter.py +0 -224
  36. focus_validator-1.0.0.dev1/focus_validator/config_objects/override.py +0 -14
  37. focus_validator-1.0.0.dev1/focus_validator/config_objects/rule.py +0 -114
  38. focus_validator-1.0.0.dev1/focus_validator/data_loaders/csv_data_loader.py +0 -9
  39. focus_validator-1.0.0.dev1/focus_validator/data_loaders/data_loader.py +0 -21
  40. focus_validator-1.0.0.dev1/focus_validator/data_loaders/parquet_data_loader.py +0 -9
  41. focus_validator-1.0.0.dev1/focus_validator/main.py +0 -77
  42. focus_validator-1.0.0.dev1/focus_validator/outputter/outputter.py +0 -17
  43. focus_validator-1.0.0.dev1/focus_validator/outputter/outputter_console.py +0 -105
  44. focus_validator-1.0.0.dev1/focus_validator/rules/.gitignore +0 -2
  45. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/AmortizedCost_IsDecimal.yaml +0 -3
  46. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/AmortizedCost_NotNull.yaml +0 -3
  47. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/AmortizedCost_Required.yaml +0 -3
  48. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/AvailabilityZone_IsString.yaml +0 -3
  49. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/AvailabilityZone_Nullable.yaml +0 -3
  50. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BilledCost_IsDecimal.yaml +0 -3
  51. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BilledCost_NotNull.yaml +0 -3
  52. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BilledCost_Required.yaml +0 -3
  53. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BilledCurrency_IsCurrencyCode.yaml +0 -3
  54. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BilledCurrency_NotNull.yaml +0 -3
  55. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BilledCurrency_Required.yaml +0 -3
  56. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingAccountId_IsString.yaml +0 -3
  57. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingAccountId_NotNull.yaml +0 -3
  58. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingAccountId_Required.yaml +0 -3
  59. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingAccountName_IsString.yaml +0 -3
  60. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingAccountName_Nullable.yaml +0 -3
  61. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingAccountName_Required.yaml +0 -3
  62. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingCurrency_IsCurrencyCode.yaml +0 -3
  63. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingCurrency_NotNull.yaml +0 -3
  64. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingCurrency_Required.yaml +0 -2
  65. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingPeriodEnd_IsDateTime.yaml +0 -3
  66. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingPeriodEnd_NotNull.yaml +0 -3
  67. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingPeriodEnd_Required.yaml +0 -3
  68. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingPeriodStart_IsDateTime.yaml +0 -3
  69. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingPeriodStart_NotNull.yaml +0 -3
  70. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/BillingPeriodStart_Required.yaml +0 -3
  71. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeCategory_IsString.yaml +0 -3
  72. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeCategory_NotNull.yaml +0 -3
  73. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeCategory_Required.yaml +0 -2
  74. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeClass_Enum.yaml +0 -4
  75. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeClass_IsString.yaml +0 -3
  76. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeClass_Nullable.yaml +0 -3
  77. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeClass_Required.yaml +0 -2
  78. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeDescription_IsString.yaml +0 -3
  79. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeDescription_NotNull.yaml +0 -3
  80. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeDescription_Nullable.yaml +0 -3
  81. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeDescription_Required.yaml +0 -2
  82. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeFrequency_Enum.yaml +0 -6
  83. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeFrequency_IsString.yaml +0 -3
  84. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeFrequency_NotNull.yaml +0 -3
  85. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeFrequency_Required.yaml +0 -2
  86. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargePeriodEnd_IsDateTime.yaml +0 -3
  87. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargePeriodEnd_NotNull.yaml +0 -3
  88. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargePeriodEnd_Required.yaml +0 -3
  89. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargePeriodStart_IsDateTime.yaml +0 -3
  90. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargePeriodStart_NotNull.yaml +0 -3
  91. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargePeriodStart_Required.yaml +0 -3
  92. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeSubcategory_Enum.yaml +0 -13
  93. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeSubcategory_IsString.yaml +0 -3
  94. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeSubcategory_NotNull.yaml +0 -3
  95. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeSubcategory_Required.yaml +0 -2
  96. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeType_Enum.yaml +0 -7
  97. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeType_IsString.yaml +0 -3
  98. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeType_NotNull.yaml +0 -3
  99. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ChargeType_Required.yaml +0 -2
  100. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountCategory_Enum.yaml +0 -5
  101. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountCategory_IsString.yaml +0 -3
  102. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountCategory_NotNull.yaml +0 -3
  103. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountCategory_Nullable.yaml +0 -3
  104. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountCategory_Required.yaml +0 -3
  105. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountId_IsString.yaml +0 -3
  106. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountId_Nullable.yaml +0 -3
  107. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountId_Required.yaml +0 -3
  108. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountName_IsString.yaml +0 -3
  109. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountName_Nullable.yaml +0 -3
  110. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountName_Required.yaml +0 -2
  111. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountStatus_Enum.yaml +0 -5
  112. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountStatus_IsString.yaml +0 -3
  113. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountStatus_Nullable.yaml +0 -3
  114. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountStatus_Required.yaml +0 -2
  115. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountType_IsString.yaml +0 -3
  116. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountType_Nullable.yaml +0 -3
  117. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/CommitmentDiscountType_Required.yaml +0 -2
  118. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ConsumedQuantity_IsDecimal.yaml +0 -3
  119. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ConsumedQuantity_Nullable.yaml +0 -3
  120. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ConsumedQuantity_Required.yaml +0 -2
  121. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ConsumedUnit_IsString.yaml +0 -3
  122. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ConsumedUnit_Nullable.yaml +0 -3
  123. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ConsumedUnit_Required.yaml +0 -2
  124. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ContractedCost_IsDecimal.yaml +0 -3
  125. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ContractedCost_NotNull.yaml +0 -3
  126. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ContractedCost_Required.yaml +0 -2
  127. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ContractedUnitPrice_IsDecimal.yaml +0 -3
  128. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ContractedUnitPrice_Nullable.yaml +0 -3
  129. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ContractedUnitPrice_Required.yaml +0 -2
  130. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/EffectiveCost_IsDecimal.yaml +0 -3
  131. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/EffectiveCost_NotNull.yaml +0 -3
  132. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/EffectiveCost_Required.yaml +0 -3
  133. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/InvoiceIssuer_IsString.yaml +0 -3
  134. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/InvoiceIssuer_NotNull.yaml +0 -3
  135. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/InvoiceIssuer_Required.yaml +0 -2
  136. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ListCost_IsDecimal.yaml +0 -3
  137. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ListCost_NotNull.yaml +0 -3
  138. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ListCost_Required.yaml +0 -3
  139. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ListUnitPrice_IsDecimal.yaml +0 -3
  140. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ListUnitPrice_Nullable.yaml +0 -3
  141. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ListUnitPrice_Required.yaml +0 -3
  142. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingCategory_Enum.yaml +0 -7
  143. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingCategory_Enum_1.0.yaml +0 -7
  144. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingCategory_IsString.yaml +0 -3
  145. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingCategory_Nullable.yaml +0 -3
  146. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingCategory_Required.yaml +0 -2
  147. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingQuantity_IsDecimal.yaml +0 -3
  148. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingQuantity_Nullable.yaml +0 -3
  149. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingQuantity_Required.yaml +0 -3
  150. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingUnit_IsString.yaml +0 -3
  151. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingUnit_Nullable.yaml +0 -3
  152. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/PricingUnit_Required.yaml +0 -2
  153. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Provider_IsString.yaml +0 -3
  154. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Provider_NotNull.yaml +0 -3
  155. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Provider_Required.yaml +0 -3
  156. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Publisher_IsString.yaml +0 -3
  157. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Publisher_NotNull.yaml +0 -3
  158. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Publisher_Required.yaml +0 -3
  159. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/RegionId_IsString.yaml +0 -3
  160. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/RegionId_Nullable.yaml +0 -3
  161. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/RegionId_Required.yaml +0 -2
  162. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/RegionName_IsString.yaml +0 -3
  163. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/RegionName_Nullable.yaml +0 -3
  164. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/RegionName_Required.yaml +0 -2
  165. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Region_IsString.yaml +0 -3
  166. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Region_NotNull.yaml +0 -3
  167. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Region_Required.yaml +0 -3
  168. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ResourceID_IsString.yaml +0 -3
  169. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ResourceID_Nullable.yaml +0 -3
  170. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ResourceID_Required.yaml +0 -2
  171. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ResourceName_IsString.yaml +0 -3
  172. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ResourceName_Nullable.yaml +0 -3
  173. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ResourceName_Required.yaml +0 -3
  174. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ResourceType_IsString.yaml +0 -3
  175. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ResourceType_Nullable.yaml +0 -3
  176. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ResourceType_Required.yaml +0 -2
  177. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ServiceCategory_Enum.yaml +0 -23
  178. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ServiceCategory_IsString.yaml +0 -3
  179. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ServiceCategory_NotNull.yaml +0 -3
  180. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ServiceCategory_Required.yaml +0 -3
  181. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ServiceName_IsString.yaml +0 -3
  182. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ServiceName_NotNull.yaml +0 -3
  183. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/ServiceName_Required.yaml +0 -3
  184. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SkuId_IsString.yaml +0 -3
  185. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SkuId_Nullable.yaml +0 -3
  186. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SkuId_Required.yaml +0 -2
  187. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SkuPriceId_IsString.yaml +0 -3
  188. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SkuPriceId_Nullable.yaml +0 -9
  189. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SkuPriceId_Required.yaml +0 -2
  190. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SubAccountId_IsString.yaml +0 -3
  191. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SubAccountId_Nullable.yaml +0 -3
  192. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SubAccountId_Required.yaml +0 -2
  193. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SubAccountName_IsString.yaml +0 -3
  194. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SubAccountName_Nullable.yaml +0 -3
  195. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/SubAccountName_Required.yaml +0 -2
  196. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Tags_IsJSONObject.yaml +0 -3
  197. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Tags_Nullable.yaml +0 -3
  198. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/Tags_Required.yaml +0 -3
  199. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/UsageQuantity_IsDecimal.yaml +0 -3
  200. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/UsageQuantity_Nullable.yaml +0 -3
  201. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/UsageQuantity_Required.yaml +0 -3
  202. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/UsageUnit_IsString.yaml +0 -3
  203. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/UsageUnit_Nullable.yaml +0 -3
  204. focus_validator-1.0.0.dev1/focus_validator/rules/base_rule_definitions/UsageUnit_Required.yaml +0 -2
  205. focus_validator-1.0.0.dev1/focus_validator/rules/checks.py +0 -115
  206. focus_validator-1.0.0.dev1/focus_validator/rules/spec_rules.py +0 -169
  207. focus_validator-1.0.0.dev1/focus_validator/rules/version_sets.yaml +0 -323
  208. focus_validator-1.0.0.dev1/focus_validator/utils/download_currency_codes.py +0 -29
  209. focus_validator-1.0.0.dev1/focus_validator/validator.py +0 -58
  210. focus_validator-1.0.0.dev1/pyproject.toml +0 -58
  211. {focus_validator-1.0.0.dev1 → focus_validator-2.0.1}/LICENSE +0 -0
  212. {focus_validator-1.0.0.dev1 → focus_validator-2.0.1}/focus_validator/data_loaders/__init__.py +0 -0
  213. {focus_validator-1.0.0.dev1 → focus_validator-2.0.1}/focus_validator/outputter/__init__.py +0 -0
  214. {focus_validator-1.0.0.dev1 → focus_validator-2.0.1}/focus_validator/rules/__init__.py +0 -0
  215. {focus_validator-1.0.0.dev1/focus_validator/utils → focus_validator-2.0.1/focus_validator/rules}/currency_codes.csv +0 -0
  216. {focus_validator-1.0.0.dev1 → focus_validator-2.0.1}/focus_validator/utils/__init__.py +0 -0
@@ -0,0 +1,1316 @@
1
+ Metadata-Version: 2.4
2
+ Name: focus_validator
3
+ Version: 2.0.1
4
+ Summary: FOCUS spec validator.
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.12,<4.0
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Programming Language :: Python :: 3.12
9
+ Classifier: Programming Language :: Python :: 3.13
10
+ Classifier: Programming Language :: Python :: 3.14
11
+ Requires-Dist: ddt (>=1.7.1,<2.0.0)
12
+ Requires-Dist: duckdb (>=1.4.1,<2.0.0)
13
+ Requires-Dist: graphviz (>=0.21,<0.22)
14
+ Requires-Dist: multimethod (>=2.0,<2.1)
15
+ Requires-Dist: numpy (>=1.26,<2.0)
16
+ Requires-Dist: pandas (>=2,<3)
17
+ Requires-Dist: pandasql (>=0.7.3,<0.8.0)
18
+ Requires-Dist: pandera (>=0.26.1,<0.27.0)
19
+ Requires-Dist: polars (>=1.34.0,<2.0.0)
20
+ Requires-Dist: pyarrow
21
+ Requires-Dist: pydantic (>=2.12,<3.0)
22
+ Requires-Dist: pytz (>=2025.2,<2026.0)
23
+ Requires-Dist: pyyaml (>=6.0.3,<7.0.0)
24
+ Requires-Dist: requests
25
+ Requires-Dist: sqlglot (>=27.28.1,<28.0.0)
26
+ Requires-Dist: tabulate
27
+ Description-Content-Type: text/markdown
28
+
29
+ # FOCUS (FinOps Open Source Cost and Usage Specification) Validator
30
+
31
+ Validator resource for checking datasets against the [FOCUS](https://focus.finops.org) specification.
32
+
33
+ ## Overview
34
+
35
+ The FOCUS Validator is a comprehensive Python application designed to validate cloud cost and usage datasets against the FinOps Foundation's FOCUS specification. It provides a robust validation framework that can process large datasets, execute complex validation rules, and generate detailed reports about compliance with FOCUS standards.
36
+
37
+ ## Codebase Architecture
38
+
39
+ The FOCUS Validator follows a modular architecture with clear separation of concerns. The codebase is organized into several key components:
40
+
41
+ ### Core Architecture Overview
42
+
43
+ ```text
44
+ focus_validator/
45
+ ├── main.py # CLI entry point and application orchestration
46
+ ├── validator.py # Main validation orchestrator
47
+ ├── config/ # Configuration files (logging, etc.)
48
+ ├── config_objects/ # Core validation logic and rule definitions
49
+ ├── data_loaders/ # Data input handling (CSV, Parquet)
50
+ ├── outputter/ # Result output formatting (Console, XML, etc.)
51
+ ├── rules/ # Rule definitions and validation execution
52
+ └── utils/ # Utility functions (performance, currency codes)
53
+ ```
54
+
55
+ ### 1. Application Entry Point (`main.py`)
56
+
57
+ The main module serves as the CLI interface and application orchestrator:
58
+
59
+ - **Argument Parsing**: Comprehensive command-line interface with support for various validation options
60
+ - **Logging Setup**: Flexible logging configuration with YAML/INI support and multiple fallback strategies
61
+ - **Validation Orchestration**: Coordinates the entire validation workflow from data loading to result output
62
+ - **Visualization Support**: Optional generation of validation dependency graphs using Graphviz
63
+ - **Version Management**: Handles both local and remote FOCUS specification versions
64
+
65
+ **Key Features:**
66
+
67
+ - Support for applicability criteria filtering
68
+ - Remote specification downloading from GitHub
69
+ - Performance timing and logging
70
+ - Cross-platform file opening for visualizations
71
+
72
+ ### 2. Validation Orchestrator (`validator.py`)
73
+
74
+ The `Validator` class is the central coordinator that manages the validation process:
75
+
76
+ - **Data Loading**: Delegates to appropriate data loaders based on file type
77
+ - **Rule Loading**: Manages FOCUS specification rule loading and version handling
78
+ - **Validation Execution**: Orchestrates rule execution against loaded datasets
79
+ - **Result Management**: Coordinates output formatting and result persistence
80
+ - **Performance Monitoring**: Integrated performance logging using decorators
81
+
82
+ **Key Responsibilities:**
83
+
84
+ - Version compatibility checking (local vs remote specifications)
85
+ - Applicability criteria processing and filtering
86
+ - Data and rule loading coordination
87
+ - Validation result aggregation and output
88
+
89
+ ### 3. Config Objects (`config_objects/`)
90
+
91
+ This module contains the core validation logic and configuration management:
92
+
93
+ #### Rule Definition (`rule.py`)
94
+
95
+ - **ModelRule**: Pydantic model for FOCUS specification rules
96
+ - **ValidationCriteria**: Detailed validation requirements and conditions
97
+ - **CompositeCheck**: Support for complex AND/OR logic in rule dependencies
98
+ - **Status Management**: Rule lifecycle management (Active, Draft, etc.)
99
+
100
+ #### DuckDB Schema Conversion (`focus_to_duckdb_converter.py`)
101
+
102
+ The `FocusToDuckDBSchemaConverter` represents the most sophisticated component of the validation engine, implementing a comprehensive SQL code generation framework with advanced pattern matching, error recovery, and performance optimization:
103
+
104
+ **Internal Architecture & Core Components:**
105
+
106
+ **Check Generator Registry System:**
107
+
108
+ ```python
109
+ CHECK_GENERATORS: Dict[str, Dict[str, Any]] = {
110
+ "ColumnPresent": {"generator": ColumnPresentCheckGenerator, "factory": lambda args: "ColumnName"},
111
+ "TypeString": {"generator": TypeStringCheckGenerator, "factory": lambda args: "ColumnName"},
112
+ "FormatDateTime": {"generator": FormatDateTimeGenerator, "factory": lambda args: "ColumnName"},
113
+ "CheckModelRule": {"generator": CheckModelRuleGenerator, "factory": lambda args: "ModelRuleId"},
114
+ "AND": {"generator": CompositeANDRuleGenerator, "factory": lambda args: "Items"},
115
+ "OR": {"generator": CompositeORRuleGenerator, "factory": lambda args: "Items"}
116
+ # ... 20+ total generators
117
+ }
118
+ ```
119
+
120
+ **Generator Architecture Patterns:**
121
+
122
+ - **Abstract Base Class**: `DuckDBCheckGenerator` defines the contract with `REQUIRED_KEYS`, `DEFAULTS`, and `FREEZE_PARAMS` class variables
123
+ - **Template Method Pattern**: Subclasses implement `generateSql()` for SQL generation and `getCheckType()` for type identification
124
+ - **Factory Pattern**: Registry system enables dynamic generator selection based on FOCUS rule function names
125
+ - **Parameter Validation**: Automatic validation of required parameters using Pydantic-style checking with `REQUIRED_KEYS`
126
+ - **Immutable Parameters**: `FREEZE_PARAMS` creates read-only parameter objects using `MappingProxyType` for thread safety
127
+
128
+ **SQL Generation Framework:**
129
+
130
+ **Template System Architecture:**
131
+
132
+ - **Dual Template Support**: Handles both `{table_name}` and `{{table_name}}` patterns for backward compatibility
133
+ - **Parameter Substitution**: Safe parameter injection using `_lit()` method with SQL injection prevention
134
+ - **Dynamic SQL Construction**: Runtime SQL assembly based on rule requirements and data types
135
+ - **Query Optimization**: Template caching and SQL query plan reuse for repeated validations
136
+
137
+ **`build_check()` - Advanced Check Construction Pipeline:**
138
+
139
+ **Multi-Phase Construction Process:**
140
+
141
+ ```python
142
+ # Detailed build_check() workflow
143
+ 1. Rule Type Analysis → Determine generator class from CHECK_GENERATORS registry
144
+ 2. Applicability Assessment → Evaluate rule inclusion via _should_include_rule()
145
+ 3. Parameter Extraction → Parse ValidationCriteria.requirement for generator parameters
146
+ 4. Generator Instantiation → Create generator with validated parameters and context
147
+ 5. SQL Generation → Execute generateSql() with template substitution
148
+ 6. Check Object Assembly → Wrap in DuckDBColumnCheck with metadata and execution context
149
+ 7. Nested Check Handling → Recursively process composite rule children
150
+ 8. Performance Optimization → Cache compiled checks for reuse
151
+ ```
152
+
153
+ **Technical Implementation Details:**
154
+
155
+ **Dynamic Rule Processing:**
156
+
157
+ - **Rule Type Detection**: Uses `rule.is_dynamic()` to identify rules requiring runtime data analysis
158
+ - **Skipped Check Generation**: Creates `SkippedDynamicCheck` objects for rules that cannot be statically validated
159
+ - **Metadata Preservation**: Maintains rule context and reasoning for debugging and reporting
160
+
161
+ **Applicability Filtering Engine:**
162
+
163
+ ```python
164
+ def _should_include_rule(self, rule, parent_edges=None) -> bool:
165
+ # Hierarchical applicability checking
166
+ 1. Check rule's own ApplicabilityCriteria against validated_applicability_criteria
167
+ 2. Traverse parent_edges to validate entire dependency chain
168
+ 3. Apply AND logic - rule included only if all criteria in chain are satisfied
169
+ 4. Generate SkippedNonApplicableCheck for filtered rules
170
+ ```
171
+
172
+ **Composite Rule Architecture:**
173
+
174
+ - **Recursive Descent Parsing**: Processes ValidationCriteria.requirement.Items recursively for nested AND/OR structures
175
+ - **Child Check Generation**: Each composite rule item becomes a separate DuckDBColumnCheck object
176
+ - **Logic Handler Assignment**: Assigns `operator.and_` or `operator.or_` functions for result aggregation
177
+ - **Dependency Context Propagation**: Passes parent results and edge context down to child generators
178
+
179
+ **`run_check()` - High-Performance Execution Engine:**
180
+
181
+ **Multi-Level Execution Strategy:**
182
+
183
+ ```python
184
+ # Comprehensive execution pipeline
185
+ 1. Connection Validation → Ensure DuckDB connection is active and prepared
186
+ 2. Check Type Dispatch → Route to appropriate execution path (skipped/leaf/composite)
187
+ 3. SQL Template Resolution → Substitute table names and parameters safely
188
+ 4. Query Execution → Execute with comprehensive error handling and recovery
189
+ 5. Result Processing → Parse DuckDB results and format for aggregation
190
+ 6. Error Analysis → Extract column names from error messages for intelligent reporting
191
+ 7. Nested Aggregation → Process composite rule children and apply logic operators
192
+ 8. Performance Metrics → Capture execution timing and resource usage
193
+ ```
194
+
195
+ **Advanced Error Recovery System:**
196
+
197
+ **Missing Column Detection:**
198
+
199
+ ```python
200
+ def _extract_missing_columns(err_msg: str) -> List[str]:
201
+ # Multi-pattern column extraction from DuckDB error messages
202
+ patterns = [
203
+ r'Column with name ([A-Za-z0-9_"]+) does not exist',
204
+ r'Binder Error: .*? column ([A-Za-z0-9_"]+)',
205
+ r'"([A-Za-z0-9_]+)" not found'
206
+ ]
207
+ # Returns sorted list of missing column names for precise error reporting
208
+ ```
209
+
210
+ **SQL Error Classification:**
211
+
212
+ - **Syntax Errors**: Template substitution failures and malformed SQL detection
213
+ - **Schema Mismatches**: Column type conflicts and missing table detection
214
+ - **Data Validation Errors**: Constraint violations and format validation failures
215
+ - **Performance Issues**: Timeout detection and resource exhaustion handling
216
+
217
+ **Composite Rule Execution Engine:**
218
+
219
+ **Upstream Dependency Short-Circuiting:**
220
+
221
+ ```python
222
+ # Advanced dependency failure handling
223
+ if upstream_failure_detected:
224
+ 1. Mark all child checks as failed with upstream reason
225
+ 2. Preserve failure context and dependency chain information
226
+ 3. Skip actual SQL execution to avoid cascading errors
227
+ 4. Generate detailed failure report with root cause analysis
228
+ ```
229
+
230
+ **Result Aggregation Logic:**
231
+
232
+ - **AND Logic**: `all(child_results)` - requires all children to pass
233
+ - **OR Logic**: `any(child_results)` - requires at least one child to pass
234
+ - **Failure Propagation**: Maintains detailed failure context including which specific children failed
235
+ - **Performance Optimization**: Early termination for AND (first failure) and OR (first success) operations
236
+
237
+ **Advanced SQL Generation Patterns:**
238
+
239
+ **Type-Specific Generators:**
240
+
241
+ **ColumnPresentCheckGenerator:**
242
+
243
+ ```sql
244
+ WITH col_check AS (
245
+ SELECT COUNT(*) AS found
246
+ FROM information_schema.columns
247
+ WHERE table_name = '{table_name}' AND column_name = '{column_name}'
248
+ )
249
+ SELECT
250
+ CASE WHEN found = 0 THEN 1 ELSE 0 END AS violations,
251
+ CASE WHEN found = 0 THEN '{error_message}' END AS error_message
252
+ FROM col_check
253
+ ```
254
+
255
+ **TypeStringCheckGenerator:**
256
+
257
+ ```sql
258
+ SELECT
259
+ COUNT(*) AS violations,
260
+ '{column_name} must be string type' AS error_message
261
+ FROM {table_name}
262
+ WHERE {column_name} IS NOT NULL
263
+ AND typeof({column_name}) != 'VARCHAR'
264
+ ```
265
+
266
+ **FormatDateTimeGenerator:**
267
+
268
+ ```sql
269
+ WITH datetime_violations AS (
270
+ SELECT COUNT(*) AS violations
271
+ FROM {table_name}
272
+ WHERE {column_name} IS NOT NULL
273
+ AND TRY_STRPTIME({column_name}, '{expected_format}') IS NULL
274
+ )
275
+ SELECT violations,
276
+ CASE WHEN violations > 0
277
+ THEN '{column_name} format violations: ' || violations || ' rows'
278
+ END AS error_message
279
+ FROM datetime_violations
280
+ ```
281
+
282
+ **Performance & Memory Optimization:**
283
+
284
+ **Connection Management:**
285
+
286
+ - **Connection Pooling**: Reuses DuckDB connections across multiple validations
287
+ - **Memory Monitoring**: Tracks memory usage for large dataset processing
288
+ - **Query Plan Caching**: DuckDB query plan reuse for repeated validation patterns
289
+ - **Parallel Execution**: Thread-safe generator design for concurrent validation
290
+
291
+ **Algorithmic Efficiency:**
292
+
293
+ - **Lazy Evaluation**: SQL queries only executed when results are needed
294
+ - **Batch Processing**: Groups similar validations for bulk execution
295
+ - **Result Streaming**: Processes large result sets without loading entire datasets into memory
296
+ - **Index Utilization**: Generates SQL that leverages DuckDB's columnar indices
297
+
298
+ **Extensibility Framework:**
299
+
300
+ **Custom Generator Development:**
301
+
302
+ ```python
303
+ class CustomCheckGenerator(DuckDBCheckGenerator):
304
+ REQUIRED_KEYS = {"ColumnName", "ThresholdValue"}
305
+ DEFAULTS = {"TolerancePercent": 0.1}
306
+
307
+ def generateSql(self) -> str:
308
+ # Custom SQL generation logic
309
+ return f"SELECT COUNT(*) FROM {{table_name}} WHERE {self.params.ColumnName} > {self.params.ThresholdValue}"
310
+
311
+ def getCheckType(self) -> str:
312
+ return "custom_threshold_check"
313
+ ```
314
+
315
+ **Registry Integration:**
316
+
317
+ - **Dynamic Registration**: New generators can be added to CHECK_GENERATORS at runtime
318
+ - **Parameter Validation**: Automatic validation using REQUIRED_KEYS and type checking
319
+ - **Factory Function**: Consistent parameter extraction across all generator types
320
+ - **Metadata Preservation**: Full context and provenance tracking for custom validations
321
+
322
+ #### Rule Dependency Resolution (`rule_dependency_resolver.py`)
323
+
324
+ The `RuleDependencyResolver` is the most sophisticated component in the validation engine, implementing advanced graph algorithms to analyze complex rule interdependencies and optimize execution paths:
325
+
326
+ **Internal Architecture & Data Structures:**
327
+
328
+ The resolver maintains three core data structures for efficient dependency management:
329
+
330
+ - **`dependency_graph`**: `Dict[str, Set[str]]` mapping rule_id → {dependent_rule_ids} representing forward edges
331
+ - **`reverse_graph`**: `Dict[str, List[str]]` mapping rule_id → [rules_that_depend_on_this] for backward traversal
332
+ - **`in_degree`**: `Dict[str, int]` tracking prerequisite counts for Kahn's algorithm implementation
333
+
334
+ **`buildDependencyGraph()` - Advanced Dependency Analysis:**
335
+
336
+ **Algorithm Implementation:**
337
+
338
+ ```python
339
+ # Pseudocode for dependency graph construction
340
+ 1. Filter rules by target prefix (e.g., "BilledCost*")
341
+ 2. Recursively collect transitive dependencies using BFS
342
+ 3. Build bidirectional graph structures for O(1) lookups
343
+ 4. Calculate in-degree counts for topological processing
344
+ 5. Validate graph integrity and detect potential cycles
345
+ ```
346
+
347
+ **Technical Details:**
348
+
349
+ - **Rule Filtering**: Supports prefix-based filtering (e.g., `target_rule_prefix="BilledCost"`) to process subsets of the full rule graph, essential for large FOCUS specifications with 200+ rules
350
+ - **Transitive Closure**: Uses `_collectAllDependencies()` with deque-based BFS to recursively discover all child dependencies, ensuring composite rules include their nested components even when they don't match the prefix filter
351
+ - **Graph Construction**: Builds forward and reverse adjacency lists simultaneously for O(1) dependency lookups during execution
352
+ - **Memory Optimization**: Uses `defaultdict(set)` and `defaultdict(list)` to minimize memory allocation overhead
353
+ - **Cycle Prevention**: Maintains processed sets to prevent infinite recursion during dependency discovery
354
+
355
+ **Composite Rule Propagation:**
356
+
357
+ - **Condition Inheritance**: Implements `_propagate_composite_conditions()` to push parent composite rule conditions down to child rules via private attributes
358
+ - **CheckModelRule Processing**: Analyzes ValidationCriteria.requirement.Items to identify referenced rules and propagate preconditions
359
+ - **Runtime Condition Evaluation**: Child rules inherit parent conditions and evaluate them dynamically during execution
360
+
361
+ **`getTopologicalOrder()` - Advanced Graph Algorithms:**
362
+
363
+ **Kahn's Algorithm Implementation:**
364
+
365
+ ```python
366
+ # Enhanced Kahn's algorithm with cycle detection
367
+ 1. Initialize in-degree counts from dependency graph
368
+ 2. Queue all zero-degree nodes (no prerequisites)
369
+ 3. Process nodes level by level, updating dependent in-degrees
370
+ 4. Detect cycles when remaining nodes > 0 after processing
371
+ 5. Handle circular dependencies by appending remaining nodes
372
+ ```
373
+
374
+ **Cycle Detection & Analysis:**
375
+
376
+ - **Tarjan's SCC Algorithm**: Implements `_tarjan_scc()` for strongly connected component detection with O(V+E) complexity
377
+ - **Cycle Visualization**: `_export_scc_dot()` generates Graphviz DOT files for each strongly connected component, enabling visual debugging of complex cycles
378
+ - **Simple Cycle Reconstruction**: `_find_simple_cycle()` uses DFS with path tracking to identify specific circular dependency chains
379
+ - **Detailed Cycle Logging**: Provides comprehensive cycle analysis including component sizes, adjacency matrices, and example cycle paths
380
+
381
+ **Advanced Debugging & Instrumentation:**
382
+
383
+ **Graph Analytics:**
384
+
385
+ - **`_log_graph_snapshot()`**: Captures comprehensive graph metrics (node count, edge count, zero-degree nodes) with sampling
386
+ - **`_trace_node()`**: Implements bounded DFS to trace dependency chains up to configurable depth for debugging blocked nodes
387
+ - **`_dump_blockers()`**: Analyzes remaining nodes after topological sort to identify specific blocking dependencies
388
+
389
+ **Performance Monitoring:**
390
+
391
+ - **Edge Counting**: Tracks total dependency relationships for complexity analysis
392
+ - **Zero-Degree Analysis**: Identifies entry points (rules with no dependencies) for parallel execution opportunities
393
+ - **Blocking Analysis**: Detailed reporting of rules that cannot be scheduled due to unsatisfied prerequisites
394
+
395
+ **`build_plan_and_schedule()` - Execution Plan Optimization:**
396
+
397
+ **PlanBuilder Integration:**
398
+
399
+ ```python
400
+ # Advanced execution planning workflow
401
+ 1. Create PlanBuilder with filtered relevant rules
402
+ 2. Build parent-preserving forest from entry points
403
+ 3. Compile to layered ValidationPlan for parallel execution
404
+ 4. Apply runtime context and edge predicates
405
+ 5. Generate deterministic scheduling with tie-breaking
406
+ ```
407
+
408
+ **Technical Implementation:**
409
+
410
+ - **Forest Construction**: Uses `builder.build_forest(roots)` to create parent-preserving execution trees that maintain rule relationships
411
+ - **Compilation Pipeline**: Transforms abstract plan graph into concrete ValidationPlan with index-based node references for O(1) lookups
412
+ - **Edge Context Propagation**: Maintains EdgeCtx objects throughout planning to preserve dependency reasoning and conditional activation
413
+ - **Execution Context**: Supports runtime `exec_ctx` parameter for dynamic rule filtering and conditional execution paths
414
+
415
+ **Memory & Performance Optimizations:**
416
+
417
+ **Data Structure Efficiency:**
418
+
419
+ - **Deque-Based Processing**: Uses `collections.deque` for BFS traversals to minimize memory reallocation
420
+ - **Set Operations**: Leverages Python set operations for O(1) membership testing and efficient union/intersection operations
421
+ - **Processed Tracking**: Maintains processed sets to prevent redundant work during recursive dependency discovery
422
+
423
+ **Algorithmic Complexity:**
424
+
425
+ - **Graph Construction**: O(V + E) where V = rules, E = dependencies
426
+ - **Topological Sort**: O(V + E) with Kahn's algorithm
427
+ - **SCC Detection**: O(V + E) with Tarjan's algorithm
428
+ - **Memory Usage**: O(V + E) for graph storage with minimal overhead
429
+
430
+ **Applicability Criteria Integration:**
431
+
432
+ **Dynamic Rule Filtering:**
433
+
434
+ - **Criteria Validation**: Supports `validated_applicability_criteria` parameter for runtime rule inclusion/exclusion
435
+ - **Hierarchical Processing**: Always includes rules in dependency graph but marks them for potential skipping during execution
436
+ - **SkippedNonApplicableCheck Generation**: Defers actual filtering to execution phase via converter's applicability checking
437
+
438
+ #### Plan Building (`plan_builder.py`)
439
+
440
+ - **Execution Planning**: Creates layered execution plans optimized for parallel processing
441
+ - **Edge Context Management**: Tracks why dependencies exist with conditional activation predicates
442
+ - **Topological Scheduling**: Implements Kahn's algorithm with deterministic tie-breaking for consistent execution order
443
+ - **Parent Preservation**: Maintains parent-child relationships throughout the planning process for result aggregation
444
+
445
+ #### JSON Loading (`json_loader.py`)
446
+
447
+ - **Specification Parsing**: Loads and parses FOCUS JSON rule definitions
448
+ - **Version Management**: Handles multiple FOCUS specification versions
449
+ - **Remote Downloading**: Supports fetching specifications from GitHub releases
450
+
451
+ ### 4. Data Loaders (`data_loaders/`)
452
+
453
+ Extensible data loading framework supporting multiple file formats:
454
+
455
+ #### Base Data Loader (`data_loader.py`)
456
+
457
+ - **Format Detection**: Automatic detection of file formats based on extension
458
+ - **Performance Monitoring**: Integrated loading performance tracking
459
+ - **Error Handling**: Comprehensive error handling and logging
460
+ - **Memory Management**: Efficient handling of large datasets
461
+
462
+ #### Format-Specific Loaders
463
+
464
+ - **CSV Loader** (`csv_data_loader.py`): Optimized CSV parsing with configurable options
465
+ - **Parquet Loader** (`parquet_data_loader.py`): High-performance Parquet file processing
466
+
467
+ **Key Features:**
468
+
469
+ - Automatic file type detection
470
+ - Performance monitoring and logging
471
+ - Memory-efficient processing for large datasets
472
+ - Extensible architecture for additional formats
473
+
474
+ #### CSV Data Loading with Schema Inference
475
+
476
+ The CSV data loader uses Polars with `infer_schema_length=10000` to analyze the first 10,000 rows for data type detection. This provides a good balance between accuracy and performance for most datasets.
477
+
478
+ **Understanding Schema Inference:**
479
+
480
+ The `infer_schema_length` parameter determines how many rows Polars examines to infer column data types:
481
+
482
+ - **Default: 10,000 rows** - Suitable for most FOCUS datasets
483
+ - **Higher values** - More accurate type detection but slower loading
484
+ - **Lower values** - Faster loading but may miss type patterns
485
+
486
+ **Handling Datasets with Many Null Values:**
487
+
488
+ If your dataset has more than 10,000 rows of null values at the beginning, you may encounter data type inference issues. Here are several solutions:
489
+
490
+ ##### Option 1: Specify Column Types Explicitly
491
+
492
+ When loading CSV data programmatically, provide explicit column types:
493
+
494
+ ```python
495
+ from focus_validator.data_loaders.csv_data_loader import CSVDataLoader
496
+ import polars as pl
497
+
498
+ # Define column types for problematic columns
499
+ column_types = {
500
+ "BilledCost": pl.Float64(),
501
+ "EffectiveCost": pl.Float64(),
502
+ "ListCost": pl.Float64(),
503
+ "BillingPeriodStart": pl.Datetime("us", "UTC"),
504
+ "BillingPeriodEnd": pl.Datetime("us", "UTC"),
505
+ "AccountId": pl.Utf8(),
506
+ "ResourceId": pl.Utf8(),
507
+ # Add other columns as needed
508
+ }
509
+
510
+ loader = CSVDataLoader("your_data.csv", column_types=column_types)
511
+ df = loader.load()
512
+ ```
513
+
514
+ ##### Option 2: Clean Your Data
515
+
516
+ Pre-process your CSV file to move non-null data towards the beginning:
517
+
518
+ ```bash
519
+ # Sort CSV by a column that has early non-null values
520
+ head -1 your_data.csv > header.csv
521
+ tail -n +2 your_data.csv | sort -t',' -k1,1 > sorted_data.csv
522
+ cat header.csv sorted_data.csv > cleaned_data.csv
523
+ ```
524
+
525
+ ##### Option 3: Increase Schema Inference Length
526
+
527
+ For very large datasets with sparse data, you can modify the inference length by extending the CSV loader. Create a custom loader:
528
+
529
+ ```python
530
+ import polars as pl
531
+
532
+ # Load with increased inference length
533
+ df = pl.read_csv(
534
+ "your_data.csv",
535
+ infer_schema_length=50000, # Analyze first 50,000 rows
536
+ try_parse_dates=True,
537
+ null_values=["", "NULL", "INVALID", "INVALID_COST", "BAD_DATE"]
538
+ )
539
+ ```
540
+
541
+ ##### Option 4: Two-Pass Loading
542
+
543
+ For extremely challenging datasets, use a two-pass approach:
544
+
545
+ ```python
546
+ import polars as pl
547
+
548
+ # First pass: scan the entire file to understand data patterns
549
+ schema_sample = pl.scan_csv("your_data.csv").sample(n=10000).collect()
550
+
551
+ # Analyze the sample to determine appropriate types
552
+ inferred_types = {}
553
+ for col in schema_sample.columns:
554
+ non_null_values = schema_sample[col].drop_nulls()
555
+ if len(non_null_values) > 0:
556
+ # Determine appropriate type based on sample
557
+ if non_null_values.dtype == pl.Utf8:
558
+ # Try to infer if it should be numeric or datetime
559
+ try:
560
+ pl.Series(non_null_values.to_list()).cast(pl.Float64)
561
+ inferred_types[col] = pl.Float64()
562
+ except:
563
+ try:
564
+ pl.Series(non_null_values.to_list()).str.strptime(pl.Datetime)
565
+ inferred_types[col] = pl.Datetime("us", "UTC")
566
+ except:
567
+ inferred_types[col] = pl.Utf8()
568
+
569
+ # Second pass: load with determined schema
570
+ df = pl.read_csv("your_data.csv", schema_overrides=inferred_types)
571
+ ```
572
+
573
+ **Common Signs of Schema Inference Issues:**
574
+
575
+ - Numeric columns loaded as strings (Utf8) when they should be Float64
576
+ - Date columns not parsed as datetime types
577
+ - Validation failures for type-checking rules
578
+ - Unexpected null value counts in validation output
579
+
580
+ **Best Practices for Large Datasets:**
581
+
582
+ 1. **Validate Your Data First**: Use tools like `head`, `tail`, and `wc -l` to understand your data structure
583
+ 2. **Check for Empty Rows**: Ensure the beginning of your file contains representative data
584
+ 3. **Use Consistent Null Representations**: Stick to standard null values like empty strings or "NULL"
585
+ 4. **Test with Sample Data**: Validate schema inference with a smaller sample before processing full datasets
586
+ 5. **Monitor Loading Logs**: The CSV loader provides detailed logging about type conversion attempts and failures
587
+
588
+ **Performance Considerations:**
589
+
590
+ - Higher `infer_schema_length` values increase loading time but improve accuracy
591
+ - For very large files (>1GB), consider processing in chunks or using Parquet format instead
592
+ - The CSV loader includes automatic fallback mechanisms for problematic columns
593
+
594
+ ### 5. Validation Rules Engine (`rules/`)
595
+
596
+ The core validation execution engine:
597
+
598
+ #### Specification Rules (`spec_rules.py`)
599
+
600
+ - **Rule Loading**: Manages loading of FOCUS specification rules from JSON
601
+ - **Version Management**: Handles multiple FOCUS versions and compatibility
602
+ - **Validation Execution**: Orchestrates rule execution against datasets using DuckDB
603
+ - **Result Aggregation**: Collects and organizes validation results
604
+ - **Remote Specification Support**: Downloads and caches remote specifications
605
+
606
+ **Validation Process:**
607
+
608
+ 1. Load FOCUS specification from JSON files or remote sources
609
+ 2. Parse rules and build dependency graph
610
+ 3. Convert rules to executable DuckDB SQL queries
611
+ 4. Execute validation queries against dataset
612
+ 5. Aggregate results and generate comprehensive reports
613
+
614
+ ### 6. Output Formatters (`outputter/`)
615
+
616
+ Flexible output system supporting multiple formats:
617
+
618
+ #### Base Outputter (`outputter.py`)
619
+
620
+ - **Format Selection**: Factory pattern for output format selection
621
+ - **Result Processing**: Standardized result processing and formatting
622
+
623
+ #### Format-Specific Outputters
624
+
625
+ - **Console Outputter** (`outputter_console.py`): Human-readable terminal output
626
+ - **XML/JUnit Outputter** (`outputter_unittest.py`): CI/CD compatible XML reports
627
+ - **Validation Graph Outputter** (`outputter_validation_graph.py`): Graphviz visualizations
628
+
629
+ ### 7. Utility Functions (`utils/`)
630
+
631
+ Supporting utilities for specialized functionality:
632
+
633
+ - **Performance Logging** (`performance_logging.py`): Decorator-based performance monitoring
634
+ - **Currency Code Downloads** (`download_currency_codes.py`): Dynamic currency validation support
635
+
636
+ ### Data Flow Architecture
637
+
638
+ The validation process follows this high-level data flow:
639
+
640
+ 1. **Input Processing**: CLI arguments parsed and configuration loaded
641
+ 2. **Data Loading**: Dataset loaded using appropriate data loader (CSV/Parquet)
642
+ 3. **Rule Loading**: FOCUS specification rules loaded and parsed from JSON
643
+ 4. **Plan Building**: Validation execution plan built considering rule dependencies
644
+ 5. **SQL Generation**: Rules converted to optimized DuckDB SQL queries
645
+ 6. **Validation Execution**: SQL queries executed against dataset using DuckDB engine
646
+ 7. **Result Aggregation**: Validation results collected and organized
647
+ 8. **Output Generation**: Results formatted and output using selected formatter
648
+ 9. **Optional Visualization**: Dependency graph and results visualized using Graphviz
649
+
650
+ ### Key Design Principles
651
+
652
+ 1. **Modularity**: Clear separation of concerns with pluggable components
653
+ 2. **Performance**: Optimized for large datasets using DuckDB and efficient algorithms
654
+ 3. **Extensibility**: Easy to add new data formats, output formats, and validation rules
655
+ 4. **Reliability**: Comprehensive error handling and logging throughout
656
+ 5. **Standards Compliance**: Full adherence to FOCUS specification requirements
657
+ 6. **Developer Experience**: Rich logging, performance monitoring, and debugging support
658
+
659
+ ### Technology Stack
660
+
661
+ - **Core Language**: Python 3.9+ with type hints and modern language features
662
+ - **Data Processing**: DuckDB for high-performance SQL-based validation
663
+ - **Data Formats**: Pandas/Polars for CSV/Parquet processing
664
+ - **Configuration**: Pydantic for type-safe configuration management
665
+ - **CLI**: argparse for comprehensive command-line interface
666
+ - **Visualization**: Graphviz for validation dependency graphs
667
+ - **Testing**: pytest with comprehensive test coverage
668
+ - **Code Quality**: Black, isort, flake8, mypy for code formatting and quality
669
+
670
+ ### Testing Architecture
671
+
672
+ The FOCUS Validator maintains high code quality through comprehensive testing:
673
+
674
+ #### Test Organization
675
+
676
+ - **Unit Tests**: Component-level testing for individual modules
677
+ - **Integration Tests**: End-to-end validation workflow testing
678
+ - **Performance Tests**: Large dataset processing validation
679
+ - **Configuration Tests**: Rule loading and version compatibility testing
680
+
681
+ #### Test Coverage Strategy
682
+
683
+ - **Data Loaders**: 100% coverage ensuring reliable data ingestion
684
+ - **Config Objects**: 97%+ coverage for core validation logic
685
+ - **Outputters**: 85%+ coverage across all output formats
686
+ - **Overall Project**: 70% coverage with targeted improvement areas
687
+
688
+ #### Quality Assurance Tools
689
+
690
+ - **pytest**: Primary testing framework with fixture support
691
+ - **pytest-cov**: Coverage reporting and analysis
692
+ - **Black**: Automated code formatting
693
+ - **isort**: Import organization and sorting
694
+ - **flake8**: Code style and complexity checking
695
+ - **mypy**: Static type checking and validation
696
+ - **pre-commit**: Git hook integration for quality checks
697
+
698
+ ### Development Workflow
699
+
700
+ #### Local Development Setup
701
+
702
+ 1. **Environment Management**: Poetry-based dependency management
703
+ 2. **Code Quality**: Automated formatting and linting on commit
704
+ 3. **Performance Monitoring**: Built-in performance logging and profiling
705
+ 4. **Testing**: Makefile targets for module-specific and comprehensive testing
706
+
707
+ #### Continuous Integration
708
+
709
+ The project uses GitHub Actions for automated quality assurance:
710
+
711
+ - **Linting Pipeline**: mypy, black, isort, flake8 validation
712
+ - **Test Execution**: Full test suite with coverage reporting
713
+ - **Performance Validation**: Memory and execution time monitoring
714
+ - **Multi-Platform Testing**: Cross-platform compatibility verification
715
+
716
+ #### Code Organization Principles
717
+
718
+ - **Type Safety**: Comprehensive type hints throughout codebase
719
+ - **Error Handling**: Structured exception hierarchy with detailed logging
720
+ - **Performance Focus**: Optimized for large dataset processing
721
+ - **Extensibility**: Plugin architecture for new formats and outputs
722
+ - **Documentation**: Inline documentation and comprehensive README
723
+
724
+ ## Environment Setup
725
+
726
+ ### Prerequisites
727
+
728
+ - Python 3.9+
729
+ - Poetry (Package & Dependency Manager)
730
+
731
+ ### Installation
732
+
733
+ #### 1. Install Poetry
734
+
735
+ If you haven't installed Poetry yet, you can do it by running:
736
+
737
+ ```bash
738
+ curl -sSL https://install.python-poetry.org | python3 -
739
+ ```
740
+
741
+ For alternative installation methods or more information about Poetry, please refer to
742
+ the [official documentation](https://python-poetry.org/docs/).
743
+
744
+ #### 2. Clone the repository
745
+
746
+ ```bash
747
+ git clone https://github.com/finopsfoundation/focus-spec-validator.git
748
+ cd focus-spec-validator
749
+ ```
750
+
751
+ #### 3. Install dependencies
752
+
753
+ Using Poetry, you can install the project's dependencies with:
754
+
755
+ ```bash
756
+ poetry install
757
+ ```
758
+
759
+ ## Usage
760
+
761
+ Activate the virtual environment (See: [poetry-plugin-shell](https://github.com/python-poetry/poetry-plugin-shell)):
762
+
763
+ ```bash
764
+ poetry shell
765
+ ```
766
+
767
+ ### Preferred Usage: Web Output
768
+
769
+ The **recommended way** to run FOCUS validations is using the **web output type**, which generates an interactive HTML report with advanced filtering, rule grouping, and detailed validation insights:
770
+
771
+ ```bash
772
+ poetry run focus-validator --data-file tests/samples/focus_sample_10000.csv --validate-version 1.2 --applicability-criteria ALL --output-type web --output-destination '10000_sample_validate.html'
773
+ ```
774
+
775
+ This command:
776
+
777
+ - Validates a dataset against FOCUS version 1.2
778
+ - Includes all applicable validation rules (`--applicability-criteria ALL`)
779
+ - Generates an interactive web report saved as `10000_sample_validate.html`
780
+ - Provides filtering by rule status (PASS/FAILED/SKIPPED), entity types, and rule functions
781
+ - Shows both entity-centric and rule-centric views of validation results
782
+
783
+ ### Additional Usage Options
784
+
785
+ For help and more options:
786
+
787
+ ```bash
788
+ focus-validator --help
789
+ ```
790
+
791
+ For console output or other formats:
792
+
793
+ ```bash
794
+ # Console output (simple text format)
795
+ focus-validator --data-file your_data.csv --validate-version 1.2
796
+
797
+ # XML output for CI/CD integration
798
+ focus-validator --data-file your_data.csv --validate-version 1.2 --output-type unittest --output-destination results.xml
799
+ ```
800
+
801
+ ## Explain Mode
802
+
803
+ The FOCUS Validator includes a powerful **Explain Mode** that allows you to inspect validation rules and their underlying SQL logic without executing actual validation or requiring input data. This feature is invaluable for understanding FOCUS specification requirements, debugging validation logic, and learning how rules are implemented.
804
+
805
+ ### What is Explain Mode?
806
+
807
+ Explain Mode generates comprehensive explanations for all validation rules in a FOCUS specification version, including:
808
+
809
+ - **Rule Metadata**: Rule ID, type, check method, and generator information
810
+ - **MustSatisfy Requirements**: The human-readable requirement that each rule validates
811
+ - **SQL Queries**: Complete, formatted SQL queries that implement the validation logic
812
+ - **Rule Relationships**: Hierarchy and dependencies between composite and child rules
813
+ - **Condition Logic**: Row-level conditions and filters applied during validation
814
+
815
+ ### How to Use Explain Mode
816
+
817
+ #### Basic Usage
818
+
819
+ ```bash
820
+ # Explain all rules for FOCUS 1.2 with CostAndUsage dataset
821
+ focus-validator --validate-version 1.2 --focus-dataset CostAndUsage --explain-mode
822
+ ```
823
+
824
+ #### With Applicability Criteria
825
+
826
+ ```bash
827
+ # Show all rules including those with specific applicability requirements
828
+ focus-validator --validate-version 1.2 --focus-dataset CostAndUsage --explain-mode --applicability-criteria ALL
829
+
830
+ # Show rules for specific applicability criteria
831
+ focus-validator --validate-version 1.2 --focus-dataset CostAndUsage --explain-mode --applicability-criteria "AVAILABILITY_ZONE_SUPPORTED,MULTIPLE_SUB_ACCOUNT_TYPES_SUPPORTED"
832
+ ```
833
+
834
+ #### View Available Applicability Criteria
835
+
836
+ ```bash
837
+ # List all applicability criteria for a FOCUS version
838
+ focus-validator --validate-version 1.2 --show-applicability-criteria
839
+ ```
840
+
841
+ ### Explain Mode Output Format
842
+
843
+ The output is organized alphabetically by rule ID and includes comprehensive information for each rule:
844
+
845
+ #### Example Output
846
+
847
+ ```text
848
+ === SQL Explanations for 578 rules ===
849
+
850
+ 📋 AvailabilityZone-C-001-M
851
+ Type: leaf
852
+ Check: type_string
853
+ Generator: TypeStringCheckGenerator
854
+ MustSatisfy: AvailabilityZone MUST be of type String.
855
+ SQL:
856
+ WITH invalid AS (
857
+ SELECT 1
858
+ FROM focus_data
859
+ WHERE AvailabilityZone IS NOT NULL AND typeof(AvailabilityZone) != 'VARCHAR'
860
+ )
861
+ SELECT COUNT(*) AS violations,
862
+ CASE WHEN COUNT(*) > 0
863
+ THEN 'AvailabilityZone MUST be of type VARCHAR (string).'
864
+ END AS error_message
865
+ FROM invalid
866
+
867
+ 📋 BilledCost-C-000-M
868
+ Type: composite
869
+ Check: composite
870
+ Generator: CompositeANDRuleGenerator
871
+ MustSatisfy: The BilledCost column adheres to the following requirements:
872
+ Children: 7 rules
873
+ - BilledCost-C-000-M -> BilledCost-C-001-M (reference, model_rule_reference)
874
+ - BilledCost-C-000-M -> BilledCost-C-002-M (reference, model_rule_reference)
875
+ - BilledCost-C-000-M -> BilledCost-C-003-M (reference, model_rule_reference)
876
+ - BilledCost-C-000-M -> BilledCost-C-004-M (reference, model_rule_reference)
877
+ - BilledCost-C-000-M -> BilledCost-C-005-C (reference, model_rule_reference)
878
+ - BilledCost-C-000-M -> BilledCost-C-006-M (reference, model_rule_reference)
879
+ - BilledCost-C-000-M -> BilledCost-C-007-M (reference, model_rule_reference)
880
+
881
+ 📋 RegionId-C-000-C
882
+ Type: skipped
883
+ Check: None
884
+ Generator: None due to non-applicable rule
885
+ MustSatisfy: RegionId is RECOMMENDED to be present in a FOCUS dataset when the provider supports deploying resources or services within regions.
886
+ ```
887
+
888
+ #### Output Components Explained
889
+
890
+ **📋 Rule Header**: Rule ID in alphabetical order
891
+
892
+ **Type**:
893
+
894
+ - `leaf` - Individual validation rule with SQL query
895
+ - `composite` - Rule that combines multiple child rules (AND/OR logic)
896
+ - `reference` - Rule that references another rule's outcome
897
+ - `skipped` - Rule that cannot be executed (dynamic or non-applicable)
898
+
899
+ **Check**: The validation method used:
900
+
901
+ - `type_string` - Validates column data type is string
902
+ - `column_presence` - Validates column exists in dataset
903
+ - `format_datetime` - Validates datetime format compliance
904
+ - `composite` - Combines multiple child rule results
905
+ - `model_rule_reference` - References another rule's result
906
+
907
+ **Generator**: The code generator that creates the validation logic:
908
+
909
+ - `TypeStringCheckGenerator` - Generates SQL for type validation
910
+ - `ColumnPresentCheckGenerator` - Generates SQL for column existence
911
+ - `CompositeANDRuleGenerator` - Combines child rules with AND logic
912
+ - `CompositeORRuleGenerator` - Combines child rules with OR logic
913
+ - `None due to dynamic rule` - Rule requires runtime data analysis
914
+ - `None due to non-applicable rule` - Rule doesn't apply to current criteria
915
+
916
+ **MustSatisfy**: Human-readable description of what the rule validates, directly from the FOCUS specification
917
+
918
+ **SQL**: Complete, formatted SQL query that implements the validation (for leaf rules only)
919
+
920
+ **Children**: For composite rules, shows all child rules with their types and references
921
+
922
+ **Condition**: Row-level conditions applied during validation (when present)
923
+
924
+ ### Rule Types and Categories
925
+
926
+ #### Leaf Rules
927
+
928
+ Individual validation rules that execute SQL queries against the dataset. These represent the core validation logic.
929
+
930
+ **Examples:**
931
+
932
+ - Column presence validation
933
+ - Data type checking
934
+ - Format validation (dates, currencies, etc.)
935
+ - Value constraint checking
936
+ - Pattern matching validation
937
+
938
+ #### Composite Rules
939
+
940
+ Rules that combine multiple child rules using logical operators:
941
+
942
+ - **AND Rules** (`CompositeANDRuleGenerator`): All child rules must pass
943
+ - **OR Rules** (`CompositeORRuleGenerator`): At least one child rule must pass
944
+
945
+ #### Reference Rules
946
+
947
+ Rules that mirror the outcome of another rule without executing additional SQL.
948
+
949
+ #### Skipped Rules
950
+
951
+ Rules that cannot be executed for various reasons:
952
+
953
+ - **Dynamic Rules**: Require runtime data analysis to determine validation logic
954
+ - **Non-Applicable Rules**: Don't apply based on current applicability criteria
955
+ - **Missing Dependencies**: Reference unavailable components or data
956
+
957
+ ### Understanding Applicability Criteria
958
+
959
+ Some FOCUS rules only apply under specific conditions (e.g., when a provider supports availability zones). The `--applicability-criteria` option controls which rules are included:
960
+
961
+ - **Default (none specified)**: Shows only universally applicable rules
962
+ - **`ALL`**: Shows all rules including those with specific requirements
963
+ - **Specific criteria**: Shows rules for particular provider capabilities
964
+
965
+ Use `--show-applicability-criteria` to see available criteria for a FOCUS version.
966
+
967
+ ### SQL Query Analysis
968
+
969
+ The SQL queries in explain mode show exactly how each validation is implemented:
970
+
971
+ #### Common SQL Patterns
972
+
973
+ **Column Presence Check:**
974
+
975
+ ```sql
976
+ WITH col_check AS (
977
+ SELECT COUNT(*) AS found
978
+ FROM information_schema.columns
979
+ WHERE table_name = 'focus_data' AND column_name = 'ColumnName'
980
+ )
981
+ SELECT CASE WHEN found = 0 THEN 1 ELSE 0 END AS violations,
982
+ CASE WHEN found = 0 THEN 'Column "ColumnName" MUST be present in the table.' END AS error_message
983
+ FROM col_check
984
+ ```
985
+
986
+ **Type Validation:**
987
+
988
+ ```sql
989
+ SELECT COUNT(*) AS violations,
990
+ 'ColumnName MUST be of type VARCHAR (string).' AS error_message
991
+ FROM focus_data
992
+ WHERE ColumnName IS NOT NULL AND typeof(ColumnName) != 'VARCHAR'
993
+ ```
994
+
995
+ **Format Validation:**
996
+
997
+ ```sql
998
+ WITH invalid AS (
999
+ SELECT ColumnName::TEXT AS value
1000
+ FROM focus_data
1001
+ WHERE ColumnName IS NOT NULL
1002
+ AND NOT (ColumnName::TEXT ~ '^[pattern]$')
1003
+ )
1004
+ SELECT COUNT(*) AS violations,
1005
+ CASE WHEN COUNT(*) > 0 THEN 'Format validation message' END AS error_message
1006
+ FROM invalid
1007
+ ```
1008
+
1009
+ ### Use Cases for Explain Mode
1010
+
1011
+ #### 1. Learning FOCUS Specification
1012
+
1013
+ - Understand what each rule validates
1014
+ - See the relationship between rules
1015
+ - Learn validation requirements for each column
1016
+
1017
+ #### 2. Debugging Validation Issues
1018
+
1019
+ - Inspect SQL logic for failing rules
1020
+ - Understand why certain rules are skipped
1021
+ - Analyze composite rule dependencies
1022
+
1023
+ #### 3. Implementation Reference
1024
+
1025
+ - Use SQL patterns for custom validation tools
1026
+ - Understand FOCUS rule implementation details
1027
+ - Reference validation logic for documentation
1028
+
1029
+ #### 4. Rule Development
1030
+
1031
+ - Validate new rule implementations
1032
+ - Test rule logic without full datasets
1033
+ - Debug rule generation and SQL creation
1034
+
1035
+ ### SQL Transpilation for Database Migration
1036
+
1037
+ The FOCUS Validator includes a powerful **SQL Transpilation** feature that allows you to see how DuckDB validation queries would appear in different SQL dialects. This is invaluable for database migration planning, cross-platform compatibility analysis, and understanding how validation logic translates across different database systems.
1038
+
1039
+ #### Using the --transpile Option
1040
+
1041
+ The `--transpile` option can only be used with `--explain-mode` and allows you to specify a target SQL dialect:
1042
+
1043
+ ```bash
1044
+ # Transpile validation queries to PostgreSQL
1045
+ focus-validator --validate-version 1.2 --focus-dataset CostAndUsage --explain-mode --transpile postgres
1046
+
1047
+ # Transpile to MySQL
1048
+ focus-validator --validate-version 1.2 --focus-dataset CostAndUsage --explain-mode --transpile mysql
1049
+
1050
+ # Transpile to BigQuery
1051
+ focus-validator --validate-version 1.2 --focus-dataset CostAndUsage --explain-mode --transpile bigquery
1052
+
1053
+ # Transpile to Snowflake
1054
+ focus-validator --validate-version 1.2 --focus-dataset CostAndUsage --explain-mode --transpile snowflake
1055
+ ```
1056
+
1057
+ #### Supported SQL Dialects
1058
+
1059
+ The transpiler supports 18+ SQL dialects including:
1060
+
1061
+ - **PostgreSQL**: `postgres` - Enterprise-grade RDBMS
1062
+ - **MySQL**: `mysql` - Popular open-source database
1063
+ - **BigQuery**: `bigquery` - Google Cloud data warehouse
1064
+ - **Snowflake**: `snowflake` - Cloud data platform
1065
+ - **Microsoft SQL Server**: `tsql` - Enterprise database system
1066
+ - **Oracle**: `oracle` - Enterprise database platform
1067
+ - **SQLite**: `sqlite` - Lightweight embedded database
1068
+ - **Amazon Redshift**: `redshift` - AWS data warehouse
1069
+ - **Databricks**: `databricks` - Unified analytics platform
1070
+ - **Apache Spark**: `spark` - Distributed computing engine
1071
+ - **ClickHouse**: `clickhouse` - Columnar database
1072
+ - **Teradata**: `teradata` - Enterprise data warehouse
1073
+ - **Hive**: `hive` - Hadoop data warehouse
1074
+ - **Presto/Trino**: `presto` - Distributed SQL engine
1075
+ - **Apache Drill**: `drill` - Schema-free SQL engine
1076
+ - **StarRocks**: `starrocks` - MPP database
1077
+ - **DuckDB**: `duckdb` - Analytical database (original format)
1078
+ - **ANSI SQL**: `ansi` - Standard SQL compliance
1079
+
1080
+ #### Transpilation Output
1081
+
1082
+ When using `--transpile`, the SQL queries in explain mode are automatically converted to the target dialect:
1083
+
1084
+ ```text
1085
+ === SQL Explanations for 578 rules (transpiled to postgres) ===
1086
+
1087
+ 📋 AvailabilityZone-C-001-M
1088
+ Type: leaf
1089
+ Check: type_string
1090
+ Generator: TypeStringCheckGenerator
1091
+ MustSatisfy: AvailabilityZone MUST be of type String.
1092
+ SQL (PostgreSQL):
1093
+ WITH invalid AS (
1094
+ SELECT 1
1095
+ FROM focus_data
1096
+ WHERE "AvailabilityZone" IS NOT NULL AND pg_typeof("AvailabilityZone") != 'text'
1097
+ )
1098
+ SELECT COUNT(*) AS violations,
1099
+ CASE WHEN COUNT(*) > 0
1100
+ THEN 'AvailabilityZone MUST be of type VARCHAR (string).'
1101
+ END AS error_message
1102
+ FROM invalid
1103
+ ```
1104
+
1105
+ #### Key Transpilation Features
1106
+
1107
+ **Dialect-Specific Syntax**: Automatically converts DuckDB syntax to target dialect conventions:
1108
+
1109
+ - **Function Names**: `typeof()` → `pg_typeof()` (PostgreSQL), `type()` (BigQuery)
1110
+ - **Type Names**: `VARCHAR` → `TEXT` (PostgreSQL), `STRING` (BigQuery)
1111
+ - **Identifiers**: Automatic quoting and escaping based on dialect requirements
1112
+ - **Date/Time Functions**: Converts temporal functions to dialect-specific equivalents
1113
+ - **String Operations**: Adapts regex and string manipulation functions
1114
+
1115
+ **Error Handling**: Robust fallback mechanism for complex queries:
1116
+
1117
+ - **Graceful Degradation**: Shows original DuckDB SQL if transpilation fails
1118
+ - **Error Logging**: Detailed error messages for debugging transpilation issues
1119
+ - **Partial Success**: Successfully transpiled queries are shown even if others fail
1120
+
1121
+ **Performance Optimization**: Efficient transpilation without data processing:
1122
+
1123
+ - **Parse-Only Mode**: Analyzes SQL structure without executing queries
1124
+ - **Cached Results**: Reuses transpilation results for similar query patterns
1125
+ - **Fast Execution**: No impact on explain mode performance
1126
+
1127
+ #### Use Cases for SQL Transpilation
1128
+
1129
+ ##### Database Migration Planning
1130
+
1131
+ Understand how FOCUS validation logic would need to be adapted when migrating from DuckDB to other database systems:
1132
+
1133
+ ```bash
1134
+ # See how validations would work in PostgreSQL environment
1135
+ focus-validator --explain-mode --transpile postgres --filter-rules "BilledCost*"
1136
+
1137
+ # Compare BigQuery compatibility for cloud migration
1138
+ focus-validator --explain-mode --transpile bigquery --filter-rules "EffectiveCost*"
1139
+ ```
1140
+
1141
+ ##### Cross-Platform Compatibility Analysis
1142
+
1143
+ Validate that your FOCUS implementation can work across multiple database platforms:
1144
+
1145
+ ```bash
1146
+ # Check MySQL compatibility for web application backends
1147
+ focus-validator --explain-mode --transpile mysql
1148
+
1149
+ # Verify Snowflake compatibility for data warehouse implementations
1150
+ focus-validator --explain-mode --transpile snowflake
1151
+ ```
1152
+
1153
+ ##### Multi-Database Validation Implementation
1154
+
1155
+ Use transpiled SQL as a reference for implementing FOCUS validation in different database environments:
1156
+
1157
+ ```bash
1158
+ # Generate SQL for Oracle-based validation systems
1159
+ focus-validator --explain-mode --transpile oracle > oracle_validation_queries.sql
1160
+
1161
+ # Create Spark-compatible validation logic
1162
+ focus-validator --explain-mode --transpile spark > spark_validation_logic.sql
1163
+ ```
1164
+
1165
+ ##### Educational and Learning Purposes
1166
+
1167
+ Learn how SQL patterns differ across database systems:
1168
+
1169
+ ```bash
1170
+ # Compare how type checking differs between databases
1171
+ focus-validator --explain-mode --transpile postgres --filter-rules "*-C-001-M" | grep "typeof"
1172
+ focus-validator --explain-mode --transpile mysql --filter-rules "*-C-001-M" | grep "typeof"
1173
+ ```
1174
+
1175
+ #### Error Handling and Limitations
1176
+
1177
+ **Unsupported Dialect Handling**: If an invalid dialect is specified, the tool provides helpful feedback:
1178
+
1179
+ ```bash
1180
+ focus-validator --explain-mode --transpile invalid_dialect
1181
+ # Error: Unsupported dialect 'invalid_dialect'. Available dialects: postgres, mysql, bigquery, snowflake, ...
1182
+ ```
1183
+
1184
+ **Complex Query Limitations**: Some advanced DuckDB features may not have direct equivalents in all dialects:
1185
+
1186
+ - Complex window functions
1187
+ - Advanced temporal operations
1188
+ - Specialized analytical functions
1189
+ - Custom DuckDB extensions
1190
+
1191
+ When transpilation isn't possible, the original DuckDB SQL is shown with a note about the limitation.
1192
+
1193
+ **Fallback Behavior**: The transpiler prioritizes showing useful information:
1194
+
1195
+ 1. **Successful Transpilation**: Shows converted SQL in target dialect
1196
+ 2. **Partial Failure**: Shows original SQL with transpilation error note
1197
+ 3. **Complete Failure**: Shows original DuckDB SQL without modification
1198
+
1199
+ #### Combining with Other Explain Mode Features
1200
+
1201
+ The `--transpile` option works seamlessly with all other explain mode features:
1202
+
1203
+ ```bash
1204
+ # Transpile with applicability criteria
1205
+ focus-validator --explain-mode --transpile postgres --applicability-criteria ALL
1206
+
1207
+ # Transpile specific rule groups
1208
+ focus-validator --explain-mode --transpile bigquery --filter-rules "BilledCost*,EffectiveCost*"
1209
+
1210
+ # Transpile and save to file for analysis
1211
+ focus-validator --explain-mode --transpile snowflake > snowflake_validation_queries.sql
1212
+ ```
1213
+
1214
+ ### Performance and Output Management
1215
+
1216
+ Explain mode is designed for fast execution since it doesn't process actual data:
1217
+
1218
+ - **Fast Execution**: No data loading or SQL execution against datasets
1219
+ - **Complete Coverage**: Analyzes all rules in the specification (500+ rules for FOCUS 1.2)
1220
+ - **Alphabetical Ordering**: Predictable rule ordering for easy navigation
1221
+ - **Detailed Output**: Comprehensive information for each rule
1222
+ - **Transpilation Speed**: SQL conversion adds minimal overhead to explain mode execution
1223
+
1224
+ The output can be extensive (500+ rules), so consider using shell tools for navigation:
1225
+
1226
+ ```bash
1227
+ # Search for specific rules with transpilation
1228
+ focus-validator --validate-version 1.2 --focus-dataset CostAndUsage --explain-mode --transpile postgres | grep "BilledCost"
1229
+
1230
+ # Page through transpiled output
1231
+ focus-validator --validate-version 1.2 --focus-dataset CostAndUsage --explain-mode --transpile mysql | less
1232
+
1233
+ # Save transpiled queries to file for analysis
1234
+ focus-validator --validate-version 1.2 --focus-dataset CostAndUsage --explain-mode --transpile bigquery > bigquery_validation_queries.sql
1235
+ ```
1236
+
1237
+ ## Running Tests
1238
+
1239
+ ### Basic Testing
1240
+
1241
+ Run all tests:
1242
+
1243
+ ```bash
1244
+ poetry run pytest
1245
+ ```
1246
+
1247
+ Run tests with verbose output:
1248
+
1249
+ ```bash
1250
+ poetry run pytest -v
1251
+ ```
1252
+
1253
+ ### Test Coverage
1254
+
1255
+ Generate coverage report:
1256
+
1257
+ ```bash
1258
+ # Terminal coverage report
1259
+ make coverage
1260
+
1261
+ # HTML coverage report (opens in browser)
1262
+ make coverage-html
1263
+
1264
+ # Comprehensive coverage report (HTML + XML + terminal)
1265
+ make coverage-report
1266
+ ```
1267
+
1268
+ ### Module-Specific Testing
1269
+
1270
+ Run tests for specific modules:
1271
+
1272
+ ```bash
1273
+ # Data loaders only
1274
+ make test-data-loaders
1275
+
1276
+ # Config objects only
1277
+ make test-config-objects
1278
+
1279
+ # Output formatters only
1280
+ make test-outputter
1281
+ ```
1282
+
1283
+ ### Coverage by Module
1284
+
1285
+ ```bash
1286
+ # Coverage for data loaders (100% coverage)
1287
+ make coverage-data-loaders
1288
+
1289
+ # Coverage for config objects (97%+ coverage)
1290
+ make coverage-config-objects
1291
+
1292
+ # Coverage for outputters (85%+ coverage)
1293
+ make coverage-outputter
1294
+ ```
1295
+
1296
+ #### Current Test Coverage: 70% overall (257 tests passing)
1297
+
1298
+ - **Data Loaders**: 100% coverage (50 tests)
1299
+ - **Config Objects**: 97%+ coverage (120 tests)
1300
+ - **Outputters**: 85%+ coverage (80 tests)
1301
+ - **Core Components**: 89%+ coverage (7 tests)
1302
+
1303
+ See `COVERAGE_REPORT.md` for detailed coverage analysis and improvement recommendations.
1304
+
1305
+ If running on legacy CPUs and the tests crash on the polars library, run the following locally only:
1306
+
1307
+ ```bash
1308
+ poetry add polars-lts-cpu
1309
+ ```
1310
+
1311
+ This will align the polars execution with your system hardware. It should NOT be committed back into the repository.
1312
+
1313
+ ## License
1314
+
1315
+ This project is licensed under the MIT License - see the `LICENSE` file for details.
1316
+