dataface 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (455) hide show
  1. d3_format/__init__.py +14 -0
  2. d3_format/errors.py +19 -0
  3. d3_format/format.py +551 -0
  4. d3_format/spec.py +159 -0
  5. dataface/DATAFACE_SYNTAX.md +1135 -0
  6. dataface/__init__.py +93 -0
  7. dataface/_docs_site.py +20 -0
  8. dataface/_install_hint.py +26 -0
  9. dataface/agent_api/__init__.py +79 -0
  10. dataface/agent_api/_init_templates/__init__.py +0 -0
  11. dataface/agent_api/_init_templates/agents_dft_snippet.md +26 -0
  12. dataface/agent_api/_init_templates/dataface.yml +15 -0
  13. dataface/agent_api/_init_templates/faces-dataface.yml +144 -0
  14. dataface/agent_api/_init_templates/index.md +24 -0
  15. dataface/agent_api/_paths.py +118 -0
  16. dataface/agent_api/_project_agents_md.py +43 -0
  17. dataface/agent_api/_session_store.py +486 -0
  18. dataface/agent_api/_state.py +28 -0
  19. dataface/agent_api/chat.py +221 -0
  20. dataface/agent_api/dashboards.py +257 -0
  21. dataface/agent_api/describe.py +366 -0
  22. dataface/agent_api/describe_query.py +120 -0
  23. dataface/agent_api/docs/__init__.py +25 -0
  24. dataface/agent_api/docs/_loader.py +292 -0
  25. dataface/agent_api/docs/yaml-reference.md +2757 -0
  26. dataface/agent_api/file_refs.py +118 -0
  27. dataface/agent_api/init.py +126 -0
  28. dataface/agent_api/inspect.py +128 -0
  29. dataface/agent_api/mcp_install.py +170 -0
  30. dataface/agent_api/query.py +274 -0
  31. dataface/agent_api/schema.py +658 -0
  32. dataface/agent_api/schema_search.py +284 -0
  33. dataface/agent_api/search.py +270 -0
  34. dataface/agent_api/skill_install.py +141 -0
  35. dataface/agent_api/skill_render.py +90 -0
  36. dataface/agent_api/skills.py +293 -0
  37. dataface/agent_api/surface_aliases.yaml +128 -0
  38. dataface/agent_api/validate.py +175 -0
  39. dataface/agent_api/validate_query.py +84 -0
  40. dataface/ai/__init__.py +39 -0
  41. dataface/ai/agent.py +139 -0
  42. dataface/ai/context.py +45 -0
  43. dataface/ai/events.py +62 -0
  44. dataface/ai/external_mcp.py +610 -0
  45. dataface/ai/generate_sql.py +96 -0
  46. dataface/ai/llm.py +403 -0
  47. dataface/ai/mcp/__init__.py +51 -0
  48. dataface/ai/mcp/server.py +289 -0
  49. dataface/ai/memories.py +85 -0
  50. dataface/ai/prompts.py +177 -0
  51. dataface/ai/schema_context.py +138 -0
  52. dataface/ai/skills/before-after-comparison/SKILL.md +102 -0
  53. dataface/ai/skills/before-after-comparison/examples/before-after-comparison.yml +24 -0
  54. dataface/ai/skills/dashboard-build/SKILL.md +212 -0
  55. dataface/ai/skills/dashboard-build/examples/_smoke.yml +15 -0
  56. dataface/ai/skills/dashboard-design/SKILL.md +182 -0
  57. dataface/ai/skills/dashboard-review/SKILL.md +113 -0
  58. dataface/ai/skills/dashboard-structural-review/SKILL.md +173 -0
  59. dataface/ai/skills/dashboard-visual-review/SKILL.md +139 -0
  60. dataface/ai/skills/dataface-mcp-setup/SKILL.md +177 -0
  61. dataface/ai/skills/dataface-troubleshooting/SKILL.md +225 -0
  62. dataface/ai/skills/drill-down-link/SKILL.md +112 -0
  63. dataface/ai/skills/drill-down-link/examples/drill-down-link.yml +27 -0
  64. dataface/ai/skills/faceted-small-multiples/SKILL.md +116 -0
  65. dataface/ai/skills/faceted-small-multiples/examples/faceted-small-multiples.yml +33 -0
  66. dataface/ai/skills/filter-bar-with-variables/SKILL.md +105 -0
  67. dataface/ai/skills/filter-bar-with-variables/examples/filter-bar-with-variables.yml +49 -0
  68. dataface/ai/skills/kpi-row/SKILL.md +101 -0
  69. dataface/ai/skills/kpi-row/examples/kpi-row.yml +55 -0
  70. dataface/ai/skills/report-design/SKILL.md +184 -0
  71. dataface/ai/skills/single-metric-bignum/SKILL.md +90 -0
  72. dataface/ai/skills/single-metric-bignum/examples/single-metric-bignum.yml +27 -0
  73. dataface/ai/skills/table-heavy-ops-dashboard/SKILL.md +114 -0
  74. dataface/ai/skills/table-heavy-ops-dashboard/examples/table-heavy-ops-dashboard.yml +48 -0
  75. dataface/ai/skills/time-series-trend/SKILL.md +93 -0
  76. dataface/ai/skills/time-series-trend/examples/time-series-trend.yml +26 -0
  77. dataface/ai/skills/top-n-with-detail/SKILL.md +98 -0
  78. dataface/ai/skills/top-n-with-detail/examples/top-n-with-detail.yml +45 -0
  79. dataface/ai/skills/two-by-two-grid-overview/SKILL.md +78 -0
  80. dataface/ai/skills/two-by-two-grid-overview/examples/two-by-two-grid-overview.yml +64 -0
  81. dataface/ai/tool_schemas.py +132 -0
  82. dataface/ai/tools/__init__.py +312 -0
  83. dataface/ai/yaml_utils.py +57 -0
  84. dataface/cli/__init__.py +3 -0
  85. dataface/cli/_console.py +48 -0
  86. dataface/cli/_error_format.py +83 -0
  87. dataface/cli/_extras.py +190 -0
  88. dataface/cli/_json_output.py +8 -0
  89. dataface/cli/_parsing.py +17 -0
  90. dataface/cli/_version_info.py +56 -0
  91. dataface/cli/commands/__init__.py +3 -0
  92. dataface/cli/commands/_agent_input.py +205 -0
  93. dataface/cli/commands/_agent_server.py +115 -0
  94. dataface/cli/commands/chat.py +645 -0
  95. dataface/cli/commands/describe.py +107 -0
  96. dataface/cli/commands/docs.py +131 -0
  97. dataface/cli/commands/extension.py +179 -0
  98. dataface/cli/commands/init.py +240 -0
  99. dataface/cli/commands/inspect.py +94 -0
  100. dataface/cli/commands/mcp_init.py +167 -0
  101. dataface/cli/commands/query.py +386 -0
  102. dataface/cli/commands/render.py +291 -0
  103. dataface/cli/commands/schema.py +411 -0
  104. dataface/cli/commands/search.py +49 -0
  105. dataface/cli/commands/serve.py +114 -0
  106. dataface/cli/commands/skills.py +133 -0
  107. dataface/cli/commands/skills_init.py +161 -0
  108. dataface/cli/commands/validate.py +63 -0
  109. dataface/cli/main.py +1501 -0
  110. dataface/core/__init__.py +75 -0
  111. dataface/core/compile/__init__.py +244 -0
  112. dataface/core/compile/_jinja_helpers.py +78 -0
  113. dataface/core/compile/channel.py +222 -0
  114. dataface/core/compile/chart_focus.py +101 -0
  115. dataface/core/compile/chart_resolved.py +169 -0
  116. dataface/core/compile/chart_type_detection.py +489 -0
  117. dataface/core/compile/chart_update.py +261 -0
  118. dataface/core/compile/colors.py +64 -0
  119. dataface/core/compile/compiler.py +904 -0
  120. dataface/core/compile/config.py +823 -0
  121. dataface/core/compile/custom_chart_types.py +208 -0
  122. dataface/core/compile/data_table_attachment.py +1287 -0
  123. dataface/core/compile/detect.py +110 -0
  124. dataface/core/compile/errors.py +302 -0
  125. dataface/core/compile/filter_injection.py +319 -0
  126. dataface/core/compile/introspection.py +527 -0
  127. dataface/core/compile/jinja.py +511 -0
  128. dataface/core/compile/labels_env.py +52 -0
  129. dataface/core/compile/markdown.py +154 -0
  130. dataface/core/compile/meta.py +388 -0
  131. dataface/core/compile/models/__init__.py +0 -0
  132. dataface/core/compile/models/chart/__init__.py +0 -0
  133. dataface/core/compile/models/chart/authored.py +2137 -0
  134. dataface/core/compile/models/chart/compiled.py +398 -0
  135. dataface/core/compile/models/config.py +347 -0
  136. dataface/core/compile/models/face/__init__.py +0 -0
  137. dataface/core/compile/models/face/authored.py +659 -0
  138. dataface/core/compile/models/face/compiled.py +522 -0
  139. dataface/core/compile/models/factories.py +201 -0
  140. dataface/core/compile/models/markers.py +40 -0
  141. dataface/core/compile/models/palette.py +36 -0
  142. dataface/core/compile/models/primitives.py +415 -0
  143. dataface/core/compile/models/query/__init__.py +0 -0
  144. dataface/core/compile/models/query/authored.py +246 -0
  145. dataface/core/compile/models/query/compiled.py +710 -0
  146. dataface/core/compile/models/refs.py +137 -0
  147. dataface/core/compile/models/source.py +611 -0
  148. dataface/core/compile/models/style/__init__.py +0 -0
  149. dataface/core/compile/models/style/authored.py +481 -0
  150. dataface/core/compile/models/style/compiled.py +3399 -0
  151. dataface/core/compile/models/style/merged.py +1682 -0
  152. dataface/core/compile/models/theme.py +362 -0
  153. dataface/core/compile/models/variable/__init__.py +0 -0
  154. dataface/core/compile/models/variable/authored.py +254 -0
  155. dataface/core/compile/models/vega_lite/__init__.py +0 -0
  156. dataface/core/compile/models/vega_lite/config.py +510 -0
  157. dataface/core/compile/models/vega_lite/contracts.py +171 -0
  158. dataface/core/compile/normalize_charts.py +494 -0
  159. dataface/core/compile/normalize_layout.py +1000 -0
  160. dataface/core/compile/normalize_queries.py +297 -0
  161. dataface/core/compile/normalize_variables.py +489 -0
  162. dataface/core/compile/normalizer.py +543 -0
  163. dataface/core/compile/palette.py +1100 -0
  164. dataface/core/compile/parameterized.py +658 -0
  165. dataface/core/compile/parser.py +228 -0
  166. dataface/core/compile/schema.py +20 -0
  167. dataface/core/compile/schema_renderers/__init__.py +0 -0
  168. dataface/core/compile/schema_renderers/json_schema.py +163 -0
  169. dataface/core/compile/schema_renderers/prompt.py +152 -0
  170. dataface/core/compile/schema_renderers/vscode_schema.py +301 -0
  171. dataface/core/compile/sizing.py +2126 -0
  172. dataface/core/compile/sources.py +518 -0
  173. dataface/core/compile/sql_authoring_lint.py +56 -0
  174. dataface/core/compile/style_cascade.py +471 -0
  175. dataface/core/compile/typography.py +299 -0
  176. dataface/core/compile/validator.py +301 -0
  177. dataface/core/compile/variables.py +53 -0
  178. dataface/core/compile/vega_config.py +98 -0
  179. dataface/core/compile/vega_lite/__init__.py +6 -0
  180. dataface/core/compile/vega_lite/validation.py +95 -0
  181. dataface/core/compile/yaml_error_formatter.py +838 -0
  182. dataface/core/connections.py +38 -0
  183. dataface/core/dashboard.py +358 -0
  184. dataface/core/defaults/default_config.yml +101 -0
  185. dataface/core/defaults/palettes/categorical/category-10-dark.yml +32 -0
  186. dataface/core/defaults/palettes/categorical/category-10-light.yml +43 -0
  187. dataface/core/defaults/palettes/categorical/category-10.yml +31 -0
  188. dataface/core/defaults/palettes/categorical/category-6-tonal-blue.yml +22 -0
  189. dataface/core/defaults/palettes/categorical/category-6-tonal-brown.yml +29 -0
  190. dataface/core/defaults/palettes/categorical/category-6-tonal-green.yml +20 -0
  191. dataface/core/defaults/palettes/categorical/category-6-tonal-orange.yml +21 -0
  192. dataface/core/defaults/palettes/categorical/category-6-tonal-purple.yml +20 -0
  193. dataface/core/defaults/palettes/categorical/editorial-10-dark.yml +32 -0
  194. dataface/core/defaults/palettes/categorical/editorial-10.yml +40 -0
  195. dataface/core/defaults/palettes/categorical/hero-6.yml +17 -0
  196. dataface/core/defaults/palettes/categorical/single-blue.yml +11 -0
  197. dataface/core/defaults/palettes/categorical/tableau.yml +20 -0
  198. dataface/core/defaults/palettes/data/xkcd_colors.json +3803 -0
  199. dataface/core/defaults/palettes/diverging/blue-red.yml +25 -0
  200. dataface/core/defaults/palettes/diverging/coolwarm.yml +24 -0
  201. dataface/core/defaults/palettes/diverging/crimson-green.yml +23 -0
  202. dataface/core/defaults/palettes/diverging/orange-teal.yml +23 -0
  203. dataface/core/defaults/palettes/diverging/sunset.yml +24 -0
  204. dataface/core/defaults/palettes/scaffold/dft-creams.yml +38 -0
  205. dataface/core/defaults/palettes/scaffold/dft-grays.yml +53 -0
  206. dataface/core/defaults/palettes/sequential/amber.yml +22 -0
  207. dataface/core/defaults/palettes/sequential/blue.yml +22 -0
  208. dataface/core/defaults/palettes/sequential/brown.yml +22 -0
  209. dataface/core/defaults/palettes/sequential/gray.yml +22 -0
  210. dataface/core/defaults/palettes/sequential/green.yml +22 -0
  211. dataface/core/defaults/palettes/sequential/purple.yml +22 -0
  212. dataface/core/defaults/palettes/sequential/rust.yml +22 -0
  213. dataface/core/defaults/palettes/sequential/teal.yml +22 -0
  214. dataface/core/defaults/palettes/tone/negative.yml +32 -0
  215. dataface/core/defaults/palettes/tone/positive.yml +22 -0
  216. dataface/core/defaults/palettes/tone/warning.yml +22 -0
  217. dataface/core/defaults/themes/_base.yaml +786 -0
  218. dataface/core/defaults/themes/bi.yaml +16 -0
  219. dataface/core/defaults/themes/carbong100.yaml +41 -0
  220. dataface/core/defaults/themes/cream.yaml +122 -0
  221. dataface/core/defaults/themes/dark.yaml +40 -0
  222. dataface/core/defaults/themes/diagnostics-title-angle-extreme.yaml +9 -0
  223. dataface/core/defaults/themes/diagnostics-title-baseline-extreme.yaml +9 -0
  224. dataface/core/defaults/themes/diagnostics-title-baseline.yaml +24 -0
  225. dataface/core/defaults/themes/diagnostics-title-center.yaml +8 -0
  226. dataface/core/defaults/themes/diagnostics-title-color-extreme.yaml +24 -0
  227. dataface/core/defaults/themes/diagnostics-title-font-extreme.yaml +25 -0
  228. dataface/core/defaults/themes/diagnostics-title-left.yaml +8 -0
  229. dataface/core/defaults/themes/diagnostics-title-offset-extreme.yaml +9 -0
  230. dataface/core/defaults/themes/diagnostics-title-size-extreme.yaml +24 -0
  231. dataface/core/defaults/themes/diagnostics-title-weight-extreme.yaml +24 -0
  232. dataface/core/defaults/themes/editorial.yaml +147 -0
  233. dataface/core/defaults/themes/light.yaml +30 -0
  234. dataface/core/defaults/themes/looker.yaml +17 -0
  235. dataface/core/defaults/themes/stark.yaml +134 -0
  236. dataface/core/errors/__init__.py +67 -0
  237. dataface/core/errors/codes_compile.py +56 -0
  238. dataface/core/errors/codes_execute.py +177 -0
  239. dataface/core/errors/codes_render.py +106 -0
  240. dataface/core/errors/codes_unknown.py +15 -0
  241. dataface/core/errors/hints.py +74 -0
  242. dataface/core/errors/registry.py +42 -0
  243. dataface/core/errors/structured.py +92 -0
  244. dataface/core/execute/__init__.py +91 -0
  245. dataface/core/execute/adapters/__init__.py +49 -0
  246. dataface/core/execute/adapters/adapter_registry.py +400 -0
  247. dataface/core/execute/adapters/base.py +245 -0
  248. dataface/core/execute/adapters/csv_adapter.py +239 -0
  249. dataface/core/execute/adapters/dbt_adapter.py +283 -0
  250. dataface/core/execute/adapters/dbt_adapter_factory.py +212 -0
  251. dataface/core/execute/adapters/dbt_macro_loader.py +95 -0
  252. dataface/core/execute/adapters/dbt_utils.py +150 -0
  253. dataface/core/execute/adapters/http_adapter.py +224 -0
  254. dataface/core/execute/adapters/metricflow_adapter.py +94 -0
  255. dataface/core/execute/adapters/schema_resolver_adapter.py +144 -0
  256. dataface/core/execute/adapters/sql_adapter.py +710 -0
  257. dataface/core/execute/adapters/values_adapter.py +58 -0
  258. dataface/core/execute/batch.py +744 -0
  259. dataface/core/execute/cache_backend.py +135 -0
  260. dataface/core/execute/cache_keys.py +66 -0
  261. dataface/core/execute/dbt_jinja.py +21 -0
  262. dataface/core/execute/dialects/__init__.py +121 -0
  263. dataface/core/execute/dialects/athena.py +75 -0
  264. dataface/core/execute/dialects/base.py +302 -0
  265. dataface/core/execute/dialects/bigquery.py +38 -0
  266. dataface/core/execute/dialects/databricks.py +68 -0
  267. dataface/core/execute/dialects/duckdb.py +35 -0
  268. dataface/core/execute/dialects/mysql.py +68 -0
  269. dataface/core/execute/dialects/postgres.py +39 -0
  270. dataface/core/execute/dialects/redshift.py +12 -0
  271. dataface/core/execute/dialects/snowflake.py +51 -0
  272. dataface/core/execute/dialects/sqlserver.py +92 -0
  273. dataface/core/execute/duckdb_cache.py +712 -0
  274. dataface/core/execute/duckdb_config.py +26 -0
  275. dataface/core/execute/errors.py +213 -0
  276. dataface/core/execute/executor.py +1249 -0
  277. dataface/core/execute/parallel.py +162 -0
  278. dataface/core/execute/setup_sql.py +58 -0
  279. dataface/core/execute/source_registry.py +72 -0
  280. dataface/core/execute/source_resolver.py +255 -0
  281. dataface/core/execute/sql_guard.py +387 -0
  282. dataface/core/execute/sql_literals.py +199 -0
  283. dataface/core/fonts.py +52 -0
  284. dataface/core/inspect/__init__.py +32 -0
  285. dataface/core/inspect/cache_factory.py +98 -0
  286. dataface/core/inspect/db_types.py +162 -0
  287. dataface/core/inspect/dbt_schema.py +96 -0
  288. dataface/core/inspect/defaults.yml +37 -0
  289. dataface/core/inspect/fanout_risk.py +109 -0
  290. dataface/core/inspect/manifest_utils.py +77 -0
  291. dataface/core/inspect/partials/categorical.yml +40 -0
  292. dataface/core/inspect/partials/date.yml +40 -0
  293. dataface/core/inspect/partials/numeric.yml +55 -0
  294. dataface/core/inspect/partition_types.py +38 -0
  295. dataface/core/inspect/query_validator.py +975 -0
  296. dataface/core/inspect/renderer.py +354 -0
  297. dataface/core/inspect/resolver.py +808 -0
  298. dataface/core/inspect/search.py +461 -0
  299. dataface/core/inspect/sources/__init__.py +32 -0
  300. dataface/core/inspect/sources/dbt.py +738 -0
  301. dataface/core/inspect/sources/duckdb_utils.py +66 -0
  302. dataface/core/inspect/templates/__init__.py +1 -0
  303. dataface/core/inspect/templates/categorical_column.yml +196 -0
  304. dataface/core/inspect/templates/charts.yml +109 -0
  305. dataface/core/inspect/templates/date_column.yml +248 -0
  306. dataface/core/inspect/templates/model.yml +138 -0
  307. dataface/core/inspect/templates/numeric_column.yml +261 -0
  308. dataface/core/inspect/templates/quality.yml +80 -0
  309. dataface/core/inspect/templates/string_column.yml +263 -0
  310. dataface/core/project_roots.py +165 -0
  311. dataface/core/render/__init__.py +87 -0
  312. dataface/core/render/board_links.py +176 -0
  313. dataface/core/render/chart/__init__.py +27 -0
  314. dataface/core/render/chart/arc_attached_table.py +251 -0
  315. dataface/core/render/chart/artifacts.py +16 -0
  316. dataface/core/render/chart/callout.py +225 -0
  317. dataface/core/render/chart/decisions.py +358 -0
  318. dataface/core/render/chart/geo.py +700 -0
  319. dataface/core/render/chart/kpi.py +916 -0
  320. dataface/core/render/chart/labels.py +76 -0
  321. dataface/core/render/chart/pipeline.py +818 -0
  322. dataface/core/render/chart/presentation.py +36 -0
  323. dataface/core/render/chart/profile.py +3438 -0
  324. dataface/core/render/chart/render_single.py +347 -0
  325. dataface/core/render/chart/renderers.py +193 -0
  326. dataface/core/render/chart/rendering.py +565 -0
  327. dataface/core/render/chart/serialization.py +90 -0
  328. dataface/core/render/chart/spark.py +496 -0
  329. dataface/core/render/chart/spark_bar.py +370 -0
  330. dataface/core/render/chart/spec_builders.py +154 -0
  331. dataface/core/render/chart/standard_renderer.py +2645 -0
  332. dataface/core/render/chart/table.py +2957 -0
  333. dataface/core/render/chart/table_support.py +1452 -0
  334. dataface/core/render/chart/tick_values.py +66 -0
  335. dataface/core/render/chart/time_unit_detect.py +809 -0
  336. dataface/core/render/chart/title_overflow.py +157 -0
  337. dataface/core/render/chart/type_inference.py +122 -0
  338. dataface/core/render/chart/validation.py +99 -0
  339. dataface/core/render/chart/vega_lite.py +125 -0
  340. dataface/core/render/chart/vega_lite_types.py +268 -0
  341. dataface/core/render/chart/vl_field_maps.py +346 -0
  342. dataface/core/render/chart_interactivity.py +24 -0
  343. dataface/core/render/control_registry.py +287 -0
  344. dataface/core/render/converters/__init__.py +24 -0
  345. dataface/core/render/converters/chart.py +276 -0
  346. dataface/core/render/converters/html.py +98 -0
  347. dataface/core/render/converters/pdf.py +40 -0
  348. dataface/core/render/converters/png.py +41 -0
  349. dataface/core/render/errors.py +144 -0
  350. dataface/core/render/face_api.py +160 -0
  351. dataface/core/render/faces.py +1194 -0
  352. dataface/core/render/font_measurement.py +48 -0
  353. dataface/core/render/font_support.py +197 -0
  354. dataface/core/render/fonts/DFTSansTabular-Regular.ttf +0 -0
  355. dataface/core/render/fonts/DFTSansTabular-Regular.woff2 +0 -0
  356. dataface/core/render/fonts/DFTSerifOldstyleProportional-Regular.ttf +0 -0
  357. dataface/core/render/fonts/DFTSerifOldstyleTabular-Regular.ttf +0 -0
  358. dataface/core/render/fonts/InterVariable.ttf +0 -0
  359. dataface/core/render/fonts/InterVariable.woff2 +0 -0
  360. dataface/core/render/fonts/NOTO_COLOR_EMOJI_LICENSE.txt +93 -0
  361. dataface/core/render/fonts/NOTO_EMOJI_LICENSE.txt +93 -0
  362. dataface/core/render/fonts/NotoColorEmoji-Regular.ttf +0 -0
  363. dataface/core/render/fonts/NotoColorEmoji-Regular.woff2 +0 -0
  364. dataface/core/render/fonts/NotoEmoji-Regular.ttf +0 -0
  365. dataface/core/render/fonts/NotoEmoji-Regular.woff2 +0 -0
  366. dataface/core/render/fonts/SOURCE_CODE_PRO_LICENSE.txt +93 -0
  367. dataface/core/render/fonts/SOURCE_SERIF_4_LICENSE.txt +98 -0
  368. dataface/core/render/fonts/SourceCodePro-Regular.ttf +0 -0
  369. dataface/core/render/fonts/SourceSerif4-Regular.ttf +0 -0
  370. dataface/core/render/fonts/_emoji_font_face.css +43 -0
  371. dataface/core/render/fonts/source-serif-4-variable-latin.woff2 +0 -0
  372. dataface/core/render/format_utils.py +329 -0
  373. dataface/core/render/geo_defaults.yml +28 -0
  374. dataface/core/render/json_format.py +146 -0
  375. dataface/core/render/layout_sizing.py +865 -0
  376. dataface/core/render/layouts.py +541 -0
  377. dataface/core/render/markdown_defaults.yml +16 -0
  378. dataface/core/render/missing_vars_prompt.py +79 -0
  379. dataface/core/render/placeholder.py +389 -0
  380. dataface/core/render/render_result.py +14 -0
  381. dataface/core/render/renderer.py +467 -0
  382. dataface/core/render/script_embedding.py +16 -0
  383. dataface/core/render/svg_utils.py +212 -0
  384. dataface/core/render/template_loader.py +69 -0
  385. dataface/core/render/templates/controls/_styles.css +606 -0
  386. dataface/core/render/templates/controls/checkbox.html +16 -0
  387. dataface/core/render/templates/controls/date.html +16 -0
  388. dataface/core/render/templates/controls/number.html +19 -0
  389. dataface/core/render/templates/controls/readonly.html +9 -0
  390. dataface/core/render/templates/controls/select.html +21 -0
  391. dataface/core/render/templates/controls/slider.html +22 -0
  392. dataface/core/render/templates/controls/text.html +16 -0
  393. dataface/core/render/templates/scripts/chart_interactivity.js +191 -0
  394. dataface/core/render/templates/scripts/variables.js +976 -0
  395. dataface/core/render/templates/svg/grid_pattern.svg +3 -0
  396. dataface/core/render/templates/svg/styles.css +51 -0
  397. dataface/core/render/terminal.py +311 -0
  398. dataface/core/render/terminal_charts.py +563 -0
  399. dataface/core/render/terminal_defaults.yml +2 -0
  400. dataface/core/render/terminal_layouts.py +299 -0
  401. dataface/core/render/terminal_text.py +31 -0
  402. dataface/core/render/text/__init__.py +1 -0
  403. dataface/core/render/text/case.py +113 -0
  404. dataface/core/render/text_format.py +129 -0
  405. dataface/core/render/utils.py +106 -0
  406. dataface/core/render/variable_controls.py +946 -0
  407. dataface/core/render/variable_input_refinement.py +140 -0
  408. dataface/core/render/warnings/__init__.py +15 -0
  409. dataface/core/render/warnings/bar_color_1_to_1_with_x.py +80 -0
  410. dataface/core/render/warnings/base.py +44 -0
  411. dataface/core/render/warnings/fanout_risk.py +15 -0
  412. dataface/core/render/warnings/from_query_diagnostic.py +56 -0
  413. dataface/core/render/warnings/missing_join_predicate.py +13 -0
  414. dataface/core/render/warnings/query_parse_error.py +14 -0
  415. dataface/core/render/warnings/query_returned_zero_rows.py +42 -0
  416. dataface/core/render/warnings/reaggregation.py +14 -0
  417. dataface/core/render/warnings/registry.py +45 -0
  418. dataface/core/render/warnings/suppression.py +46 -0
  419. dataface/core/render/warnings/temporal_single_point.py +63 -0
  420. dataface/core/render/warnings/unreferenced_chart.py +15 -0
  421. dataface/core/render/warnings/y_encoding_mostly_null.py +76 -0
  422. dataface/core/render/yaml_format.py +167 -0
  423. dataface/core/resolve_face.py +195 -0
  424. dataface/core/schema/__init__.py +0 -0
  425. dataface/core/schema/guidance.py +151 -0
  426. dataface/core/scoped_paths.py +59 -0
  427. dataface/core/serve/__init__.py +14 -0
  428. dataface/core/serve/bootstrap.py +39 -0
  429. dataface/core/serve/embedded.py +57 -0
  430. dataface/core/serve/port.py +129 -0
  431. dataface/core/serve/server.py +938 -0
  432. dataface/core/serve/templates/__init__.py +0 -0
  433. dataface/core/serve/templates/directory.yml +6 -0
  434. dataface/core/serve/templates/error.html.j2 +217 -0
  435. dataface/core/utils.py +121 -0
  436. dataface/core/validate.py +64 -0
  437. dataface/integrations/__init__.py +0 -0
  438. dataface/integrations/highlighting.py +351 -0
  439. dataface/integrations/markdown.py +537 -0
  440. dataface/py.typed +0 -0
  441. dataface-0.1.2.dist-info/METADATA +375 -0
  442. dataface-0.1.2.dist-info/RECORD +455 -0
  443. dataface-0.1.2.dist-info/WHEEL +4 -0
  444. dataface-0.1.2.dist-info/entry_points.txt +2 -0
  445. dataface-0.1.2.dist-info/licenses/LICENSE +202 -0
  446. mdsvg/__init__.py +168 -0
  447. mdsvg/fonts.py +656 -0
  448. mdsvg/images.py +299 -0
  449. mdsvg/parser.py +629 -0
  450. mdsvg/playground.py +284 -0
  451. mdsvg/py.typed +2 -0
  452. mdsvg/renderer.py +1623 -0
  453. mdsvg/style.py +355 -0
  454. mdsvg/types.py +200 -0
  455. mdsvg/utils.py +86 -0
@@ -0,0 +1,975 @@
1
+ """Deterministic query validator using SQLGlot AST analysis.
2
+
3
+ Parses SQL and detects structural issues that indicate likely query bugs:
4
+
5
+ - **missing_join_predicate**: Cross joins or comma-separated tables without
6
+ an explicit join predicate — usually an accidental cartesian product.
7
+ - **fanout_risk**: Aggregation over a joined query where aggregate expressions
8
+ reference columns from multiple tables, use unqualified columns with 2+
9
+ tables in scope, or COUNT(*) with joins — the structural signal for
10
+ double-counting / aggregate inflation.
11
+ - **reaggregation**: Outer query applies an aggregate function to a column
12
+ that is already aggregate-derived in a subquery or CTE — e.g. SUM of a
13
+ SUM, AVG of an AVG. Uses propagation of aggregate lineage through nested
14
+ scopes to detect these patterns.
15
+ - **parse_error**: SQL that SQLGlot cannot parse.
16
+
17
+ Schema context (grain, primary keys) is optional. Structural checks work
18
+ without it; metadata refines severity and adds repair guidance.
19
+
20
+ Relationship context (multiplicity, fanout factor) is optional. When
21
+ available, it calibrates fanout_risk severity and grounds recommendations
22
+ in known join metadata.
23
+
24
+ Pure functions — no DB queries, no side effects.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import re
30
+ from dataclasses import dataclass, field
31
+ from typing import Literal, overload
32
+
33
+ import sqlglot
34
+ from sqlglot import exp
35
+
36
+ # Pattern for -- dft:ignore [code1 code2 ...]
37
+ _DFT_IGNORE_RE = re.compile(r"--\s*dft:ignore\b\s*(.*)", re.IGNORECASE)
38
+
39
+ # Codes that must never be suppressed.
40
+ _UNSUPPRESSIBLE_CODES = frozenset({"parse_error"})
41
+
42
+ from dataface.core.inspect.fanout_risk import HIGH_FANOUT_THRESHOLD
43
+
44
+ # Minimum confidence to trust a relationship hint for severity changes.
45
+ _MIN_CALIBRATION_CONFIDENCE = 0.75
46
+
47
+ # Multiplicity flip table for direction normalization.
48
+ _FLIP_MULTIPLICITY = {
49
+ "one-to-many": "many-to-one",
50
+ "many-to-one": "one-to-many",
51
+ "one-to-one": "one-to-one",
52
+ "many-to-many": "many-to-many",
53
+ }
54
+
55
+ # ---------------------------------------------------------------------------
56
+ # Data model
57
+ # ---------------------------------------------------------------------------
58
+
59
+
60
+ @dataclass(frozen=True)
61
+ class QueryDiagnostic:
62
+ """A single diagnostic finding from query validation."""
63
+
64
+ code: Literal[
65
+ "missing_join_predicate", "fanout_risk", "parse_error", "reaggregation"
66
+ ]
67
+ severity: Literal["error", "warning", "info"]
68
+ message: str
69
+ detail: str | None = None
70
+ recommendation: str | None = None
71
+ confidence: float | None = None
72
+ evidence: tuple[str, ...] = ()
73
+
74
+ def to_dict(self) -> dict[str, str | float | list[str] | None]:
75
+ return {
76
+ "code": self.code,
77
+ "severity": self.severity,
78
+ "message": self.message,
79
+ "detail": self.detail,
80
+ "recommendation": self.recommendation,
81
+ "confidence": self.confidence,
82
+ "evidence": list(self.evidence),
83
+ }
84
+
85
+
86
+ @dataclass(frozen=True)
87
+ class TableContext:
88
+ """Schema context for a single table."""
89
+
90
+ grain_columns: list[str] = field(default_factory=list)
91
+ primary_key_columns: list[str] = field(default_factory=list)
92
+
93
+
94
+ @dataclass(frozen=True)
95
+ class SchemaContext:
96
+ """Optional schema context to enrich diagnostics."""
97
+
98
+ tables: dict[str, TableContext] = field(default_factory=dict)
99
+
100
+
101
+ @dataclass(frozen=True)
102
+ class RelationshipHint:
103
+ """Known relationship between two tables for severity calibration."""
104
+
105
+ left_table: str
106
+ right_table: str
107
+ multiplicity: str # "one-to-one" | "one-to-many" | "many-to-one" | "many-to-many"
108
+ fanout_factor: float
109
+ confidence: float # 0.0–1.0
110
+
111
+
112
+ @dataclass(frozen=True)
113
+ class RelationshipContext:
114
+ """Relationship metadata for query validator severity calibration."""
115
+
116
+ hints: tuple[RelationshipHint, ...] = ()
117
+
118
+
119
+ # ---------------------------------------------------------------------------
120
+ # AST helpers
121
+ # ---------------------------------------------------------------------------
122
+
123
+
124
+ def _source_name(node: exp.Table | exp.Subquery) -> str:
125
+ """Return the alias if present, else the table name.
126
+
127
+ For subqueries, returns the alias (subqueries always need one in valid SQL).
128
+ """
129
+ if isinstance(node, exp.Subquery):
130
+ return node.alias or "<subquery>"
131
+ return node.alias or node.name
132
+
133
+
134
+ def _direct_from_sources(select: exp.Select) -> list[exp.Table | exp.Subquery]:
135
+ """Get direct table/subquery sources from FROM clause (non-recursive)."""
136
+ from_ = select.find(exp.From)
137
+ if not from_:
138
+ return []
139
+ sources: list[exp.Table | exp.Subquery] = []
140
+ for child in from_.iter_expressions():
141
+ if isinstance(child, exp.Subquery):
142
+ sources.append(child)
143
+ elif isinstance(child, exp.Table):
144
+ # Only include if not nested inside a subquery
145
+ parent_subquery = child.find_ancestor(exp.Subquery)
146
+ if (
147
+ parent_subquery is None
148
+ or parent_subquery not in from_.iter_expressions()
149
+ ):
150
+ sources.append(child)
151
+ return sources
152
+
153
+
154
+ def _direct_join_sources(select: exp.Select) -> list[exp.Table | exp.Subquery]:
155
+ """Get direct table/subquery sources from JOIN clauses (non-recursive)."""
156
+ sources: list[exp.Table | exp.Subquery] = []
157
+ for join in select.find_all(exp.Join):
158
+ # Skip joins that belong to subqueries
159
+ if join.find_ancestor(exp.Subquery):
160
+ continue
161
+ for child in join.iter_expressions():
162
+ if isinstance(child, exp.Subquery):
163
+ sources.append(child)
164
+ elif isinstance(child, exp.Table):
165
+ parent_subquery = child.find_ancestor(exp.Subquery)
166
+ if parent_subquery is None:
167
+ sources.append(child)
168
+ return sources
169
+
170
+
171
+ def _build_alias_map(select: exp.Select) -> dict[str, str]:
172
+ """Map alias → table name for direct (non-subquery) table sources."""
173
+ alias_map: dict[str, str] = {}
174
+ for src in _direct_from_sources(select) + _direct_join_sources(select):
175
+ if isinstance(src, exp.Table):
176
+ alias_map[_source_name(src)] = src.name
177
+ return alias_map
178
+
179
+
180
+ def _has_join(select: exp.Select) -> bool:
181
+ """Check if the SELECT has any direct JOIN clause (not in subqueries)."""
182
+ return bool(_direct_join_sources(select))
183
+
184
+
185
+ def _has_comma_join(select: exp.Select) -> bool:
186
+ """Check if FROM clause has multiple direct sources (comma join)."""
187
+ return len(_direct_from_sources(select)) > 1
188
+
189
+
190
+ def _find_cross_joins(select: exp.Select) -> list[tuple[str, str]]:
191
+ """Find explicit CROSS JOINs and return (left_name, right_name)."""
192
+ pairs: list[tuple[str, str]] = []
193
+ from_sources = _direct_from_sources(select)
194
+ if not from_sources:
195
+ return pairs
196
+
197
+ left_name = _source_name(from_sources[0])
198
+
199
+ for join in select.find_all(exp.Join):
200
+ if join.find_ancestor(exp.Subquery):
201
+ continue
202
+ if join.args.get("on") or join.args.get("using"):
203
+ continue
204
+ # NATURAL JOIN has implicit predicates — not a cross join
205
+ if (join.args.get("method") or "").upper() == "NATURAL":
206
+ continue
207
+ kind = (join.args.get("kind") or "").upper()
208
+ if kind == "CROSS" or kind == "":
209
+ # Find the table/subquery in this join
210
+ for child in join.iter_expressions():
211
+ if isinstance(child, (exp.Table, exp.Subquery)):
212
+ parent_subquery = child.find_ancestor(exp.Subquery)
213
+ if parent_subquery is None or parent_subquery is child:
214
+ pairs.append((left_name, _source_name(child)))
215
+ break
216
+ return pairs
217
+
218
+
219
+ def _get_aggregate_functions(select: exp.Select) -> list[exp.AggFunc]:
220
+ """Get all aggregate function calls in SELECT and HAVING clauses."""
221
+ aggs: list[exp.AggFunc] = []
222
+ for expr in select.expressions:
223
+ aggs.extend(expr.find_all(exp.AggFunc))
224
+ having = select.find(exp.Having)
225
+ if having:
226
+ aggs.extend(having.find_all(exp.AggFunc))
227
+ return aggs
228
+
229
+
230
+ def _column_table_ref(col: exp.Column) -> str | None:
231
+ """Get the table reference (alias or name) from a Column expression."""
232
+ return col.table if col.table else None
233
+
234
+
235
+ def _resolve_table(ref: str, alias_map: dict[str, str]) -> str:
236
+ """Resolve a table reference through the alias map."""
237
+ return alias_map.get(ref, ref)
238
+
239
+
240
+ # ---------------------------------------------------------------------------
241
+ # Relationship hint matching and calibration
242
+ # ---------------------------------------------------------------------------
243
+
244
+
245
+ def _find_matching_hint(
246
+ query_tables: set[str],
247
+ from_tables: set[str],
248
+ hints: tuple[RelationshipHint, ...],
249
+ ) -> tuple[RelationshipHint, str, bool] | None:
250
+ """Find the riskiest matching hint, direction-normalized multiplicity, and flip flag.
251
+
252
+ MVP limitation: returns only one hint. For multi-join queries (3+ tables),
253
+ only one table pair gets calibrated. Selects the riskiest multiplicity
254
+ first (error > warning > info), then highest confidence as tiebreaker.
255
+
256
+ Matches when both sides of a hint appear in the query's resolved table set.
257
+ Flips multiplicity when the hint's left_table is not in FROM (i.e. tables
258
+ are reversed relative to the query's join direction).
259
+
260
+ When both or neither hint sides are in FROM (e.g. comma joins), no flip
261
+ occurs — there's no clear directionality to normalize against.
262
+
263
+ Returns (hint, normalized_multiplicity, was_flipped) or None.
264
+ """
265
+ tables_lower = {t.lower() for t in query_tables}
266
+ from_lower = {t.lower() for t in from_tables}
267
+ best: tuple[RelationshipHint, str, bool] | None = None
268
+ best_risk: int = -1 # Higher = riskier
269
+ for hint in hints:
270
+ left = hint.left_table.lower()
271
+ right = hint.right_table.lower()
272
+ if left not in tables_lower or right not in tables_lower:
273
+ continue
274
+ mult = hint.multiplicity
275
+ flipped = False
276
+ if right in from_lower and left not in from_lower:
277
+ mult = _FLIP_MULTIPLICITY.get(mult, mult)
278
+ flipped = True
279
+ severity = _severity_for_multiplicity(mult, hint.confidence, hint.fanout_factor)
280
+ risk = {"error": 2, "warning": 1, "info": 0}.get(severity, 1)
281
+ # Prefer riskiest; break ties by confidence.
282
+ if risk > best_risk or (
283
+ risk == best_risk and (best is None or hint.confidence > best[0].confidence)
284
+ ):
285
+ best = (hint, mult, flipped)
286
+ best_risk = risk
287
+ return best
288
+
289
+
290
+ def _severity_for_multiplicity(
291
+ mult: str,
292
+ confidence: float,
293
+ fanout_factor: float,
294
+ ) -> Literal["error", "warning", "info"]:
295
+ """Determine calibrated severity from multiplicity, confidence, and fanout."""
296
+ if confidence < _MIN_CALIBRATION_CONFIDENCE:
297
+ return "warning"
298
+ if mult == "one-to-one":
299
+ return "info"
300
+ if mult == "many-to-one":
301
+ return "info"
302
+ if mult == "many-to-many":
303
+ return "error"
304
+ if mult == "one-to-many" and fanout_factor > HIGH_FANOUT_THRESHOLD:
305
+ return "error"
306
+ return "warning"
307
+
308
+
309
+ def _message_for_multiplicity(
310
+ mult: str,
311
+ from_table: str,
312
+ join_table: str,
313
+ fanout_factor: float,
314
+ severity: Literal["error", "warning", "info"],
315
+ ) -> tuple[str, str]:
316
+ """Return (message, recommendation) grounded in known multiplicity.
317
+
318
+ ``from_table`` / ``join_table`` are query-oriented: the FROM-clause side
319
+ and the JOIN-clause side after direction normalization.
320
+ """
321
+ lt, rt = from_table, join_table
322
+ ff = fanout_factor
323
+ if mult == "one-to-one":
324
+ return (
325
+ "Aggregation over joined tables — 1:1 join confirmed, no inflation risk",
326
+ "Join is 1:1 — aggregation is safe. "
327
+ "Verify the 1:1 assumption holds as data evolves.",
328
+ )
329
+ if mult == "many-to-one":
330
+ return (
331
+ "Aggregation over joined tables — N:1 join, no row multiplication",
332
+ f"Join {lt} → {rt} is N:1 (dimension lookup). "
333
+ f"Aggregates on {lt} are safe. "
334
+ f"Aggregates on {rt} columns may repeat values — "
335
+ f"verify they are grouping keys, not measures.",
336
+ )
337
+ if mult == "many-to-many":
338
+ if severity == "error":
339
+ return (
340
+ "N:M join with aggregation — results almost certainly inflated",
341
+ f"Use a bridge table or pre-aggregate each side to the correct "
342
+ f"grain before joining {lt} ↔ {rt}.",
343
+ )
344
+ return (
345
+ "Possible N:M join with aggregation — review join relationship",
346
+ f"Verify the join relationship between {lt} and {rt}. "
347
+ f"If N:M, pre-aggregate each side before joining.",
348
+ )
349
+ if mult == "one-to-many":
350
+ if severity == "error":
351
+ return (
352
+ f"1:N join with aggregation and high fanout "
353
+ f"({ff:.1f}x) — results likely inflated",
354
+ f"Pre-aggregate {rt} to the {lt} grain before joining "
355
+ f"({ff:.1f}x average row multiplication).",
356
+ )
357
+ return (
358
+ f"1:N join with aggregation ({ff:.1f}x fanout) "
359
+ f"— verify aggregate correctness",
360
+ f"Pre-aggregate {rt} to the {lt} join key grain before joining, "
361
+ f"or verify aggregation handles the {ff:.1f}x row expansion correctly.",
362
+ )
363
+ # Unknown multiplicity
364
+ return (
365
+ "Aggregation over joined tables may inflate results",
366
+ "Pre-aggregate each table to the join key grain "
367
+ "before joining, or verify the join is 1:1 / N:1.",
368
+ )
369
+
370
+
371
+ def _calibrate_fanout(
372
+ hint: RelationshipHint,
373
+ mult: str,
374
+ flipped: bool,
375
+ detail_parts: list[str],
376
+ ) -> QueryDiagnostic:
377
+ """Produce a calibrated fanout_risk diagnostic using a matched relationship hint."""
378
+ # Orient table names to match the query's join direction.
379
+ from_table = hint.right_table if flipped else hint.left_table
380
+ join_table = hint.left_table if flipped else hint.right_table
381
+ evidence = (
382
+ f"Relationship: {from_table} ↔ {join_table} "
383
+ f"({mult}, {hint.fanout_factor:.1f}x fanout, "
384
+ f"confidence {hint.confidence:.0%})",
385
+ )
386
+ severity = _severity_for_multiplicity(mult, hint.confidence, hint.fanout_factor)
387
+ message, recommendation = _message_for_multiplicity(
388
+ mult, from_table, join_table, hint.fanout_factor, severity
389
+ )
390
+ return QueryDiagnostic(
391
+ code="fanout_risk",
392
+ severity=severity,
393
+ message=message,
394
+ detail=". ".join(detail_parts) if detail_parts else None,
395
+ recommendation=recommendation,
396
+ confidence=hint.confidence,
397
+ evidence=evidence,
398
+ )
399
+
400
+
401
+ # ---------------------------------------------------------------------------
402
+ # Diagnostic detectors
403
+ # ---------------------------------------------------------------------------
404
+
405
+
406
+ def _detect_missing_join_predicates(
407
+ select: exp.Select,
408
+ ) -> list[QueryDiagnostic]:
409
+ """Detect comma joins and explicit CROSS JOINs."""
410
+ diags: list[QueryDiagnostic] = []
411
+
412
+ # Comma joins: multiple direct sources in FROM
413
+ if _has_comma_join(select):
414
+ sources = _direct_from_sources(select)
415
+ names = [_source_name(s) for s in sources]
416
+ diags.append(
417
+ QueryDiagnostic(
418
+ code="missing_join_predicate",
419
+ severity="error",
420
+ message=f"Implicit cross join between {', '.join(names)} — "
421
+ f"no explicit join predicate",
422
+ detail=f"Tables {', '.join(names)} appear in FROM without "
423
+ f"a JOIN ... ON clause. This produces a cartesian product.",
424
+ recommendation="Use explicit JOIN with ON clause to specify "
425
+ "the join relationship.",
426
+ )
427
+ )
428
+
429
+ # Explicit CROSS JOINs
430
+ for left, right in _find_cross_joins(select):
431
+ diags.append(
432
+ QueryDiagnostic(
433
+ code="missing_join_predicate",
434
+ severity="error",
435
+ message=f"CROSS JOIN between {left} and {right} — no join predicate",
436
+ detail=f"CROSS JOIN produces a cartesian product of {left} × {right}.",
437
+ recommendation="If intentional, document why. Otherwise, "
438
+ "use JOIN with ON clause.",
439
+ )
440
+ )
441
+
442
+ return diags
443
+
444
+
445
+ def _detect_fanout_risk(
446
+ select: exp.Select,
447
+ schema_context: SchemaContext | None,
448
+ relationship_context: RelationshipContext | None,
449
+ ) -> list[QueryDiagnostic]:
450
+ """Detect structural fanout risk: aggregation over joined tables.
451
+
452
+ Triggers when:
453
+ 1. The query has JOINs (explicit or comma-join)
454
+ 2. The query has aggregate functions (in SELECT or HAVING)
455
+ 3. Any of:
456
+ a. Aggregate functions reference columns from 2+ distinct tables, OR
457
+ b. COUNT(*) is used (inflated by row multiplication from join), OR
458
+ c. Unqualified columns appear in aggregates with 2+ tables in scope
459
+
460
+ When relationship_context is provided, matched hints calibrate severity:
461
+ - 1:1 → info (no inflation possible)
462
+ - N:1 → info (dimension lookup, safe for many-side aggregates)
463
+ - 1:N + high fanout → error
464
+ - N:M → error (with sufficient confidence)
465
+ """
466
+ has_joins = _has_join(select) or _has_comma_join(select)
467
+ if not has_joins:
468
+ return []
469
+
470
+ aggs = _get_aggregate_functions(select)
471
+ if not aggs:
472
+ return []
473
+
474
+ alias_map = _build_alias_map(select)
475
+ table_count = len(_direct_from_sources(select)) + len(_direct_join_sources(select))
476
+
477
+ # Check for COUNT(*) — always risky with joins
478
+ has_count_star = any(
479
+ isinstance(agg, exp.Count) and isinstance(agg.this, exp.Star) for agg in aggs
480
+ )
481
+
482
+ # Track aliases (source refs) for multi-table detection — so self-joins
483
+ # (e1.salary, e2.salary → same table) are correctly flagged.
484
+ # Resolved names are only for detail messages.
485
+ agg_source_refs: set[str] = set()
486
+ agg_resolved_tables: set[str] = set()
487
+ has_unqualified_agg_cols = False
488
+ for agg in aggs:
489
+ for col in agg.find_all(exp.Column):
490
+ ref = _column_table_ref(col)
491
+ if ref:
492
+ agg_source_refs.add(ref)
493
+ agg_resolved_tables.add(_resolve_table(ref, alias_map))
494
+ else:
495
+ has_unqualified_agg_cols = True
496
+
497
+ multi_table_agg = len(agg_source_refs) > 1
498
+ # Unqualified columns in aggregates with 2+ tables = ambiguous ownership
499
+ ambiguous_agg = has_unqualified_agg_cols and table_count >= 2
500
+
501
+ has_structural_signal = has_count_star or multi_table_agg or ambiguous_agg
502
+
503
+ # --- Relationship-triggered detection path ---
504
+ # When no structural signal (COUNT(*), multi-table agg, ambiguous cols) but
505
+ # relationship context indicates a risky join (1:N or N:M), any aggregation
506
+ # + join should fire. This covers the most common real-world fanout pattern:
507
+ # single-table aggregate inflated by a 1:N join (e.g. SUM(o.amount) with
508
+ # JOIN line_items).
509
+ if not has_structural_signal:
510
+ if not relationship_context or not relationship_context.hints:
511
+ return []
512
+ all_query_tables = set(alias_map.values())
513
+ from_tables = {
514
+ src.name
515
+ for src in _direct_from_sources(select)
516
+ if isinstance(src, exp.Table)
517
+ }
518
+ match = _find_matching_hint(
519
+ all_query_tables, from_tables, relationship_context.hints
520
+ )
521
+ if match is None:
522
+ return []
523
+ hint, mult, flipped = match
524
+ # Only fire for risky multiplicities — 1:1 and N:1 are safe.
525
+ if mult in ("one-to-one", "many-to-one"):
526
+ return []
527
+ return [
528
+ _calibrate_fanout(
529
+ hint,
530
+ mult,
531
+ flipped,
532
+ [
533
+ "Single-table aggregation after row-multiplying join — "
534
+ "aggregate values are inflated by the join fanout"
535
+ ],
536
+ )
537
+ ]
538
+
539
+ # Build detail with schema context if available
540
+ detail_parts: list[str] = []
541
+ if has_count_star:
542
+ detail_parts.append("COUNT(*) is inflated by row multiplication from JOIN")
543
+ if multi_table_agg:
544
+ if len(agg_resolved_tables) == 1:
545
+ tname = next(iter(agg_resolved_tables))
546
+ aliases = sorted(agg_source_refs)
547
+ detail_parts.append(
548
+ f"Aggregate expressions reference columns from "
549
+ f"aliases {', '.join(aliases)} (all from table {tname})"
550
+ )
551
+ else:
552
+ detail_parts.append(
553
+ f"Aggregate expressions reference columns from tables: "
554
+ f"{', '.join(sorted(agg_resolved_tables))}"
555
+ )
556
+ if ambiguous_agg:
557
+ detail_parts.append(
558
+ "Unqualified columns in aggregate expressions with multiple "
559
+ "tables in scope — column ownership is ambiguous"
560
+ )
561
+ if schema_context:
562
+ for tname in sorted(agg_resolved_tables):
563
+ tctx = schema_context.tables.get(tname)
564
+ if tctx and tctx.grain_columns:
565
+ detail_parts.append(
566
+ f"Table '{tname}' has grain: {', '.join(tctx.grain_columns)}"
567
+ )
568
+
569
+ # --- Relationship-based severity calibration ---
570
+ # Match against resolved table names only (not aliases) to avoid
571
+ # phantom matches with short alias names like 'o' or 'c'.
572
+ all_query_tables = set(alias_map.values())
573
+ # FROM tables for direction normalization.
574
+ from_tables = {
575
+ src.name for src in _direct_from_sources(select) if isinstance(src, exp.Table)
576
+ }
577
+ if relationship_context and relationship_context.hints:
578
+ match = _find_matching_hint(
579
+ all_query_tables, from_tables, relationship_context.hints
580
+ )
581
+ if match is not None:
582
+ hint, mult, flipped = match
583
+ # Early return: the calibrated finding subsumes the generic
584
+ # structural warning. detail_parts (COUNT(*), multi-table refs)
585
+ # are preserved in the calibrated diagnostic.
586
+ return [_calibrate_fanout(hint, mult, flipped, detail_parts)]
587
+
588
+ # No relationship context or no matching hint — generic structural finding
589
+ return [
590
+ QueryDiagnostic(
591
+ code="fanout_risk",
592
+ severity="warning",
593
+ message="Aggregation over joined tables may inflate results",
594
+ detail=". ".join(detail_parts) if detail_parts else None,
595
+ recommendation="Pre-aggregate each table to the join key grain "
596
+ "before joining, or verify the join is 1:1 / N:1.",
597
+ )
598
+ ]
599
+
600
+
601
+ # ---------------------------------------------------------------------------
602
+ # Propagation-backed re-aggregation detection
603
+ # ---------------------------------------------------------------------------
604
+
605
+
606
+ def _select_output_columns(select: exp.Select) -> dict[str, bool]:
607
+ """Map output alias → is_aggregate_derived for a SELECT.
608
+
609
+ Returns a dict where keys are the output column names (alias or column name)
610
+ and values indicate whether the column is derived from an aggregate function.
611
+ Group-by keys are explicitly marked as non-aggregate.
612
+ """
613
+ # Collect GROUP BY column names to exclude them from aggregate tagging
614
+ group_by_keys: set[str] = set()
615
+ group = select.args.get("group")
616
+ if group:
617
+ for expr in group.expressions:
618
+ if isinstance(expr, exp.Column):
619
+ group_by_keys.add(expr.name.lower())
620
+
621
+ outputs: dict[str, bool] = {}
622
+ for expr in select.expressions:
623
+ alias = expr.alias if isinstance(expr, exp.Alias) else None
624
+ if alias:
625
+ inner = expr.this
626
+ # AggFunc inside a Window node is a window function (per-row),
627
+ # not a collapsed aggregate — don't tag as aggregate-derived.
628
+ agg_node = inner.find(exp.AggFunc)
629
+ is_agg = bool(agg_node) and not bool(inner.find(exp.Window))
630
+ outputs[alias.lower()] = is_agg
631
+ elif isinstance(expr, exp.Column):
632
+ outputs[expr.name.lower()] = False
633
+ elif isinstance(expr, exp.Star):
634
+ # Can't track through SELECT * — skip
635
+ pass
636
+ # Group-by keys are never aggregate-derived even if aliased from one
637
+ for key in group_by_keys:
638
+ if key in outputs:
639
+ outputs[key] = False
640
+ return outputs
641
+
642
+
643
+ def _propagate_aggregate_columns(
644
+ parsed: exp.Expression,
645
+ ) -> dict[str, dict[str, bool]]:
646
+ """Build a map of source_name → {column_name: is_aggregate_derived}.
647
+
648
+ Walks CTEs and inline subqueries to determine which output columns
649
+ are aggregate-derived so the outer query can detect re-aggregation.
650
+ """
651
+ agg_map: dict[str, dict[str, bool]] = {}
652
+
653
+ # CTEs — process in order so later CTEs can inherit from earlier ones
654
+ with_ = parsed.find(exp.With)
655
+ if with_:
656
+ for cte in with_.expressions:
657
+ if not isinstance(cte, exp.CTE):
658
+ continue
659
+ alias = cte.alias
660
+ inner_select = cte.find(exp.Select)
661
+ if alias and inner_select:
662
+ outputs = _select_output_columns(inner_select)
663
+ # Propagate aggregate lineage: if this CTE selects from
664
+ # another CTE/source already in agg_map, columns that are
665
+ # merely passed through (not re-aggregated, not in GROUP BY)
666
+ # inherit aggregate status from the upstream source.
667
+ _inherit_aggregate_lineage(inner_select, outputs, agg_map)
668
+ agg_map[alias.lower()] = outputs
669
+
670
+ # Inline subqueries in FROM / JOIN of the main SELECT
671
+ if isinstance(parsed, exp.Select):
672
+ for src in _direct_from_sources(parsed) + _direct_join_sources(parsed):
673
+ if isinstance(src, exp.Subquery):
674
+ name = (src.alias or "<subquery>").lower()
675
+ inner_select = src.find(exp.Select)
676
+ if inner_select:
677
+ outputs = _select_output_columns(inner_select)
678
+ _inherit_aggregate_lineage(inner_select, outputs, agg_map)
679
+ agg_map[name] = outputs
680
+
681
+ return agg_map
682
+
683
+
684
+ def _inherit_aggregate_lineage(
685
+ select: exp.Select,
686
+ outputs: dict[str, bool],
687
+ agg_map: dict[str, dict[str, bool]],
688
+ ) -> None:
689
+ """Propagate aggregate-derived status through pass-through columns.
690
+
691
+ When a CTE or subquery selects a column from an upstream source that
692
+ is already in ``agg_map``, and the column is not wrapped in an
693
+ aggregate function, it inherits the upstream aggregate status.
694
+
695
+ Limitations:
696
+ - ``SELECT *`` pass-throughs are not tracked (same as ``_select_output_columns``).
697
+ """
698
+ # Build source name → agg_map key for this SELECT's FROM and JOIN sources
699
+ source_keys: dict[str, str] = {}
700
+ for src in _direct_from_sources(select) + _direct_join_sources(select):
701
+ if isinstance(src, exp.Table):
702
+ name = (src.alias or src.name).lower()
703
+ table_name = src.name.lower()
704
+ if table_name in agg_map:
705
+ source_keys[name] = table_name
706
+
707
+ if not source_keys:
708
+ return
709
+
710
+ for expr in select.expressions:
711
+ if isinstance(expr, exp.Column):
712
+ col_name = expr.name.lower()
713
+ # Already marked as aggregate by _select_output_columns — skip
714
+ if outputs.get(col_name):
715
+ continue
716
+ ref = (expr.table or "").lower()
717
+ sources = (
718
+ [source_keys[ref]]
719
+ if ref and ref in source_keys
720
+ else list(source_keys.values())
721
+ )
722
+ for agg_key in sources:
723
+ if agg_map.get(agg_key, {}).get(col_name):
724
+ outputs[col_name] = True
725
+ break
726
+ elif isinstance(expr, exp.Alias):
727
+ inner = expr.this
728
+ alias_name = expr.alias.lower()
729
+ if outputs.get(alias_name):
730
+ continue
731
+ # Check if the inner expression is a simple column reference
732
+ if isinstance(inner, exp.Column):
733
+ col_name = inner.name.lower()
734
+ ref = (inner.table or "").lower()
735
+ sources = (
736
+ [source_keys[ref]]
737
+ if ref and ref in source_keys
738
+ else list(source_keys.values())
739
+ )
740
+ for agg_key in sources:
741
+ if agg_map.get(agg_key, {}).get(col_name):
742
+ outputs[alias_name] = True
743
+ break
744
+
745
+
746
+ def _detect_reaggregation(
747
+ select: exp.Select, parsed: exp.Expression
748
+ ) -> list[QueryDiagnostic]:
749
+ """Detect re-aggregation: outer aggregate wrapping an already-aggregated column.
750
+
751
+ Uses propagation of aggregate lineage through subqueries and CTEs to
752
+ identify columns that are aggregate-derived, then checks whether
753
+ the outer query applies aggregate functions to those columns.
754
+ """
755
+ agg_map = _propagate_aggregate_columns(parsed)
756
+ if not agg_map:
757
+ return []
758
+
759
+ # Also resolve table aliases from FROM — CTE references appear as tables
760
+ # Build a map of alias/table name → source name in agg_map
761
+ source_names: dict[str, str] = {}
762
+ for src in _direct_from_sources(select) + _direct_join_sources(select):
763
+ name = _source_name(src).lower()
764
+ if isinstance(src, exp.Table):
765
+ table_name = src.name.lower()
766
+ # CTE references appear as Table nodes
767
+ if table_name in agg_map:
768
+ source_names[name] = table_name
769
+ elif isinstance(src, exp.Subquery):
770
+ if name in agg_map:
771
+ source_names[name] = name
772
+
773
+ if not source_names:
774
+ return []
775
+
776
+ # Find aggregate functions in the outer SELECT and HAVING
777
+ outer_aggs = _get_aggregate_functions(select)
778
+ if not outer_aggs:
779
+ return []
780
+
781
+ diags: list[QueryDiagnostic] = []
782
+ seen: set[tuple[str, str]] = set()
783
+
784
+ for agg in outer_aggs:
785
+ for col in agg.find_all(exp.Column):
786
+ col_name = col.name.lower()
787
+ # Try to resolve which source this column comes from
788
+ table_ref = (col.table or "").lower()
789
+ if table_ref and table_ref in source_names:
790
+ # Qualified column — resolve to the specific source
791
+ candidate_sources = [source_names[table_ref]]
792
+ else:
793
+ # Unqualified column — only flag if aggregate-derived in ALL
794
+ # sources that contain it (ambiguous ownership → skip)
795
+ candidate_sources = list(source_names.values())
796
+ containing = [
797
+ s for s in candidate_sources if col_name in agg_map.get(s, {})
798
+ ]
799
+ if not containing or not all(agg_map[s][col_name] for s in containing):
800
+ continue
801
+ candidate_sources = containing
802
+
803
+ for src_name in candidate_sources:
804
+ col_info = agg_map.get(src_name, {})
805
+ key = (src_name, col_name)
806
+ if col_info.get(col_name) and key not in seen:
807
+ seen.add(key)
808
+ outer_func = type(agg).__name__.upper()
809
+ diags.append(
810
+ QueryDiagnostic(
811
+ code="reaggregation",
812
+ severity="warning",
813
+ message=(
814
+ f"{outer_func}({col.name}) re-aggregates an "
815
+ f"already aggregate-derived column"
816
+ ),
817
+ detail=(
818
+ f"Column '{col.name}' is produced by an aggregate "
819
+ f"in source '{src_name}'. Applying {outer_func} "
820
+ f"on top is likely a statistical error."
821
+ ),
822
+ recommendation=(
823
+ "Review whether the outer aggregation is correct. "
824
+ "Summing a pre-summed column or averaging an "
825
+ "already-averaged column usually produces "
826
+ "incorrect results."
827
+ ),
828
+ )
829
+ )
830
+ return diags
831
+
832
+
833
+ # ---------------------------------------------------------------------------
834
+ # Public API
835
+ # ---------------------------------------------------------------------------
836
+
837
+
838
+ def parse_inline_suppressions(sql: str) -> set[str]:
839
+ """Extract suppressed diagnostic codes from ``-- dft:ignore`` comments.
840
+
841
+ Supports:
842
+ - ``-- dft:ignore fanout_risk`` — suppress one code
843
+ - ``-- dft:ignore fanout_risk reaggregation`` — suppress multiple
844
+ - ``-- dft:ignore`` — blanket suppress all (returns ``{"*"}``)
845
+
846
+ Note: Uses a simple line scan. Does not distinguish SQL comments from
847
+ string literals — a pattern inside a string literal will also match.
848
+ This is consistent with how sqlfluff and other linters handle noqa.
849
+ """
850
+ codes: set[str] = set()
851
+ for line in sql.splitlines():
852
+ m = _DFT_IGNORE_RE.search(line.strip())
853
+ if m:
854
+ rest = m.group(1).strip()
855
+ if rest:
856
+ codes.update(rest.split())
857
+ else:
858
+ codes.add("*")
859
+ return codes
860
+
861
+
862
+ def _apply_suppression(
863
+ diags: list[QueryDiagnostic],
864
+ suppressed_codes: set[str],
865
+ ) -> tuple[list[QueryDiagnostic], list[QueryDiagnostic]]:
866
+ """Split diagnostics into active and suppressed lists.
867
+
868
+ Unsuppressible codes (e.g. parse_error) are never suppressed.
869
+ """
870
+ if not suppressed_codes:
871
+ return diags, []
872
+ blanket = "*" in suppressed_codes
873
+ active: list[QueryDiagnostic] = []
874
+ suppressed: list[QueryDiagnostic] = []
875
+ for d in diags:
876
+ if d.code in _UNSUPPRESSIBLE_CODES:
877
+ active.append(d)
878
+ elif blanket or d.code in suppressed_codes:
879
+ suppressed.append(d)
880
+ else:
881
+ active.append(d)
882
+ return active, suppressed
883
+
884
+
885
+ @overload
886
+ def validate_query(
887
+ sql: str,
888
+ *,
889
+ dialect: str | None = ...,
890
+ schema_context: SchemaContext | None = ...,
891
+ relationship_context: RelationshipContext | None = ...,
892
+ suppress: set[str] | None = ...,
893
+ return_suppressed: Literal[False] = ...,
894
+ ) -> list[QueryDiagnostic]: ...
895
+
896
+
897
+ @overload
898
+ def validate_query(
899
+ sql: str,
900
+ *,
901
+ dialect: str | None = ...,
902
+ schema_context: SchemaContext | None = ...,
903
+ relationship_context: RelationshipContext | None = ...,
904
+ suppress: set[str] | None = ...,
905
+ return_suppressed: Literal[True],
906
+ ) -> tuple[list[QueryDiagnostic], list[QueryDiagnostic]]: ...
907
+
908
+
909
+ def validate_query(
910
+ sql: str,
911
+ *,
912
+ dialect: str | None = None,
913
+ schema_context: SchemaContext | None = None,
914
+ relationship_context: RelationshipContext | None = None,
915
+ suppress: set[str] | None = None,
916
+ return_suppressed: bool = False,
917
+ ) -> list[QueryDiagnostic] | tuple[list[QueryDiagnostic], list[QueryDiagnostic]]:
918
+ """Validate a SQL query and return structural diagnostics.
919
+
920
+ Args:
921
+ sql: SQL query string to validate.
922
+ dialect: Optional SQLGlot dialect name (e.g. "duckdb", "bigquery").
923
+ schema_context: Optional schema metadata to enrich diagnostics.
924
+ relationship_context: Optional relationship metadata for severity
925
+ calibration. When provided, fanout_risk findings are refined
926
+ using known multiplicity, fanout factor, and confidence.
927
+ suppress: Optional set of diagnostic codes to suppress externally
928
+ (e.g. from YAML ``ignore`` or ``meta.yaml`` lint config).
929
+ return_suppressed: If True, return a tuple of
930
+ (active_diagnostics, suppressed_diagnostics).
931
+
932
+ Returns:
933
+ List of QueryDiagnostic findings (or tuple if return_suppressed=True).
934
+ Empty list means no issues found.
935
+ """
936
+ if not sql or not sql.strip():
937
+ result = [
938
+ QueryDiagnostic(
939
+ code="parse_error",
940
+ severity="error",
941
+ message="Empty SQL query",
942
+ )
943
+ ]
944
+ return (result, []) if return_suppressed else result
945
+
946
+ # Collect suppression codes from SQL-inline comments + external callers
947
+ suppressed_codes = parse_inline_suppressions(sql)
948
+ if suppress:
949
+ suppressed_codes.update(suppress)
950
+
951
+ try:
952
+ parsed = sqlglot.parse_one(sql, read=dialect)
953
+ except sqlglot.errors.ParseError as e:
954
+ result = [
955
+ QueryDiagnostic(
956
+ code="parse_error",
957
+ severity="error",
958
+ message=f"SQL parse error: {e}",
959
+ )
960
+ ]
961
+ return (result, []) if return_suppressed else result
962
+
963
+ if not isinstance(parsed, exp.Select):
964
+ # Only validate SELECT statements for now
965
+ return ([], []) if return_suppressed else []
966
+
967
+ diags: list[QueryDiagnostic] = []
968
+ diags.extend(_detect_missing_join_predicates(parsed))
969
+ diags.extend(_detect_fanout_risk(parsed, schema_context, relationship_context))
970
+ diags.extend(_detect_reaggregation(parsed, parsed))
971
+
972
+ active, suppressed_diags = _apply_suppression(diags, suppressed_codes)
973
+ if return_suppressed:
974
+ return active, suppressed_diags
975
+ return active