dataface 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- d3_format/__init__.py +14 -0
- d3_format/errors.py +19 -0
- d3_format/format.py +551 -0
- d3_format/spec.py +159 -0
- dataface/DATAFACE_SYNTAX.md +1135 -0
- dataface/__init__.py +93 -0
- dataface/_docs_site.py +20 -0
- dataface/_install_hint.py +26 -0
- dataface/agent_api/__init__.py +79 -0
- dataface/agent_api/_init_templates/__init__.py +0 -0
- dataface/agent_api/_init_templates/agents_dft_snippet.md +26 -0
- dataface/agent_api/_init_templates/dataface.yml +15 -0
- dataface/agent_api/_init_templates/faces-dataface.yml +144 -0
- dataface/agent_api/_init_templates/index.md +24 -0
- dataface/agent_api/_paths.py +118 -0
- dataface/agent_api/_project_agents_md.py +43 -0
- dataface/agent_api/_session_store.py +486 -0
- dataface/agent_api/_state.py +28 -0
- dataface/agent_api/chat.py +221 -0
- dataface/agent_api/dashboards.py +257 -0
- dataface/agent_api/describe.py +366 -0
- dataface/agent_api/describe_query.py +120 -0
- dataface/agent_api/docs/__init__.py +25 -0
- dataface/agent_api/docs/_loader.py +292 -0
- dataface/agent_api/docs/yaml-reference.md +2757 -0
- dataface/agent_api/file_refs.py +118 -0
- dataface/agent_api/init.py +126 -0
- dataface/agent_api/inspect.py +128 -0
- dataface/agent_api/mcp_install.py +170 -0
- dataface/agent_api/query.py +274 -0
- dataface/agent_api/schema.py +658 -0
- dataface/agent_api/schema_search.py +284 -0
- dataface/agent_api/search.py +270 -0
- dataface/agent_api/skill_install.py +141 -0
- dataface/agent_api/skill_render.py +90 -0
- dataface/agent_api/skills.py +293 -0
- dataface/agent_api/surface_aliases.yaml +128 -0
- dataface/agent_api/validate.py +175 -0
- dataface/agent_api/validate_query.py +84 -0
- dataface/ai/__init__.py +39 -0
- dataface/ai/agent.py +139 -0
- dataface/ai/context.py +45 -0
- dataface/ai/events.py +62 -0
- dataface/ai/external_mcp.py +610 -0
- dataface/ai/generate_sql.py +96 -0
- dataface/ai/llm.py +403 -0
- dataface/ai/mcp/__init__.py +51 -0
- dataface/ai/mcp/server.py +289 -0
- dataface/ai/memories.py +85 -0
- dataface/ai/prompts.py +177 -0
- dataface/ai/schema_context.py +138 -0
- dataface/ai/skills/before-after-comparison/SKILL.md +102 -0
- dataface/ai/skills/before-after-comparison/examples/before-after-comparison.yml +24 -0
- dataface/ai/skills/dashboard-build/SKILL.md +212 -0
- dataface/ai/skills/dashboard-build/examples/_smoke.yml +15 -0
- dataface/ai/skills/dashboard-design/SKILL.md +182 -0
- dataface/ai/skills/dashboard-review/SKILL.md +113 -0
- dataface/ai/skills/dashboard-structural-review/SKILL.md +173 -0
- dataface/ai/skills/dashboard-visual-review/SKILL.md +139 -0
- dataface/ai/skills/dataface-mcp-setup/SKILL.md +177 -0
- dataface/ai/skills/dataface-troubleshooting/SKILL.md +225 -0
- dataface/ai/skills/drill-down-link/SKILL.md +112 -0
- dataface/ai/skills/drill-down-link/examples/drill-down-link.yml +27 -0
- dataface/ai/skills/faceted-small-multiples/SKILL.md +116 -0
- dataface/ai/skills/faceted-small-multiples/examples/faceted-small-multiples.yml +33 -0
- dataface/ai/skills/filter-bar-with-variables/SKILL.md +105 -0
- dataface/ai/skills/filter-bar-with-variables/examples/filter-bar-with-variables.yml +49 -0
- dataface/ai/skills/kpi-row/SKILL.md +101 -0
- dataface/ai/skills/kpi-row/examples/kpi-row.yml +55 -0
- dataface/ai/skills/report-design/SKILL.md +184 -0
- dataface/ai/skills/single-metric-bignum/SKILL.md +90 -0
- dataface/ai/skills/single-metric-bignum/examples/single-metric-bignum.yml +27 -0
- dataface/ai/skills/table-heavy-ops-dashboard/SKILL.md +114 -0
- dataface/ai/skills/table-heavy-ops-dashboard/examples/table-heavy-ops-dashboard.yml +48 -0
- dataface/ai/skills/time-series-trend/SKILL.md +93 -0
- dataface/ai/skills/time-series-trend/examples/time-series-trend.yml +26 -0
- dataface/ai/skills/top-n-with-detail/SKILL.md +98 -0
- dataface/ai/skills/top-n-with-detail/examples/top-n-with-detail.yml +45 -0
- dataface/ai/skills/two-by-two-grid-overview/SKILL.md +78 -0
- dataface/ai/skills/two-by-two-grid-overview/examples/two-by-two-grid-overview.yml +64 -0
- dataface/ai/tool_schemas.py +132 -0
- dataface/ai/tools/__init__.py +312 -0
- dataface/ai/yaml_utils.py +57 -0
- dataface/cli/__init__.py +3 -0
- dataface/cli/_console.py +48 -0
- dataface/cli/_error_format.py +83 -0
- dataface/cli/_extras.py +190 -0
- dataface/cli/_json_output.py +8 -0
- dataface/cli/_parsing.py +17 -0
- dataface/cli/_version_info.py +56 -0
- dataface/cli/commands/__init__.py +3 -0
- dataface/cli/commands/_agent_input.py +205 -0
- dataface/cli/commands/_agent_server.py +115 -0
- dataface/cli/commands/chat.py +645 -0
- dataface/cli/commands/describe.py +107 -0
- dataface/cli/commands/docs.py +131 -0
- dataface/cli/commands/extension.py +179 -0
- dataface/cli/commands/init.py +240 -0
- dataface/cli/commands/inspect.py +94 -0
- dataface/cli/commands/mcp_init.py +167 -0
- dataface/cli/commands/query.py +386 -0
- dataface/cli/commands/render.py +291 -0
- dataface/cli/commands/schema.py +411 -0
- dataface/cli/commands/search.py +49 -0
- dataface/cli/commands/serve.py +114 -0
- dataface/cli/commands/skills.py +133 -0
- dataface/cli/commands/skills_init.py +161 -0
- dataface/cli/commands/validate.py +63 -0
- dataface/cli/main.py +1501 -0
- dataface/core/__init__.py +75 -0
- dataface/core/compile/__init__.py +244 -0
- dataface/core/compile/_jinja_helpers.py +78 -0
- dataface/core/compile/channel.py +222 -0
- dataface/core/compile/chart_focus.py +101 -0
- dataface/core/compile/chart_resolved.py +169 -0
- dataface/core/compile/chart_type_detection.py +489 -0
- dataface/core/compile/chart_update.py +261 -0
- dataface/core/compile/colors.py +64 -0
- dataface/core/compile/compiler.py +904 -0
- dataface/core/compile/config.py +823 -0
- dataface/core/compile/custom_chart_types.py +208 -0
- dataface/core/compile/data_table_attachment.py +1287 -0
- dataface/core/compile/detect.py +110 -0
- dataface/core/compile/errors.py +302 -0
- dataface/core/compile/filter_injection.py +319 -0
- dataface/core/compile/introspection.py +527 -0
- dataface/core/compile/jinja.py +511 -0
- dataface/core/compile/labels_env.py +52 -0
- dataface/core/compile/markdown.py +154 -0
- dataface/core/compile/meta.py +388 -0
- dataface/core/compile/models/__init__.py +0 -0
- dataface/core/compile/models/chart/__init__.py +0 -0
- dataface/core/compile/models/chart/authored.py +2137 -0
- dataface/core/compile/models/chart/compiled.py +398 -0
- dataface/core/compile/models/config.py +347 -0
- dataface/core/compile/models/face/__init__.py +0 -0
- dataface/core/compile/models/face/authored.py +659 -0
- dataface/core/compile/models/face/compiled.py +522 -0
- dataface/core/compile/models/factories.py +201 -0
- dataface/core/compile/models/markers.py +40 -0
- dataface/core/compile/models/palette.py +36 -0
- dataface/core/compile/models/primitives.py +415 -0
- dataface/core/compile/models/query/__init__.py +0 -0
- dataface/core/compile/models/query/authored.py +246 -0
- dataface/core/compile/models/query/compiled.py +710 -0
- dataface/core/compile/models/refs.py +137 -0
- dataface/core/compile/models/source.py +611 -0
- dataface/core/compile/models/style/__init__.py +0 -0
- dataface/core/compile/models/style/authored.py +481 -0
- dataface/core/compile/models/style/compiled.py +3399 -0
- dataface/core/compile/models/style/merged.py +1682 -0
- dataface/core/compile/models/theme.py +362 -0
- dataface/core/compile/models/variable/__init__.py +0 -0
- dataface/core/compile/models/variable/authored.py +254 -0
- dataface/core/compile/models/vega_lite/__init__.py +0 -0
- dataface/core/compile/models/vega_lite/config.py +510 -0
- dataface/core/compile/models/vega_lite/contracts.py +171 -0
- dataface/core/compile/normalize_charts.py +494 -0
- dataface/core/compile/normalize_layout.py +1000 -0
- dataface/core/compile/normalize_queries.py +297 -0
- dataface/core/compile/normalize_variables.py +489 -0
- dataface/core/compile/normalizer.py +543 -0
- dataface/core/compile/palette.py +1100 -0
- dataface/core/compile/parameterized.py +658 -0
- dataface/core/compile/parser.py +228 -0
- dataface/core/compile/schema.py +20 -0
- dataface/core/compile/schema_renderers/__init__.py +0 -0
- dataface/core/compile/schema_renderers/json_schema.py +163 -0
- dataface/core/compile/schema_renderers/prompt.py +152 -0
- dataface/core/compile/schema_renderers/vscode_schema.py +301 -0
- dataface/core/compile/sizing.py +2126 -0
- dataface/core/compile/sources.py +518 -0
- dataface/core/compile/sql_authoring_lint.py +56 -0
- dataface/core/compile/style_cascade.py +471 -0
- dataface/core/compile/typography.py +299 -0
- dataface/core/compile/validator.py +301 -0
- dataface/core/compile/variables.py +53 -0
- dataface/core/compile/vega_config.py +98 -0
- dataface/core/compile/vega_lite/__init__.py +6 -0
- dataface/core/compile/vega_lite/validation.py +95 -0
- dataface/core/compile/yaml_error_formatter.py +838 -0
- dataface/core/connections.py +38 -0
- dataface/core/dashboard.py +358 -0
- dataface/core/defaults/default_config.yml +101 -0
- dataface/core/defaults/palettes/categorical/category-10-dark.yml +32 -0
- dataface/core/defaults/palettes/categorical/category-10-light.yml +43 -0
- dataface/core/defaults/palettes/categorical/category-10.yml +31 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-blue.yml +22 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-brown.yml +29 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-green.yml +20 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-orange.yml +21 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-purple.yml +20 -0
- dataface/core/defaults/palettes/categorical/editorial-10-dark.yml +32 -0
- dataface/core/defaults/palettes/categorical/editorial-10.yml +40 -0
- dataface/core/defaults/palettes/categorical/hero-6.yml +17 -0
- dataface/core/defaults/palettes/categorical/single-blue.yml +11 -0
- dataface/core/defaults/palettes/categorical/tableau.yml +20 -0
- dataface/core/defaults/palettes/data/xkcd_colors.json +3803 -0
- dataface/core/defaults/palettes/diverging/blue-red.yml +25 -0
- dataface/core/defaults/palettes/diverging/coolwarm.yml +24 -0
- dataface/core/defaults/palettes/diverging/crimson-green.yml +23 -0
- dataface/core/defaults/palettes/diverging/orange-teal.yml +23 -0
- dataface/core/defaults/palettes/diverging/sunset.yml +24 -0
- dataface/core/defaults/palettes/scaffold/dft-creams.yml +38 -0
- dataface/core/defaults/palettes/scaffold/dft-grays.yml +53 -0
- dataface/core/defaults/palettes/sequential/amber.yml +22 -0
- dataface/core/defaults/palettes/sequential/blue.yml +22 -0
- dataface/core/defaults/palettes/sequential/brown.yml +22 -0
- dataface/core/defaults/palettes/sequential/gray.yml +22 -0
- dataface/core/defaults/palettes/sequential/green.yml +22 -0
- dataface/core/defaults/palettes/sequential/purple.yml +22 -0
- dataface/core/defaults/palettes/sequential/rust.yml +22 -0
- dataface/core/defaults/palettes/sequential/teal.yml +22 -0
- dataface/core/defaults/palettes/tone/negative.yml +32 -0
- dataface/core/defaults/palettes/tone/positive.yml +22 -0
- dataface/core/defaults/palettes/tone/warning.yml +22 -0
- dataface/core/defaults/themes/_base.yaml +786 -0
- dataface/core/defaults/themes/bi.yaml +16 -0
- dataface/core/defaults/themes/carbong100.yaml +41 -0
- dataface/core/defaults/themes/cream.yaml +122 -0
- dataface/core/defaults/themes/dark.yaml +40 -0
- dataface/core/defaults/themes/diagnostics-title-angle-extreme.yaml +9 -0
- dataface/core/defaults/themes/diagnostics-title-baseline-extreme.yaml +9 -0
- dataface/core/defaults/themes/diagnostics-title-baseline.yaml +24 -0
- dataface/core/defaults/themes/diagnostics-title-center.yaml +8 -0
- dataface/core/defaults/themes/diagnostics-title-color-extreme.yaml +24 -0
- dataface/core/defaults/themes/diagnostics-title-font-extreme.yaml +25 -0
- dataface/core/defaults/themes/diagnostics-title-left.yaml +8 -0
- dataface/core/defaults/themes/diagnostics-title-offset-extreme.yaml +9 -0
- dataface/core/defaults/themes/diagnostics-title-size-extreme.yaml +24 -0
- dataface/core/defaults/themes/diagnostics-title-weight-extreme.yaml +24 -0
- dataface/core/defaults/themes/editorial.yaml +147 -0
- dataface/core/defaults/themes/light.yaml +30 -0
- dataface/core/defaults/themes/looker.yaml +17 -0
- dataface/core/defaults/themes/stark.yaml +134 -0
- dataface/core/errors/__init__.py +67 -0
- dataface/core/errors/codes_compile.py +56 -0
- dataface/core/errors/codes_execute.py +177 -0
- dataface/core/errors/codes_render.py +106 -0
- dataface/core/errors/codes_unknown.py +15 -0
- dataface/core/errors/hints.py +74 -0
- dataface/core/errors/registry.py +42 -0
- dataface/core/errors/structured.py +92 -0
- dataface/core/execute/__init__.py +91 -0
- dataface/core/execute/adapters/__init__.py +49 -0
- dataface/core/execute/adapters/adapter_registry.py +400 -0
- dataface/core/execute/adapters/base.py +245 -0
- dataface/core/execute/adapters/csv_adapter.py +239 -0
- dataface/core/execute/adapters/dbt_adapter.py +283 -0
- dataface/core/execute/adapters/dbt_adapter_factory.py +212 -0
- dataface/core/execute/adapters/dbt_macro_loader.py +95 -0
- dataface/core/execute/adapters/dbt_utils.py +150 -0
- dataface/core/execute/adapters/http_adapter.py +224 -0
- dataface/core/execute/adapters/metricflow_adapter.py +94 -0
- dataface/core/execute/adapters/schema_resolver_adapter.py +144 -0
- dataface/core/execute/adapters/sql_adapter.py +710 -0
- dataface/core/execute/adapters/values_adapter.py +58 -0
- dataface/core/execute/batch.py +744 -0
- dataface/core/execute/cache_backend.py +135 -0
- dataface/core/execute/cache_keys.py +66 -0
- dataface/core/execute/dbt_jinja.py +21 -0
- dataface/core/execute/dialects/__init__.py +121 -0
- dataface/core/execute/dialects/athena.py +75 -0
- dataface/core/execute/dialects/base.py +302 -0
- dataface/core/execute/dialects/bigquery.py +38 -0
- dataface/core/execute/dialects/databricks.py +68 -0
- dataface/core/execute/dialects/duckdb.py +35 -0
- dataface/core/execute/dialects/mysql.py +68 -0
- dataface/core/execute/dialects/postgres.py +39 -0
- dataface/core/execute/dialects/redshift.py +12 -0
- dataface/core/execute/dialects/snowflake.py +51 -0
- dataface/core/execute/dialects/sqlserver.py +92 -0
- dataface/core/execute/duckdb_cache.py +712 -0
- dataface/core/execute/duckdb_config.py +26 -0
- dataface/core/execute/errors.py +213 -0
- dataface/core/execute/executor.py +1249 -0
- dataface/core/execute/parallel.py +162 -0
- dataface/core/execute/setup_sql.py +58 -0
- dataface/core/execute/source_registry.py +72 -0
- dataface/core/execute/source_resolver.py +255 -0
- dataface/core/execute/sql_guard.py +387 -0
- dataface/core/execute/sql_literals.py +199 -0
- dataface/core/fonts.py +52 -0
- dataface/core/inspect/__init__.py +32 -0
- dataface/core/inspect/cache_factory.py +98 -0
- dataface/core/inspect/db_types.py +162 -0
- dataface/core/inspect/dbt_schema.py +96 -0
- dataface/core/inspect/defaults.yml +37 -0
- dataface/core/inspect/fanout_risk.py +109 -0
- dataface/core/inspect/manifest_utils.py +77 -0
- dataface/core/inspect/partials/categorical.yml +40 -0
- dataface/core/inspect/partials/date.yml +40 -0
- dataface/core/inspect/partials/numeric.yml +55 -0
- dataface/core/inspect/partition_types.py +38 -0
- dataface/core/inspect/query_validator.py +975 -0
- dataface/core/inspect/renderer.py +354 -0
- dataface/core/inspect/resolver.py +808 -0
- dataface/core/inspect/search.py +461 -0
- dataface/core/inspect/sources/__init__.py +32 -0
- dataface/core/inspect/sources/dbt.py +738 -0
- dataface/core/inspect/sources/duckdb_utils.py +66 -0
- dataface/core/inspect/templates/__init__.py +1 -0
- dataface/core/inspect/templates/categorical_column.yml +196 -0
- dataface/core/inspect/templates/charts.yml +109 -0
- dataface/core/inspect/templates/date_column.yml +248 -0
- dataface/core/inspect/templates/model.yml +138 -0
- dataface/core/inspect/templates/numeric_column.yml +261 -0
- dataface/core/inspect/templates/quality.yml +80 -0
- dataface/core/inspect/templates/string_column.yml +263 -0
- dataface/core/project_roots.py +165 -0
- dataface/core/render/__init__.py +87 -0
- dataface/core/render/board_links.py +176 -0
- dataface/core/render/chart/__init__.py +27 -0
- dataface/core/render/chart/arc_attached_table.py +251 -0
- dataface/core/render/chart/artifacts.py +16 -0
- dataface/core/render/chart/callout.py +225 -0
- dataface/core/render/chart/decisions.py +358 -0
- dataface/core/render/chart/geo.py +700 -0
- dataface/core/render/chart/kpi.py +916 -0
- dataface/core/render/chart/labels.py +76 -0
- dataface/core/render/chart/pipeline.py +818 -0
- dataface/core/render/chart/presentation.py +36 -0
- dataface/core/render/chart/profile.py +3438 -0
- dataface/core/render/chart/render_single.py +347 -0
- dataface/core/render/chart/renderers.py +193 -0
- dataface/core/render/chart/rendering.py +565 -0
- dataface/core/render/chart/serialization.py +90 -0
- dataface/core/render/chart/spark.py +496 -0
- dataface/core/render/chart/spark_bar.py +370 -0
- dataface/core/render/chart/spec_builders.py +154 -0
- dataface/core/render/chart/standard_renderer.py +2645 -0
- dataface/core/render/chart/table.py +2957 -0
- dataface/core/render/chart/table_support.py +1452 -0
- dataface/core/render/chart/tick_values.py +66 -0
- dataface/core/render/chart/time_unit_detect.py +809 -0
- dataface/core/render/chart/title_overflow.py +157 -0
- dataface/core/render/chart/type_inference.py +122 -0
- dataface/core/render/chart/validation.py +99 -0
- dataface/core/render/chart/vega_lite.py +125 -0
- dataface/core/render/chart/vega_lite_types.py +268 -0
- dataface/core/render/chart/vl_field_maps.py +346 -0
- dataface/core/render/chart_interactivity.py +24 -0
- dataface/core/render/control_registry.py +287 -0
- dataface/core/render/converters/__init__.py +24 -0
- dataface/core/render/converters/chart.py +276 -0
- dataface/core/render/converters/html.py +98 -0
- dataface/core/render/converters/pdf.py +40 -0
- dataface/core/render/converters/png.py +41 -0
- dataface/core/render/errors.py +144 -0
- dataface/core/render/face_api.py +160 -0
- dataface/core/render/faces.py +1194 -0
- dataface/core/render/font_measurement.py +48 -0
- dataface/core/render/font_support.py +197 -0
- dataface/core/render/fonts/DFTSansTabular-Regular.ttf +0 -0
- dataface/core/render/fonts/DFTSansTabular-Regular.woff2 +0 -0
- dataface/core/render/fonts/DFTSerifOldstyleProportional-Regular.ttf +0 -0
- dataface/core/render/fonts/DFTSerifOldstyleTabular-Regular.ttf +0 -0
- dataface/core/render/fonts/InterVariable.ttf +0 -0
- dataface/core/render/fonts/InterVariable.woff2 +0 -0
- dataface/core/render/fonts/NOTO_COLOR_EMOJI_LICENSE.txt +93 -0
- dataface/core/render/fonts/NOTO_EMOJI_LICENSE.txt +93 -0
- dataface/core/render/fonts/NotoColorEmoji-Regular.ttf +0 -0
- dataface/core/render/fonts/NotoColorEmoji-Regular.woff2 +0 -0
- dataface/core/render/fonts/NotoEmoji-Regular.ttf +0 -0
- dataface/core/render/fonts/NotoEmoji-Regular.woff2 +0 -0
- dataface/core/render/fonts/SOURCE_CODE_PRO_LICENSE.txt +93 -0
- dataface/core/render/fonts/SOURCE_SERIF_4_LICENSE.txt +98 -0
- dataface/core/render/fonts/SourceCodePro-Regular.ttf +0 -0
- dataface/core/render/fonts/SourceSerif4-Regular.ttf +0 -0
- dataface/core/render/fonts/_emoji_font_face.css +43 -0
- dataface/core/render/fonts/source-serif-4-variable-latin.woff2 +0 -0
- dataface/core/render/format_utils.py +329 -0
- dataface/core/render/geo_defaults.yml +28 -0
- dataface/core/render/json_format.py +146 -0
- dataface/core/render/layout_sizing.py +865 -0
- dataface/core/render/layouts.py +541 -0
- dataface/core/render/markdown_defaults.yml +16 -0
- dataface/core/render/missing_vars_prompt.py +79 -0
- dataface/core/render/placeholder.py +389 -0
- dataface/core/render/render_result.py +14 -0
- dataface/core/render/renderer.py +467 -0
- dataface/core/render/script_embedding.py +16 -0
- dataface/core/render/svg_utils.py +212 -0
- dataface/core/render/template_loader.py +69 -0
- dataface/core/render/templates/controls/_styles.css +606 -0
- dataface/core/render/templates/controls/checkbox.html +16 -0
- dataface/core/render/templates/controls/date.html +16 -0
- dataface/core/render/templates/controls/number.html +19 -0
- dataface/core/render/templates/controls/readonly.html +9 -0
- dataface/core/render/templates/controls/select.html +21 -0
- dataface/core/render/templates/controls/slider.html +22 -0
- dataface/core/render/templates/controls/text.html +16 -0
- dataface/core/render/templates/scripts/chart_interactivity.js +191 -0
- dataface/core/render/templates/scripts/variables.js +976 -0
- dataface/core/render/templates/svg/grid_pattern.svg +3 -0
- dataface/core/render/templates/svg/styles.css +51 -0
- dataface/core/render/terminal.py +311 -0
- dataface/core/render/terminal_charts.py +563 -0
- dataface/core/render/terminal_defaults.yml +2 -0
- dataface/core/render/terminal_layouts.py +299 -0
- dataface/core/render/terminal_text.py +31 -0
- dataface/core/render/text/__init__.py +1 -0
- dataface/core/render/text/case.py +113 -0
- dataface/core/render/text_format.py +129 -0
- dataface/core/render/utils.py +106 -0
- dataface/core/render/variable_controls.py +946 -0
- dataface/core/render/variable_input_refinement.py +140 -0
- dataface/core/render/warnings/__init__.py +15 -0
- dataface/core/render/warnings/bar_color_1_to_1_with_x.py +80 -0
- dataface/core/render/warnings/base.py +44 -0
- dataface/core/render/warnings/fanout_risk.py +15 -0
- dataface/core/render/warnings/from_query_diagnostic.py +56 -0
- dataface/core/render/warnings/missing_join_predicate.py +13 -0
- dataface/core/render/warnings/query_parse_error.py +14 -0
- dataface/core/render/warnings/query_returned_zero_rows.py +42 -0
- dataface/core/render/warnings/reaggregation.py +14 -0
- dataface/core/render/warnings/registry.py +45 -0
- dataface/core/render/warnings/suppression.py +46 -0
- dataface/core/render/warnings/temporal_single_point.py +63 -0
- dataface/core/render/warnings/unreferenced_chart.py +15 -0
- dataface/core/render/warnings/y_encoding_mostly_null.py +76 -0
- dataface/core/render/yaml_format.py +167 -0
- dataface/core/resolve_face.py +195 -0
- dataface/core/schema/__init__.py +0 -0
- dataface/core/schema/guidance.py +151 -0
- dataface/core/scoped_paths.py +59 -0
- dataface/core/serve/__init__.py +14 -0
- dataface/core/serve/bootstrap.py +39 -0
- dataface/core/serve/embedded.py +57 -0
- dataface/core/serve/port.py +129 -0
- dataface/core/serve/server.py +938 -0
- dataface/core/serve/templates/__init__.py +0 -0
- dataface/core/serve/templates/directory.yml +6 -0
- dataface/core/serve/templates/error.html.j2 +217 -0
- dataface/core/utils.py +121 -0
- dataface/core/validate.py +64 -0
- dataface/integrations/__init__.py +0 -0
- dataface/integrations/highlighting.py +351 -0
- dataface/integrations/markdown.py +537 -0
- dataface/py.typed +0 -0
- dataface-0.1.2.dist-info/METADATA +375 -0
- dataface-0.1.2.dist-info/RECORD +455 -0
- dataface-0.1.2.dist-info/WHEEL +4 -0
- dataface-0.1.2.dist-info/entry_points.txt +2 -0
- dataface-0.1.2.dist-info/licenses/LICENSE +202 -0
- mdsvg/__init__.py +168 -0
- mdsvg/fonts.py +656 -0
- mdsvg/images.py +299 -0
- mdsvg/parser.py +629 -0
- mdsvg/playground.py +284 -0
- mdsvg/py.typed +2 -0
- mdsvg/renderer.py +1623 -0
- mdsvg/style.py +355 -0
- mdsvg/types.py +200 -0
- mdsvg/utils.py +86 -0
|
@@ -0,0 +1,738 @@
|
|
|
1
|
+
"""Cold-start `SchemaSource` composing a dbt-core adapter + lazy manifest.
|
|
2
|
+
|
|
3
|
+
The dbt-core adapter (built by ``build_adapter()`` with macros loaded per
|
|
4
|
+
the bootstrap-dbt-macro-manifest task) answers live questions:
|
|
5
|
+
``list_schemas``, ``list_relations``, ``get_columns_in_relation``,
|
|
6
|
+
``get_column_schema_from_query``. The manifest contributes only what is
|
|
7
|
+
*explicitly declared* — descriptions, declared types, listed tests, tags,
|
|
8
|
+
owner, and ``relationships:`` tests.
|
|
9
|
+
|
|
10
|
+
No inference. No naming heuristics. No FK guessing from ``<x>_id → <x>``.
|
|
11
|
+
If the manifest doesn't carry the field explicitly, it's absent — empty is
|
|
12
|
+
the honest answer.
|
|
13
|
+
|
|
14
|
+
Cross-table derivations (reverse-FK roll-up, M2M two-hop, lineage walks)
|
|
15
|
+
live in the Phase 4 resolver, not here.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
from collections import deque
|
|
22
|
+
from datetime import datetime, timezone
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
from dbt_common.exceptions.base import DbtRuntimeError
|
|
27
|
+
|
|
28
|
+
from dataface.core.execute.adapters.dbt_utils import load_dbt_manifest
|
|
29
|
+
from dataface.core.inspect.partition_types import (
|
|
30
|
+
PartitionEntry,
|
|
31
|
+
PartitionType,
|
|
32
|
+
TablePartitions,
|
|
33
|
+
)
|
|
34
|
+
from dataface.core.inspect.sources.duckdb_utils import duckdb_resolve_database
|
|
35
|
+
|
|
36
|
+
# Stripped from manifest test kwargs because they're dbt's plumbing, not
|
|
37
|
+
# user-facing test config: every generic test carries them.
|
|
38
|
+
_INTERNAL_TEST_KWARGS: frozenset[str] = frozenset({"column_name", "model"})
|
|
39
|
+
|
|
40
|
+
# Unique-id prefixes that represent actual warehouse tables. Other prefixes
|
|
41
|
+
# (test.*, exposure.*, metric.*, semantic_model.*, unit_test.*, saved_query.*,
|
|
42
|
+
# analysis.*) are dbt metadata nodes, not tables, and must be filtered out
|
|
43
|
+
# before returning lineage neighbors.
|
|
44
|
+
_TABLE_UID_PREFIXES: tuple[str, ...] = ("model.", "source.", "seed.", "snapshot.")
|
|
45
|
+
|
|
46
|
+
# `kwargs.to` looks like "ref('users')" or "source('raw', 'users')". The
|
|
47
|
+
# resolver in Phase 4 owns lineage walks; here we only need the target
|
|
48
|
+
# table name to pair with `kwargs.field`.
|
|
49
|
+
_REF_RE = re.compile(r"ref\(\s*['\"]([^'\"]+)['\"]\s*\)")
|
|
50
|
+
_SOURCE_RE = re.compile(
|
|
51
|
+
r"source\(\s*['\"]([^'\"]+)['\"]\s*,\s*['\"]([^'\"]+)['\"]\s*\)"
|
|
52
|
+
)
|
|
53
|
+
_SNOWFLAKE_IDENTIFIER_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_$]*")
|
|
54
|
+
_BQ_TIME_PARTITION_TYPES = frozenset({"DATE", "DATETIME", "TIMESTAMP"})
|
|
55
|
+
# BigQuery integer-range partitioning only supports INTEGER/INT64 columns.
|
|
56
|
+
_BQ_RANGE_PARTITION_TYPES = frozenset({"INT64", "INTEGER"})
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _sql_string_literal(value: str) -> str:
|
|
60
|
+
"""Return a SQL string literal with embedded single quotes escaped."""
|
|
61
|
+
return "'" + value.replace("'", "''") + "'"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _bq_identifier(value: str | None) -> str:
|
|
65
|
+
if value is None or "`" in value:
|
|
66
|
+
raise ValueError(f"Unsupported BigQuery identifier: {value!r}")
|
|
67
|
+
return value
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _snowflake_identifier(value: str | None) -> str:
|
|
71
|
+
if value is None or value == "" or "\x00" in value:
|
|
72
|
+
raise ValueError(f"Unsupported Snowflake database identifier: {value!r}")
|
|
73
|
+
if _SNOWFLAKE_IDENTIFIER_RE.fullmatch(value):
|
|
74
|
+
return value
|
|
75
|
+
return '"' + value.replace('"', '""') + '"'
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _json_datetime(value: datetime) -> str:
|
|
79
|
+
if value.tzinfo is None or value.utcoffset() is None:
|
|
80
|
+
raise ValueError("last_modified must be timezone-aware")
|
|
81
|
+
return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _bq_partition_type(column: str, data_type: str | None) -> PartitionType:
|
|
85
|
+
if data_type is None:
|
|
86
|
+
raise ValueError(f"Missing BigQuery partition column type for {column!r}")
|
|
87
|
+
normalized = data_type.upper()
|
|
88
|
+
if normalized in _BQ_TIME_PARTITION_TYPES:
|
|
89
|
+
return "time"
|
|
90
|
+
if normalized in _BQ_RANGE_PARTITION_TYPES:
|
|
91
|
+
return "range"
|
|
92
|
+
raise ValueError(f"Unsupported BigQuery partition column type: {data_type!r}")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class DbtSchemaSource:
|
|
96
|
+
"""`SchemaSource` composing a live dbt adapter + manifest reader."""
|
|
97
|
+
|
|
98
|
+
name = "dbt"
|
|
99
|
+
generated_at: datetime | None = None # live source — no cache build time
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
adapter: Any,
|
|
104
|
+
project_root: Path,
|
|
105
|
+
*,
|
|
106
|
+
db_path: str | None = None,
|
|
107
|
+
) -> None:
|
|
108
|
+
self._adapter = adapter
|
|
109
|
+
self._project_root = Path(project_root)
|
|
110
|
+
# db_path is the configured file path for DuckDB sources; None for
|
|
111
|
+
# in-memory or non-DuckDB. Used by _resolve_duckdb_database to read
|
|
112
|
+
# PRAGMA database_list once and cache the attach name.
|
|
113
|
+
self._db_path = db_path
|
|
114
|
+
self._manifest_loaded = False
|
|
115
|
+
self._manifest: dict[str, Any] | None = None
|
|
116
|
+
# ``(schema_lower, table_lower) -> (uid, model_node)``. Built once
|
|
117
|
+
# from the manifest so list_tables doesn't walk every node per
|
|
118
|
+
# relation. ``None`` means "haven't built it yet."
|
|
119
|
+
self._model_index: dict[tuple[str, str], tuple[str, dict[str, Any]]] | None = (
|
|
120
|
+
None
|
|
121
|
+
)
|
|
122
|
+
# Per-schema relation cache. Avoids the N+1 in level-4 wildcard
|
|
123
|
+
# queries: the resolver iterates expanded targets and each
|
|
124
|
+
# ``profile_table(schema, t)`` would otherwise re-list the whole
|
|
125
|
+
# schema. Instance-scoped — fresh on every resolver call cycle
|
|
126
|
+
# (the resolver builds DbtSchemaSource per-source per-call).
|
|
127
|
+
self._relations_by_schema: dict[str, list[Any]] = {}
|
|
128
|
+
# Lazy (schema_lower, name_lower) → unique_id index for lineage
|
|
129
|
+
# resolution. Covers models, seeds, snapshots from nodes and
|
|
130
|
+
# sources from the sources section. None = not yet built.
|
|
131
|
+
self._uid_index: dict[tuple[str, str], str] | None = None
|
|
132
|
+
# Resolved DuckDB attach name — None means "not yet resolved".
|
|
133
|
+
# Only populated for DuckDB adapters; stays None for all others.
|
|
134
|
+
self._duckdb_database: str | None = None
|
|
135
|
+
self._duckdb_database_resolved: bool = False
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def has_manifest(self) -> bool:
|
|
139
|
+
"""Is a manifest available with at least one node? (Not "did it
|
|
140
|
+
contribute to this call.") Lazy: forces the load.
|
|
141
|
+
|
|
142
|
+
Per-call manifest contribution is decided in the resolver by
|
|
143
|
+
inspecting whether the response actually carries manifest-only keys
|
|
144
|
+
(description / tags / declared_type / tests / relationships / etc.),
|
|
145
|
+
because a request for a table that exists in the warehouse but is
|
|
146
|
+
absent from the manifest must report ``sources_consulted=["dbt_adapter"]``,
|
|
147
|
+
not ``["dbt_adapter", "dbt_manifest"]``.
|
|
148
|
+
"""
|
|
149
|
+
manifest = self._load_manifest()
|
|
150
|
+
return bool(manifest and manifest.get("nodes"))
|
|
151
|
+
|
|
152
|
+
# ---- SchemaSource methods ---------------------------------------------
|
|
153
|
+
|
|
154
|
+
def _credentials_database(self) -> str | None:
|
|
155
|
+
"""Return the database name from adapter credentials, or None if absent.
|
|
156
|
+
|
|
157
|
+
Snowflake requires a database argument for list_schemas / list_relations.
|
|
158
|
+
DuckDB uses a path-based attach (see _resolve_duckdb_database); its
|
|
159
|
+
credentials carry no database field, so this returns None for DuckDB.
|
|
160
|
+
"""
|
|
161
|
+
config = getattr(self._adapter, "config", None)
|
|
162
|
+
creds = getattr(config, "credentials", None)
|
|
163
|
+
return getattr(creds, "database", None)
|
|
164
|
+
|
|
165
|
+
def list_schemas(self) -> dict[str, Any] | None:
|
|
166
|
+
database = self._credentials_database()
|
|
167
|
+
with self._adapter.connection_named("dft_schema_list_schemas"):
|
|
168
|
+
schemas = list(self._adapter.list_schemas(database))
|
|
169
|
+
# De-duplicate while preserving first-seen order. dbt's list_schemas
|
|
170
|
+
# may return duplicates across databases (e.g. DuckDB returns 'main'
|
|
171
|
+
# multiple times when the same DB is attached more than once).
|
|
172
|
+
seen: dict[str, None] = {}
|
|
173
|
+
for s in schemas:
|
|
174
|
+
if s and s not in seen:
|
|
175
|
+
seen[s] = None
|
|
176
|
+
if not seen:
|
|
177
|
+
return None
|
|
178
|
+
return {"schemas": {name: {} for name in seen}}
|
|
179
|
+
|
|
180
|
+
def _resolve_duckdb_database(self) -> str | None:
|
|
181
|
+
"""Return the DuckDB attach name for this adapter, resolved once.
|
|
182
|
+
|
|
183
|
+
Returns None for non-DuckDB adapters. For DuckDB, reads
|
|
184
|
+
``PRAGMA database_list`` and caches the result — one round-trip
|
|
185
|
+
per DbtSchemaSource instance, amortized across all ``_get_relations``
|
|
186
|
+
calls in one resolver cycle.
|
|
187
|
+
"""
|
|
188
|
+
if self._duckdb_database_resolved:
|
|
189
|
+
return self._duckdb_database
|
|
190
|
+
self._duckdb_database_resolved = True
|
|
191
|
+
if self._adapter.type() != "duckdb":
|
|
192
|
+
return None
|
|
193
|
+
with self._adapter.connection_named("dft_schema_resolve_db"):
|
|
194
|
+
self._duckdb_database = duckdb_resolve_database(
|
|
195
|
+
self._adapter, self._db_path
|
|
196
|
+
)
|
|
197
|
+
return self._duckdb_database
|
|
198
|
+
|
|
199
|
+
def _get_relations(self, schema: str) -> list[Any]:
|
|
200
|
+
"""Return the relations in ``schema``; populate the per-schema cache.
|
|
201
|
+
|
|
202
|
+
Single source of truth for the warehouse round-trip. Both
|
|
203
|
+
``list_tables`` and ``profile_table`` route through here so a
|
|
204
|
+
wildcard level-4 walk only pays one ``list_relations`` per schema.
|
|
205
|
+
|
|
206
|
+
For file-backed DuckDB the database argument must be the attach name
|
|
207
|
+
(e.g. 'dundersign'), not None — passing None returns an empty list.
|
|
208
|
+
``_resolve_duckdb_database`` reads PRAGMA database_list once per
|
|
209
|
+
source per cycle to get the correct name.
|
|
210
|
+
"""
|
|
211
|
+
cached = self._relations_by_schema.get(schema)
|
|
212
|
+
if cached is not None:
|
|
213
|
+
return cached
|
|
214
|
+
database = self._resolve_duckdb_database() or self._credentials_database()
|
|
215
|
+
with self._adapter.connection_named("dft_schema_list_relations"):
|
|
216
|
+
relations = list(self._adapter.list_relations(database, schema))
|
|
217
|
+
self._relations_by_schema[schema] = relations
|
|
218
|
+
return relations
|
|
219
|
+
|
|
220
|
+
def list_tables(self, schema: str) -> dict[str, Any] | None:
|
|
221
|
+
relations = self._get_relations(schema)
|
|
222
|
+
if not relations:
|
|
223
|
+
return None
|
|
224
|
+
out: dict[str, dict[str, Any]] = {}
|
|
225
|
+
for rel in relations:
|
|
226
|
+
name = rel.identifier
|
|
227
|
+
summary: dict[str, Any] = {"kind": _relation_kind(rel)}
|
|
228
|
+
entry = self._lookup_model(schema=schema, table=name)
|
|
229
|
+
if entry is not None:
|
|
230
|
+
self._apply_table_manifest(summary, entry[1])
|
|
231
|
+
out[name] = summary
|
|
232
|
+
return {"tables": out}
|
|
233
|
+
|
|
234
|
+
def profile_table(
|
|
235
|
+
self, schema: str, table: str, lineage_depth: int = 1
|
|
236
|
+
) -> dict[str, Any] | None:
|
|
237
|
+
relations = self._get_relations(schema)
|
|
238
|
+
relation = next((r for r in relations if r.identifier == table), None)
|
|
239
|
+
if relation is None:
|
|
240
|
+
return None
|
|
241
|
+
with self._adapter.connection_named("dft_schema_profile_table"):
|
|
242
|
+
adapter_columns = list(self._adapter.get_columns_in_relation(relation))
|
|
243
|
+
|
|
244
|
+
entry = self._lookup_model(schema=schema, table=table)
|
|
245
|
+
node_uid = entry[0] if entry else None
|
|
246
|
+
node = entry[1] if entry else None
|
|
247
|
+
manifest_cols = (node or {}).get("columns") or {}
|
|
248
|
+
tests_by_column, rels_by_column = (
|
|
249
|
+
self._collect_column_tests(node_uid, node) if node else ({}, {})
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
cols_out: dict[str, dict[str, Any]] = {}
|
|
253
|
+
for col in adapter_columns:
|
|
254
|
+
col_name = col.name
|
|
255
|
+
col_entry: dict[str, Any] = {"actual_type": _column_dtype(col)}
|
|
256
|
+
manifest_col = manifest_cols.get(col_name)
|
|
257
|
+
if isinstance(manifest_col, dict):
|
|
258
|
+
_apply_column_manifest(col_entry, manifest_col)
|
|
259
|
+
tests = tests_by_column.get(col_name)
|
|
260
|
+
if tests:
|
|
261
|
+
col_entry["tests"] = tests
|
|
262
|
+
rels = rels_by_column.get(col_name)
|
|
263
|
+
if rels:
|
|
264
|
+
col_entry["relationships"] = rels
|
|
265
|
+
cols_out[col_name] = col_entry
|
|
266
|
+
|
|
267
|
+
# Resolve unique_id for lineage walks. Uses the broader uid index
|
|
268
|
+
# (covers seeds/snapshots/sources) rather than the model-only index.
|
|
269
|
+
table_uid = self._lookup_uid(schema=schema, table=table)
|
|
270
|
+
upstream = self._lineage_neighbors(table_uid, "parent_map", lineage_depth)
|
|
271
|
+
downstream = self._lineage_neighbors(table_uid, "child_map", lineage_depth)
|
|
272
|
+
|
|
273
|
+
partitions = self._fetch_partitions(relation)
|
|
274
|
+
last_modified = self._fetch_last_modified(relation)
|
|
275
|
+
|
|
276
|
+
out: dict[str, Any] = {
|
|
277
|
+
"kind": _relation_kind(relation),
|
|
278
|
+
"table_exists": True,
|
|
279
|
+
"columns": cols_out,
|
|
280
|
+
"upstream": upstream,
|
|
281
|
+
"downstream": downstream,
|
|
282
|
+
"partitions": partitions.model_dump(mode="json", exclude_none=True),
|
|
283
|
+
}
|
|
284
|
+
if last_modified is not None:
|
|
285
|
+
out["last_modified"] = _json_datetime(last_modified)
|
|
286
|
+
if node is not None:
|
|
287
|
+
self._apply_table_manifest(out, node)
|
|
288
|
+
return out
|
|
289
|
+
|
|
290
|
+
def _fetch_partitions(self, relation: Any) -> TablePartitions:
|
|
291
|
+
adapter_type = self._adapter.type()
|
|
292
|
+
if adapter_type == "bigquery":
|
|
293
|
+
return self._fetch_bq_partitions(relation)
|
|
294
|
+
if adapter_type == "snowflake":
|
|
295
|
+
return self._fetch_snowflake_partitions(relation)
|
|
296
|
+
return TablePartitions(type="none", supported=False)
|
|
297
|
+
|
|
298
|
+
def _fetch_bq_partitions(self, relation: Any) -> TablePartitions:
|
|
299
|
+
"""BigQuery-specific partition fetch via INFORMATION_SCHEMA.
|
|
300
|
+
|
|
301
|
+
get_partitions_metadata uses the legacy $__PARTITIONS_SUMMARY__ table
|
|
302
|
+
which lacks total_rows, total_logical_bytes, and the partition column name.
|
|
303
|
+
INFORMATION_SCHEMA.PARTITIONS has all three; INFORMATION_SCHEMA.COLUMNS
|
|
304
|
+
WHERE is_partitioning_column='YES' gives the column name.
|
|
305
|
+
"""
|
|
306
|
+
db = _bq_identifier(relation.database)
|
|
307
|
+
schema = _bq_identifier(relation.schema)
|
|
308
|
+
table = relation.identifier
|
|
309
|
+
col_sql = (
|
|
310
|
+
f"SELECT column_name, data_type"
|
|
311
|
+
f" FROM `{db}`.`{schema}`.INFORMATION_SCHEMA.COLUMNS"
|
|
312
|
+
f" WHERE table_name = {_sql_string_literal(table)}"
|
|
313
|
+
f" AND is_partitioning_column = 'YES'"
|
|
314
|
+
f" LIMIT 1"
|
|
315
|
+
)
|
|
316
|
+
parts_sql = (
|
|
317
|
+
f"SELECT partition_id, total_rows, total_logical_bytes, last_modified_time"
|
|
318
|
+
f" FROM `{db}`.`{schema}`.INFORMATION_SCHEMA.PARTITIONS"
|
|
319
|
+
f" WHERE table_name = {_sql_string_literal(table)}"
|
|
320
|
+
f" AND partition_id NOT IN ('__NULL__', '__STREAMING_UNPARTITIONED__')"
|
|
321
|
+
f" ORDER BY partition_id DESC"
|
|
322
|
+
)
|
|
323
|
+
with self._adapter.connection_named("dft_schema_bq_partitions"):
|
|
324
|
+
_, col_result = self._adapter.execute(col_sql, fetch=True)
|
|
325
|
+
partition_col = str(col_result.rows[0][0]) if col_result.rows else None
|
|
326
|
+
partition_data_type = (
|
|
327
|
+
str(col_result.rows[0][1]) if col_result.rows else None
|
|
328
|
+
)
|
|
329
|
+
_, parts_result = self._adapter.execute(parts_sql, fetch=True)
|
|
330
|
+
col_names = list(parts_result.column_names)
|
|
331
|
+
entries = []
|
|
332
|
+
for row in parts_result.rows:
|
|
333
|
+
row_dict = dict(zip(col_names, row, strict=False))
|
|
334
|
+
pid = str(row_dict.get("partition_id", ""))
|
|
335
|
+
rc = row_dict.get("total_rows")
|
|
336
|
+
sb = row_dict.get("total_logical_bytes")
|
|
337
|
+
lm = row_dict.get("last_modified_time")
|
|
338
|
+
entries.append(
|
|
339
|
+
PartitionEntry(
|
|
340
|
+
partition_id=pid,
|
|
341
|
+
row_count=int(rc) if rc is not None else None,
|
|
342
|
+
size_bytes=int(sb) if sb is not None else None,
|
|
343
|
+
last_modified=lm if isinstance(lm, datetime) else None,
|
|
344
|
+
)
|
|
345
|
+
)
|
|
346
|
+
if partition_col is None and not entries:
|
|
347
|
+
return TablePartitions(type="unpartitioned", entries=[], supported=True)
|
|
348
|
+
if partition_col is None:
|
|
349
|
+
p_type: PartitionType = "ingestion"
|
|
350
|
+
else:
|
|
351
|
+
p_type = _bq_partition_type(partition_col, partition_data_type)
|
|
352
|
+
return TablePartitions(
|
|
353
|
+
column=partition_col,
|
|
354
|
+
type=p_type,
|
|
355
|
+
entries=entries,
|
|
356
|
+
supported=True,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
def _fetch_snowflake_partitions(self, relation: Any) -> TablePartitions:
|
|
360
|
+
"""Snowflake clustering-key fetch via INFORMATION_SCHEMA.TABLES.
|
|
361
|
+
|
|
362
|
+
Snowflake uses clustering keys (not partitions) for scan pruning.
|
|
363
|
+
CLUSTERING_KEY is a string like "LINEAR(col1, col2)" or an expression.
|
|
364
|
+
Surface the raw expression so nested commas and quoted identifiers do not
|
|
365
|
+
get corrupted by best-effort parsing.
|
|
366
|
+
"""
|
|
367
|
+
database = _snowflake_identifier(relation.database)
|
|
368
|
+
schema = relation.schema
|
|
369
|
+
table = relation.identifier
|
|
370
|
+
sql = (
|
|
371
|
+
f"SELECT CLUSTERING_KEY"
|
|
372
|
+
f" FROM {database}.INFORMATION_SCHEMA.TABLES"
|
|
373
|
+
f" WHERE TABLE_SCHEMA = {_sql_string_literal(schema)}"
|
|
374
|
+
f" AND TABLE_NAME = {_sql_string_literal(table)}"
|
|
375
|
+
f" LIMIT 1"
|
|
376
|
+
)
|
|
377
|
+
with self._adapter.connection_named("dft_schema_snowflake_partitions"):
|
|
378
|
+
_, result = self._adapter.execute(sql, fetch=True)
|
|
379
|
+
if not result.rows:
|
|
380
|
+
return TablePartitions(type="unpartitioned", supported=True)
|
|
381
|
+
raw = result.rows[0][0]
|
|
382
|
+
if not raw:
|
|
383
|
+
return TablePartitions(type="unpartitioned", supported=True)
|
|
384
|
+
key_str = str(raw).strip()
|
|
385
|
+
return TablePartitions(
|
|
386
|
+
type="clustering", column=key_str, entries=[], supported=True
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
def _fetch_last_modified(self, relation: Any) -> datetime | None:
|
|
390
|
+
try:
|
|
391
|
+
with self._adapter.connection_named("dft_schema_freshness"):
|
|
392
|
+
_, freshness = self._adapter.calculate_freshness_from_metadata(relation)
|
|
393
|
+
except (NotImplementedError, DbtRuntimeError):
|
|
394
|
+
# NotImplementedError: adapter explicitly declares no support.
|
|
395
|
+
# DbtRuntimeError: dbt-duckdb raises it with "macro not implemented"
|
|
396
|
+
# for get_relation_last_modified — same semantic: freshness not supported.
|
|
397
|
+
return None
|
|
398
|
+
max_loaded_at = freshness.get("max_loaded_at") if freshness else None
|
|
399
|
+
if isinstance(max_loaded_at, datetime) and max_loaded_at.year == 1:
|
|
400
|
+
return None
|
|
401
|
+
return max_loaded_at if isinstance(max_loaded_at, datetime) else None
|
|
402
|
+
|
|
403
|
+
def describe_query(self, sql: str) -> dict[str, Any] | None:
|
|
404
|
+
with self._adapter.connection_named("dft_schema_describe_query"):
|
|
405
|
+
cols = list(self._adapter.get_column_schema_from_query(sql))
|
|
406
|
+
if not cols:
|
|
407
|
+
return None
|
|
408
|
+
return {"columns": {c.name: {"actual_type": _column_dtype(c)} for c in cols}}
|
|
409
|
+
|
|
410
|
+
# ---- Lineage helpers ---------------------------------------------------
|
|
411
|
+
|
|
412
|
+
def _build_uid_index(self) -> dict[tuple[str, str], str]:
|
|
413
|
+
"""Build (schema_lower, name_lower) → unique_id for all table nodes.
|
|
414
|
+
|
|
415
|
+
Covers models, seeds, snapshots from manifest["nodes"] and raw
|
|
416
|
+
warehouse sources from manifest["sources"]. Sources use "identifier"
|
|
417
|
+
(warehouse table name) rather than "name" (the source alias).
|
|
418
|
+
"""
|
|
419
|
+
index: dict[tuple[str, str], str] = {}
|
|
420
|
+
manifest = self._load_manifest()
|
|
421
|
+
if not manifest:
|
|
422
|
+
return index
|
|
423
|
+
for uid, node in manifest.get("nodes", {}).items():
|
|
424
|
+
rt = node.get("resource_type")
|
|
425
|
+
if rt not in {"model", "seed", "snapshot"}:
|
|
426
|
+
continue
|
|
427
|
+
schema = (node.get("schema") or "").lower()
|
|
428
|
+
name = (
|
|
429
|
+
node.get("identifier") or node.get("alias") or node.get("name") or ""
|
|
430
|
+
).lower()
|
|
431
|
+
if schema and name:
|
|
432
|
+
index[(schema, name)] = uid
|
|
433
|
+
for uid, node in manifest.get("sources", {}).items():
|
|
434
|
+
schema = (node.get("schema") or "").lower()
|
|
435
|
+
identifier = (node.get("identifier") or node.get("name") or "").lower()
|
|
436
|
+
if schema and identifier:
|
|
437
|
+
index[(schema, identifier)] = uid
|
|
438
|
+
return index
|
|
439
|
+
|
|
440
|
+
def _lookup_uid(self, schema: str, table: str) -> str | None:
|
|
441
|
+
if self._uid_index is None:
|
|
442
|
+
self._uid_index = self._build_uid_index()
|
|
443
|
+
return self._uid_index.get((schema.lower(), table.lower()))
|
|
444
|
+
|
|
445
|
+
def _resolve_uid_ref(self, uid: str) -> dict[str, Any] | None:
|
|
446
|
+
"""Resolve a unique_id to a lineage ref dict {model_name, schema, table, kind}.
|
|
447
|
+
|
|
448
|
+
``model_name`` is the dbt symbol (what appears in ``ref()`` /
|
|
449
|
+
``source()``). ``table`` is the warehouse table name, which may differ
|
|
450
|
+
when a model sets ``alias`` / ``identifier`` or a source sets
|
|
451
|
+
``identifier``.
|
|
452
|
+
"""
|
|
453
|
+
manifest = self._load_manifest()
|
|
454
|
+
if not manifest:
|
|
455
|
+
return None
|
|
456
|
+
if uid.startswith("source."):
|
|
457
|
+
node = manifest.get("sources", {}).get(uid)
|
|
458
|
+
if not isinstance(node, dict):
|
|
459
|
+
return None
|
|
460
|
+
schema = node.get("schema") or ""
|
|
461
|
+
model_name = node.get("name") or ""
|
|
462
|
+
table = node.get("identifier") or model_name
|
|
463
|
+
kind = "source"
|
|
464
|
+
else:
|
|
465
|
+
node = manifest.get("nodes", {}).get(uid)
|
|
466
|
+
if not isinstance(node, dict):
|
|
467
|
+
return None
|
|
468
|
+
schema = node.get("schema") or ""
|
|
469
|
+
model_name = node.get("name") or ""
|
|
470
|
+
table = node.get("identifier") or node.get("alias") or model_name
|
|
471
|
+
kind = "ref"
|
|
472
|
+
if not schema or not model_name:
|
|
473
|
+
return None
|
|
474
|
+
return {
|
|
475
|
+
"model_name": model_name,
|
|
476
|
+
"schema": schema,
|
|
477
|
+
"table": table,
|
|
478
|
+
"kind": kind,
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
def _lineage_neighbors(
|
|
482
|
+
self,
|
|
483
|
+
uid: str | None,
|
|
484
|
+
edge_map_name: str,
|
|
485
|
+
depth: int,
|
|
486
|
+
) -> list[dict[str, Any]]:
|
|
487
|
+
"""BFS over parent_map or child_map up to ``depth`` hops.
|
|
488
|
+
|
|
489
|
+
Filters out non-table uid prefixes (test.*, exposure.*, metric.*,
|
|
490
|
+
semantic_model.*, unit_test.*, saved_query.*, analysis.*). Returns
|
|
491
|
+
a list of resolved lineage ref dicts, deduped via visited set.
|
|
492
|
+
"""
|
|
493
|
+
if uid is None:
|
|
494
|
+
return []
|
|
495
|
+
manifest = self._load_manifest()
|
|
496
|
+
if not manifest:
|
|
497
|
+
return []
|
|
498
|
+
edge_map: dict[str, list[str]] = manifest.get(edge_map_name, {})
|
|
499
|
+
result: list[dict[str, Any]] = []
|
|
500
|
+
visited: set[str] = {uid}
|
|
501
|
+
queue: deque[tuple[str, int]] = deque([(uid, 0)])
|
|
502
|
+
while queue:
|
|
503
|
+
current_uid, d = queue.popleft()
|
|
504
|
+
for neighbor_uid in edge_map.get(current_uid, []):
|
|
505
|
+
if neighbor_uid in visited:
|
|
506
|
+
continue
|
|
507
|
+
visited.add(neighbor_uid)
|
|
508
|
+
if not any(neighbor_uid.startswith(p) for p in _TABLE_UID_PREFIXES):
|
|
509
|
+
continue
|
|
510
|
+
ref = self._resolve_uid_ref(neighbor_uid)
|
|
511
|
+
if ref is not None:
|
|
512
|
+
result.append(ref)
|
|
513
|
+
if d + 1 < depth:
|
|
514
|
+
queue.append((neighbor_uid, d + 1))
|
|
515
|
+
return result
|
|
516
|
+
|
|
517
|
+
# ---- Manifest helpers --------------------------------------------------
|
|
518
|
+
|
|
519
|
+
def _load_manifest(self) -> dict[str, Any] | None:
|
|
520
|
+
if not self._manifest_loaded:
|
|
521
|
+
self._manifest = load_dbt_manifest(self._project_root)
|
|
522
|
+
self._manifest_loaded = True
|
|
523
|
+
return self._manifest
|
|
524
|
+
|
|
525
|
+
def _build_model_index(self) -> dict[tuple[str, str], tuple[str, dict[str, Any]]]:
|
|
526
|
+
index: dict[tuple[str, str], tuple[str, dict[str, Any]]] = {}
|
|
527
|
+
manifest = self._load_manifest()
|
|
528
|
+
if not manifest:
|
|
529
|
+
return index
|
|
530
|
+
for uid, node in manifest.get("nodes", {}).items():
|
|
531
|
+
if node.get("resource_type") != "model":
|
|
532
|
+
continue
|
|
533
|
+
schema = (node.get("schema") or "").lower()
|
|
534
|
+
table = (node.get("alias") or node.get("name") or "").lower()
|
|
535
|
+
if not schema or not table:
|
|
536
|
+
continue
|
|
537
|
+
index[(schema, table)] = (uid, node)
|
|
538
|
+
return index
|
|
539
|
+
|
|
540
|
+
def _lookup_model(
|
|
541
|
+
self, schema: str, table: str
|
|
542
|
+
) -> tuple[str, dict[str, Any]] | None:
|
|
543
|
+
if self._model_index is None:
|
|
544
|
+
self._model_index = self._build_model_index()
|
|
545
|
+
return self._model_index.get((schema.lower(), table.lower()))
|
|
546
|
+
|
|
547
|
+
def _collect_column_tests(
|
|
548
|
+
self,
|
|
549
|
+
model_uid: str | None,
|
|
550
|
+
model_node: dict[str, Any],
|
|
551
|
+
) -> tuple[dict[str, list[dict[str, Any]]], dict[str, list[dict[str, str]]]]:
|
|
552
|
+
"""Return (tests_by_column, relationships_by_column).
|
|
553
|
+
|
|
554
|
+
Walks ``manifest.nodes`` for generic-test entries attached to the
|
|
555
|
+
given model and groups them per column. ``relationships:`` tests
|
|
556
|
+
are promoted into the structured relationships list and *not*
|
|
557
|
+
also echoed in ``tests:`` — one canonical surfacing.
|
|
558
|
+
"""
|
|
559
|
+
tests_by_column: dict[str, list[dict[str, Any]]] = {}
|
|
560
|
+
rels_by_column: dict[str, list[dict[str, str]]] = {}
|
|
561
|
+
manifest = self._load_manifest()
|
|
562
|
+
if not manifest:
|
|
563
|
+
return tests_by_column, rels_by_column
|
|
564
|
+
|
|
565
|
+
model_name = model_node.get("name", "")
|
|
566
|
+
file_key_target = f"models.{model_name}"
|
|
567
|
+
|
|
568
|
+
for node in manifest.get("nodes", {}).values():
|
|
569
|
+
if node.get("resource_type") != "test":
|
|
570
|
+
continue
|
|
571
|
+
attached = node.get("attached_node")
|
|
572
|
+
file_key = node.get("file_key_name")
|
|
573
|
+
if not (
|
|
574
|
+
(
|
|
575
|
+
attached is not None
|
|
576
|
+
and model_uid is not None
|
|
577
|
+
and attached == model_uid
|
|
578
|
+
)
|
|
579
|
+
or (file_key == file_key_target and model_name)
|
|
580
|
+
):
|
|
581
|
+
continue
|
|
582
|
+
|
|
583
|
+
metadata = node.get("test_metadata") or {}
|
|
584
|
+
test_name = metadata.get("name")
|
|
585
|
+
if not test_name:
|
|
586
|
+
continue
|
|
587
|
+
raw_kwargs = metadata.get("kwargs") or {}
|
|
588
|
+
column_name = node.get("column_name") or raw_kwargs.get("column_name")
|
|
589
|
+
if not column_name:
|
|
590
|
+
# Table-level tests aren't surfaced in this Phase 2 — they
|
|
591
|
+
# have no clear column to attach to. Phase 3 may add them
|
|
592
|
+
# to a table-level `tests` field.
|
|
593
|
+
continue
|
|
594
|
+
|
|
595
|
+
if test_name == "relationships":
|
|
596
|
+
rel = _parse_relationships_target(raw_kwargs)
|
|
597
|
+
if rel is not None:
|
|
598
|
+
rels_by_column.setdefault(column_name, []).append(rel)
|
|
599
|
+
continue
|
|
600
|
+
|
|
601
|
+
clean_kwargs = {
|
|
602
|
+
k: v for k, v in raw_kwargs.items() if k not in _INTERNAL_TEST_KWARGS
|
|
603
|
+
}
|
|
604
|
+
tests_by_column.setdefault(column_name, []).append(
|
|
605
|
+
{"name": test_name, "kwargs": clean_kwargs}
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
return tests_by_column, rels_by_column
|
|
609
|
+
|
|
610
|
+
@staticmethod
|
|
611
|
+
def _apply_table_manifest(out: dict[str, Any], node: dict[str, Any]) -> None:
|
|
612
|
+
description = node.get("description")
|
|
613
|
+
if isinstance(description, str) and description:
|
|
614
|
+
out["description"] = description
|
|
615
|
+
tags = node.get("tags")
|
|
616
|
+
if isinstance(tags, list) and tags:
|
|
617
|
+
out["tags"] = list(tags)
|
|
618
|
+
meta = node.get("meta") or {}
|
|
619
|
+
owner = meta.get("owner")
|
|
620
|
+
if owner:
|
|
621
|
+
out["owner"] = owner
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def _column_dtype(col: Any) -> str:
|
|
625
|
+
"""Read a dbt-core Column's canonical ``dtype`` attribute."""
|
|
626
|
+
return str(col.dtype)
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
def _relation_kind(relation: Any) -> str:
|
|
630
|
+
kind = getattr(relation, "type", None)
|
|
631
|
+
return str(kind) if kind else "table"
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
def _apply_column_manifest(entry: dict[str, Any], manifest_col: dict[str, Any]) -> None:
|
|
635
|
+
description = manifest_col.get("description")
|
|
636
|
+
if isinstance(description, str) and description:
|
|
637
|
+
entry["description"] = description
|
|
638
|
+
declared_type = manifest_col.get("data_type")
|
|
639
|
+
if declared_type:
|
|
640
|
+
entry["declared_type"] = declared_type
|
|
641
|
+
tags = manifest_col.get("tags")
|
|
642
|
+
if isinstance(tags, list) and tags:
|
|
643
|
+
entry["tags"] = list(tags)
|
|
644
|
+
granularity = manifest_col.get("granularity")
|
|
645
|
+
if granularity:
|
|
646
|
+
entry["granularity"] = granularity
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
def extract_all_relationships(
|
|
650
|
+
manifest: dict[str, Any] | None,
|
|
651
|
+
) -> list[dict[str, str]]:
|
|
652
|
+
"""Every forward FK declared by a ``relationships:`` test in the manifest.
|
|
653
|
+
|
|
654
|
+
Returns a list of ``{from_table, from_column, to_table, to_column}``
|
|
655
|
+
dicts — one per test that names a valid target. Empty when no manifest.
|
|
656
|
+
Pure: takes a manifest dict and returns a list. The resolver uses this
|
|
657
|
+
directly (without building a dbt adapter) so cache-hit short-circuits
|
|
658
|
+
can still surface ``referenced_by`` / ``linked_via`` without paying
|
|
659
|
+
for a warehouse-connection-bearing adapter build.
|
|
660
|
+
"""
|
|
661
|
+
if not manifest:
|
|
662
|
+
return []
|
|
663
|
+
# Build uid → model name once so we can resolve ``attached_node`` to the
|
|
664
|
+
# source table. Tests without ``attached_node`` fall back to
|
|
665
|
+
# ``file_key_name = "models.<name>"``.
|
|
666
|
+
uid_to_name: dict[str, str] = {}
|
|
667
|
+
for uid, node in manifest.get("nodes", {}).items():
|
|
668
|
+
if node.get("resource_type") == "model":
|
|
669
|
+
name = node.get("alias") or node.get("name")
|
|
670
|
+
if name:
|
|
671
|
+
uid_to_name[uid] = name
|
|
672
|
+
|
|
673
|
+
out: list[dict[str, str]] = []
|
|
674
|
+
for node in manifest.get("nodes", {}).values():
|
|
675
|
+
if node.get("resource_type") != "test":
|
|
676
|
+
continue
|
|
677
|
+
metadata = node.get("test_metadata") or {}
|
|
678
|
+
if metadata.get("name") != "relationships":
|
|
679
|
+
continue
|
|
680
|
+
kwargs = metadata.get("kwargs") or {}
|
|
681
|
+
target = _parse_relationships_target(kwargs)
|
|
682
|
+
if target is None:
|
|
683
|
+
continue
|
|
684
|
+
from_column = node.get("column_name") or kwargs.get("column_name")
|
|
685
|
+
if not from_column:
|
|
686
|
+
continue
|
|
687
|
+
from_table = _resolve_attached_table(node, uid_to_name)
|
|
688
|
+
if not from_table:
|
|
689
|
+
continue
|
|
690
|
+
out.append(
|
|
691
|
+
{
|
|
692
|
+
"from_table": from_table,
|
|
693
|
+
"from_column": from_column,
|
|
694
|
+
"to_table": target["to_table"],
|
|
695
|
+
"to_column": target["to_column"],
|
|
696
|
+
}
|
|
697
|
+
)
|
|
698
|
+
return out
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
def _resolve_attached_table(
|
|
702
|
+
test_node: dict[str, Any], uid_to_name: dict[str, str]
|
|
703
|
+
) -> str | None:
|
|
704
|
+
"""Return the source-table name for a generic-test node.
|
|
705
|
+
|
|
706
|
+
Tests carry either ``attached_node`` (a model uid) or
|
|
707
|
+
``file_key_name`` (``"models.<name>"``). We prefer ``attached_node``
|
|
708
|
+
because it survives renames; ``file_key_name`` is the fallback for
|
|
709
|
+
older manifest shapes that don't stamp it.
|
|
710
|
+
"""
|
|
711
|
+
attached = test_node.get("attached_node")
|
|
712
|
+
if attached and attached in uid_to_name:
|
|
713
|
+
return uid_to_name[attached]
|
|
714
|
+
file_key = test_node.get("file_key_name")
|
|
715
|
+
if isinstance(file_key, str) and file_key.startswith("models."):
|
|
716
|
+
return file_key[len("models.") :]
|
|
717
|
+
return None
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def _parse_relationships_target(kwargs: dict[str, Any]) -> dict[str, str] | None:
|
|
721
|
+
"""Extract ``{to_table, to_column}`` from a relationships-test kwargs.
|
|
722
|
+
|
|
723
|
+
``kwargs.to`` is the Jinja string a user wrote (``ref('users')`` or
|
|
724
|
+
``source('raw', 'users')``); ``kwargs.field`` is the target column.
|
|
725
|
+
Returns ``None`` if the kwargs aren't shaped like an explicit FK
|
|
726
|
+
declaration — we don't guess.
|
|
727
|
+
"""
|
|
728
|
+
to_str = kwargs.get("to")
|
|
729
|
+
field = kwargs.get("field")
|
|
730
|
+
if not isinstance(to_str, str) or not isinstance(field, str) or not field:
|
|
731
|
+
return None
|
|
732
|
+
m = _REF_RE.match(to_str.strip())
|
|
733
|
+
if m:
|
|
734
|
+
return {"to_table": m.group(1), "to_column": field}
|
|
735
|
+
m = _SOURCE_RE.match(to_str.strip())
|
|
736
|
+
if m:
|
|
737
|
+
return {"to_table": m.group(2), "to_column": field}
|
|
738
|
+
return None
|