dataface 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- d3_format/__init__.py +14 -0
- d3_format/errors.py +19 -0
- d3_format/format.py +551 -0
- d3_format/spec.py +159 -0
- dataface/DATAFACE_SYNTAX.md +1135 -0
- dataface/__init__.py +93 -0
- dataface/_docs_site.py +20 -0
- dataface/_install_hint.py +26 -0
- dataface/agent_api/__init__.py +79 -0
- dataface/agent_api/_init_templates/__init__.py +0 -0
- dataface/agent_api/_init_templates/agents_dft_snippet.md +26 -0
- dataface/agent_api/_init_templates/dataface.yml +15 -0
- dataface/agent_api/_init_templates/faces-dataface.yml +144 -0
- dataface/agent_api/_init_templates/index.md +24 -0
- dataface/agent_api/_paths.py +118 -0
- dataface/agent_api/_project_agents_md.py +43 -0
- dataface/agent_api/_session_store.py +486 -0
- dataface/agent_api/_state.py +28 -0
- dataface/agent_api/chat.py +221 -0
- dataface/agent_api/dashboards.py +257 -0
- dataface/agent_api/describe.py +366 -0
- dataface/agent_api/describe_query.py +120 -0
- dataface/agent_api/docs/__init__.py +25 -0
- dataface/agent_api/docs/_loader.py +292 -0
- dataface/agent_api/docs/yaml-reference.md +2757 -0
- dataface/agent_api/file_refs.py +118 -0
- dataface/agent_api/init.py +126 -0
- dataface/agent_api/inspect.py +128 -0
- dataface/agent_api/mcp_install.py +170 -0
- dataface/agent_api/query.py +274 -0
- dataface/agent_api/schema.py +658 -0
- dataface/agent_api/schema_search.py +284 -0
- dataface/agent_api/search.py +270 -0
- dataface/agent_api/skill_install.py +141 -0
- dataface/agent_api/skill_render.py +90 -0
- dataface/agent_api/skills.py +293 -0
- dataface/agent_api/surface_aliases.yaml +128 -0
- dataface/agent_api/validate.py +175 -0
- dataface/agent_api/validate_query.py +84 -0
- dataface/ai/__init__.py +39 -0
- dataface/ai/agent.py +139 -0
- dataface/ai/context.py +45 -0
- dataface/ai/events.py +62 -0
- dataface/ai/external_mcp.py +610 -0
- dataface/ai/generate_sql.py +96 -0
- dataface/ai/llm.py +403 -0
- dataface/ai/mcp/__init__.py +51 -0
- dataface/ai/mcp/server.py +289 -0
- dataface/ai/memories.py +85 -0
- dataface/ai/prompts.py +177 -0
- dataface/ai/schema_context.py +138 -0
- dataface/ai/skills/before-after-comparison/SKILL.md +102 -0
- dataface/ai/skills/before-after-comparison/examples/before-after-comparison.yml +24 -0
- dataface/ai/skills/dashboard-build/SKILL.md +212 -0
- dataface/ai/skills/dashboard-build/examples/_smoke.yml +15 -0
- dataface/ai/skills/dashboard-design/SKILL.md +182 -0
- dataface/ai/skills/dashboard-review/SKILL.md +113 -0
- dataface/ai/skills/dashboard-structural-review/SKILL.md +173 -0
- dataface/ai/skills/dashboard-visual-review/SKILL.md +139 -0
- dataface/ai/skills/dataface-mcp-setup/SKILL.md +177 -0
- dataface/ai/skills/dataface-troubleshooting/SKILL.md +225 -0
- dataface/ai/skills/drill-down-link/SKILL.md +112 -0
- dataface/ai/skills/drill-down-link/examples/drill-down-link.yml +27 -0
- dataface/ai/skills/faceted-small-multiples/SKILL.md +116 -0
- dataface/ai/skills/faceted-small-multiples/examples/faceted-small-multiples.yml +33 -0
- dataface/ai/skills/filter-bar-with-variables/SKILL.md +105 -0
- dataface/ai/skills/filter-bar-with-variables/examples/filter-bar-with-variables.yml +49 -0
- dataface/ai/skills/kpi-row/SKILL.md +101 -0
- dataface/ai/skills/kpi-row/examples/kpi-row.yml +55 -0
- dataface/ai/skills/report-design/SKILL.md +184 -0
- dataface/ai/skills/single-metric-bignum/SKILL.md +90 -0
- dataface/ai/skills/single-metric-bignum/examples/single-metric-bignum.yml +27 -0
- dataface/ai/skills/table-heavy-ops-dashboard/SKILL.md +114 -0
- dataface/ai/skills/table-heavy-ops-dashboard/examples/table-heavy-ops-dashboard.yml +48 -0
- dataface/ai/skills/time-series-trend/SKILL.md +93 -0
- dataface/ai/skills/time-series-trend/examples/time-series-trend.yml +26 -0
- dataface/ai/skills/top-n-with-detail/SKILL.md +98 -0
- dataface/ai/skills/top-n-with-detail/examples/top-n-with-detail.yml +45 -0
- dataface/ai/skills/two-by-two-grid-overview/SKILL.md +78 -0
- dataface/ai/skills/two-by-two-grid-overview/examples/two-by-two-grid-overview.yml +64 -0
- dataface/ai/tool_schemas.py +132 -0
- dataface/ai/tools/__init__.py +312 -0
- dataface/ai/yaml_utils.py +57 -0
- dataface/cli/__init__.py +3 -0
- dataface/cli/_console.py +48 -0
- dataface/cli/_error_format.py +83 -0
- dataface/cli/_extras.py +190 -0
- dataface/cli/_json_output.py +8 -0
- dataface/cli/_parsing.py +17 -0
- dataface/cli/_version_info.py +56 -0
- dataface/cli/commands/__init__.py +3 -0
- dataface/cli/commands/_agent_input.py +205 -0
- dataface/cli/commands/_agent_server.py +115 -0
- dataface/cli/commands/chat.py +645 -0
- dataface/cli/commands/describe.py +107 -0
- dataface/cli/commands/docs.py +131 -0
- dataface/cli/commands/extension.py +179 -0
- dataface/cli/commands/init.py +240 -0
- dataface/cli/commands/inspect.py +94 -0
- dataface/cli/commands/mcp_init.py +167 -0
- dataface/cli/commands/query.py +386 -0
- dataface/cli/commands/render.py +291 -0
- dataface/cli/commands/schema.py +411 -0
- dataface/cli/commands/search.py +49 -0
- dataface/cli/commands/serve.py +114 -0
- dataface/cli/commands/skills.py +133 -0
- dataface/cli/commands/skills_init.py +161 -0
- dataface/cli/commands/validate.py +63 -0
- dataface/cli/main.py +1501 -0
- dataface/core/__init__.py +75 -0
- dataface/core/compile/__init__.py +244 -0
- dataface/core/compile/_jinja_helpers.py +78 -0
- dataface/core/compile/channel.py +222 -0
- dataface/core/compile/chart_focus.py +101 -0
- dataface/core/compile/chart_resolved.py +169 -0
- dataface/core/compile/chart_type_detection.py +489 -0
- dataface/core/compile/chart_update.py +261 -0
- dataface/core/compile/colors.py +64 -0
- dataface/core/compile/compiler.py +904 -0
- dataface/core/compile/config.py +823 -0
- dataface/core/compile/custom_chart_types.py +208 -0
- dataface/core/compile/data_table_attachment.py +1287 -0
- dataface/core/compile/detect.py +110 -0
- dataface/core/compile/errors.py +302 -0
- dataface/core/compile/filter_injection.py +319 -0
- dataface/core/compile/introspection.py +527 -0
- dataface/core/compile/jinja.py +511 -0
- dataface/core/compile/labels_env.py +52 -0
- dataface/core/compile/markdown.py +154 -0
- dataface/core/compile/meta.py +388 -0
- dataface/core/compile/models/__init__.py +0 -0
- dataface/core/compile/models/chart/__init__.py +0 -0
- dataface/core/compile/models/chart/authored.py +2137 -0
- dataface/core/compile/models/chart/compiled.py +398 -0
- dataface/core/compile/models/config.py +347 -0
- dataface/core/compile/models/face/__init__.py +0 -0
- dataface/core/compile/models/face/authored.py +659 -0
- dataface/core/compile/models/face/compiled.py +522 -0
- dataface/core/compile/models/factories.py +201 -0
- dataface/core/compile/models/markers.py +40 -0
- dataface/core/compile/models/palette.py +36 -0
- dataface/core/compile/models/primitives.py +415 -0
- dataface/core/compile/models/query/__init__.py +0 -0
- dataface/core/compile/models/query/authored.py +246 -0
- dataface/core/compile/models/query/compiled.py +710 -0
- dataface/core/compile/models/refs.py +137 -0
- dataface/core/compile/models/source.py +611 -0
- dataface/core/compile/models/style/__init__.py +0 -0
- dataface/core/compile/models/style/authored.py +481 -0
- dataface/core/compile/models/style/compiled.py +3399 -0
- dataface/core/compile/models/style/merged.py +1682 -0
- dataface/core/compile/models/theme.py +362 -0
- dataface/core/compile/models/variable/__init__.py +0 -0
- dataface/core/compile/models/variable/authored.py +254 -0
- dataface/core/compile/models/vega_lite/__init__.py +0 -0
- dataface/core/compile/models/vega_lite/config.py +510 -0
- dataface/core/compile/models/vega_lite/contracts.py +171 -0
- dataface/core/compile/normalize_charts.py +494 -0
- dataface/core/compile/normalize_layout.py +1000 -0
- dataface/core/compile/normalize_queries.py +297 -0
- dataface/core/compile/normalize_variables.py +489 -0
- dataface/core/compile/normalizer.py +543 -0
- dataface/core/compile/palette.py +1100 -0
- dataface/core/compile/parameterized.py +658 -0
- dataface/core/compile/parser.py +228 -0
- dataface/core/compile/schema.py +20 -0
- dataface/core/compile/schema_renderers/__init__.py +0 -0
- dataface/core/compile/schema_renderers/json_schema.py +163 -0
- dataface/core/compile/schema_renderers/prompt.py +152 -0
- dataface/core/compile/schema_renderers/vscode_schema.py +301 -0
- dataface/core/compile/sizing.py +2126 -0
- dataface/core/compile/sources.py +518 -0
- dataface/core/compile/sql_authoring_lint.py +56 -0
- dataface/core/compile/style_cascade.py +471 -0
- dataface/core/compile/typography.py +299 -0
- dataface/core/compile/validator.py +301 -0
- dataface/core/compile/variables.py +53 -0
- dataface/core/compile/vega_config.py +98 -0
- dataface/core/compile/vega_lite/__init__.py +6 -0
- dataface/core/compile/vega_lite/validation.py +95 -0
- dataface/core/compile/yaml_error_formatter.py +838 -0
- dataface/core/connections.py +38 -0
- dataface/core/dashboard.py +358 -0
- dataface/core/defaults/default_config.yml +101 -0
- dataface/core/defaults/palettes/categorical/category-10-dark.yml +32 -0
- dataface/core/defaults/palettes/categorical/category-10-light.yml +43 -0
- dataface/core/defaults/palettes/categorical/category-10.yml +31 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-blue.yml +22 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-brown.yml +29 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-green.yml +20 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-orange.yml +21 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-purple.yml +20 -0
- dataface/core/defaults/palettes/categorical/editorial-10-dark.yml +32 -0
- dataface/core/defaults/palettes/categorical/editorial-10.yml +40 -0
- dataface/core/defaults/palettes/categorical/hero-6.yml +17 -0
- dataface/core/defaults/palettes/categorical/single-blue.yml +11 -0
- dataface/core/defaults/palettes/categorical/tableau.yml +20 -0
- dataface/core/defaults/palettes/data/xkcd_colors.json +3803 -0
- dataface/core/defaults/palettes/diverging/blue-red.yml +25 -0
- dataface/core/defaults/palettes/diverging/coolwarm.yml +24 -0
- dataface/core/defaults/palettes/diverging/crimson-green.yml +23 -0
- dataface/core/defaults/palettes/diverging/orange-teal.yml +23 -0
- dataface/core/defaults/palettes/diverging/sunset.yml +24 -0
- dataface/core/defaults/palettes/scaffold/dft-creams.yml +38 -0
- dataface/core/defaults/palettes/scaffold/dft-grays.yml +53 -0
- dataface/core/defaults/palettes/sequential/amber.yml +22 -0
- dataface/core/defaults/palettes/sequential/blue.yml +22 -0
- dataface/core/defaults/palettes/sequential/brown.yml +22 -0
- dataface/core/defaults/palettes/sequential/gray.yml +22 -0
- dataface/core/defaults/palettes/sequential/green.yml +22 -0
- dataface/core/defaults/palettes/sequential/purple.yml +22 -0
- dataface/core/defaults/palettes/sequential/rust.yml +22 -0
- dataface/core/defaults/palettes/sequential/teal.yml +22 -0
- dataface/core/defaults/palettes/tone/negative.yml +32 -0
- dataface/core/defaults/palettes/tone/positive.yml +22 -0
- dataface/core/defaults/palettes/tone/warning.yml +22 -0
- dataface/core/defaults/themes/_base.yaml +786 -0
- dataface/core/defaults/themes/bi.yaml +16 -0
- dataface/core/defaults/themes/carbong100.yaml +41 -0
- dataface/core/defaults/themes/cream.yaml +122 -0
- dataface/core/defaults/themes/dark.yaml +40 -0
- dataface/core/defaults/themes/diagnostics-title-angle-extreme.yaml +9 -0
- dataface/core/defaults/themes/diagnostics-title-baseline-extreme.yaml +9 -0
- dataface/core/defaults/themes/diagnostics-title-baseline.yaml +24 -0
- dataface/core/defaults/themes/diagnostics-title-center.yaml +8 -0
- dataface/core/defaults/themes/diagnostics-title-color-extreme.yaml +24 -0
- dataface/core/defaults/themes/diagnostics-title-font-extreme.yaml +25 -0
- dataface/core/defaults/themes/diagnostics-title-left.yaml +8 -0
- dataface/core/defaults/themes/diagnostics-title-offset-extreme.yaml +9 -0
- dataface/core/defaults/themes/diagnostics-title-size-extreme.yaml +24 -0
- dataface/core/defaults/themes/diagnostics-title-weight-extreme.yaml +24 -0
- dataface/core/defaults/themes/editorial.yaml +147 -0
- dataface/core/defaults/themes/light.yaml +30 -0
- dataface/core/defaults/themes/looker.yaml +17 -0
- dataface/core/defaults/themes/stark.yaml +134 -0
- dataface/core/errors/__init__.py +67 -0
- dataface/core/errors/codes_compile.py +56 -0
- dataface/core/errors/codes_execute.py +177 -0
- dataface/core/errors/codes_render.py +106 -0
- dataface/core/errors/codes_unknown.py +15 -0
- dataface/core/errors/hints.py +74 -0
- dataface/core/errors/registry.py +42 -0
- dataface/core/errors/structured.py +92 -0
- dataface/core/execute/__init__.py +91 -0
- dataface/core/execute/adapters/__init__.py +49 -0
- dataface/core/execute/adapters/adapter_registry.py +400 -0
- dataface/core/execute/adapters/base.py +245 -0
- dataface/core/execute/adapters/csv_adapter.py +239 -0
- dataface/core/execute/adapters/dbt_adapter.py +283 -0
- dataface/core/execute/adapters/dbt_adapter_factory.py +212 -0
- dataface/core/execute/adapters/dbt_macro_loader.py +95 -0
- dataface/core/execute/adapters/dbt_utils.py +150 -0
- dataface/core/execute/adapters/http_adapter.py +224 -0
- dataface/core/execute/adapters/metricflow_adapter.py +94 -0
- dataface/core/execute/adapters/schema_resolver_adapter.py +144 -0
- dataface/core/execute/adapters/sql_adapter.py +710 -0
- dataface/core/execute/adapters/values_adapter.py +58 -0
- dataface/core/execute/batch.py +744 -0
- dataface/core/execute/cache_backend.py +135 -0
- dataface/core/execute/cache_keys.py +66 -0
- dataface/core/execute/dbt_jinja.py +21 -0
- dataface/core/execute/dialects/__init__.py +121 -0
- dataface/core/execute/dialects/athena.py +75 -0
- dataface/core/execute/dialects/base.py +302 -0
- dataface/core/execute/dialects/bigquery.py +38 -0
- dataface/core/execute/dialects/databricks.py +68 -0
- dataface/core/execute/dialects/duckdb.py +35 -0
- dataface/core/execute/dialects/mysql.py +68 -0
- dataface/core/execute/dialects/postgres.py +39 -0
- dataface/core/execute/dialects/redshift.py +12 -0
- dataface/core/execute/dialects/snowflake.py +51 -0
- dataface/core/execute/dialects/sqlserver.py +92 -0
- dataface/core/execute/duckdb_cache.py +712 -0
- dataface/core/execute/duckdb_config.py +26 -0
- dataface/core/execute/errors.py +213 -0
- dataface/core/execute/executor.py +1249 -0
- dataface/core/execute/parallel.py +162 -0
- dataface/core/execute/setup_sql.py +58 -0
- dataface/core/execute/source_registry.py +72 -0
- dataface/core/execute/source_resolver.py +255 -0
- dataface/core/execute/sql_guard.py +387 -0
- dataface/core/execute/sql_literals.py +199 -0
- dataface/core/fonts.py +52 -0
- dataface/core/inspect/__init__.py +32 -0
- dataface/core/inspect/cache_factory.py +98 -0
- dataface/core/inspect/db_types.py +162 -0
- dataface/core/inspect/dbt_schema.py +96 -0
- dataface/core/inspect/defaults.yml +37 -0
- dataface/core/inspect/fanout_risk.py +109 -0
- dataface/core/inspect/manifest_utils.py +77 -0
- dataface/core/inspect/partials/categorical.yml +40 -0
- dataface/core/inspect/partials/date.yml +40 -0
- dataface/core/inspect/partials/numeric.yml +55 -0
- dataface/core/inspect/partition_types.py +38 -0
- dataface/core/inspect/query_validator.py +975 -0
- dataface/core/inspect/renderer.py +354 -0
- dataface/core/inspect/resolver.py +808 -0
- dataface/core/inspect/search.py +461 -0
- dataface/core/inspect/sources/__init__.py +32 -0
- dataface/core/inspect/sources/dbt.py +738 -0
- dataface/core/inspect/sources/duckdb_utils.py +66 -0
- dataface/core/inspect/templates/__init__.py +1 -0
- dataface/core/inspect/templates/categorical_column.yml +196 -0
- dataface/core/inspect/templates/charts.yml +109 -0
- dataface/core/inspect/templates/date_column.yml +248 -0
- dataface/core/inspect/templates/model.yml +138 -0
- dataface/core/inspect/templates/numeric_column.yml +261 -0
- dataface/core/inspect/templates/quality.yml +80 -0
- dataface/core/inspect/templates/string_column.yml +263 -0
- dataface/core/project_roots.py +165 -0
- dataface/core/render/__init__.py +87 -0
- dataface/core/render/board_links.py +176 -0
- dataface/core/render/chart/__init__.py +27 -0
- dataface/core/render/chart/arc_attached_table.py +251 -0
- dataface/core/render/chart/artifacts.py +16 -0
- dataface/core/render/chart/callout.py +225 -0
- dataface/core/render/chart/decisions.py +358 -0
- dataface/core/render/chart/geo.py +700 -0
- dataface/core/render/chart/kpi.py +916 -0
- dataface/core/render/chart/labels.py +76 -0
- dataface/core/render/chart/pipeline.py +818 -0
- dataface/core/render/chart/presentation.py +36 -0
- dataface/core/render/chart/profile.py +3438 -0
- dataface/core/render/chart/render_single.py +347 -0
- dataface/core/render/chart/renderers.py +193 -0
- dataface/core/render/chart/rendering.py +565 -0
- dataface/core/render/chart/serialization.py +90 -0
- dataface/core/render/chart/spark.py +496 -0
- dataface/core/render/chart/spark_bar.py +370 -0
- dataface/core/render/chart/spec_builders.py +154 -0
- dataface/core/render/chart/standard_renderer.py +2645 -0
- dataface/core/render/chart/table.py +2957 -0
- dataface/core/render/chart/table_support.py +1452 -0
- dataface/core/render/chart/tick_values.py +66 -0
- dataface/core/render/chart/time_unit_detect.py +809 -0
- dataface/core/render/chart/title_overflow.py +157 -0
- dataface/core/render/chart/type_inference.py +122 -0
- dataface/core/render/chart/validation.py +99 -0
- dataface/core/render/chart/vega_lite.py +125 -0
- dataface/core/render/chart/vega_lite_types.py +268 -0
- dataface/core/render/chart/vl_field_maps.py +346 -0
- dataface/core/render/chart_interactivity.py +24 -0
- dataface/core/render/control_registry.py +287 -0
- dataface/core/render/converters/__init__.py +24 -0
- dataface/core/render/converters/chart.py +276 -0
- dataface/core/render/converters/html.py +98 -0
- dataface/core/render/converters/pdf.py +40 -0
- dataface/core/render/converters/png.py +41 -0
- dataface/core/render/errors.py +144 -0
- dataface/core/render/face_api.py +160 -0
- dataface/core/render/faces.py +1194 -0
- dataface/core/render/font_measurement.py +48 -0
- dataface/core/render/font_support.py +197 -0
- dataface/core/render/fonts/DFTSansTabular-Regular.ttf +0 -0
- dataface/core/render/fonts/DFTSansTabular-Regular.woff2 +0 -0
- dataface/core/render/fonts/DFTSerifOldstyleProportional-Regular.ttf +0 -0
- dataface/core/render/fonts/DFTSerifOldstyleTabular-Regular.ttf +0 -0
- dataface/core/render/fonts/InterVariable.ttf +0 -0
- dataface/core/render/fonts/InterVariable.woff2 +0 -0
- dataface/core/render/fonts/NOTO_COLOR_EMOJI_LICENSE.txt +93 -0
- dataface/core/render/fonts/NOTO_EMOJI_LICENSE.txt +93 -0
- dataface/core/render/fonts/NotoColorEmoji-Regular.ttf +0 -0
- dataface/core/render/fonts/NotoColorEmoji-Regular.woff2 +0 -0
- dataface/core/render/fonts/NotoEmoji-Regular.ttf +0 -0
- dataface/core/render/fonts/NotoEmoji-Regular.woff2 +0 -0
- dataface/core/render/fonts/SOURCE_CODE_PRO_LICENSE.txt +93 -0
- dataface/core/render/fonts/SOURCE_SERIF_4_LICENSE.txt +98 -0
- dataface/core/render/fonts/SourceCodePro-Regular.ttf +0 -0
- dataface/core/render/fonts/SourceSerif4-Regular.ttf +0 -0
- dataface/core/render/fonts/_emoji_font_face.css +43 -0
- dataface/core/render/fonts/source-serif-4-variable-latin.woff2 +0 -0
- dataface/core/render/format_utils.py +329 -0
- dataface/core/render/geo_defaults.yml +28 -0
- dataface/core/render/json_format.py +146 -0
- dataface/core/render/layout_sizing.py +865 -0
- dataface/core/render/layouts.py +541 -0
- dataface/core/render/markdown_defaults.yml +16 -0
- dataface/core/render/missing_vars_prompt.py +79 -0
- dataface/core/render/placeholder.py +389 -0
- dataface/core/render/render_result.py +14 -0
- dataface/core/render/renderer.py +467 -0
- dataface/core/render/script_embedding.py +16 -0
- dataface/core/render/svg_utils.py +212 -0
- dataface/core/render/template_loader.py +69 -0
- dataface/core/render/templates/controls/_styles.css +606 -0
- dataface/core/render/templates/controls/checkbox.html +16 -0
- dataface/core/render/templates/controls/date.html +16 -0
- dataface/core/render/templates/controls/number.html +19 -0
- dataface/core/render/templates/controls/readonly.html +9 -0
- dataface/core/render/templates/controls/select.html +21 -0
- dataface/core/render/templates/controls/slider.html +22 -0
- dataface/core/render/templates/controls/text.html +16 -0
- dataface/core/render/templates/scripts/chart_interactivity.js +191 -0
- dataface/core/render/templates/scripts/variables.js +976 -0
- dataface/core/render/templates/svg/grid_pattern.svg +3 -0
- dataface/core/render/templates/svg/styles.css +51 -0
- dataface/core/render/terminal.py +311 -0
- dataface/core/render/terminal_charts.py +563 -0
- dataface/core/render/terminal_defaults.yml +2 -0
- dataface/core/render/terminal_layouts.py +299 -0
- dataface/core/render/terminal_text.py +31 -0
- dataface/core/render/text/__init__.py +1 -0
- dataface/core/render/text/case.py +113 -0
- dataface/core/render/text_format.py +129 -0
- dataface/core/render/utils.py +106 -0
- dataface/core/render/variable_controls.py +946 -0
- dataface/core/render/variable_input_refinement.py +140 -0
- dataface/core/render/warnings/__init__.py +15 -0
- dataface/core/render/warnings/bar_color_1_to_1_with_x.py +80 -0
- dataface/core/render/warnings/base.py +44 -0
- dataface/core/render/warnings/fanout_risk.py +15 -0
- dataface/core/render/warnings/from_query_diagnostic.py +56 -0
- dataface/core/render/warnings/missing_join_predicate.py +13 -0
- dataface/core/render/warnings/query_parse_error.py +14 -0
- dataface/core/render/warnings/query_returned_zero_rows.py +42 -0
- dataface/core/render/warnings/reaggregation.py +14 -0
- dataface/core/render/warnings/registry.py +45 -0
- dataface/core/render/warnings/suppression.py +46 -0
- dataface/core/render/warnings/temporal_single_point.py +63 -0
- dataface/core/render/warnings/unreferenced_chart.py +15 -0
- dataface/core/render/warnings/y_encoding_mostly_null.py +76 -0
- dataface/core/render/yaml_format.py +167 -0
- dataface/core/resolve_face.py +195 -0
- dataface/core/schema/__init__.py +0 -0
- dataface/core/schema/guidance.py +151 -0
- dataface/core/scoped_paths.py +59 -0
- dataface/core/serve/__init__.py +14 -0
- dataface/core/serve/bootstrap.py +39 -0
- dataface/core/serve/embedded.py +57 -0
- dataface/core/serve/port.py +129 -0
- dataface/core/serve/server.py +938 -0
- dataface/core/serve/templates/__init__.py +0 -0
- dataface/core/serve/templates/directory.yml +6 -0
- dataface/core/serve/templates/error.html.j2 +217 -0
- dataface/core/utils.py +121 -0
- dataface/core/validate.py +64 -0
- dataface/integrations/__init__.py +0 -0
- dataface/integrations/highlighting.py +351 -0
- dataface/integrations/markdown.py +537 -0
- dataface/py.typed +0 -0
- dataface-0.1.2.dist-info/METADATA +375 -0
- dataface-0.1.2.dist-info/RECORD +455 -0
- dataface-0.1.2.dist-info/WHEEL +4 -0
- dataface-0.1.2.dist-info/entry_points.txt +2 -0
- dataface-0.1.2.dist-info/licenses/LICENSE +202 -0
- mdsvg/__init__.py +168 -0
- mdsvg/fonts.py +656 -0
- mdsvg/images.py +299 -0
- mdsvg/parser.py +629 -0
- mdsvg/playground.py +284 -0
- mdsvg/py.typed +2 -0
- mdsvg/renderer.py +1623 -0
- mdsvg/style.py +355 -0
- mdsvg/types.py +200 -0
- mdsvg/utils.py +86 -0
|
@@ -0,0 +1,808 @@
|
|
|
1
|
+
"""Layered schema resolver — composes SuperSchemaSource + DbtSchemaSource.
|
|
2
|
+
|
|
3
|
+
The resolver is the one entry point for the ``schema`` verb. Per-target
|
|
4
|
+
it picks between cache (warm) and dbt-source (cold), expands wildcards,
|
|
5
|
+
assembles the hierarchical named-dict tree, and stamps the response
|
|
6
|
+
with a single ``_meta`` footer.
|
|
7
|
+
|
|
8
|
+
The cascade is **per target**, not global. For every individual table /
|
|
9
|
+
column the resolver consults:
|
|
10
|
+
|
|
11
|
+
1. ``SuperSchemaSource.profile_table(...)`` — if the cache has it,
|
|
12
|
+
use the rich profile (stats, distributions, semantic types).
|
|
13
|
+
2. ``DbtSchemaSource.profile_table(...)`` — fall through when the
|
|
14
|
+
cache misses, returning the honest cold-start view (column types
|
|
15
|
+
from the adapter; descriptions / declared types / tests when the
|
|
16
|
+
manifest contributed).
|
|
17
|
+
|
|
18
|
+
This means a partially-populated cache still gets dbt's bare schema for
|
|
19
|
+
the unprofiled tables instead of silently dropping them — the level-3 /
|
|
20
|
+
level-4 asymmetry the initiative was filed to fix.
|
|
21
|
+
|
|
22
|
+
``_meta.sources_consulted`` reports exactly which layers actually
|
|
23
|
+
contributed to *this* response: ``["super_schema"]`` if everything came
|
|
24
|
+
from the cache, ``["dbt_adapter"]`` if everything came from dbt without
|
|
25
|
+
a manifest, ``["dbt_adapter", "dbt_manifest"]`` if the manifest also
|
|
26
|
+
contributed, and the union when a response mixed cache + dbt for
|
|
27
|
+
different tables. ``cache_built_at`` is set whenever ``super_schema`` is
|
|
28
|
+
in the list — anchored to the cache file's ``generated_at``.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
from __future__ import annotations
|
|
32
|
+
|
|
33
|
+
import fnmatch
|
|
34
|
+
from collections.abc import Callable
|
|
35
|
+
from datetime import datetime, timezone
|
|
36
|
+
from pathlib import Path
|
|
37
|
+
from typing import TYPE_CHECKING, Any
|
|
38
|
+
|
|
39
|
+
from dataface.core.compile.errors import DatafaceError
|
|
40
|
+
from dataface.core.errors.codes_execute import (
|
|
41
|
+
DF_EXECUTE_SOURCE_NOT_FOUND,
|
|
42
|
+
DF_EXECUTE_SOURCE_NOT_FOUND_EMPTY,
|
|
43
|
+
)
|
|
44
|
+
from dataface.core.execute.adapters.dbt_adapter_factory import build_adapter
|
|
45
|
+
from dataface.core.execute.adapters.dbt_utils import load_dbt_manifest
|
|
46
|
+
from dataface.core.inspect.sources.dbt import DbtSchemaSource, extract_all_relationships
|
|
47
|
+
|
|
48
|
+
if TYPE_CHECKING:
|
|
49
|
+
# SuperSchemaSource lives in the private dataface-super-schema package.
|
|
50
|
+
# Import only for type-checking; at runtime it is injected by the caller.
|
|
51
|
+
from dataface.core.execute.adapters import AdapterRegistry
|
|
52
|
+
from dataface_super_schema.inspect.sources.super_schema import SuperSchemaSource
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
AdapterFactory = Callable[[dict[str, Any]], Any]
|
|
56
|
+
SourceLayer = str # "super_schema" / "dbt_adapter" / "dbt_manifest"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _schema_adapter_factory(cfg: dict[str, Any]) -> Any:
|
|
60
|
+
"""Open file-backed DuckDB read-only for schema cold paths."""
|
|
61
|
+
return build_adapter(cfg, read_only=True)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _now_utc() -> datetime:
|
|
65
|
+
return datetime.now(timezone.utc)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _expand_targets(spec: str | None, candidates: list[str]) -> list[str]:
|
|
69
|
+
"""Expand a wildcard / comma-list / glob spec against a list of candidates."""
|
|
70
|
+
if spec is None or spec == "*":
|
|
71
|
+
return list(candidates)
|
|
72
|
+
if "," in spec:
|
|
73
|
+
wanted = [s.strip() for s in spec.split(",") if s.strip()]
|
|
74
|
+
cand_set = set(candidates)
|
|
75
|
+
return [w for w in wanted if w in cand_set]
|
|
76
|
+
if any(ch in spec for ch in "*?["):
|
|
77
|
+
return [c for c in candidates if fnmatch.fnmatch(c, spec)]
|
|
78
|
+
return [spec] if spec in candidates else []
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def is_exact_target(spec: str | None) -> bool:
|
|
82
|
+
return spec is not None and "," not in spec and not any(ch in spec for ch in "*?[")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _table_universe(
|
|
86
|
+
cache_tables: dict[str, dict[str, Any]],
|
|
87
|
+
dbt_tables: dict[str, dict[str, Any]],
|
|
88
|
+
) -> list[str]:
|
|
89
|
+
"""Return cache-first table names, deduped across cache and dbt."""
|
|
90
|
+
seen: set[str] = set()
|
|
91
|
+
universe: list[str] = []
|
|
92
|
+
for name in cache_tables:
|
|
93
|
+
if name not in seen:
|
|
94
|
+
seen.add(name)
|
|
95
|
+
universe.append(name)
|
|
96
|
+
for name in dbt_tables:
|
|
97
|
+
if name not in seen:
|
|
98
|
+
seen.add(name)
|
|
99
|
+
universe.append(name)
|
|
100
|
+
return universe
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class LayeredSchemaResolver:
|
|
104
|
+
"""Composes an optional SuperSchemaSource (cache) + DbtSchemaSource (live).
|
|
105
|
+
|
|
106
|
+
``cache`` is optional (default ``None``). When absent the resolver is
|
|
107
|
+
dbt-only: all schema data comes from the dbt adapter + manifest with no
|
|
108
|
+
warm-cache enrichment. OSS installs without ``dataface-super-schema``
|
|
109
|
+
pass no cache; Cloud/IDE installs pass a ``SuperSchemaSource`` instance.
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
*,
|
|
115
|
+
cache: SuperSchemaSource | None = None,
|
|
116
|
+
adapter_registry: AdapterRegistry,
|
|
117
|
+
project_root: Path,
|
|
118
|
+
adapter_factory: AdapterFactory | None = None,
|
|
119
|
+
) -> None:
|
|
120
|
+
self.cache = cache
|
|
121
|
+
self.adapter_registry = adapter_registry
|
|
122
|
+
self.project_root = Path(project_root)
|
|
123
|
+
self._adapter_factory = adapter_factory or _schema_adapter_factory
|
|
124
|
+
self._dbt_sources: dict[str, DbtSchemaSource] = {}
|
|
125
|
+
self._source_lookup_cache: dict[str, dict[str, Any]] | None = None
|
|
126
|
+
# Cached manifest-relationship walk. Loaded directly from the
|
|
127
|
+
# ``target/manifest.json`` file so cache-hit short-circuits don't
|
|
128
|
+
# have to build a dbt adapter to surface ``referenced_by`` /
|
|
129
|
+
# ``linked_via``. ``None`` means "haven't checked yet"; an empty
|
|
130
|
+
# list means "checked, no manifest" (tests reuse the cache).
|
|
131
|
+
self._manifest_relationships: list[dict[str, str]] | None = None
|
|
132
|
+
|
|
133
|
+
# ---- Public entries -----------------------------------------------------
|
|
134
|
+
|
|
135
|
+
def list_schemas(self, source: str) -> dict[str, Any]:
|
|
136
|
+
"""Level 2: enumerate schemas in a source.
|
|
137
|
+
|
|
138
|
+
Cache contributes when present (carries ``table_count`` summaries);
|
|
139
|
+
dbt contributes the canonical schema list when reachable. The
|
|
140
|
+
result merges cache rows on top of the dbt enumeration. When
|
|
141
|
+
the source name is not configured, ``DatafaceError``
|
|
142
|
+
(DF-EXECUTE-SOURCE-NOT-FOUND or DF-EXECUTE-SOURCE-NOT-FOUND-EMPTY)
|
|
143
|
+
propagates so the verb wrapper turns it into ``success=False``.
|
|
144
|
+
"""
|
|
145
|
+
source_entry = self._source_entry(source)
|
|
146
|
+
used: set[SourceLayer] = set()
|
|
147
|
+
cache_built_at = None
|
|
148
|
+
|
|
149
|
+
cache_contrib = self.cache.list_schemas() if self.cache is not None else None
|
|
150
|
+
if cache_contrib is not None:
|
|
151
|
+
used.add("super_schema")
|
|
152
|
+
cache_built_at = self.cache.generated_at # type: ignore[union-attr]
|
|
153
|
+
|
|
154
|
+
dbt = (
|
|
155
|
+
self._try_dbt_for(source)
|
|
156
|
+
if cache_contrib is not None
|
|
157
|
+
else self._dbt_for(source)
|
|
158
|
+
)
|
|
159
|
+
dbt_contrib = dbt.list_schemas() if dbt is not None else None
|
|
160
|
+
if dbt_contrib is not None:
|
|
161
|
+
used.add("dbt_adapter")
|
|
162
|
+
|
|
163
|
+
schemas: dict[str, dict[str, Any]] = {}
|
|
164
|
+
if dbt_contrib is not None:
|
|
165
|
+
for name in dbt_contrib["schemas"]:
|
|
166
|
+
schemas.setdefault(name, {})
|
|
167
|
+
if cache_contrib is not None:
|
|
168
|
+
for name, entry in cache_contrib["schemas"].items():
|
|
169
|
+
schemas.setdefault(name, {}).update(entry)
|
|
170
|
+
|
|
171
|
+
sources_dict = {source: dict(source_entry)}
|
|
172
|
+
if schemas:
|
|
173
|
+
sources_dict[source]["schemas"] = schemas
|
|
174
|
+
return _build_envelope(
|
|
175
|
+
sources_dict,
|
|
176
|
+
sources_consulted=_ordered(used),
|
|
177
|
+
cache_built_at=cache_built_at if "super_schema" in used else None,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def list_tables(self, source: str, schema: str) -> dict[str, Any]:
|
|
181
|
+
"""Level 3: lean table summaries.
|
|
182
|
+
|
|
183
|
+
Per target: try the cache first; fall through to dbt for any
|
|
184
|
+
schema/table the cache doesn't know about. Same shape regardless
|
|
185
|
+
of which layers contributed.
|
|
186
|
+
"""
|
|
187
|
+
return self._tabled_walk(
|
|
188
|
+
source=source,
|
|
189
|
+
schema_spec=schema,
|
|
190
|
+
table_spec=None,
|
|
191
|
+
column_spec=None,
|
|
192
|
+
fresh=False,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def profile_table(
|
|
196
|
+
self,
|
|
197
|
+
source: str,
|
|
198
|
+
schema: str,
|
|
199
|
+
table: str,
|
|
200
|
+
*,
|
|
201
|
+
fresh: bool = False,
|
|
202
|
+
lineage_depth: int = 1,
|
|
203
|
+
) -> dict[str, Any]:
|
|
204
|
+
"""Level 4: full profile.
|
|
205
|
+
|
|
206
|
+
Per target the resolver tries cache first, falls through to dbt.
|
|
207
|
+
``fresh=True`` skips the cache entirely.
|
|
208
|
+
"""
|
|
209
|
+
return self._tabled_walk(
|
|
210
|
+
source=source,
|
|
211
|
+
schema_spec=schema,
|
|
212
|
+
table_spec=table,
|
|
213
|
+
column_spec=None,
|
|
214
|
+
fresh=fresh,
|
|
215
|
+
lineage_depth=lineage_depth,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
def profile_column(
|
|
219
|
+
self,
|
|
220
|
+
source: str,
|
|
221
|
+
schema: str,
|
|
222
|
+
table: str,
|
|
223
|
+
column: str,
|
|
224
|
+
) -> dict[str, Any]:
|
|
225
|
+
return self._tabled_walk(
|
|
226
|
+
source=source,
|
|
227
|
+
schema_spec=schema,
|
|
228
|
+
table_spec=table,
|
|
229
|
+
column_spec=column,
|
|
230
|
+
fresh=False,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# ---- DbtSchemaSource factory (D5) ---------------------------------------
|
|
234
|
+
|
|
235
|
+
def _dbt_for(self, source_name: str) -> DbtSchemaSource:
|
|
236
|
+
"""Build (and cache) the dbt source for ``source_name``.
|
|
237
|
+
|
|
238
|
+
Raises ``DatafaceError`` (DF-EXECUTE-SOURCE-NOT-FOUND or
|
|
239
|
+
DF-EXECUTE-SOURCE-NOT-FOUND-EMPTY) if the registry can't resolve
|
|
240
|
+
the source config, or whatever ``build_adapter`` raises (typically
|
|
241
|
+
``ImportError`` for a missing dbt-<warehouse> package). Callers
|
|
242
|
+
that need cache-only degradation should call ``_try_dbt_for`` and
|
|
243
|
+
check for ``None``.
|
|
244
|
+
"""
|
|
245
|
+
if source_name not in self._dbt_sources:
|
|
246
|
+
cfg = self.adapter_registry.resolve_source_config(source_name)
|
|
247
|
+
adapter = self._adapter_factory(cfg)
|
|
248
|
+
# Pass db_path so DbtSchemaSource can resolve the correct DuckDB
|
|
249
|
+
# attach name via PRAGMA database_list (needed for file-backed DuckDB
|
|
250
|
+
# where list_relations(None, schema) returns an empty list).
|
|
251
|
+
db_path = (
|
|
252
|
+
cfg.get("path") if cfg.get("type", "").lower() == "duckdb" else None
|
|
253
|
+
)
|
|
254
|
+
self._dbt_sources[source_name] = DbtSchemaSource(
|
|
255
|
+
adapter=adapter, project_root=self.project_root, db_path=db_path
|
|
256
|
+
)
|
|
257
|
+
return self._dbt_sources[source_name]
|
|
258
|
+
|
|
259
|
+
def _try_dbt_for(self, source_name: str) -> DbtSchemaSource | None:
|
|
260
|
+
"""Like ``_dbt_for`` but returns ``None`` on the build-time
|
|
261
|
+
failure we can recover from: ``ImportError`` when the
|
|
262
|
+
dbt-<dialect> package isn't installed. Runtime warehouse-connection
|
|
263
|
+
errors from ``adapter.list_*`` are *not* caught here — they propagate
|
|
264
|
+
and the verb wrapper turns them into ``success=False`` envelopes.
|
|
265
|
+
"""
|
|
266
|
+
try:
|
|
267
|
+
return self._dbt_for(source_name)
|
|
268
|
+
except ImportError:
|
|
269
|
+
return None
|
|
270
|
+
|
|
271
|
+
def _all_manifest_relationships(self) -> list[dict[str, str]]:
|
|
272
|
+
"""Every forward FK declared in this project's dbt manifest.
|
|
273
|
+
|
|
274
|
+
Loaded once per resolver instance, directly from
|
|
275
|
+
``target/manifest.json`` — no dbt adapter required. This is what
|
|
276
|
+
lets the cache-hit short-circuit attach ``referenced_by`` /
|
|
277
|
+
``linked_via`` without paying for an adapter build.
|
|
278
|
+
"""
|
|
279
|
+
if self._manifest_relationships is None:
|
|
280
|
+
self._manifest_relationships = extract_all_relationships(
|
|
281
|
+
load_dbt_manifest(self.project_root)
|
|
282
|
+
)
|
|
283
|
+
return self._manifest_relationships
|
|
284
|
+
|
|
285
|
+
# ---- Walkers ------------------------------------------------------------
|
|
286
|
+
|
|
287
|
+
def _tabled_walk(
|
|
288
|
+
self,
|
|
289
|
+
source: str,
|
|
290
|
+
schema_spec: str,
|
|
291
|
+
table_spec: str | None,
|
|
292
|
+
column_spec: str | None,
|
|
293
|
+
fresh: bool,
|
|
294
|
+
lineage_depth: int = 1,
|
|
295
|
+
) -> dict[str, Any]:
|
|
296
|
+
"""Walk schemas → tables → optional columns, per-target cache-then-dbt."""
|
|
297
|
+
source_entry = self._source_entry(source)
|
|
298
|
+
if (
|
|
299
|
+
self.cache is not None
|
|
300
|
+
and not fresh
|
|
301
|
+
and table_spec is not None
|
|
302
|
+
and is_exact_target(schema_spec)
|
|
303
|
+
and is_exact_target(table_spec)
|
|
304
|
+
):
|
|
305
|
+
cached = self.cache.profile_table(schema=schema_spec, table=table_spec)
|
|
306
|
+
if cached is not None:
|
|
307
|
+
cached.setdefault("upstream", [])
|
|
308
|
+
cached.setdefault("downstream", [])
|
|
309
|
+
profile: dict[str, Any] | None = cached
|
|
310
|
+
if column_spec is not None:
|
|
311
|
+
profile = _filter_to_columns(cached, column_spec)
|
|
312
|
+
if profile is not None:
|
|
313
|
+
short_used: set[SourceLayer] = {"super_schema"}
|
|
314
|
+
if _attach_cross_table(
|
|
315
|
+
profile, table_spec, self._all_manifest_relationships()
|
|
316
|
+
):
|
|
317
|
+
short_used.add("dbt_manifest")
|
|
318
|
+
sources_dict = {source: dict(source_entry)}
|
|
319
|
+
sources_dict[source]["schemas"] = {
|
|
320
|
+
schema_spec: {"tables": {table_spec: profile}}
|
|
321
|
+
}
|
|
322
|
+
return _build_envelope(
|
|
323
|
+
sources_dict,
|
|
324
|
+
sources_consulted=_ordered(short_used),
|
|
325
|
+
cache_built_at=self.cache.generated_at,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
schema_universe = self._enumerate_schemas(source, fresh)
|
|
329
|
+
used: set[SourceLayer] = set()
|
|
330
|
+
schemas_dict: dict[str, Any] = {}
|
|
331
|
+
for sname in _expand_targets(schema_spec, schema_universe):
|
|
332
|
+
schema_entry, schema_used = self._walk_schema(
|
|
333
|
+
source=source,
|
|
334
|
+
schema=sname,
|
|
335
|
+
table_spec=table_spec,
|
|
336
|
+
column_spec=column_spec,
|
|
337
|
+
fresh=fresh,
|
|
338
|
+
lineage_depth=lineage_depth,
|
|
339
|
+
)
|
|
340
|
+
used.update(schema_used)
|
|
341
|
+
if schema_entry:
|
|
342
|
+
schemas_dict[sname] = schema_entry
|
|
343
|
+
|
|
344
|
+
cache_built_at = (
|
|
345
|
+
self.cache.generated_at # type: ignore[union-attr]
|
|
346
|
+
if "super_schema" in used
|
|
347
|
+
else None
|
|
348
|
+
)
|
|
349
|
+
sources_dict = {source: dict(source_entry)}
|
|
350
|
+
if schemas_dict:
|
|
351
|
+
sources_dict[source]["schemas"] = schemas_dict
|
|
352
|
+
return _build_envelope(
|
|
353
|
+
sources_dict,
|
|
354
|
+
sources_consulted=_ordered(used),
|
|
355
|
+
cache_built_at=cache_built_at,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
def _enumerate_schemas(self, source: str, fresh: bool) -> list[str]:
|
|
359
|
+
"""Build the universe of candidate schema names for the walk.
|
|
360
|
+
|
|
361
|
+
Cache + dbt union — either alone might be incomplete (cache is a
|
|
362
|
+
snapshot; dbt is the live warehouse). When the cache has data we
|
|
363
|
+
treat dbt as best-effort for adapter build errors. When the cache
|
|
364
|
+
is empty, we require dbt and let registry / build errors propagate.
|
|
365
|
+
Schema enumeration only builds candidates; ``sources_consulted`` is
|
|
366
|
+
stamped by the layer that contributes returned table / column leaves.
|
|
367
|
+
"""
|
|
368
|
+
names: list[str] = []
|
|
369
|
+
seen: set[str] = set()
|
|
370
|
+
|
|
371
|
+
cache_contrib = (
|
|
372
|
+
None if (fresh or self.cache is None) else self.cache.list_schemas()
|
|
373
|
+
)
|
|
374
|
+
if cache_contrib is not None:
|
|
375
|
+
for n in cache_contrib["schemas"]:
|
|
376
|
+
if n not in seen:
|
|
377
|
+
seen.add(n)
|
|
378
|
+
names.append(n)
|
|
379
|
+
|
|
380
|
+
dbt = (
|
|
381
|
+
self._try_dbt_for(source)
|
|
382
|
+
if cache_contrib is not None
|
|
383
|
+
else self._dbt_for(source)
|
|
384
|
+
)
|
|
385
|
+
dbt_contrib = dbt.list_schemas() if dbt is not None else None
|
|
386
|
+
if dbt_contrib is not None:
|
|
387
|
+
for n in dbt_contrib["schemas"]:
|
|
388
|
+
if n not in seen:
|
|
389
|
+
seen.add(n)
|
|
390
|
+
names.append(n)
|
|
391
|
+
return names
|
|
392
|
+
|
|
393
|
+
def _walk_schema(
|
|
394
|
+
self,
|
|
395
|
+
source: str,
|
|
396
|
+
schema: str,
|
|
397
|
+
table_spec: str | None,
|
|
398
|
+
column_spec: str | None,
|
|
399
|
+
fresh: bool,
|
|
400
|
+
lineage_depth: int = 1,
|
|
401
|
+
) -> tuple[dict[str, Any], set[SourceLayer]]:
|
|
402
|
+
"""Resolve tables for one schema. Returns (schema_entry, layers_used).
|
|
403
|
+
|
|
404
|
+
Level 3 reads each layer's table list once, then dispatches lean
|
|
405
|
+
summaries from those dicts. Level 4 still profiles each matched
|
|
406
|
+
table because dbt has to return per-relation columns.
|
|
407
|
+
"""
|
|
408
|
+
cache_tables, dbt_tables, dbt, dbt_has_manifest = self._schema_table_layers(
|
|
409
|
+
source=source, schema=schema, fresh=fresh
|
|
410
|
+
)
|
|
411
|
+
universe = _table_universe(cache_tables, dbt_tables)
|
|
412
|
+
tables_dict, used = self._dispatch_schema_targets(
|
|
413
|
+
schema=schema,
|
|
414
|
+
table_spec=table_spec,
|
|
415
|
+
column_spec=column_spec,
|
|
416
|
+
fresh=fresh,
|
|
417
|
+
universe=universe,
|
|
418
|
+
cache_tables=cache_tables,
|
|
419
|
+
dbt_tables=dbt_tables,
|
|
420
|
+
dbt=dbt,
|
|
421
|
+
dbt_has_manifest=dbt_has_manifest,
|
|
422
|
+
lineage_depth=lineage_depth,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
schema_entry: dict[str, Any] = {}
|
|
426
|
+
if tables_dict:
|
|
427
|
+
schema_entry["tables"] = tables_dict
|
|
428
|
+
return schema_entry, used
|
|
429
|
+
|
|
430
|
+
def _schema_table_layers(self, source: str, schema: str, fresh: bool) -> tuple[
|
|
431
|
+
dict[str, dict[str, Any]],
|
|
432
|
+
dict[str, dict[str, Any]],
|
|
433
|
+
DbtSchemaSource | None,
|
|
434
|
+
bool,
|
|
435
|
+
]:
|
|
436
|
+
cache_list = None
|
|
437
|
+
cache_tables_dict: dict[str, dict[str, Any]] = {}
|
|
438
|
+
if not fresh and self.cache is not None:
|
|
439
|
+
cache_list = self.cache.list_tables(schema=schema)
|
|
440
|
+
if cache_list is not None:
|
|
441
|
+
cache_tables_dict = dict(cache_list["tables"])
|
|
442
|
+
dbt = (
|
|
443
|
+
self._try_dbt_for(source)
|
|
444
|
+
if cache_list is not None
|
|
445
|
+
else self._dbt_for(source)
|
|
446
|
+
)
|
|
447
|
+
dbt_tables_dict: dict[str, dict[str, Any]] = {}
|
|
448
|
+
dbt_has_manifest = False
|
|
449
|
+
if dbt is not None:
|
|
450
|
+
dbt_list = dbt.list_tables(schema=schema)
|
|
451
|
+
if dbt_list is not None:
|
|
452
|
+
dbt_tables_dict = dict(dbt_list["tables"])
|
|
453
|
+
dbt_has_manifest = dbt.has_manifest
|
|
454
|
+
|
|
455
|
+
return cache_tables_dict, dbt_tables_dict, dbt, dbt_has_manifest
|
|
456
|
+
|
|
457
|
+
def _dispatch_schema_targets(
|
|
458
|
+
self,
|
|
459
|
+
schema: str,
|
|
460
|
+
table_spec: str | None,
|
|
461
|
+
column_spec: str | None,
|
|
462
|
+
fresh: bool,
|
|
463
|
+
universe: list[str],
|
|
464
|
+
cache_tables: dict[str, dict[str, Any]],
|
|
465
|
+
dbt_tables: dict[str, dict[str, Any]],
|
|
466
|
+
dbt: DbtSchemaSource | None,
|
|
467
|
+
dbt_has_manifest: bool,
|
|
468
|
+
lineage_depth: int = 1,
|
|
469
|
+
) -> tuple[dict[str, Any], set[SourceLayer]]:
|
|
470
|
+
used: set[SourceLayer] = set()
|
|
471
|
+
tables_dict: dict[str, Any] = {}
|
|
472
|
+
if table_spec is None:
|
|
473
|
+
for tname in universe:
|
|
474
|
+
summary, layers = self._lean_summary(
|
|
475
|
+
table=tname,
|
|
476
|
+
cache_tables=cache_tables,
|
|
477
|
+
dbt_tables=dbt_tables,
|
|
478
|
+
dbt_has_manifest=dbt_has_manifest,
|
|
479
|
+
)
|
|
480
|
+
if summary is None:
|
|
481
|
+
continue
|
|
482
|
+
used.update(layers)
|
|
483
|
+
tables_dict[tname] = summary
|
|
484
|
+
else:
|
|
485
|
+
for tname in _expand_targets(table_spec, universe):
|
|
486
|
+
profile, layers = self._full_profile(
|
|
487
|
+
schema=schema,
|
|
488
|
+
table=tname,
|
|
489
|
+
fresh=fresh,
|
|
490
|
+
dbt=dbt,
|
|
491
|
+
dbt_has_manifest=dbt_has_manifest,
|
|
492
|
+
lineage_depth=lineage_depth,
|
|
493
|
+
)
|
|
494
|
+
if profile is None:
|
|
495
|
+
continue
|
|
496
|
+
if column_spec is not None:
|
|
497
|
+
profile = _filter_to_columns(profile, column_spec)
|
|
498
|
+
if profile is None and layers == {"super_schema"}:
|
|
499
|
+
profile, layers = self._full_profile(
|
|
500
|
+
schema=schema,
|
|
501
|
+
table=tname,
|
|
502
|
+
fresh=True,
|
|
503
|
+
dbt=dbt,
|
|
504
|
+
dbt_has_manifest=dbt_has_manifest,
|
|
505
|
+
lineage_depth=lineage_depth,
|
|
506
|
+
)
|
|
507
|
+
if profile is not None:
|
|
508
|
+
profile = _filter_to_columns(profile, column_spec)
|
|
509
|
+
if profile is None:
|
|
510
|
+
continue
|
|
511
|
+
used.update(layers)
|
|
512
|
+
tables_dict[tname] = profile
|
|
513
|
+
return tables_dict, used
|
|
514
|
+
|
|
515
|
+
@staticmethod
|
|
516
|
+
def _lean_summary(
|
|
517
|
+
table: str,
|
|
518
|
+
cache_tables: dict[str, dict[str, Any]],
|
|
519
|
+
dbt_tables: dict[str, dict[str, Any]],
|
|
520
|
+
dbt_has_manifest: bool,
|
|
521
|
+
) -> tuple[dict[str, Any] | None, set[SourceLayer]]:
|
|
522
|
+
"""Pick a level-3 summary from the per-schema lookups, cache first.
|
|
523
|
+
|
|
524
|
+
Returns ``(None, set())`` when no layer has the table (cross-cutting
|
|
525
|
+
non-match — caller drops the row from the response).
|
|
526
|
+
"""
|
|
527
|
+
cached = cache_tables.get(table)
|
|
528
|
+
if cached is not None:
|
|
529
|
+
return cached, {"super_schema"}
|
|
530
|
+
summary = dbt_tables.get(table)
|
|
531
|
+
if summary is None:
|
|
532
|
+
return None, set()
|
|
533
|
+
layers: set[SourceLayer] = {"dbt_adapter"}
|
|
534
|
+
if dbt_has_manifest and _summary_has_manifest_contribution(summary):
|
|
535
|
+
layers.add("dbt_manifest")
|
|
536
|
+
return summary, layers
|
|
537
|
+
|
|
538
|
+
def _full_profile(
|
|
539
|
+
self,
|
|
540
|
+
schema: str,
|
|
541
|
+
table: str,
|
|
542
|
+
fresh: bool,
|
|
543
|
+
dbt: DbtSchemaSource | None,
|
|
544
|
+
dbt_has_manifest: bool,
|
|
545
|
+
lineage_depth: int = 1,
|
|
546
|
+
) -> tuple[dict[str, Any] | None, set[SourceLayer]]:
|
|
547
|
+
"""Return a level-4 profile and the layers used to build it.
|
|
548
|
+
|
|
549
|
+
Cache first per target. Falls through to ``dbt.profile_table`` —
|
|
550
|
+
the per-table query is unavoidable for level 4 because the cache
|
|
551
|
+
only stores rich profiles for previously-inspected tables; dbt's
|
|
552
|
+
adapter must still answer per relation.
|
|
553
|
+
|
|
554
|
+
After the source returns the profile, we attach manifest-derived
|
|
555
|
+
cross-table fields (``referenced_by``, ``linked_via``) when the
|
|
556
|
+
manifest is available — even on cache hits, since the manifest is
|
|
557
|
+
the authority for declared FKs and the cache may pre-date the
|
|
558
|
+
``relationships:`` tests.
|
|
559
|
+
"""
|
|
560
|
+
layers: set[SourceLayer]
|
|
561
|
+
profile: dict[str, Any] | None
|
|
562
|
+
all_rels = self._all_manifest_relationships()
|
|
563
|
+
if not fresh and self.cache is not None:
|
|
564
|
+
cached = self.cache.profile_table(schema=schema, table=table)
|
|
565
|
+
if cached is not None:
|
|
566
|
+
cached.setdefault("upstream", [])
|
|
567
|
+
cached.setdefault("downstream", [])
|
|
568
|
+
profile, layers = cached, {"super_schema"}
|
|
569
|
+
if _attach_cross_table(profile, table, all_rels):
|
|
570
|
+
layers.add("dbt_manifest")
|
|
571
|
+
return profile, layers
|
|
572
|
+
if dbt is None:
|
|
573
|
+
return None, set()
|
|
574
|
+
profile = dbt.profile_table(
|
|
575
|
+
schema=schema, table=table, lineage_depth=lineage_depth
|
|
576
|
+
)
|
|
577
|
+
if profile is None:
|
|
578
|
+
return None, set()
|
|
579
|
+
layers = {"dbt_adapter"}
|
|
580
|
+
if dbt_has_manifest:
|
|
581
|
+
_attach_cross_table(profile, table, all_rels)
|
|
582
|
+
if _profile_has_manifest_contribution(profile):
|
|
583
|
+
layers.add("dbt_manifest")
|
|
584
|
+
return profile, layers
|
|
585
|
+
|
|
586
|
+
# ---- Source entry helpers ----------------------------------------------
|
|
587
|
+
|
|
588
|
+
def _source_entries(self) -> dict[str, dict[str, Any]]:
|
|
589
|
+
if self._source_lookup_cache is None:
|
|
590
|
+
self._source_lookup_cache = {}
|
|
591
|
+
for raw in self.adapter_registry.list_sql_sources():
|
|
592
|
+
name = raw["name"]
|
|
593
|
+
self._source_lookup_cache[name] = {
|
|
594
|
+
k: v for k, v in raw.items() if k != "name"
|
|
595
|
+
}
|
|
596
|
+
return self._source_lookup_cache
|
|
597
|
+
|
|
598
|
+
def _source_entry(self, source: str) -> dict[str, Any]:
|
|
599
|
+
entries = self._source_entries()
|
|
600
|
+
entry = entries.get(source)
|
|
601
|
+
if entry is None:
|
|
602
|
+
available = ", ".join(sorted(entries.keys()))
|
|
603
|
+
if available:
|
|
604
|
+
raise DatafaceError.from_code(
|
|
605
|
+
DF_EXECUTE_SOURCE_NOT_FOUND, source=source, available=available
|
|
606
|
+
)
|
|
607
|
+
raise DatafaceError.from_code(
|
|
608
|
+
DF_EXECUTE_SOURCE_NOT_FOUND_EMPTY, source=source
|
|
609
|
+
)
|
|
610
|
+
return entry
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def _filter_to_columns(
|
|
614
|
+
profile: dict[str, Any], column_spec: str
|
|
615
|
+
) -> dict[str, Any] | None:
|
|
616
|
+
"""Keep only columns matching ``column_spec``; drop the table if empty."""
|
|
617
|
+
cols = profile.get("columns") or {}
|
|
618
|
+
matched = _expand_targets(column_spec, list(cols))
|
|
619
|
+
if not matched:
|
|
620
|
+
return None
|
|
621
|
+
out = dict(profile)
|
|
622
|
+
out["columns"] = {name: cols[name] for name in matched}
|
|
623
|
+
return out
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
# Keys a `DbtSchemaSource` only adds when manifest data merged into the
|
|
627
|
+
# response. Used to answer "did manifest *actually* contribute to this call"
|
|
628
|
+
# vs "is manifest *available*" (the latter is `has_manifest`). The
|
|
629
|
+
# distinction matters for `_meta.sources_consulted` honesty: a request for
|
|
630
|
+
# a table the warehouse has but the manifest doesn't reports
|
|
631
|
+
# `["dbt_adapter"]`, not `["dbt_adapter", "dbt_manifest"]`.
|
|
632
|
+
_MANIFEST_TABLE_KEYS = frozenset(
|
|
633
|
+
("description", "tags", "owner", "referenced_by", "linked_via")
|
|
634
|
+
)
|
|
635
|
+
# When the only manifest contribution on a profile is a forward FK injected
|
|
636
|
+
# onto a column's `relationships` list, table-level keys won't trip the
|
|
637
|
+
# manifest-contribution check. `_profile_has_manifest_contribution` covers
|
|
638
|
+
# this via `_MANIFEST_COLUMN_KEYS` containing `relationships`.
|
|
639
|
+
_MANIFEST_COLUMN_KEYS = frozenset(
|
|
640
|
+
("description", "declared_type", "tags", "granularity", "tests", "relationships")
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
# M2M cutoff: len(columns) <= 4 AND (cols_with_explicit_FK / len(columns)) >= 0.75
|
|
645
|
+
# AND >=2 distinct FK columns. The 4-column ceiling caps "obvious" join tables
|
|
646
|
+
# (`order_items(order_id, product_id)` qualifies; a wide fact table does not).
|
|
647
|
+
# 75% guarantees enough of the row is FK to be a connector and not a fact
|
|
648
|
+
# table that happens to carry two FKs. Both bounds are explicit per the
|
|
649
|
+
# initiative's no-magic rule — easier to defend than a percentile or fitted
|
|
650
|
+
# threshold.
|
|
651
|
+
_M2M_MAX_COLUMNS = 4
|
|
652
|
+
_M2M_MIN_FK_RATIO = 0.75
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
def _attach_cross_table(
|
|
656
|
+
profile: dict[str, Any], table: str, all_rels: list[dict[str, str]]
|
|
657
|
+
) -> bool:
|
|
658
|
+
"""Attach manifest-derived FK views to ``profile``.
|
|
659
|
+
|
|
660
|
+
Three views, all derivations of explicit forward-FK declarations in the
|
|
661
|
+
dbt manifest — D3 forbids any naming-heuristic invention:
|
|
662
|
+
|
|
663
|
+
* column-level ``relationships`` — declared forward FKs on this
|
|
664
|
+
table's columns. ``DbtSchemaSource.profile_table`` already adds
|
|
665
|
+
these on the cold path; the cache-hit short-circuit doesn't (the
|
|
666
|
+
cache predates ``relationships:`` tests), so this helper merges
|
|
667
|
+
them on regardless of where the profile came from.
|
|
668
|
+
* table-level ``referenced_by`` — reverse-FK roll-up.
|
|
669
|
+
* table-level ``linked_via`` — M2M two-hop reachability.
|
|
670
|
+
|
|
671
|
+
Returns ``True`` when at least one field was attached (the
|
|
672
|
+
``dbt_manifest`` provenance bookkeeping rides on this).
|
|
673
|
+
"""
|
|
674
|
+
if not all_rels:
|
|
675
|
+
return False
|
|
676
|
+
contributed = False
|
|
677
|
+
if _attach_forward_relationships(profile, table, all_rels):
|
|
678
|
+
contributed = True
|
|
679
|
+
referenced_by = _compute_referenced_by(table, all_rels)
|
|
680
|
+
if referenced_by:
|
|
681
|
+
profile["referenced_by"] = referenced_by
|
|
682
|
+
contributed = True
|
|
683
|
+
linked_via = _compute_linked_via(profile, table, all_rels)
|
|
684
|
+
if linked_via:
|
|
685
|
+
profile["linked_via"] = linked_via
|
|
686
|
+
contributed = True
|
|
687
|
+
return contributed
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
def _attach_forward_relationships(
|
|
691
|
+
profile: dict[str, Any], table: str, all_rels: list[dict[str, str]]
|
|
692
|
+
) -> bool:
|
|
693
|
+
"""Merge declared forward FKs onto ``profile['columns'][col]['relationships']``.
|
|
694
|
+
|
|
695
|
+
Idempotent: if the cold dbt path already populated ``relationships``
|
|
696
|
+
on a column, we don't double-record an entry that's already present.
|
|
697
|
+
Skip columns that aren't on the profile (manifest declares an FK on
|
|
698
|
+
a column the warehouse profile doesn't carry — renamed / dropped).
|
|
699
|
+
"""
|
|
700
|
+
cols = profile.get("columns") or {}
|
|
701
|
+
if not cols:
|
|
702
|
+
return False
|
|
703
|
+
contributed = False
|
|
704
|
+
for rel in all_rels:
|
|
705
|
+
if rel["from_table"] != table:
|
|
706
|
+
continue
|
|
707
|
+
col = cols.get(rel["from_column"])
|
|
708
|
+
if col is None:
|
|
709
|
+
continue
|
|
710
|
+
existing = col.get("relationships")
|
|
711
|
+
entry = {"to_table": rel["to_table"], "to_column": rel["to_column"]}
|
|
712
|
+
if existing is None:
|
|
713
|
+
col["relationships"] = [entry]
|
|
714
|
+
contributed = True
|
|
715
|
+
elif entry not in existing:
|
|
716
|
+
existing.append(entry)
|
|
717
|
+
contributed = True
|
|
718
|
+
return contributed
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
def _compute_referenced_by(
|
|
722
|
+
table: str, all_rels: list[dict[str, str]]
|
|
723
|
+
) -> list[dict[str, str]]:
|
|
724
|
+
"""Reverse-FK roll-up: every declared FK that targets ``table``."""
|
|
725
|
+
out = [
|
|
726
|
+
{"from_table": rel["from_table"], "from_column": rel["from_column"]}
|
|
727
|
+
for rel in all_rels
|
|
728
|
+
if rel["to_table"] == table
|
|
729
|
+
]
|
|
730
|
+
out.sort(key=lambda x: (x["from_table"], x["from_column"]))
|
|
731
|
+
return out
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
def _compute_linked_via(
|
|
735
|
+
profile: dict[str, Any], table: str, all_rels: list[dict[str, str]]
|
|
736
|
+
) -> list[dict[str, str]]:
|
|
737
|
+
"""M2M two-hop: ordered FK column pairs on ``table`` when it looks like
|
|
738
|
+
a join table by the cutoff documented above."""
|
|
739
|
+
cols = profile.get("columns") or {}
|
|
740
|
+
if len(cols) == 0 or len(cols) > _M2M_MAX_COLUMNS:
|
|
741
|
+
return []
|
|
742
|
+
|
|
743
|
+
fk_targets: dict[str, str] = {} # from_column -> first declared to_table
|
|
744
|
+
for rel in all_rels:
|
|
745
|
+
if rel["from_table"] != table:
|
|
746
|
+
continue
|
|
747
|
+
col = rel["from_column"]
|
|
748
|
+
if col not in cols or col in fk_targets:
|
|
749
|
+
continue
|
|
750
|
+
fk_targets[col] = rel["to_table"]
|
|
751
|
+
|
|
752
|
+
if len(fk_targets) < 2:
|
|
753
|
+
return []
|
|
754
|
+
if len(fk_targets) / len(cols) < _M2M_MIN_FK_RATIO:
|
|
755
|
+
return []
|
|
756
|
+
|
|
757
|
+
fk_cols = sorted(fk_targets)
|
|
758
|
+
out: list[dict[str, str]] = []
|
|
759
|
+
for i, a in enumerate(fk_cols):
|
|
760
|
+
for b in fk_cols[i + 1 :]:
|
|
761
|
+
out.append(
|
|
762
|
+
{
|
|
763
|
+
"through_column_a": a,
|
|
764
|
+
"hop_table_a": fk_targets[a],
|
|
765
|
+
"through_column_b": b,
|
|
766
|
+
"hop_table_b": fk_targets[b],
|
|
767
|
+
}
|
|
768
|
+
)
|
|
769
|
+
return out
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
def _summary_has_manifest_contribution(summary: dict[str, Any]) -> bool:
|
|
773
|
+
return any(k in summary for k in _MANIFEST_TABLE_KEYS)
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
def _profile_has_manifest_contribution(profile: dict[str, Any]) -> bool:
|
|
777
|
+
if any(k in profile for k in _MANIFEST_TABLE_KEYS):
|
|
778
|
+
return True
|
|
779
|
+
# Non-empty lineage lists mean the manifest contributed edge data.
|
|
780
|
+
if profile.get("upstream") or profile.get("downstream"):
|
|
781
|
+
return True
|
|
782
|
+
cols = profile.get("columns")
|
|
783
|
+
if isinstance(cols, dict):
|
|
784
|
+
for col in cols.values():
|
|
785
|
+
if isinstance(col, dict) and any(k in col for k in _MANIFEST_COLUMN_KEYS):
|
|
786
|
+
return True
|
|
787
|
+
return False
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
_LAYER_ORDER: tuple[SourceLayer, ...] = ("super_schema", "dbt_adapter", "dbt_manifest")
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
def _ordered(used: set[SourceLayer]) -> list[SourceLayer]:
|
|
794
|
+
return [layer for layer in _LAYER_ORDER if layer in used]
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
def _build_envelope(
|
|
798
|
+
sources_dict: dict[str, dict[str, Any]],
|
|
799
|
+
sources_consulted: list[SourceLayer],
|
|
800
|
+
cache_built_at: datetime | None = None,
|
|
801
|
+
) -> dict[str, Any]:
|
|
802
|
+
meta: dict[str, Any] = {
|
|
803
|
+
"retrieved_at": _now_utc().isoformat(),
|
|
804
|
+
"sources_consulted": sources_consulted,
|
|
805
|
+
}
|
|
806
|
+
if cache_built_at is not None:
|
|
807
|
+
meta["cache_built_at"] = cache_built_at.isoformat()
|
|
808
|
+
return {"sources": sources_dict, "_meta": meta}
|