dataface 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- d3_format/__init__.py +14 -0
- d3_format/errors.py +19 -0
- d3_format/format.py +551 -0
- d3_format/spec.py +159 -0
- dataface/DATAFACE_SYNTAX.md +1135 -0
- dataface/__init__.py +93 -0
- dataface/_docs_site.py +20 -0
- dataface/_install_hint.py +26 -0
- dataface/agent_api/__init__.py +79 -0
- dataface/agent_api/_init_templates/__init__.py +0 -0
- dataface/agent_api/_init_templates/agents_dft_snippet.md +26 -0
- dataface/agent_api/_init_templates/dataface.yml +15 -0
- dataface/agent_api/_init_templates/faces-dataface.yml +144 -0
- dataface/agent_api/_init_templates/index.md +24 -0
- dataface/agent_api/_paths.py +118 -0
- dataface/agent_api/_project_agents_md.py +43 -0
- dataface/agent_api/_session_store.py +486 -0
- dataface/agent_api/_state.py +28 -0
- dataface/agent_api/chat.py +221 -0
- dataface/agent_api/dashboards.py +257 -0
- dataface/agent_api/describe.py +366 -0
- dataface/agent_api/describe_query.py +120 -0
- dataface/agent_api/docs/__init__.py +25 -0
- dataface/agent_api/docs/_loader.py +292 -0
- dataface/agent_api/docs/yaml-reference.md +2757 -0
- dataface/agent_api/file_refs.py +118 -0
- dataface/agent_api/init.py +126 -0
- dataface/agent_api/inspect.py +128 -0
- dataface/agent_api/mcp_install.py +170 -0
- dataface/agent_api/query.py +274 -0
- dataface/agent_api/schema.py +658 -0
- dataface/agent_api/schema_search.py +284 -0
- dataface/agent_api/search.py +270 -0
- dataface/agent_api/skill_install.py +141 -0
- dataface/agent_api/skill_render.py +90 -0
- dataface/agent_api/skills.py +293 -0
- dataface/agent_api/surface_aliases.yaml +128 -0
- dataface/agent_api/validate.py +175 -0
- dataface/agent_api/validate_query.py +84 -0
- dataface/ai/__init__.py +39 -0
- dataface/ai/agent.py +139 -0
- dataface/ai/context.py +45 -0
- dataface/ai/events.py +62 -0
- dataface/ai/external_mcp.py +610 -0
- dataface/ai/generate_sql.py +96 -0
- dataface/ai/llm.py +403 -0
- dataface/ai/mcp/__init__.py +51 -0
- dataface/ai/mcp/server.py +289 -0
- dataface/ai/memories.py +85 -0
- dataface/ai/prompts.py +177 -0
- dataface/ai/schema_context.py +138 -0
- dataface/ai/skills/before-after-comparison/SKILL.md +102 -0
- dataface/ai/skills/before-after-comparison/examples/before-after-comparison.yml +24 -0
- dataface/ai/skills/dashboard-build/SKILL.md +212 -0
- dataface/ai/skills/dashboard-build/examples/_smoke.yml +15 -0
- dataface/ai/skills/dashboard-design/SKILL.md +182 -0
- dataface/ai/skills/dashboard-review/SKILL.md +113 -0
- dataface/ai/skills/dashboard-structural-review/SKILL.md +173 -0
- dataface/ai/skills/dashboard-visual-review/SKILL.md +139 -0
- dataface/ai/skills/dataface-mcp-setup/SKILL.md +177 -0
- dataface/ai/skills/dataface-troubleshooting/SKILL.md +225 -0
- dataface/ai/skills/drill-down-link/SKILL.md +112 -0
- dataface/ai/skills/drill-down-link/examples/drill-down-link.yml +27 -0
- dataface/ai/skills/faceted-small-multiples/SKILL.md +116 -0
- dataface/ai/skills/faceted-small-multiples/examples/faceted-small-multiples.yml +33 -0
- dataface/ai/skills/filter-bar-with-variables/SKILL.md +105 -0
- dataface/ai/skills/filter-bar-with-variables/examples/filter-bar-with-variables.yml +49 -0
- dataface/ai/skills/kpi-row/SKILL.md +101 -0
- dataface/ai/skills/kpi-row/examples/kpi-row.yml +55 -0
- dataface/ai/skills/report-design/SKILL.md +184 -0
- dataface/ai/skills/single-metric-bignum/SKILL.md +90 -0
- dataface/ai/skills/single-metric-bignum/examples/single-metric-bignum.yml +27 -0
- dataface/ai/skills/table-heavy-ops-dashboard/SKILL.md +114 -0
- dataface/ai/skills/table-heavy-ops-dashboard/examples/table-heavy-ops-dashboard.yml +48 -0
- dataface/ai/skills/time-series-trend/SKILL.md +93 -0
- dataface/ai/skills/time-series-trend/examples/time-series-trend.yml +26 -0
- dataface/ai/skills/top-n-with-detail/SKILL.md +98 -0
- dataface/ai/skills/top-n-with-detail/examples/top-n-with-detail.yml +45 -0
- dataface/ai/skills/two-by-two-grid-overview/SKILL.md +78 -0
- dataface/ai/skills/two-by-two-grid-overview/examples/two-by-two-grid-overview.yml +64 -0
- dataface/ai/tool_schemas.py +132 -0
- dataface/ai/tools/__init__.py +312 -0
- dataface/ai/yaml_utils.py +57 -0
- dataface/cli/__init__.py +3 -0
- dataface/cli/_console.py +48 -0
- dataface/cli/_error_format.py +83 -0
- dataface/cli/_extras.py +190 -0
- dataface/cli/_json_output.py +8 -0
- dataface/cli/_parsing.py +17 -0
- dataface/cli/_version_info.py +56 -0
- dataface/cli/commands/__init__.py +3 -0
- dataface/cli/commands/_agent_input.py +205 -0
- dataface/cli/commands/_agent_server.py +115 -0
- dataface/cli/commands/chat.py +645 -0
- dataface/cli/commands/describe.py +107 -0
- dataface/cli/commands/docs.py +131 -0
- dataface/cli/commands/extension.py +179 -0
- dataface/cli/commands/init.py +240 -0
- dataface/cli/commands/inspect.py +94 -0
- dataface/cli/commands/mcp_init.py +167 -0
- dataface/cli/commands/query.py +386 -0
- dataface/cli/commands/render.py +291 -0
- dataface/cli/commands/schema.py +411 -0
- dataface/cli/commands/search.py +49 -0
- dataface/cli/commands/serve.py +114 -0
- dataface/cli/commands/skills.py +133 -0
- dataface/cli/commands/skills_init.py +161 -0
- dataface/cli/commands/validate.py +63 -0
- dataface/cli/main.py +1501 -0
- dataface/core/__init__.py +75 -0
- dataface/core/compile/__init__.py +244 -0
- dataface/core/compile/_jinja_helpers.py +78 -0
- dataface/core/compile/channel.py +222 -0
- dataface/core/compile/chart_focus.py +101 -0
- dataface/core/compile/chart_resolved.py +169 -0
- dataface/core/compile/chart_type_detection.py +489 -0
- dataface/core/compile/chart_update.py +261 -0
- dataface/core/compile/colors.py +64 -0
- dataface/core/compile/compiler.py +904 -0
- dataface/core/compile/config.py +823 -0
- dataface/core/compile/custom_chart_types.py +208 -0
- dataface/core/compile/data_table_attachment.py +1287 -0
- dataface/core/compile/detect.py +110 -0
- dataface/core/compile/errors.py +302 -0
- dataface/core/compile/filter_injection.py +319 -0
- dataface/core/compile/introspection.py +527 -0
- dataface/core/compile/jinja.py +511 -0
- dataface/core/compile/labels_env.py +52 -0
- dataface/core/compile/markdown.py +154 -0
- dataface/core/compile/meta.py +388 -0
- dataface/core/compile/models/__init__.py +0 -0
- dataface/core/compile/models/chart/__init__.py +0 -0
- dataface/core/compile/models/chart/authored.py +2137 -0
- dataface/core/compile/models/chart/compiled.py +398 -0
- dataface/core/compile/models/config.py +347 -0
- dataface/core/compile/models/face/__init__.py +0 -0
- dataface/core/compile/models/face/authored.py +659 -0
- dataface/core/compile/models/face/compiled.py +522 -0
- dataface/core/compile/models/factories.py +201 -0
- dataface/core/compile/models/markers.py +40 -0
- dataface/core/compile/models/palette.py +36 -0
- dataface/core/compile/models/primitives.py +415 -0
- dataface/core/compile/models/query/__init__.py +0 -0
- dataface/core/compile/models/query/authored.py +246 -0
- dataface/core/compile/models/query/compiled.py +710 -0
- dataface/core/compile/models/refs.py +137 -0
- dataface/core/compile/models/source.py +611 -0
- dataface/core/compile/models/style/__init__.py +0 -0
- dataface/core/compile/models/style/authored.py +481 -0
- dataface/core/compile/models/style/compiled.py +3399 -0
- dataface/core/compile/models/style/merged.py +1682 -0
- dataface/core/compile/models/theme.py +362 -0
- dataface/core/compile/models/variable/__init__.py +0 -0
- dataface/core/compile/models/variable/authored.py +254 -0
- dataface/core/compile/models/vega_lite/__init__.py +0 -0
- dataface/core/compile/models/vega_lite/config.py +510 -0
- dataface/core/compile/models/vega_lite/contracts.py +171 -0
- dataface/core/compile/normalize_charts.py +494 -0
- dataface/core/compile/normalize_layout.py +1000 -0
- dataface/core/compile/normalize_queries.py +297 -0
- dataface/core/compile/normalize_variables.py +489 -0
- dataface/core/compile/normalizer.py +543 -0
- dataface/core/compile/palette.py +1100 -0
- dataface/core/compile/parameterized.py +658 -0
- dataface/core/compile/parser.py +228 -0
- dataface/core/compile/schema.py +20 -0
- dataface/core/compile/schema_renderers/__init__.py +0 -0
- dataface/core/compile/schema_renderers/json_schema.py +163 -0
- dataface/core/compile/schema_renderers/prompt.py +152 -0
- dataface/core/compile/schema_renderers/vscode_schema.py +301 -0
- dataface/core/compile/sizing.py +2126 -0
- dataface/core/compile/sources.py +518 -0
- dataface/core/compile/sql_authoring_lint.py +56 -0
- dataface/core/compile/style_cascade.py +471 -0
- dataface/core/compile/typography.py +299 -0
- dataface/core/compile/validator.py +301 -0
- dataface/core/compile/variables.py +53 -0
- dataface/core/compile/vega_config.py +98 -0
- dataface/core/compile/vega_lite/__init__.py +6 -0
- dataface/core/compile/vega_lite/validation.py +95 -0
- dataface/core/compile/yaml_error_formatter.py +838 -0
- dataface/core/connections.py +38 -0
- dataface/core/dashboard.py +358 -0
- dataface/core/defaults/default_config.yml +101 -0
- dataface/core/defaults/palettes/categorical/category-10-dark.yml +32 -0
- dataface/core/defaults/palettes/categorical/category-10-light.yml +43 -0
- dataface/core/defaults/palettes/categorical/category-10.yml +31 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-blue.yml +22 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-brown.yml +29 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-green.yml +20 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-orange.yml +21 -0
- dataface/core/defaults/palettes/categorical/category-6-tonal-purple.yml +20 -0
- dataface/core/defaults/palettes/categorical/editorial-10-dark.yml +32 -0
- dataface/core/defaults/palettes/categorical/editorial-10.yml +40 -0
- dataface/core/defaults/palettes/categorical/hero-6.yml +17 -0
- dataface/core/defaults/palettes/categorical/single-blue.yml +11 -0
- dataface/core/defaults/palettes/categorical/tableau.yml +20 -0
- dataface/core/defaults/palettes/data/xkcd_colors.json +3803 -0
- dataface/core/defaults/palettes/diverging/blue-red.yml +25 -0
- dataface/core/defaults/palettes/diverging/coolwarm.yml +24 -0
- dataface/core/defaults/palettes/diverging/crimson-green.yml +23 -0
- dataface/core/defaults/palettes/diverging/orange-teal.yml +23 -0
- dataface/core/defaults/palettes/diverging/sunset.yml +24 -0
- dataface/core/defaults/palettes/scaffold/dft-creams.yml +38 -0
- dataface/core/defaults/palettes/scaffold/dft-grays.yml +53 -0
- dataface/core/defaults/palettes/sequential/amber.yml +22 -0
- dataface/core/defaults/palettes/sequential/blue.yml +22 -0
- dataface/core/defaults/palettes/sequential/brown.yml +22 -0
- dataface/core/defaults/palettes/sequential/gray.yml +22 -0
- dataface/core/defaults/palettes/sequential/green.yml +22 -0
- dataface/core/defaults/palettes/sequential/purple.yml +22 -0
- dataface/core/defaults/palettes/sequential/rust.yml +22 -0
- dataface/core/defaults/palettes/sequential/teal.yml +22 -0
- dataface/core/defaults/palettes/tone/negative.yml +32 -0
- dataface/core/defaults/palettes/tone/positive.yml +22 -0
- dataface/core/defaults/palettes/tone/warning.yml +22 -0
- dataface/core/defaults/themes/_base.yaml +786 -0
- dataface/core/defaults/themes/bi.yaml +16 -0
- dataface/core/defaults/themes/carbong100.yaml +41 -0
- dataface/core/defaults/themes/cream.yaml +122 -0
- dataface/core/defaults/themes/dark.yaml +40 -0
- dataface/core/defaults/themes/diagnostics-title-angle-extreme.yaml +9 -0
- dataface/core/defaults/themes/diagnostics-title-baseline-extreme.yaml +9 -0
- dataface/core/defaults/themes/diagnostics-title-baseline.yaml +24 -0
- dataface/core/defaults/themes/diagnostics-title-center.yaml +8 -0
- dataface/core/defaults/themes/diagnostics-title-color-extreme.yaml +24 -0
- dataface/core/defaults/themes/diagnostics-title-font-extreme.yaml +25 -0
- dataface/core/defaults/themes/diagnostics-title-left.yaml +8 -0
- dataface/core/defaults/themes/diagnostics-title-offset-extreme.yaml +9 -0
- dataface/core/defaults/themes/diagnostics-title-size-extreme.yaml +24 -0
- dataface/core/defaults/themes/diagnostics-title-weight-extreme.yaml +24 -0
- dataface/core/defaults/themes/editorial.yaml +147 -0
- dataface/core/defaults/themes/light.yaml +30 -0
- dataface/core/defaults/themes/looker.yaml +17 -0
- dataface/core/defaults/themes/stark.yaml +134 -0
- dataface/core/errors/__init__.py +67 -0
- dataface/core/errors/codes_compile.py +56 -0
- dataface/core/errors/codes_execute.py +177 -0
- dataface/core/errors/codes_render.py +106 -0
- dataface/core/errors/codes_unknown.py +15 -0
- dataface/core/errors/hints.py +74 -0
- dataface/core/errors/registry.py +42 -0
- dataface/core/errors/structured.py +92 -0
- dataface/core/execute/__init__.py +91 -0
- dataface/core/execute/adapters/__init__.py +49 -0
- dataface/core/execute/adapters/adapter_registry.py +400 -0
- dataface/core/execute/adapters/base.py +245 -0
- dataface/core/execute/adapters/csv_adapter.py +239 -0
- dataface/core/execute/adapters/dbt_adapter.py +283 -0
- dataface/core/execute/adapters/dbt_adapter_factory.py +212 -0
- dataface/core/execute/adapters/dbt_macro_loader.py +95 -0
- dataface/core/execute/adapters/dbt_utils.py +150 -0
- dataface/core/execute/adapters/http_adapter.py +224 -0
- dataface/core/execute/adapters/metricflow_adapter.py +94 -0
- dataface/core/execute/adapters/schema_resolver_adapter.py +144 -0
- dataface/core/execute/adapters/sql_adapter.py +710 -0
- dataface/core/execute/adapters/values_adapter.py +58 -0
- dataface/core/execute/batch.py +744 -0
- dataface/core/execute/cache_backend.py +135 -0
- dataface/core/execute/cache_keys.py +66 -0
- dataface/core/execute/dbt_jinja.py +21 -0
- dataface/core/execute/dialects/__init__.py +121 -0
- dataface/core/execute/dialects/athena.py +75 -0
- dataface/core/execute/dialects/base.py +302 -0
- dataface/core/execute/dialects/bigquery.py +38 -0
- dataface/core/execute/dialects/databricks.py +68 -0
- dataface/core/execute/dialects/duckdb.py +35 -0
- dataface/core/execute/dialects/mysql.py +68 -0
- dataface/core/execute/dialects/postgres.py +39 -0
- dataface/core/execute/dialects/redshift.py +12 -0
- dataface/core/execute/dialects/snowflake.py +51 -0
- dataface/core/execute/dialects/sqlserver.py +92 -0
- dataface/core/execute/duckdb_cache.py +712 -0
- dataface/core/execute/duckdb_config.py +26 -0
- dataface/core/execute/errors.py +213 -0
- dataface/core/execute/executor.py +1249 -0
- dataface/core/execute/parallel.py +162 -0
- dataface/core/execute/setup_sql.py +58 -0
- dataface/core/execute/source_registry.py +72 -0
- dataface/core/execute/source_resolver.py +255 -0
- dataface/core/execute/sql_guard.py +387 -0
- dataface/core/execute/sql_literals.py +199 -0
- dataface/core/fonts.py +52 -0
- dataface/core/inspect/__init__.py +32 -0
- dataface/core/inspect/cache_factory.py +98 -0
- dataface/core/inspect/db_types.py +162 -0
- dataface/core/inspect/dbt_schema.py +96 -0
- dataface/core/inspect/defaults.yml +37 -0
- dataface/core/inspect/fanout_risk.py +109 -0
- dataface/core/inspect/manifest_utils.py +77 -0
- dataface/core/inspect/partials/categorical.yml +40 -0
- dataface/core/inspect/partials/date.yml +40 -0
- dataface/core/inspect/partials/numeric.yml +55 -0
- dataface/core/inspect/partition_types.py +38 -0
- dataface/core/inspect/query_validator.py +975 -0
- dataface/core/inspect/renderer.py +354 -0
- dataface/core/inspect/resolver.py +808 -0
- dataface/core/inspect/search.py +461 -0
- dataface/core/inspect/sources/__init__.py +32 -0
- dataface/core/inspect/sources/dbt.py +738 -0
- dataface/core/inspect/sources/duckdb_utils.py +66 -0
- dataface/core/inspect/templates/__init__.py +1 -0
- dataface/core/inspect/templates/categorical_column.yml +196 -0
- dataface/core/inspect/templates/charts.yml +109 -0
- dataface/core/inspect/templates/date_column.yml +248 -0
- dataface/core/inspect/templates/model.yml +138 -0
- dataface/core/inspect/templates/numeric_column.yml +261 -0
- dataface/core/inspect/templates/quality.yml +80 -0
- dataface/core/inspect/templates/string_column.yml +263 -0
- dataface/core/project_roots.py +165 -0
- dataface/core/render/__init__.py +87 -0
- dataface/core/render/board_links.py +176 -0
- dataface/core/render/chart/__init__.py +27 -0
- dataface/core/render/chart/arc_attached_table.py +251 -0
- dataface/core/render/chart/artifacts.py +16 -0
- dataface/core/render/chart/callout.py +225 -0
- dataface/core/render/chart/decisions.py +358 -0
- dataface/core/render/chart/geo.py +700 -0
- dataface/core/render/chart/kpi.py +916 -0
- dataface/core/render/chart/labels.py +76 -0
- dataface/core/render/chart/pipeline.py +818 -0
- dataface/core/render/chart/presentation.py +36 -0
- dataface/core/render/chart/profile.py +3438 -0
- dataface/core/render/chart/render_single.py +347 -0
- dataface/core/render/chart/renderers.py +193 -0
- dataface/core/render/chart/rendering.py +565 -0
- dataface/core/render/chart/serialization.py +90 -0
- dataface/core/render/chart/spark.py +496 -0
- dataface/core/render/chart/spark_bar.py +370 -0
- dataface/core/render/chart/spec_builders.py +154 -0
- dataface/core/render/chart/standard_renderer.py +2645 -0
- dataface/core/render/chart/table.py +2957 -0
- dataface/core/render/chart/table_support.py +1452 -0
- dataface/core/render/chart/tick_values.py +66 -0
- dataface/core/render/chart/time_unit_detect.py +809 -0
- dataface/core/render/chart/title_overflow.py +157 -0
- dataface/core/render/chart/type_inference.py +122 -0
- dataface/core/render/chart/validation.py +99 -0
- dataface/core/render/chart/vega_lite.py +125 -0
- dataface/core/render/chart/vega_lite_types.py +268 -0
- dataface/core/render/chart/vl_field_maps.py +346 -0
- dataface/core/render/chart_interactivity.py +24 -0
- dataface/core/render/control_registry.py +287 -0
- dataface/core/render/converters/__init__.py +24 -0
- dataface/core/render/converters/chart.py +276 -0
- dataface/core/render/converters/html.py +98 -0
- dataface/core/render/converters/pdf.py +40 -0
- dataface/core/render/converters/png.py +41 -0
- dataface/core/render/errors.py +144 -0
- dataface/core/render/face_api.py +160 -0
- dataface/core/render/faces.py +1194 -0
- dataface/core/render/font_measurement.py +48 -0
- dataface/core/render/font_support.py +197 -0
- dataface/core/render/fonts/DFTSansTabular-Regular.ttf +0 -0
- dataface/core/render/fonts/DFTSansTabular-Regular.woff2 +0 -0
- dataface/core/render/fonts/DFTSerifOldstyleProportional-Regular.ttf +0 -0
- dataface/core/render/fonts/DFTSerifOldstyleTabular-Regular.ttf +0 -0
- dataface/core/render/fonts/InterVariable.ttf +0 -0
- dataface/core/render/fonts/InterVariable.woff2 +0 -0
- dataface/core/render/fonts/NOTO_COLOR_EMOJI_LICENSE.txt +93 -0
- dataface/core/render/fonts/NOTO_EMOJI_LICENSE.txt +93 -0
- dataface/core/render/fonts/NotoColorEmoji-Regular.ttf +0 -0
- dataface/core/render/fonts/NotoColorEmoji-Regular.woff2 +0 -0
- dataface/core/render/fonts/NotoEmoji-Regular.ttf +0 -0
- dataface/core/render/fonts/NotoEmoji-Regular.woff2 +0 -0
- dataface/core/render/fonts/SOURCE_CODE_PRO_LICENSE.txt +93 -0
- dataface/core/render/fonts/SOURCE_SERIF_4_LICENSE.txt +98 -0
- dataface/core/render/fonts/SourceCodePro-Regular.ttf +0 -0
- dataface/core/render/fonts/SourceSerif4-Regular.ttf +0 -0
- dataface/core/render/fonts/_emoji_font_face.css +43 -0
- dataface/core/render/fonts/source-serif-4-variable-latin.woff2 +0 -0
- dataface/core/render/format_utils.py +329 -0
- dataface/core/render/geo_defaults.yml +28 -0
- dataface/core/render/json_format.py +146 -0
- dataface/core/render/layout_sizing.py +865 -0
- dataface/core/render/layouts.py +541 -0
- dataface/core/render/markdown_defaults.yml +16 -0
- dataface/core/render/missing_vars_prompt.py +79 -0
- dataface/core/render/placeholder.py +389 -0
- dataface/core/render/render_result.py +14 -0
- dataface/core/render/renderer.py +467 -0
- dataface/core/render/script_embedding.py +16 -0
- dataface/core/render/svg_utils.py +212 -0
- dataface/core/render/template_loader.py +69 -0
- dataface/core/render/templates/controls/_styles.css +606 -0
- dataface/core/render/templates/controls/checkbox.html +16 -0
- dataface/core/render/templates/controls/date.html +16 -0
- dataface/core/render/templates/controls/number.html +19 -0
- dataface/core/render/templates/controls/readonly.html +9 -0
- dataface/core/render/templates/controls/select.html +21 -0
- dataface/core/render/templates/controls/slider.html +22 -0
- dataface/core/render/templates/controls/text.html +16 -0
- dataface/core/render/templates/scripts/chart_interactivity.js +191 -0
- dataface/core/render/templates/scripts/variables.js +976 -0
- dataface/core/render/templates/svg/grid_pattern.svg +3 -0
- dataface/core/render/templates/svg/styles.css +51 -0
- dataface/core/render/terminal.py +311 -0
- dataface/core/render/terminal_charts.py +563 -0
- dataface/core/render/terminal_defaults.yml +2 -0
- dataface/core/render/terminal_layouts.py +299 -0
- dataface/core/render/terminal_text.py +31 -0
- dataface/core/render/text/__init__.py +1 -0
- dataface/core/render/text/case.py +113 -0
- dataface/core/render/text_format.py +129 -0
- dataface/core/render/utils.py +106 -0
- dataface/core/render/variable_controls.py +946 -0
- dataface/core/render/variable_input_refinement.py +140 -0
- dataface/core/render/warnings/__init__.py +15 -0
- dataface/core/render/warnings/bar_color_1_to_1_with_x.py +80 -0
- dataface/core/render/warnings/base.py +44 -0
- dataface/core/render/warnings/fanout_risk.py +15 -0
- dataface/core/render/warnings/from_query_diagnostic.py +56 -0
- dataface/core/render/warnings/missing_join_predicate.py +13 -0
- dataface/core/render/warnings/query_parse_error.py +14 -0
- dataface/core/render/warnings/query_returned_zero_rows.py +42 -0
- dataface/core/render/warnings/reaggregation.py +14 -0
- dataface/core/render/warnings/registry.py +45 -0
- dataface/core/render/warnings/suppression.py +46 -0
- dataface/core/render/warnings/temporal_single_point.py +63 -0
- dataface/core/render/warnings/unreferenced_chart.py +15 -0
- dataface/core/render/warnings/y_encoding_mostly_null.py +76 -0
- dataface/core/render/yaml_format.py +167 -0
- dataface/core/resolve_face.py +195 -0
- dataface/core/schema/__init__.py +0 -0
- dataface/core/schema/guidance.py +151 -0
- dataface/core/scoped_paths.py +59 -0
- dataface/core/serve/__init__.py +14 -0
- dataface/core/serve/bootstrap.py +39 -0
- dataface/core/serve/embedded.py +57 -0
- dataface/core/serve/port.py +129 -0
- dataface/core/serve/server.py +938 -0
- dataface/core/serve/templates/__init__.py +0 -0
- dataface/core/serve/templates/directory.yml +6 -0
- dataface/core/serve/templates/error.html.j2 +217 -0
- dataface/core/utils.py +121 -0
- dataface/core/validate.py +64 -0
- dataface/integrations/__init__.py +0 -0
- dataface/integrations/highlighting.py +351 -0
- dataface/integrations/markdown.py +537 -0
- dataface/py.typed +0 -0
- dataface-0.1.2.dist-info/METADATA +375 -0
- dataface-0.1.2.dist-info/RECORD +455 -0
- dataface-0.1.2.dist-info/WHEEL +4 -0
- dataface-0.1.2.dist-info/entry_points.txt +2 -0
- dataface-0.1.2.dist-info/licenses/LICENSE +202 -0
- mdsvg/__init__.py +168 -0
- mdsvg/fonts.py +656 -0
- mdsvg/images.py +299 -0
- mdsvg/parser.py +629 -0
- mdsvg/playground.py +284 -0
- mdsvg/py.typed +2 -0
- mdsvg/renderer.py +1623 -0
- mdsvg/style.py +355 -0
- mdsvg/types.py +200 -0
- mdsvg/utils.py +86 -0
|
@@ -0,0 +1,712 @@
|
|
|
1
|
+
"""DuckDB caching layer for persistent query result storage.
|
|
2
|
+
|
|
3
|
+
Stage: EXECUTE (Cache)
|
|
4
|
+
Purpose: Persist query results in DuckDB for fast dataface loads,
|
|
5
|
+
cross-database queries, and snapshot history.
|
|
6
|
+
|
|
7
|
+
Cache Key: (source_hash, query_hash, variables_hash)
|
|
8
|
+
- source_hash — stable hash of the data source identity
|
|
9
|
+
- query_hash — SHA-256 of the SQL/query definition
|
|
10
|
+
- variables_hash — SHA-256 of the variable values that affect the result
|
|
11
|
+
|
|
12
|
+
face_slug and query_name are METADATA stored in _face_query_index, not key
|
|
13
|
+
components. This lets two faces with identical SQL against the same source
|
|
14
|
+
share one cache entry (cross-face deduplication).
|
|
15
|
+
|
|
16
|
+
Storage:
|
|
17
|
+
- _query_results — canonical result store, keyed by (source_hash, query_hash, vars_hash)
|
|
18
|
+
- _face_query_index — maps (face_slug, query_name) → key triplet, for {{ results.X }}
|
|
19
|
+
- _query_failures — failure store, keyed by (source_hash, query_hash, vars_hash)
|
|
20
|
+
|
|
21
|
+
Views: one per cached key, named by combined hash, for {{ results.X }} SQL execution.
|
|
22
|
+
|
|
23
|
+
Dependencies:
|
|
24
|
+
- duckdb (optional at module load; required at DuckDBCache() construction)
|
|
25
|
+
- pyarrow (optional; falls back to row-by-row insert)
|
|
26
|
+
|
|
27
|
+
See also:
|
|
28
|
+
- dataface/core/execute/cache_backend.py — QueryResultCache Protocol
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
import contextlib
|
|
32
|
+
import hashlib
|
|
33
|
+
import json
|
|
34
|
+
import logging
|
|
35
|
+
import os
|
|
36
|
+
import re
|
|
37
|
+
import threading
|
|
38
|
+
import traceback as tb_mod
|
|
39
|
+
from datetime import datetime
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
from typing import Any
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
import duckdb
|
|
45
|
+
|
|
46
|
+
HAS_DUCKDB = True
|
|
47
|
+
except ImportError:
|
|
48
|
+
duckdb = None # type: ignore[assignment]
|
|
49
|
+
HAS_DUCKDB = False
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
import pyarrow as pa
|
|
53
|
+
|
|
54
|
+
HAS_PYARROW = True
|
|
55
|
+
except ImportError:
|
|
56
|
+
pa = None
|
|
57
|
+
HAS_PYARROW = False
|
|
58
|
+
|
|
59
|
+
from dataface._install_hint import install_hint
|
|
60
|
+
from dataface.core.execute.cache_backend import CachedQueryFailure
|
|
61
|
+
from dataface.core.execute.cache_keys import source_identity
|
|
62
|
+
|
|
63
|
+
logger = logging.getLogger(__name__)
|
|
64
|
+
|
|
65
|
+
# Metadata columns added to all cached result tables.
|
|
66
|
+
# The key (source_hash, query_hash, variables_hash) is encoded in the table name;
|
|
67
|
+
# we do NOT store it redundantly as per-row columns. Only snapshot sequencing,
|
|
68
|
+
# timestamp, and the full variables JSON (for audit/debug) are stored per-row.
|
|
69
|
+
_METADATA_COLS = {
|
|
70
|
+
"_snapshot_set_seq",
|
|
71
|
+
"_snapshot_run_seq",
|
|
72
|
+
"_run_timestamp",
|
|
73
|
+
"_variables",
|
|
74
|
+
"_source_hash",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# How long to keep a combined hash prefix for table names
|
|
78
|
+
_HASH_PREFIX = 12
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
82
|
+
# Hash helpers (public — imported by executor.py)
|
|
83
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def compute_query_hash(sql: str) -> str:
|
|
87
|
+
"""Compute hash of query SQL."""
|
|
88
|
+
return hashlib.sha256(sql.encode()).hexdigest()[:16]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def compute_variables_hash(variables: dict[str, Any]) -> str:
|
|
92
|
+
"""Compute hash of variable values (only the relevant ones for a query)."""
|
|
93
|
+
if not variables:
|
|
94
|
+
return "0" * 16
|
|
95
|
+
serialized = json.dumps(sorted(variables.items()), default=str)
|
|
96
|
+
return hashlib.sha256(serialized.encode()).hexdigest()[:16]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def compute_source_hash(
|
|
100
|
+
source: str | dict[str, Any] | None,
|
|
101
|
+
face_sources: dict[str, dict[str, Any]] | None = None,
|
|
102
|
+
) -> str:
|
|
103
|
+
"""Compute a stable hash for a data source identity.
|
|
104
|
+
|
|
105
|
+
The hash distinguishes connections with different identities (host,
|
|
106
|
+
user, project, keyfile, ...) so two users pointing the same source
|
|
107
|
+
name at different credentials never share a cache entry. Secrets
|
|
108
|
+
(password, token, api_key, ...) are stripped before hashing — see
|
|
109
|
+
`cache_keys.SECRET_FIELDS`.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
source: Source identifier. May be:
|
|
113
|
+
- a connection string / name (str) like "warehouse_prod"
|
|
114
|
+
- a source config dict with arbitrary keys (inline YAML)
|
|
115
|
+
- None → treated as the default/in-process source
|
|
116
|
+
face_sources: Optional resolved-source map (`face.sources`) used
|
|
117
|
+
to expand a string name into its identity-bearing config dict.
|
|
118
|
+
When omitted or the name is absent, falls back to hashing the
|
|
119
|
+
name string as-is.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
16-char hex string that is stable across calls for the same input.
|
|
123
|
+
"""
|
|
124
|
+
# Resolve named string references through the project sources map so
|
|
125
|
+
# different users with the same source name but different credentials
|
|
126
|
+
# produce different hashes.
|
|
127
|
+
if isinstance(source, str) and face_sources and source in face_sources:
|
|
128
|
+
source = face_sources[source]
|
|
129
|
+
|
|
130
|
+
identity = source_identity(source)
|
|
131
|
+
|
|
132
|
+
if identity is None:
|
|
133
|
+
# No explicit source — default adapter (in-memory DuckDB, CSV, etc.)
|
|
134
|
+
canonical = "__default__"
|
|
135
|
+
elif isinstance(identity, str):
|
|
136
|
+
canonical = identity.strip()
|
|
137
|
+
else:
|
|
138
|
+
# Dict source config: sort keys for stability
|
|
139
|
+
canonical = json.dumps(sorted(identity.items()), default=str, sort_keys=True)
|
|
140
|
+
return hashlib.sha256(canonical.encode()).hexdigest()[:16]
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _result_table_name(source_hash: str, query_hash: str, variables_hash: str) -> str:
|
|
144
|
+
"""Generate the table name for a cached result.
|
|
145
|
+
|
|
146
|
+
Format: _r_{combined_prefix} where combined is a short hash of all three key parts.
|
|
147
|
+
Short enough to stay under DuckDB's identifier limits.
|
|
148
|
+
"""
|
|
149
|
+
combined = hashlib.sha256(
|
|
150
|
+
f"{source_hash}:{query_hash}:{variables_hash}".encode()
|
|
151
|
+
).hexdigest()[:_HASH_PREFIX]
|
|
152
|
+
return f"_r_{combined}"
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _view_name_for_table(table_name: str) -> str:
|
|
156
|
+
return f"{table_name}_v"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
160
|
+
# DuckDBCache
|
|
161
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class DuckDBCache:
|
|
165
|
+
"""DuckDB-based persistent cache for query results.
|
|
166
|
+
|
|
167
|
+
Implements QueryResultCache. Keys all operations on
|
|
168
|
+
(source_hash, query_hash, variables_hash). face_slug and query_name are
|
|
169
|
+
metadata stored in _face_query_index.
|
|
170
|
+
|
|
171
|
+
Attributes:
|
|
172
|
+
conn: DuckDB connection
|
|
173
|
+
db_path: Path to DuckDB file (or None for :memory:)
|
|
174
|
+
failure_ttl_seconds: Failure TTL; 0 disables failure caching.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
def __init__(
|
|
178
|
+
self,
|
|
179
|
+
db_path: Path | None = None,
|
|
180
|
+
failure_ttl_seconds: int = 900,
|
|
181
|
+
) -> None:
|
|
182
|
+
if not HAS_DUCKDB:
|
|
183
|
+
raise ImportError(
|
|
184
|
+
"DuckDB is required to use DuckDBCache. "
|
|
185
|
+
f"Install it with: {install_hint('duckdb')}"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
self.db_path = db_path
|
|
189
|
+
self.failure_ttl_seconds = failure_ttl_seconds
|
|
190
|
+
self._lock = threading.RLock()
|
|
191
|
+
|
|
192
|
+
if db_path:
|
|
193
|
+
self.conn = duckdb.connect(str(db_path))
|
|
194
|
+
else:
|
|
195
|
+
self.conn = duckdb.connect(":memory:")
|
|
196
|
+
|
|
197
|
+
self.conn.execute("SET enable_object_cache = true")
|
|
198
|
+
self._drop_legacy_tables()
|
|
199
|
+
self._ensure_schema()
|
|
200
|
+
|
|
201
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
202
|
+
# QueryResultCache Protocol implementation
|
|
203
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
204
|
+
|
|
205
|
+
def get(
|
|
206
|
+
self, source_hash: str, query_hash: str, variables_hash: str
|
|
207
|
+
) -> list[dict[str, Any]] | None:
|
|
208
|
+
"""Return cached rows for this key, or None on miss."""
|
|
209
|
+
with self._lock:
|
|
210
|
+
tbl = _result_table_name(source_hash, query_hash, variables_hash)
|
|
211
|
+
if not self._table_exists(tbl):
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
quoted_tbl = _q(tbl)
|
|
216
|
+
rows = self.conn.execute(
|
|
217
|
+
f"""
|
|
218
|
+
SELECT * EXCLUDE ({', '.join(sorted(_METADATA_COLS))})
|
|
219
|
+
FROM {quoted_tbl}
|
|
220
|
+
WHERE _snapshot_set_seq = (SELECT MAX(_snapshot_set_seq) FROM {quoted_tbl})
|
|
221
|
+
""",
|
|
222
|
+
).fetchall()
|
|
223
|
+
if not rows:
|
|
224
|
+
return None
|
|
225
|
+
columns = [d[0] for d in self.conn.description]
|
|
226
|
+
return [dict(zip(columns, row, strict=False)) for row in rows]
|
|
227
|
+
except duckdb.CatalogException:
|
|
228
|
+
return None
|
|
229
|
+
|
|
230
|
+
def put(
|
|
231
|
+
self,
|
|
232
|
+
source_hash: str,
|
|
233
|
+
query_hash: str,
|
|
234
|
+
variables_hash: str,
|
|
235
|
+
data: list[dict[str, Any]],
|
|
236
|
+
*,
|
|
237
|
+
face_slug: str,
|
|
238
|
+
query_name: str,
|
|
239
|
+
variables: dict[str, Any],
|
|
240
|
+
is_full_refresh: bool = False,
|
|
241
|
+
primary_key: str | None = None,
|
|
242
|
+
) -> None:
|
|
243
|
+
"""Store rows under (source_hash, query_hash, variables_hash)."""
|
|
244
|
+
if not data:
|
|
245
|
+
return
|
|
246
|
+
|
|
247
|
+
with self._lock:
|
|
248
|
+
tbl = _result_table_name(source_hash, query_hash, variables_hash)
|
|
249
|
+
set_seq, run_seq = self._next_sequences(tbl, is_full_refresh)
|
|
250
|
+
self._ensure_result_table(tbl, data)
|
|
251
|
+
self._insert_rows(
|
|
252
|
+
tbl,
|
|
253
|
+
data,
|
|
254
|
+
set_seq,
|
|
255
|
+
run_seq,
|
|
256
|
+
source_hash,
|
|
257
|
+
variables,
|
|
258
|
+
)
|
|
259
|
+
self._update_view(tbl, primary_key)
|
|
260
|
+
self._upsert_face_query_index(
|
|
261
|
+
face_slug, query_name, source_hash, query_hash, variables_hash
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
logger.info(
|
|
265
|
+
"Cached %d rows to %s (set_seq=%d, run_seq=%d)",
|
|
266
|
+
len(data),
|
|
267
|
+
tbl,
|
|
268
|
+
set_seq,
|
|
269
|
+
run_seq,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
def get_failure(
|
|
273
|
+
self, source_hash: str, query_hash: str, variables_hash: str
|
|
274
|
+
) -> CachedQueryFailure | None:
|
|
275
|
+
"""Return a cached failure if within TTL, else None."""
|
|
276
|
+
if self.failure_ttl_seconds == 0:
|
|
277
|
+
return None
|
|
278
|
+
with self._lock:
|
|
279
|
+
row = self.conn.execute(
|
|
280
|
+
"""
|
|
281
|
+
SELECT error_class, error_message, traceback, failed_at
|
|
282
|
+
FROM _query_failures
|
|
283
|
+
WHERE source_hash = ? AND query_hash = ? AND variables_hash = ?
|
|
284
|
+
""",
|
|
285
|
+
[source_hash, query_hash, variables_hash],
|
|
286
|
+
).fetchone()
|
|
287
|
+
if row is None:
|
|
288
|
+
return None
|
|
289
|
+
error_class, error_message, traceback, failed_at = row
|
|
290
|
+
elapsed = (datetime.now() - failed_at).total_seconds()
|
|
291
|
+
if elapsed > self.failure_ttl_seconds:
|
|
292
|
+
return None
|
|
293
|
+
return CachedQueryFailure(
|
|
294
|
+
error_class=error_class,
|
|
295
|
+
error_message=error_message,
|
|
296
|
+
traceback=traceback,
|
|
297
|
+
failed_at=failed_at,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
def put_failure(
|
|
301
|
+
self,
|
|
302
|
+
source_hash: str,
|
|
303
|
+
query_hash: str,
|
|
304
|
+
variables_hash: str,
|
|
305
|
+
exception: Exception,
|
|
306
|
+
*,
|
|
307
|
+
face_slug: str,
|
|
308
|
+
query_name: str,
|
|
309
|
+
) -> None:
|
|
310
|
+
"""Store a query failure keyed by (source_hash, query_hash, variables_hash)."""
|
|
311
|
+
with self._lock:
|
|
312
|
+
self.conn.execute(
|
|
313
|
+
"""
|
|
314
|
+
INSERT OR REPLACE INTO _query_failures
|
|
315
|
+
(source_hash, query_hash, variables_hash,
|
|
316
|
+
face_slug, query_name,
|
|
317
|
+
error_class, error_message, traceback, failed_at)
|
|
318
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
319
|
+
""",
|
|
320
|
+
[
|
|
321
|
+
source_hash,
|
|
322
|
+
query_hash,
|
|
323
|
+
variables_hash,
|
|
324
|
+
face_slug,
|
|
325
|
+
query_name,
|
|
326
|
+
type(exception).__name__,
|
|
327
|
+
str(exception),
|
|
328
|
+
"".join(tb_mod.format_exception(exception)),
|
|
329
|
+
datetime.now(),
|
|
330
|
+
],
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
def clear(self, source_hash: str, query_hash: str, variables_hash: str) -> None:
|
|
334
|
+
"""Remove success and failure entries for this key."""
|
|
335
|
+
with self._lock:
|
|
336
|
+
self.conn.execute(
|
|
337
|
+
"DELETE FROM _query_failures WHERE source_hash = ? AND query_hash = ? AND variables_hash = ?",
|
|
338
|
+
[source_hash, query_hash, variables_hash],
|
|
339
|
+
)
|
|
340
|
+
tbl = _result_table_name(source_hash, query_hash, variables_hash)
|
|
341
|
+
if self._table_exists(tbl):
|
|
342
|
+
self.conn.execute(f"DROP TABLE {_q(tbl)}")
|
|
343
|
+
view = _view_name_for_table(tbl)
|
|
344
|
+
self.conn.execute(f"DROP VIEW IF EXISTS {_q(view)}")
|
|
345
|
+
|
|
346
|
+
def close(self) -> None:
|
|
347
|
+
with self._lock:
|
|
348
|
+
self.conn.close()
|
|
349
|
+
|
|
350
|
+
def supports_results_refs(self) -> bool:
|
|
351
|
+
return True
|
|
352
|
+
|
|
353
|
+
def rewrite_results_refs(self, face_slug: str, sql: str) -> str:
|
|
354
|
+
"""Rewrite {{ results.X }} to view names for this face."""
|
|
355
|
+
pattern = r"\{\{\s*results\.(\w+)\s*\}\}"
|
|
356
|
+
|
|
357
|
+
def replace(match: re.Match) -> str:
|
|
358
|
+
qname = match.group(1)
|
|
359
|
+
with self._lock:
|
|
360
|
+
view = self._view_for_face_query(face_slug, qname)
|
|
361
|
+
if view is None:
|
|
362
|
+
raise RuntimeError(
|
|
363
|
+
f"{{ results.{qname} }} referenced but not cached for face '{face_slug}'"
|
|
364
|
+
)
|
|
365
|
+
return view
|
|
366
|
+
|
|
367
|
+
return re.sub(pattern, replace, sql)
|
|
368
|
+
|
|
369
|
+
def execute_results_query(self, face_slug: str, sql: str) -> list[dict[str, Any]]:
|
|
370
|
+
"""Execute SQL against cached result views."""
|
|
371
|
+
with self._lock:
|
|
372
|
+
rows = self.conn.execute(sql).fetchall()
|
|
373
|
+
columns = [d[0] for d in self.conn.description]
|
|
374
|
+
return [dict(zip(columns, row, strict=False)) for row in rows]
|
|
375
|
+
|
|
376
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
377
|
+
# Private helpers
|
|
378
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
379
|
+
|
|
380
|
+
def _drop_legacy_tables(self) -> None:
|
|
381
|
+
"""Drop tables that match the old (face_slug_q_query_name) schema.
|
|
382
|
+
|
|
383
|
+
Legacy tables are dev artifacts; we drop them on open to avoid
|
|
384
|
+
shape mismatch. Cache re-warms naturally on next query.
|
|
385
|
+
"""
|
|
386
|
+
try:
|
|
387
|
+
tables = self.conn.execute(
|
|
388
|
+
"""
|
|
389
|
+
SELECT table_name FROM information_schema.tables
|
|
390
|
+
WHERE table_type = 'BASE TABLE'
|
|
391
|
+
AND table_name NOT LIKE '_r_%'
|
|
392
|
+
AND table_name NOT LIKE '_query_%'
|
|
393
|
+
AND table_name NOT LIKE '_face_%'
|
|
394
|
+
"""
|
|
395
|
+
).fetchall()
|
|
396
|
+
except Exception: # noqa: BLE001
|
|
397
|
+
return
|
|
398
|
+
|
|
399
|
+
dropped = 0
|
|
400
|
+
for (tbl,) in tables:
|
|
401
|
+
# Old pattern: <face_slug>_q_<query_name> and related
|
|
402
|
+
if "_q_" in tbl or tbl.endswith("_current") or "_archive_" in tbl:
|
|
403
|
+
with contextlib.suppress(Exception):
|
|
404
|
+
self.conn.execute(f"DROP TABLE IF EXISTS {_q(tbl)}")
|
|
405
|
+
dropped += 1
|
|
406
|
+
# Also drop old views
|
|
407
|
+
with contextlib.suppress(Exception):
|
|
408
|
+
views = self.conn.execute(
|
|
409
|
+
"""
|
|
410
|
+
SELECT table_name FROM information_schema.tables
|
|
411
|
+
WHERE table_type = 'VIEW' AND table_name NOT LIKE '_r_%'
|
|
412
|
+
"""
|
|
413
|
+
).fetchall()
|
|
414
|
+
for (v,) in views:
|
|
415
|
+
with contextlib.suppress(Exception):
|
|
416
|
+
self.conn.execute(f"DROP VIEW IF EXISTS {_q(v)}")
|
|
417
|
+
|
|
418
|
+
if dropped:
|
|
419
|
+
logger.info(
|
|
420
|
+
"legacy cache cleared on schema change (%d tables dropped)", dropped
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
def _ensure_schema(self) -> None:
|
|
424
|
+
"""Create the system tables if they don't exist."""
|
|
425
|
+
self.conn.execute(
|
|
426
|
+
"""
|
|
427
|
+
CREATE TABLE IF NOT EXISTS _query_failures (
|
|
428
|
+
source_hash TEXT NOT NULL,
|
|
429
|
+
query_hash TEXT NOT NULL,
|
|
430
|
+
variables_hash TEXT NOT NULL,
|
|
431
|
+
face_slug TEXT NOT NULL,
|
|
432
|
+
query_name TEXT NOT NULL,
|
|
433
|
+
error_class TEXT NOT NULL,
|
|
434
|
+
error_message TEXT NOT NULL,
|
|
435
|
+
traceback TEXT,
|
|
436
|
+
failed_at TIMESTAMP NOT NULL,
|
|
437
|
+
PRIMARY KEY (source_hash, query_hash, variables_hash)
|
|
438
|
+
)
|
|
439
|
+
"""
|
|
440
|
+
)
|
|
441
|
+
self.conn.execute(
|
|
442
|
+
"""
|
|
443
|
+
CREATE TABLE IF NOT EXISTS _face_query_index (
|
|
444
|
+
face_slug TEXT NOT NULL,
|
|
445
|
+
query_name TEXT NOT NULL,
|
|
446
|
+
source_hash TEXT NOT NULL,
|
|
447
|
+
query_hash TEXT NOT NULL,
|
|
448
|
+
variables_hash TEXT NOT NULL,
|
|
449
|
+
created_at TIMESTAMP NOT NULL,
|
|
450
|
+
PRIMARY KEY (face_slug, query_name, variables_hash)
|
|
451
|
+
)
|
|
452
|
+
"""
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
def _table_exists(self, name: str) -> bool:
|
|
456
|
+
try:
|
|
457
|
+
result = self.conn.execute(
|
|
458
|
+
"SELECT COUNT(*) FROM information_schema.tables WHERE table_name = ?",
|
|
459
|
+
[name],
|
|
460
|
+
).fetchone()
|
|
461
|
+
return bool(result and result[0] > 0)
|
|
462
|
+
except duckdb.CatalogException:
|
|
463
|
+
return False
|
|
464
|
+
|
|
465
|
+
def _next_sequences(
|
|
466
|
+
self, table_name: str, is_full_refresh: bool
|
|
467
|
+
) -> tuple[int, int]:
|
|
468
|
+
if not self._table_exists(table_name):
|
|
469
|
+
return (1, 1)
|
|
470
|
+
try:
|
|
471
|
+
row = self.conn.execute(
|
|
472
|
+
f"""
|
|
473
|
+
SELECT COALESCE(MAX(_snapshot_set_seq), 0),
|
|
474
|
+
COALESCE(MAX(_snapshot_run_seq), 0)
|
|
475
|
+
FROM {_q(table_name)}
|
|
476
|
+
"""
|
|
477
|
+
).fetchone()
|
|
478
|
+
if row is None:
|
|
479
|
+
return (1, 1)
|
|
480
|
+
set_seq, run_seq = (row[0] or 0), (row[1] or 0)
|
|
481
|
+
if is_full_refresh:
|
|
482
|
+
return (set_seq + 1, run_seq + 1)
|
|
483
|
+
return (set_seq or 1, run_seq + 1)
|
|
484
|
+
except duckdb.CatalogException:
|
|
485
|
+
return (1, 1)
|
|
486
|
+
|
|
487
|
+
def _ensure_result_table(self, table_name: str, data: list[dict[str, Any]]) -> None:
|
|
488
|
+
if self._table_exists(table_name):
|
|
489
|
+
return
|
|
490
|
+
sample = data[0]
|
|
491
|
+
cols = [
|
|
492
|
+
"_snapshot_set_seq INTEGER",
|
|
493
|
+
"_snapshot_run_seq INTEGER",
|
|
494
|
+
"_run_timestamp TIMESTAMP",
|
|
495
|
+
"_source_hash VARCHAR",
|
|
496
|
+
"_variables JSON",
|
|
497
|
+
]
|
|
498
|
+
for col_name, value in sample.items():
|
|
499
|
+
col_type = _infer_type(value)
|
|
500
|
+
cols.append(f'"{col_name}" {col_type}')
|
|
501
|
+
self.conn.execute(f"CREATE TABLE {_q(table_name)} ({', '.join(cols)})")
|
|
502
|
+
self.conn.execute(
|
|
503
|
+
f"""
|
|
504
|
+
CREATE INDEX {_q(f'idx_{table_name}')}
|
|
505
|
+
ON {_q(table_name)} (_snapshot_set_seq, _snapshot_run_seq)
|
|
506
|
+
"""
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
def _insert_rows(
|
|
510
|
+
self,
|
|
511
|
+
table_name: str,
|
|
512
|
+
data: list[dict[str, Any]],
|
|
513
|
+
set_seq: int,
|
|
514
|
+
run_seq: int,
|
|
515
|
+
source_hash: str,
|
|
516
|
+
variables: dict[str, Any],
|
|
517
|
+
) -> None:
|
|
518
|
+
n = len(data)
|
|
519
|
+
now = datetime.now()
|
|
520
|
+
|
|
521
|
+
if HAS_PYARROW:
|
|
522
|
+
metadata = {
|
|
523
|
+
"_snapshot_set_seq": pa.array([set_seq] * n, type=pa.int64()),
|
|
524
|
+
"_snapshot_run_seq": pa.array([run_seq] * n, type=pa.int64()),
|
|
525
|
+
"_run_timestamp": pa.array([now] * n, type=pa.timestamp("us")),
|
|
526
|
+
"_source_hash": pa.array([source_hash] * n),
|
|
527
|
+
"_variables": pa.array([json.dumps(variables)] * n),
|
|
528
|
+
}
|
|
529
|
+
data_cols = {
|
|
530
|
+
k: _cache_safe_arrow_array([row.get(k) for row in data])
|
|
531
|
+
for k in data[0]
|
|
532
|
+
}
|
|
533
|
+
arrow_table = pa.table({**metadata, **data_cols})
|
|
534
|
+
self.conn.register("_df_insert_buf", arrow_table)
|
|
535
|
+
self.conn.execute(
|
|
536
|
+
f'INSERT INTO {_q(table_name)} SELECT * FROM "_df_insert_buf"'
|
|
537
|
+
)
|
|
538
|
+
self.conn.unregister("_df_insert_buf")
|
|
539
|
+
else:
|
|
540
|
+
for row in data:
|
|
541
|
+
values = [
|
|
542
|
+
set_seq,
|
|
543
|
+
run_seq,
|
|
544
|
+
now,
|
|
545
|
+
source_hash,
|
|
546
|
+
json.dumps(variables),
|
|
547
|
+
]
|
|
548
|
+
values.extend(row.get(k) for k in data[0])
|
|
549
|
+
placeholders = ", ".join("?" * len(values))
|
|
550
|
+
self.conn.execute(
|
|
551
|
+
f"INSERT INTO {_q(table_name)} VALUES ({placeholders})", values
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
def _update_view(self, table_name: str, primary_key: str | None = None) -> None:
|
|
555
|
+
view = _view_name_for_table(table_name)
|
|
556
|
+
self.conn.execute(f"DROP VIEW IF EXISTS {_q(view)}")
|
|
557
|
+
|
|
558
|
+
all_cols = self._get_columns(table_name)
|
|
559
|
+
data_cols = sorted(all_cols - _METADATA_COLS)
|
|
560
|
+
if not data_cols:
|
|
561
|
+
return
|
|
562
|
+
|
|
563
|
+
exclude = ", ".join(sorted(_METADATA_COLS))
|
|
564
|
+
quoted_tbl = _q(table_name)
|
|
565
|
+
|
|
566
|
+
if primary_key and primary_key in data_cols:
|
|
567
|
+
quoted_pk = _q(primary_key)
|
|
568
|
+
sql = f"""
|
|
569
|
+
CREATE VIEW {_q(view)} AS
|
|
570
|
+
SELECT * EXCLUDE ({exclude})
|
|
571
|
+
FROM {quoted_tbl}
|
|
572
|
+
WHERE _snapshot_set_seq = (SELECT MAX(_snapshot_set_seq) FROM {quoted_tbl})
|
|
573
|
+
QUALIFY ROW_NUMBER() OVER (
|
|
574
|
+
PARTITION BY {quoted_pk} ORDER BY _snapshot_run_seq DESC
|
|
575
|
+
) = 1
|
|
576
|
+
"""
|
|
577
|
+
else:
|
|
578
|
+
sql = f"""
|
|
579
|
+
CREATE VIEW {_q(view)} AS
|
|
580
|
+
SELECT * EXCLUDE ({exclude})
|
|
581
|
+
FROM {quoted_tbl}
|
|
582
|
+
WHERE _snapshot_set_seq = (SELECT MAX(_snapshot_set_seq) FROM {quoted_tbl})
|
|
583
|
+
"""
|
|
584
|
+
self.conn.execute(sql)
|
|
585
|
+
|
|
586
|
+
def _get_columns(self, table_name: str) -> set[str]:
|
|
587
|
+
try:
|
|
588
|
+
rows = self.conn.execute(
|
|
589
|
+
"SELECT column_name FROM information_schema.columns WHERE table_name = ?",
|
|
590
|
+
[table_name],
|
|
591
|
+
).fetchall()
|
|
592
|
+
return {r[0] for r in rows}
|
|
593
|
+
except duckdb.CatalogException:
|
|
594
|
+
return set()
|
|
595
|
+
|
|
596
|
+
def _upsert_face_query_index(
|
|
597
|
+
self,
|
|
598
|
+
face_slug: str,
|
|
599
|
+
query_name: str,
|
|
600
|
+
source_hash: str,
|
|
601
|
+
query_hash: str,
|
|
602
|
+
variables_hash: str,
|
|
603
|
+
) -> None:
|
|
604
|
+
self.conn.execute(
|
|
605
|
+
"""
|
|
606
|
+
INSERT OR REPLACE INTO _face_query_index
|
|
607
|
+
(face_slug, query_name, source_hash, query_hash, variables_hash, created_at)
|
|
608
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
609
|
+
""",
|
|
610
|
+
[
|
|
611
|
+
face_slug,
|
|
612
|
+
query_name,
|
|
613
|
+
source_hash,
|
|
614
|
+
query_hash,
|
|
615
|
+
variables_hash,
|
|
616
|
+
datetime.now(),
|
|
617
|
+
],
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
def _view_for_face_query(self, face_slug: str, query_name: str) -> str | None:
|
|
621
|
+
"""Resolve (face_slug, query_name) → view name via _face_query_index."""
|
|
622
|
+
row = self.conn.execute(
|
|
623
|
+
"""
|
|
624
|
+
SELECT source_hash, query_hash, variables_hash
|
|
625
|
+
FROM _face_query_index
|
|
626
|
+
WHERE face_slug = ? AND query_name = ?
|
|
627
|
+
ORDER BY created_at DESC LIMIT 1
|
|
628
|
+
""",
|
|
629
|
+
[face_slug, query_name],
|
|
630
|
+
).fetchone()
|
|
631
|
+
if row is None:
|
|
632
|
+
return None
|
|
633
|
+
source_hash, query_hash, variables_hash = row
|
|
634
|
+
tbl = _result_table_name(source_hash, query_hash, variables_hash)
|
|
635
|
+
return _view_name_for_table(tbl)
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
639
|
+
# Private module-level helpers
|
|
640
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def _q(identifier: str) -> str:
|
|
644
|
+
"""Quote a SQL identifier (already assumed safe)."""
|
|
645
|
+
escaped = identifier.replace('"', '""')
|
|
646
|
+
return f'"{escaped}"'
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
# Maximum DECIMAL precision DuckDB supports (HUGEINT-backed). BigQuery NUMERIC
|
|
650
|
+
# can return values with higher precision (e.g. 47,38) which crash duckdb's
|
|
651
|
+
# arrow-table register with `Unsupported Internal Arrow Type for Decimal`.
|
|
652
|
+
_DUCKDB_MAX_DECIMAL_PRECISION = 38
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
def _cache_safe_arrow_array(values: list[Any]) -> "pa.Array":
|
|
656
|
+
"""Build a pyarrow array, downcasting too-precise decimals to float64.
|
|
657
|
+
|
|
658
|
+
BigQuery NUMERIC columns can have precision > 38 (e.g. NUMERIC(47, 38) on
|
|
659
|
+
fan-out-deduplicated SUM aggregates). pyarrow auto-infers ``decimal256``
|
|
660
|
+
for these, but DuckDB's DECIMAL is HUGEINT-backed and capped at precision
|
|
661
|
+
38; ``conn.register`` then raises ``NotImplementedException`` when it
|
|
662
|
+
encounters the over-precise type. Visualization downstream is fine with
|
|
663
|
+
float64 (15 significant digits is plenty for dollar amounts).
|
|
664
|
+
"""
|
|
665
|
+
arr = pa.array(values)
|
|
666
|
+
arrow_type = arr.type
|
|
667
|
+
if (
|
|
668
|
+
pa.types.is_decimal(arrow_type)
|
|
669
|
+
and arrow_type.precision > _DUCKDB_MAX_DECIMAL_PRECISION
|
|
670
|
+
):
|
|
671
|
+
return arr.cast(pa.float64())
|
|
672
|
+
return arr
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
def _infer_type(value: Any) -> str:
|
|
676
|
+
if value is None:
|
|
677
|
+
return "VARCHAR"
|
|
678
|
+
if isinstance(value, bool):
|
|
679
|
+
return "BOOLEAN"
|
|
680
|
+
if isinstance(value, int):
|
|
681
|
+
return "BIGINT"
|
|
682
|
+
if isinstance(value, float):
|
|
683
|
+
return "DOUBLE"
|
|
684
|
+
if isinstance(value, datetime):
|
|
685
|
+
return "TIMESTAMP"
|
|
686
|
+
if isinstance(value, (list, dict)):
|
|
687
|
+
return "JSON"
|
|
688
|
+
return "VARCHAR"
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
692
|
+
# Entry-point helper
|
|
693
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def open_cache_from_env() -> DuckDBCache | None:
|
|
697
|
+
"""Open a DuckDBCache from the DFT_CACHE_PATH environment variable.
|
|
698
|
+
|
|
699
|
+
Returns None when the variable is unset.
|
|
700
|
+
Raises ValueError when set but the path does not exist.
|
|
701
|
+
"""
|
|
702
|
+
path_str = os.getenv("DFT_CACHE_PATH")
|
|
703
|
+
if not path_str:
|
|
704
|
+
return None
|
|
705
|
+
path = Path(path_str)
|
|
706
|
+
if not path.exists():
|
|
707
|
+
raise ValueError(
|
|
708
|
+
f"DFT_CACHE_PATH points to a non-existent file: {path}. "
|
|
709
|
+
"Create the cache file first (construct DuckDBCache(db_path=...) and populate it) "
|
|
710
|
+
"or unset the variable."
|
|
711
|
+
)
|
|
712
|
+
return DuckDBCache(db_path=path)
|