dagster-evidence 0.1.7__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,660 @@
1
+ """Project classes for Evidence projects.
2
+
3
+ This module defines the project types for Evidence projects, including
4
+ local file-based projects and Evidence Studio cloud projects.
5
+ """
6
+
7
+ import os
8
+ import shutil
9
+ from abc import abstractmethod
10
+ from collections.abc import Sequence
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+ from tempfile import TemporaryDirectory
14
+ from typing import TYPE_CHECKING, Annotated, Literal, Optional, Union
15
+
16
+ import dagster as dg
17
+ import yaml
18
+ from dagster._annotations import beta, public
19
+ from dagster._serdes import whitelist_for_serdes
20
+ from dagster.components import Resolver
21
+ from dagster.components.resolved.base import resolve_fields
22
+ from pydantic import BaseModel, Field
23
+
24
+ from .deployments import (
25
+ BaseEvidenceProjectDeployment,
26
+ CustomEvidenceProjectDeploymentArgs,
27
+ EvidenceProjectNetlifyDeploymentArgs,
28
+ GithubPagesEvidenceProjectDeploymentArgs,
29
+ resolve_evidence_project_deployment,
30
+ )
31
+ from .sources import (
32
+ EvidenceProjectTranslatorData,
33
+ EvidenceSourceTranslatorData,
34
+ ProjectDagsterMetadata,
35
+ SourceContent,
36
+ )
37
+
38
+ if TYPE_CHECKING:
39
+ from .translator import DagsterEvidenceTranslator
40
+
41
+
42
+ @beta
43
+ @public
44
+ @whitelist_for_serdes
45
+ @dataclass
46
+ class EvidenceProjectData:
47
+ """Parsed data from an Evidence project.
48
+
49
+ Attributes:
50
+ project_name: The name of the Evidence project.
51
+ sources_by_id: Dictionary mapping source folder names to their content.
52
+ """
53
+
54
+ project_name: str
55
+ sources_by_id: dict[str, SourceContent]
56
+
57
+
58
+ @beta
59
+ @public
60
+ class BaseEvidenceProject(dg.ConfigurableResource):
61
+ """Base class for Evidence project configurations.
62
+
63
+ This abstract class defines the interface for Evidence projects.
64
+ Implementations include LocalEvidenceProject for local file-based
65
+ projects and EvidenceStudioProject for cloud-hosted projects.
66
+
67
+ Subclass this class to implement custom project types.
68
+ """
69
+
70
+ @public
71
+ @abstractmethod
72
+ def get_evidence_project_name(self) -> str:
73
+ """Get the name of the Evidence project.
74
+
75
+ Returns:
76
+ The project name string.
77
+ """
78
+ raise NotImplementedError()
79
+
80
+ @public
81
+ @abstractmethod
82
+ def load_evidence_project_assets(
83
+ self,
84
+ evidence_project_data: EvidenceProjectData,
85
+ translator: "DagsterEvidenceTranslator",
86
+ ) -> tuple[
87
+ Sequence[dg.AssetsDefinition | dg.AssetSpec], Sequence[dg.SensorDefinition]
88
+ ]:
89
+ """Load and build Dagster assets for this Evidence project.
90
+
91
+ Args:
92
+ evidence_project_data: Parsed project data containing sources.
93
+ translator: Translator instance for converting Evidence objects to AssetSpecs.
94
+
95
+ Returns:
96
+ A tuple containing:
97
+ - A sequence of AssetSpecs and AssetsDefinitions
98
+ - A sequence of SensorDefinitions for source change detection
99
+ """
100
+ raise NotImplementedError()
101
+
102
+ @public
103
+ def load_source_assets(
104
+ self,
105
+ evidence_project_data: EvidenceProjectData,
106
+ translator: "DagsterEvidenceTranslator",
107
+ ) -> tuple[list[dg.AssetsDefinition], list[dg.AssetKey], list[dg.SensorDefinition]]:
108
+ """Load source assets using the translator.
109
+
110
+ Args:
111
+ evidence_project_data: Parsed project data containing sources.
112
+ translator: Translator instance for converting Evidence objects to assets.
113
+
114
+ Returns:
115
+ A tuple containing:
116
+ - List of AssetsDefinition for source queries (may be empty if hiding is enabled)
117
+ - List of AssetKeys for project dependencies (source asset keys or table_deps keys)
118
+ - List of SensorDefinition for source change detection
119
+ """
120
+ source_assets: list[dg.AssetsDefinition] = []
121
+ project_deps: list[dg.AssetKey] = []
122
+ source_sensors: list[dg.SensorDefinition] = []
123
+
124
+ for source_group, source_content in evidence_project_data.sources_by_id.items():
125
+ # Use translator to get source class (validates source type is known)
126
+ source_type = source_content.connection.type
127
+ source_class = translator.get_source_class(source_type)
128
+ # Instantiate source to use instance methods for per-source metadata overrides
129
+ source = source_class(source_content)
130
+
131
+ # Check if this source should hide its assets (uses instance method for overrides)
132
+ should_hide = source.get_hide_source_asset()
133
+
134
+ # Get the source path for resolving relative paths
135
+ source_path = self.get_source_path(source_group)
136
+
137
+ # Generate asset specs via translator
138
+ for query in source_content.queries:
139
+ # First create data without extracted_data
140
+ initial_data = EvidenceSourceTranslatorData(
141
+ source_content=source_content,
142
+ source_group=source_group,
143
+ query=query,
144
+ source_path=source_path,
145
+ )
146
+ # Extract data using source class
147
+ extracted = source_class.extract_data_from_source(initial_data)
148
+ # Create final data with extracted_data
149
+ data = EvidenceSourceTranslatorData(
150
+ source_content=source_content,
151
+ source_group=source_group,
152
+ query=query,
153
+ extracted_data=extracted,
154
+ source_path=source_path,
155
+ )
156
+
157
+ if should_hide:
158
+ # Don't create source asset, but add table_deps directly to project_deps
159
+ table_deps = extracted.get("table_deps", [])
160
+ for ref in table_deps:
161
+ if ref.get("table"):
162
+ project_deps.append(dg.AssetKey([ref["table"]]))
163
+ else:
164
+ # Create the source asset and add its key to project_deps
165
+ # For source data, translator returns AssetsDefinition
166
+ asset = translator.get_asset_spec(data)
167
+ assert isinstance(asset, dg.AssetsDefinition)
168
+ source_assets.append(asset)
169
+ project_deps.append(asset.key)
170
+
171
+ # Check if we should create a sensor for this source (uses instance method)
172
+ should_create_sensor = source.get_source_sensor_enabled()
173
+
174
+ if should_create_sensor:
175
+ sensor = source_class.get_source_sensor(data, asset.key)
176
+ if sensor is not None:
177
+ source_sensors.append(sensor)
178
+
179
+ return source_assets, project_deps, source_sensors
180
+
181
+ @public
182
+ @abstractmethod
183
+ def parse_evidence_project_sources(self) -> dict[str, SourceContent]:
184
+ """Parse the sources from the Evidence project.
185
+
186
+ Returns:
187
+ Dictionary mapping source folder names to their SourceContent.
188
+ """
189
+ raise NotImplementedError()
190
+
191
+ @public
192
+ def get_source_path(self, source_group: str) -> str | None:
193
+ """Get the absolute path to a source directory.
194
+
195
+ This method is used to resolve relative paths in source configurations
196
+ (e.g., DuckDB database file paths).
197
+
198
+ Args:
199
+ source_group: The source folder name (e.g., "orders_db").
200
+
201
+ Returns:
202
+ The absolute path to the source directory, or None if not applicable.
203
+ """
204
+ return None
205
+
206
+ @public
207
+ def parse_evidence_project(self) -> EvidenceProjectData:
208
+ """Parse the full Evidence project into structured data.
209
+
210
+ Returns:
211
+ EvidenceProjectData containing project name and sources.
212
+ """
213
+ sources = self.parse_evidence_project_sources()
214
+
215
+ return EvidenceProjectData(
216
+ project_name=self.get_evidence_project_name(), sources_by_id=sources
217
+ )
218
+
219
+
220
+ @beta
221
+ @public
222
+ class LocalEvidenceProject(BaseEvidenceProject):
223
+ """Local Evidence project backed by a file system directory.
224
+
225
+ This project type reads sources from a local Evidence project directory,
226
+ builds the project using npm, and deploys using the configured deployment.
227
+
228
+ Attributes:
229
+ project_path: Path to the Evidence project directory containing
230
+ sources/ folder and package.json.
231
+ project_deployment: Deployment configuration (GitHub Pages, Netlify, or custom).
232
+ npm_executable: Path to npm executable (default: "npm").
233
+
234
+ Example:
235
+
236
+ Using in a Dagster component configuration:
237
+
238
+ .. code-block:: yaml
239
+
240
+ # defs.yaml
241
+ type: dagster_evidence.EvidenceProjectComponentV2
242
+ attributes:
243
+ evidence_project:
244
+ project_type: local
245
+ project_path: ./evidence-dashboards/sales-dashboard
246
+ project_deployment:
247
+ type: github_pages
248
+ github_repo: my-org/sales-dashboard
249
+ branch: gh-pages
250
+
251
+ Project structure expected:
252
+
253
+ .. code-block:: text
254
+
255
+ my-evidence-project/
256
+ ├── package.json
257
+ ├── evidence.config.yaml
258
+ └── sources/
259
+ ├── orders_db/
260
+ │ ├── connection.yaml
261
+ │ ├── orders.sql
262
+ │ └── customers.sql
263
+ └── metrics_db/
264
+ ├── connection.yaml
265
+ └── daily_metrics.sql
266
+ """
267
+
268
+ project_path: str
269
+ project_deployment: BaseEvidenceProjectDeployment
270
+ npm_executable: str = "npm"
271
+
272
+ def parse_evidence_project_sources(self) -> dict[str, SourceContent]:
273
+ """Read sources folder from Evidence project and build source dictionary.
274
+
275
+ Returns:
276
+ Dictionary mapping folder names to their SourceContent.
277
+
278
+ Raises:
279
+ FileNotFoundError: If sources folder or connection.yaml files are missing.
280
+ """
281
+ sources_path = Path(self.project_path) / "sources"
282
+
283
+ if not sources_path.exists():
284
+ raise FileNotFoundError(f"Sources folder not found: {sources_path}")
285
+
286
+ result: dict[str, SourceContent] = {}
287
+
288
+ for folder in sources_path.iterdir():
289
+ if not folder.is_dir():
290
+ continue
291
+
292
+ # Read connection.yaml (required)
293
+ connection_file = folder / "connection.yaml"
294
+ if not connection_file.exists():
295
+ raise FileNotFoundError(f"connection.yaml not found in {folder}")
296
+
297
+ with open(connection_file) as f:
298
+ connection = yaml.safe_load(f)
299
+
300
+ # Read all .sql files
301
+ queries = []
302
+ for sql_file in folder.glob("*.sql"):
303
+ query_name = sql_file.stem # filename without extension
304
+ query_content = sql_file.read_text()
305
+ queries.append({"name": query_name, "content": query_content})
306
+
307
+ # For gsheets, synthesize queries from sheets config (no SQL files)
308
+ if connection.get("type") == "gsheets":
309
+ from .sources import GSheetsEvidenceProjectSource
310
+
311
+ queries = GSheetsEvidenceProjectSource.build_queries_from_sheets_config(
312
+ connection
313
+ )
314
+
315
+ result[folder.name] = SourceContent.from_dict(
316
+ {"connection": connection, "queries": queries}
317
+ )
318
+
319
+ return result
320
+
321
+ def get_evidence_project_name(self) -> str:
322
+ return Path(self.project_path).name
323
+
324
+ def get_source_path(self, source_group: str) -> str | None:
325
+ """Get the absolute path to a source directory.
326
+
327
+ Args:
328
+ source_group: The source folder name (e.g., "orders_db").
329
+
330
+ Returns:
331
+ The absolute path to the source directory.
332
+ """
333
+ return str(Path(self.project_path) / "sources" / source_group)
334
+
335
+ def _parse_project_dagster_metadata(self) -> ProjectDagsterMetadata:
336
+ """Parse Dagster metadata from evidence.config.yaml.
337
+
338
+ Returns:
339
+ ProjectDagsterMetadata instance with parsed values,
340
+ or default metadata if config doesn't exist or lacks dagster section.
341
+ """
342
+ config_path = Path(self.project_path) / "evidence.config.yaml"
343
+ if not config_path.exists():
344
+ return ProjectDagsterMetadata()
345
+
346
+ with open(config_path, "r") as f:
347
+ config = yaml.safe_load(f) or {}
348
+
349
+ meta = config.get("meta", {})
350
+ dagster_meta = meta.get("dagster", {})
351
+ return ProjectDagsterMetadata(
352
+ group_name=dagster_meta.get("group_name"),
353
+ )
354
+
355
+ def load_evidence_project_assets(
356
+ self,
357
+ evidence_project_data: EvidenceProjectData,
358
+ translator: "DagsterEvidenceTranslator",
359
+ ) -> tuple[
360
+ Sequence[dg.AssetsDefinition | dg.AssetSpec], Sequence[dg.SensorDefinition]
361
+ ]:
362
+ # Get source assets, project dependencies, and sensors via translator
363
+ source_assets, source_deps, source_sensors = self.load_source_assets(
364
+ evidence_project_data, translator
365
+ )
366
+
367
+ # Parse project-level Dagster metadata from evidence.config.yaml
368
+ project_metadata = self._parse_project_dagster_metadata()
369
+
370
+ # Get project asset spec via translator
371
+ project_data = EvidenceProjectTranslatorData(
372
+ project_name=self.get_evidence_project_name(),
373
+ sources_by_id=evidence_project_data.sources_by_id,
374
+ source_deps=source_deps,
375
+ dagster_metadata=project_metadata,
376
+ )
377
+ project_spec = translator.get_asset_spec(project_data)
378
+ # For project data, translator returns AssetSpec
379
+ assert isinstance(project_spec, dg.AssetSpec)
380
+
381
+ # Create automation condition that triggers when any direct dependency is updated
382
+ # Source assets now have their own automation conditions that cascade updates
383
+ automation_condition = dg.AutomationCondition.any_deps_match(
384
+ dg.AutomationCondition.newly_updated()
385
+ )
386
+
387
+ # Create the executable asset using the translated spec
388
+ # Use the returned source_deps which may contain table_deps directly
389
+ # when source asset hiding is enabled
390
+ @dg.asset(
391
+ key=project_spec.key,
392
+ kinds=project_spec.kinds or {"evidence"},
393
+ deps=source_deps,
394
+ automation_condition=automation_condition,
395
+ group_name=project_spec.group_name,
396
+ )
397
+ def build_and_deploy_evidence_project(
398
+ context: dg.AssetExecutionContext,
399
+ pipes_subprocess_client: dg.PipesSubprocessClient,
400
+ ):
401
+ with TemporaryDirectory() as temp_dir:
402
+ temp_dir = temp_dir + "/project"
403
+ shutil.copytree(
404
+ self.project_path,
405
+ temp_dir,
406
+ ignore=shutil.ignore_patterns(
407
+ "logs", ".git", "*.tmp", "node_modules"
408
+ ),
409
+ )
410
+ # Get base path from deployment (e.g., GitHub Pages needs basePath config)
411
+ base_path = self.project_deployment.get_base_path(self.project_path)
412
+
413
+ build_output_dir = base_path
414
+ os.makedirs(build_output_dir, exist_ok=True)
415
+
416
+ build_env = os.environ.copy()
417
+ build_env["EVIDENCE_BUILD_DIR"] = build_output_dir
418
+ build_env["EVIDENCE_PROJECT_TMP_DIR"] = temp_dir
419
+ build_env["EVIDENCE_PROJECT_PATH"] = self.project_path
420
+
421
+ context.log.info(f"Building Evidence project to: {build_output_dir}")
422
+ context.log.info(f"Evidence project path: {self.project_path}")
423
+
424
+ self._run_cmd(
425
+ context=context,
426
+ pipes_subprocess_client=pipes_subprocess_client,
427
+ project_path=temp_dir,
428
+ cmd=[self.npm_executable, "install"],
429
+ env=build_env,
430
+ )
431
+ # Run sources command and yield results
432
+ self._run_cmd(
433
+ context=context,
434
+ pipes_subprocess_client=pipes_subprocess_client,
435
+ project_path=temp_dir,
436
+ cmd=[self.npm_executable, "run", "sources"],
437
+ env=build_env,
438
+ )
439
+
440
+ # Run build command and yield results
441
+ self._run_cmd(
442
+ context=context,
443
+ pipes_subprocess_client=pipes_subprocess_client,
444
+ project_path=temp_dir,
445
+ cmd=[self.npm_executable, "run", "build"],
446
+ env=build_env,
447
+ )
448
+
449
+ # Deploy from the build output folder and yield results
450
+ self.project_deployment.deploy_evidence_project(
451
+ evidence_project_build_path=os.path.join(
452
+ temp_dir, build_output_dir
453
+ ),
454
+ context=context,
455
+ pipes_subprocess_client=pipes_subprocess_client,
456
+ env=build_env,
457
+ )
458
+ return dg.MaterializeResult(metadata={"status": "success"})
459
+
460
+ return list(source_assets) + [build_and_deploy_evidence_project], source_sensors
461
+
462
+ def _run_cmd(
463
+ self,
464
+ context: dg.AssetExecutionContext,
465
+ pipes_subprocess_client: dg.PipesSubprocessClient,
466
+ project_path: str,
467
+ cmd: Sequence[str],
468
+ env: Optional[dict[str, str]] = None,
469
+ ) -> None:
470
+ context.log.info(f"{project_path}$ {' '.join(cmd)}")
471
+ pipes_subprocess_client.run(
472
+ command=cmd,
473
+ cwd=project_path,
474
+ context=context,
475
+ env=env or dict(os.environ),
476
+ )
477
+
478
+
479
+ @beta
480
+ @public
481
+ class LocalEvidenceProjectArgs(dg.Model, dg.Resolvable):
482
+ """Arguments for configuring a local Evidence project.
483
+
484
+ Example:
485
+
486
+ .. code-block:: yaml
487
+
488
+ evidence_project:
489
+ project_type: local
490
+ project_path: ./my-evidence-project
491
+ project_deployment:
492
+ type: github_pages
493
+ github_repo: owner/repo
494
+
495
+ Attributes:
496
+ project_type: Must be "local" to use this project type.
497
+ project_path: Path to the Evidence project directory.
498
+ project_deployment: Deployment configuration for the built project.
499
+ """
500
+
501
+ project_type: Literal["local"] = Field(
502
+ default="local",
503
+ description="Project type identifier.",
504
+ )
505
+ project_path: str = Field(
506
+ ...,
507
+ description="Path to the Evidence project directory.",
508
+ )
509
+ project_deployment: Annotated[
510
+ BaseEvidenceProjectDeployment,
511
+ Resolver(
512
+ resolve_evidence_project_deployment,
513
+ model_field_name="project_deployment",
514
+ model_field_type=Union[
515
+ GithubPagesEvidenceProjectDeploymentArgs.model(),
516
+ EvidenceProjectNetlifyDeploymentArgs.model(),
517
+ CustomEvidenceProjectDeploymentArgs.model(),
518
+ ],
519
+ description="Deployment configuration for the Evidence project.",
520
+ examples=[],
521
+ ),
522
+ ]
523
+
524
+
525
+ @beta
526
+ @public
527
+ class EvidenceStudioProject(BaseEvidenceProject):
528
+ """Evidence Studio cloud-hosted project.
529
+
530
+ **Coming Soon** - This project type is planned but not yet implemented.
531
+
532
+ This project type will connect to Evidence Studio to fetch project
533
+ configuration and sources from the cloud, enabling seamless integration
534
+ between Dagster pipelines and Evidence Studio hosted dashboards.
535
+
536
+ Planned Features:
537
+ - Fetch source configurations from Evidence Studio API
538
+ - Sync local data sources with cloud project
539
+ - Trigger cloud builds from Dagster
540
+ - Monitor deployment status
541
+
542
+ Attributes:
543
+ evidence_studio_url: URL of the Evidence Studio workspace.
544
+
545
+ Example:
546
+
547
+ .. code-block:: yaml
548
+
549
+ evidence_project:
550
+ project_type: evidence_studio
551
+ evidence_studio_url: https://evidence.studio/my-workspace
552
+
553
+ Note:
554
+ If you need Evidence Studio integration, please open an issue on GitHub
555
+ to help prioritize this feature.
556
+ """
557
+
558
+ evidence_studio_url: str
559
+
560
+ def parse_evidence_project_sources(self) -> dict[str, SourceContent]:
561
+ raise NotImplementedError()
562
+
563
+ def get_evidence_project_name(self) -> str:
564
+ raise NotImplementedError()
565
+
566
+ def load_evidence_project_assets(
567
+ self,
568
+ evidence_project_data: EvidenceProjectData,
569
+ translator: "DagsterEvidenceTranslator",
570
+ ) -> tuple[
571
+ Sequence[dg.AssetsDefinition | dg.AssetSpec], Sequence[dg.SensorDefinition]
572
+ ]:
573
+ raise NotImplementedError()
574
+
575
+
576
+ @beta
577
+ @public
578
+ class EvidenceStudioProjectArgs(dg.Model, dg.Resolvable):
579
+ """Arguments for configuring an Evidence Studio project.
580
+
581
+ **Coming Soon** - This project type is planned but not yet implemented.
582
+
583
+ Example:
584
+
585
+ .. code-block:: yaml
586
+
587
+ evidence_project:
588
+ project_type: evidence_studio
589
+ evidence_studio_url: https://evidence.studio/my-workspace
590
+ evidence_project_git_url: https://github.com/org/evidence-project.git
591
+
592
+ Attributes:
593
+ project_type: Must be "evidence_studio" to use this project type.
594
+ evidence_studio_url: URL of the Evidence Studio workspace.
595
+ evidence_project_git_url: Git URL for the underlying project repository.
596
+
597
+ Note:
598
+ Use the ``local`` project type with a git clone of your Evidence project
599
+ as a workaround until Evidence Studio integration is implemented.
600
+ """
601
+
602
+ project_type: Literal["evidence_studio"] = Field(
603
+ default="evidence_studio",
604
+ description="Project type identifier.",
605
+ )
606
+ evidence_studio_url: str = Field(
607
+ ...,
608
+ description="Evidence Studio URL.",
609
+ )
610
+ evidence_project_git_url: str = Field(
611
+ default="no_url",
612
+ description="Git URL for the Evidence project.",
613
+ )
614
+
615
+
616
+ @public
617
+ def resolve_evidence_project(
618
+ context: dg.ResolutionContext, model: BaseModel
619
+ ) -> BaseEvidenceProject:
620
+ """Resolve project configuration to a concrete project instance.
621
+
622
+ This function is used internally by the component resolution system
623
+ to convert YAML configuration into project instances.
624
+
625
+ Args:
626
+ context: The resolution context providing access to paths and config.
627
+ model: The parsed configuration model.
628
+
629
+ Returns:
630
+ A BaseEvidenceProject instance (LocalEvidenceProject or EvidenceStudioProject).
631
+
632
+ Raises:
633
+ NotImplementedError: If an unknown project_type is specified.
634
+ """
635
+ # First, check which type we're dealing with
636
+ project_type = (
637
+ model.get("project_type", "local")
638
+ if isinstance(model, dict)
639
+ else getattr(model, "project_type", "local")
640
+ )
641
+
642
+ if project_type == "local":
643
+ resolved = resolve_fields(
644
+ model=model, resolved_cls=LocalEvidenceProjectArgs, context=context
645
+ )
646
+ return LocalEvidenceProject(
647
+ project_path=str(
648
+ context.resolve_source_relative_path(resolved["project_path"])
649
+ ),
650
+ project_deployment=resolved["project_deployment"],
651
+ )
652
+ elif project_type == "evidence_studio":
653
+ resolved = resolve_fields(
654
+ model=model, resolved_cls=EvidenceStudioProjectArgs, context=context
655
+ )
656
+ return EvidenceStudioProject(
657
+ evidence_studio_url=resolved["evidence_studio_url"]
658
+ )
659
+ else:
660
+ raise NotImplementedError(f"Unknown project type: {project_type}")