dagster-pandas 0.19.5__tar.gz → 0.27.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dagster-pandas might be problematic. Click here for more details.
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/LICENSE +1 -1
- {dagster-pandas-0.19.5/dagster_pandas.egg-info → dagster_pandas-0.27.9}/PKG-INFO +21 -7
- dagster_pandas-0.27.9/README.md +4 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/__init__.py +12 -12
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/constraints.py +63 -102
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/data_frame.py +16 -27
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/__init__.py +9 -4
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/pandas_hello_world/environments/pandas_hello_world_prod.yaml +1 -1
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/pandas_hello_world/environments/pandas_hello_world_test.yaml +1 -1
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/pandas_hello_world/environments/papermill_pandas_hello_world_prod.yaml +1 -1
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/pandas_hello_world/environments/papermill_pandas_hello_world_test.yaml +1 -1
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/pandas_hello_world/ops.py +1 -2
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/pandas_hello_world.yaml +1 -1
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/validation.py +4 -5
- dagster_pandas-0.27.9/dagster_pandas/version.py +1 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9/dagster_pandas.egg-info}/PKG-INFO +21 -7
- dagster_pandas-0.27.9/dagster_pandas.egg-info/requires.txt +2 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/setup.py +11 -8
- dagster-pandas-0.19.5/README.md +0 -4
- dagster-pandas-0.19.5/dagster_pandas/version.py +0 -1
- dagster-pandas-0.19.5/dagster_pandas.egg-info/requires.txt +0 -2
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/MANIFEST.in +0 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/pandas_hello_world/__init__.py +0 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/pandas_hello_world/env.yaml +0 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/pandas_hello_world/solids.yaml +0 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/workspace.yaml +0 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/py.typed +0 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas.egg-info/SOURCES.txt +0 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas.egg-info/dependency_links.txt +0 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas.egg-info/top_level.txt +0 -0
- {dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/setup.cfg +0 -0
|
@@ -186,7 +186,7 @@
|
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
|
187
187
|
identification within third-party archives.
|
|
188
188
|
|
|
189
|
-
Copyright
|
|
189
|
+
Copyright 2025 Dagster Labs, Inc.
|
|
190
190
|
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
192
|
you may not use this file except in compliance with the License.
|
|
@@ -1,22 +1,36 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: dagster-pandas
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.27.9
|
|
4
4
|
Summary: Utilities and examples for working with pandas and dagster, an opinionated framework for expressing data pipelines
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster
|
|
6
|
-
Author:
|
|
7
|
-
Author-email: hello@
|
|
6
|
+
Author: Dagster Labs
|
|
7
|
+
Author-email: hello@dagsterlabs.com
|
|
8
8
|
License: Apache-2.0
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
11
9
|
Classifier: Programming Language :: Python :: 3.9
|
|
12
10
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
11
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
14
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
|
16
|
+
Requires-Python: >=3.9,<3.14
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
17
18
|
License-File: LICENSE
|
|
19
|
+
Requires-Dist: dagster==1.11.9
|
|
20
|
+
Requires-Dist: pandas
|
|
21
|
+
Dynamic: author
|
|
22
|
+
Dynamic: author-email
|
|
23
|
+
Dynamic: classifier
|
|
24
|
+
Dynamic: description
|
|
25
|
+
Dynamic: description-content-type
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: license
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
Dynamic: requires-dist
|
|
30
|
+
Dynamic: requires-python
|
|
31
|
+
Dynamic: summary
|
|
18
32
|
|
|
19
33
|
# dagster-pandas
|
|
20
34
|
|
|
21
35
|
The docs for `dagster-pandas` can be found
|
|
22
|
-
[here](https://docs.dagster.io/
|
|
36
|
+
[here](https://docs.dagster.io/api/python-api/libraries/dagster-pandas).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
from
|
|
1
|
+
from dagster_shared.libraries import DagsterLibraryRegistry
|
|
2
2
|
|
|
3
|
-
from .constraints import (
|
|
3
|
+
from dagster_pandas.constraints import (
|
|
4
4
|
ColumnWithMetadataException,
|
|
5
5
|
ConstraintWithMetadata,
|
|
6
6
|
ConstraintWithMetadataException,
|
|
@@ -17,34 +17,34 @@ from .constraints import (
|
|
|
17
17
|
non_null_validation,
|
|
18
18
|
nonnull,
|
|
19
19
|
)
|
|
20
|
-
from .data_frame import (
|
|
20
|
+
from dagster_pandas.data_frame import (
|
|
21
21
|
DataFrame,
|
|
22
22
|
create_dagster_pandas_dataframe_type,
|
|
23
23
|
create_structured_dataframe_type,
|
|
24
24
|
)
|
|
25
|
-
from .validation import PandasColumn
|
|
26
|
-
from .version import __version__
|
|
25
|
+
from dagster_pandas.validation import PandasColumn
|
|
26
|
+
from dagster_pandas.version import __version__
|
|
27
27
|
|
|
28
28
|
DagsterLibraryRegistry.register("dagster-pandas", __version__)
|
|
29
29
|
|
|
30
30
|
__all__ = [
|
|
31
|
-
"DataFrame",
|
|
32
|
-
"create_dagster_pandas_dataframe_type",
|
|
33
|
-
"create_structured_dataframe_type",
|
|
34
|
-
"PandasColumn",
|
|
35
31
|
"ColumnWithMetadataException",
|
|
32
|
+
"ConstraintWithMetadata",
|
|
36
33
|
"ConstraintWithMetadataException",
|
|
34
|
+
"DataFrame",
|
|
37
35
|
"MultiAggregateConstraintWithMetadata",
|
|
38
36
|
"MultiColumnConstraintWithMetadata",
|
|
39
|
-
"ConstraintWithMetadata",
|
|
40
37
|
"MultiConstraintWithMetadata",
|
|
38
|
+
"PandasColumn",
|
|
41
39
|
"RowCountConstraint",
|
|
42
40
|
"StrictColumnsConstraint",
|
|
43
41
|
"StrictColumnsWithMetadata",
|
|
44
42
|
"all_unique_validator",
|
|
43
|
+
"categorical_column_validator_factory",
|
|
45
44
|
"column_range_validation_factory",
|
|
45
|
+
"create_dagster_pandas_dataframe_type",
|
|
46
|
+
"create_structured_dataframe_type",
|
|
46
47
|
"dtype_in_set_validation_factory",
|
|
47
|
-
"nonnull",
|
|
48
48
|
"non_null_validation",
|
|
49
|
-
"
|
|
49
|
+
"nonnull",
|
|
50
50
|
]
|
|
@@ -2,6 +2,7 @@ import sys
|
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from functools import wraps
|
|
5
|
+
from typing import Final
|
|
5
6
|
|
|
6
7
|
import pandas as pd
|
|
7
8
|
from dagster import (
|
|
@@ -9,9 +10,8 @@ from dagster import (
|
|
|
9
10
|
TypeCheck,
|
|
10
11
|
_check as check,
|
|
11
12
|
)
|
|
12
|
-
from dagster.
|
|
13
|
+
from dagster._annotations import beta
|
|
13
14
|
from pandas import DataFrame
|
|
14
|
-
from typing_extensions import Final
|
|
15
15
|
|
|
16
16
|
CONSTRAINT_METADATA_KEY: Final = "constraint_metadata"
|
|
17
17
|
|
|
@@ -20,6 +20,7 @@ class ConstraintViolationException(Exception):
|
|
|
20
20
|
"""Indicates that a constraint has been violated."""
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
@beta
|
|
23
24
|
class ConstraintWithMetadataException(Exception):
|
|
24
25
|
"""This class defines the response generated when a pandas DF fails validation -- it can be used to generate either a
|
|
25
26
|
failed typecheck or an exception.
|
|
@@ -45,14 +46,8 @@ class ConstraintWithMetadataException(Exception):
|
|
|
45
46
|
self.expectation = check.opt_inst_param(expectation, "expectation", (dict, list, str, set))
|
|
46
47
|
self.offending = check.opt_inst_param(offending, "offending", (dict, list, str, set))
|
|
47
48
|
self.actual = check.opt_inst_param(actual, "actual", (dict, list, str, set))
|
|
48
|
-
super(
|
|
49
|
-
"Violated {} - {}, {} was/were expected, but we received {} which was/were {}"
|
|
50
|
-
constraint_name,
|
|
51
|
-
constraint_description,
|
|
52
|
-
expectation,
|
|
53
|
-
offending,
|
|
54
|
-
actual,
|
|
55
|
-
)
|
|
49
|
+
super().__init__(
|
|
50
|
+
f"Violated {constraint_name} - {constraint_description}, {expectation} was/were expected, but we received {offending} which was/were {actual}"
|
|
56
51
|
)
|
|
57
52
|
|
|
58
53
|
def normalize_metadata_json_value(self, val):
|
|
@@ -82,16 +77,12 @@ class DataFrameConstraintViolationException(ConstraintViolationException):
|
|
|
82
77
|
"""Indicates a dataframe level constraint has been violated."""
|
|
83
78
|
|
|
84
79
|
def __init__(self, constraint_name, constraint_description):
|
|
85
|
-
super(
|
|
86
|
-
"Violated {constraint_name} - {constraint_description}".format(
|
|
87
|
-
constraint_name=constraint_name, constraint_description=constraint_description
|
|
88
|
-
)
|
|
89
|
-
)
|
|
80
|
+
super().__init__(f"Violated {constraint_name} - {constraint_description}")
|
|
90
81
|
|
|
91
82
|
|
|
92
83
|
class DataFrameWithMetadataException(ConstraintWithMetadataException):
|
|
93
84
|
def __init__(self, constraint_name, constraint_description, expectation, actual):
|
|
94
|
-
super(
|
|
85
|
+
super().__init__(
|
|
95
86
|
constraint_name, constraint_description, expectation, "a malformed dataframe", actual
|
|
96
87
|
)
|
|
97
88
|
|
|
@@ -104,27 +95,21 @@ class ColumnConstraintViolationException(ConstraintViolationException):
|
|
|
104
95
|
self.constraint_description = constraint_description
|
|
105
96
|
self.column_name = column_name
|
|
106
97
|
self.offending_rows = offending_rows
|
|
107
|
-
super(
|
|
98
|
+
super().__init__(self.construct_message())
|
|
108
99
|
|
|
109
100
|
def construct_message(self):
|
|
110
|
-
base_message =
|
|
111
|
-
'Violated "{constraint_name}" for column "{column_name}" - {constraint_description}'
|
|
112
|
-
.format(
|
|
113
|
-
constraint_name=self.constraint_name,
|
|
114
|
-
constraint_description=self.constraint_description,
|
|
115
|
-
column_name=self.column_name,
|
|
116
|
-
)
|
|
117
|
-
)
|
|
101
|
+
base_message = f'Violated "{self.constraint_name}" for column "{self.column_name}" - {self.constraint_description}'
|
|
118
102
|
if self.offending_rows is not None:
|
|
119
|
-
base_message +=
|
|
120
|
-
self.offending_rows
|
|
103
|
+
base_message += (
|
|
104
|
+
f"The offending (index, row values) are the following: {self.offending_rows}"
|
|
121
105
|
)
|
|
122
106
|
return base_message
|
|
123
107
|
|
|
124
108
|
|
|
109
|
+
@beta
|
|
125
110
|
class ColumnWithMetadataException(ConstraintWithMetadataException):
|
|
126
111
|
def __init__(self, constraint_name, constraint_description, expectation, offending, actual):
|
|
127
|
-
super(
|
|
112
|
+
super().__init__(
|
|
128
113
|
"the column constraint " + constraint_name,
|
|
129
114
|
constraint_description,
|
|
130
115
|
expectation,
|
|
@@ -138,7 +123,7 @@ class Constraint:
|
|
|
138
123
|
|
|
139
124
|
Args:
|
|
140
125
|
error_description (Optional[str]): The plain string description that is output in the terminal if the constraint fails.
|
|
141
|
-
markdown_description (Optional[str]): A markdown supported description that is
|
|
126
|
+
markdown_description (Optional[str]): A markdown supported description that is shown in the Dagster UI if the constraint fails.
|
|
142
127
|
"""
|
|
143
128
|
|
|
144
129
|
def __init__(self, error_description=None, markdown_description=None):
|
|
@@ -147,6 +132,7 @@ class Constraint:
|
|
|
147
132
|
self.error_description = check.str_param(error_description, "error_description")
|
|
148
133
|
|
|
149
134
|
|
|
135
|
+
@beta
|
|
150
136
|
class ConstraintWithMetadata:
|
|
151
137
|
"""This class defines a base constraint over pandas DFs with organized metadata.
|
|
152
138
|
|
|
@@ -168,7 +154,6 @@ class ConstraintWithMetadata:
|
|
|
168
154
|
def __init__(
|
|
169
155
|
self, description, validation_fn, resulting_exception, raise_or_typecheck=True, name=None
|
|
170
156
|
):
|
|
171
|
-
experimental_class_warning(self.__class__.__name__)
|
|
172
157
|
if name is None:
|
|
173
158
|
self.name = self.__class__.__name__
|
|
174
159
|
else:
|
|
@@ -203,14 +188,13 @@ class ConstraintWithMetadata:
|
|
|
203
188
|
)
|
|
204
189
|
return DagsterType(
|
|
205
190
|
name=self.name,
|
|
206
|
-
description="A Pandas DataFrame with the following validation: {}"
|
|
207
|
-
|
|
208
|
-
),
|
|
209
|
-
type_check_fn=lambda x: self.validate(x, *args),
|
|
191
|
+
description=f"A Pandas DataFrame with the following validation: {self.description}",
|
|
192
|
+
type_check_fn=lambda x: self.validate(x, *args), # pyright: ignore[reportArgumentType]
|
|
210
193
|
**kwargs,
|
|
211
194
|
)
|
|
212
195
|
|
|
213
196
|
|
|
197
|
+
@beta
|
|
214
198
|
class MultiConstraintWithMetadata(ConstraintWithMetadata):
|
|
215
199
|
"""Use this class if you have multiple constraints to check over the entire dataframe.
|
|
216
200
|
|
|
@@ -247,7 +231,7 @@ class MultiConstraintWithMetadata(ConstraintWithMetadata):
|
|
|
247
231
|
metadict[key][validation_fn_arr[i].__name__] = dicta[key]
|
|
248
232
|
return (truthparam, metadict)
|
|
249
233
|
|
|
250
|
-
super(
|
|
234
|
+
super().__init__(
|
|
251
235
|
description,
|
|
252
236
|
validation_fn,
|
|
253
237
|
resulting_exception,
|
|
@@ -256,6 +240,7 @@ class MultiConstraintWithMetadata(ConstraintWithMetadata):
|
|
|
256
240
|
)
|
|
257
241
|
|
|
258
242
|
|
|
243
|
+
@beta
|
|
259
244
|
class StrictColumnsWithMetadata(ConstraintWithMetadata):
|
|
260
245
|
def __init__(self, column_list, enforce_ordering=False, raise_or_typecheck=True, name=None):
|
|
261
246
|
self.enforce_ordering = check.bool_param(enforce_ordering, "enforce_ordering")
|
|
@@ -283,7 +268,7 @@ class StrictColumnsWithMetadata(ConstraintWithMetadata):
|
|
|
283
268
|
basestr = f"ensuring that the right columns, {self.column_list} were present"
|
|
284
269
|
if enforce_ordering:
|
|
285
270
|
basestr += " in the right order"
|
|
286
|
-
super(
|
|
271
|
+
super().__init__(
|
|
287
272
|
basestr,
|
|
288
273
|
validation_fcn,
|
|
289
274
|
DataFrameWithMetadataException,
|
|
@@ -297,11 +282,11 @@ class DataFrameConstraint(Constraint):
|
|
|
297
282
|
|
|
298
283
|
Args:
|
|
299
284
|
error_description (Optional[str]): The plain string description that is output in the terminal if the constraint fails.
|
|
300
|
-
markdown_description (Optional[str]): A markdown supported description that is
|
|
285
|
+
markdown_description (Optional[str]): A markdown supported description that is shown in the Dagster UI if the constraint fails.
|
|
301
286
|
"""
|
|
302
287
|
|
|
303
288
|
def __init__(self, error_description=None, markdown_description=None):
|
|
304
|
-
super(
|
|
289
|
+
super().__init__(
|
|
305
290
|
error_description=error_description, markdown_description=markdown_description
|
|
306
291
|
)
|
|
307
292
|
|
|
@@ -309,6 +294,7 @@ class DataFrameConstraint(Constraint):
|
|
|
309
294
|
raise NotImplementedError()
|
|
310
295
|
|
|
311
296
|
|
|
297
|
+
@beta
|
|
312
298
|
class StrictColumnsConstraint(DataFrameConstraint):
|
|
313
299
|
"""A dataframe constraint that validates column existence and ordering.
|
|
314
300
|
|
|
@@ -326,9 +312,7 @@ class StrictColumnsConstraint(DataFrameConstraint):
|
|
|
326
312
|
description = f"No columns outside of {self.strict_column_list} allowed. "
|
|
327
313
|
if enforce_ordering:
|
|
328
314
|
description += "Columns must be in that order."
|
|
329
|
-
super(
|
|
330
|
-
error_description=description, markdown_description=description
|
|
331
|
-
)
|
|
315
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
332
316
|
|
|
333
317
|
def validate(self, dataframe):
|
|
334
318
|
check.inst_param(dataframe, "dataframe", DataFrame)
|
|
@@ -338,22 +322,19 @@ class StrictColumnsConstraint(DataFrameConstraint):
|
|
|
338
322
|
raise DataFrameConstraintViolationException(
|
|
339
323
|
constraint_name=self.name,
|
|
340
324
|
constraint_description=(
|
|
341
|
-
"Expected the following ordering of columns {
|
|
342
|
-
" {
|
|
343
|
-
expected=self.strict_column_list, received=columns_received
|
|
344
|
-
)
|
|
325
|
+
f"Expected the following ordering of columns {self.strict_column_list}. Received:"
|
|
326
|
+
f" {columns_received}"
|
|
345
327
|
),
|
|
346
328
|
)
|
|
347
329
|
for column in columns_received:
|
|
348
330
|
if column not in self.strict_column_list:
|
|
349
331
|
raise DataFrameConstraintViolationException(
|
|
350
332
|
constraint_name=self.name,
|
|
351
|
-
constraint_description="Expected {}. Recevied {}."
|
|
352
|
-
self.strict_column_list, columns_received
|
|
353
|
-
),
|
|
333
|
+
constraint_description=f"Expected {self.strict_column_list}. Recevied {columns_received}.",
|
|
354
334
|
)
|
|
355
335
|
|
|
356
336
|
|
|
337
|
+
@beta
|
|
357
338
|
class RowCountConstraint(DataFrameConstraint):
|
|
358
339
|
"""A dataframe constraint that validates the expected count of rows.
|
|
359
340
|
|
|
@@ -367,12 +348,8 @@ class RowCountConstraint(DataFrameConstraint):
|
|
|
367
348
|
self.error_tolerance = abs(check.int_param(error_tolerance, "error_tolerance"))
|
|
368
349
|
if self.error_tolerance > self.num_allowed_rows:
|
|
369
350
|
raise ValueError("Tolerance can't be greater than the number of rows you expect.")
|
|
370
|
-
description = "Dataframe must have {} +- {} rows."
|
|
371
|
-
|
|
372
|
-
)
|
|
373
|
-
super(RowCountConstraint, self).__init__(
|
|
374
|
-
error_description=description, markdown_description=description
|
|
375
|
-
)
|
|
351
|
+
description = f"Dataframe must have {self.num_allowed_rows} +- {self.error_tolerance} rows."
|
|
352
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
376
353
|
|
|
377
354
|
def validate(self, dataframe):
|
|
378
355
|
check.inst_param(dataframe, "dataframe", DataFrame)
|
|
@@ -385,11 +362,7 @@ class RowCountConstraint(DataFrameConstraint):
|
|
|
385
362
|
raise DataFrameConstraintViolationException(
|
|
386
363
|
constraint_name=self.name,
|
|
387
364
|
constraint_description=(
|
|
388
|
-
"Expected {
|
|
389
|
-
expected=self.num_allowed_rows,
|
|
390
|
-
tolerance=self.error_tolerance,
|
|
391
|
-
received=len(dataframe),
|
|
392
|
-
)
|
|
365
|
+
f"Expected {self.num_allowed_rows} +- {self.error_tolerance} rows. Got {len(dataframe)}"
|
|
393
366
|
),
|
|
394
367
|
)
|
|
395
368
|
|
|
@@ -503,6 +476,7 @@ class ColumnConstraintWithMetadata(ConstraintWithMetadata):
|
|
|
503
476
|
return exc.return_as_typecheck()
|
|
504
477
|
|
|
505
478
|
|
|
479
|
+
@beta
|
|
506
480
|
class MultiColumnConstraintWithMetadata(ColumnConstraintWithMetadata):
|
|
507
481
|
"""This class is useful for constructing more complicated relationships between columns
|
|
508
482
|
and expectations -- i.e. you want some validations on column A, others on column B, etc.
|
|
@@ -551,10 +525,10 @@ class MultiColumnConstraintWithMetadata(ColumnConstraintWithMetadata):
|
|
|
551
525
|
result = new_validator.validate(
|
|
552
526
|
DataFrame(data[column]), column, *args, **kwargs
|
|
553
527
|
)
|
|
554
|
-
result_val = result.success
|
|
528
|
+
result_val = result.success # pyright: ignore[reportOptionalMemberAccess]
|
|
555
529
|
if result_val:
|
|
556
530
|
continue
|
|
557
|
-
result_dict = result.metadata[CONSTRAINT_METADATA_KEY].data
|
|
531
|
+
result_dict = result.metadata[CONSTRAINT_METADATA_KEY].data # pyright: ignore[reportAttributeAccessIssue,reportOptionalMemberAccess]
|
|
558
532
|
truthparam = truthparam and result_val
|
|
559
533
|
for key in result_dict.keys():
|
|
560
534
|
if "constraint" not in key:
|
|
@@ -573,7 +547,7 @@ class MultiColumnConstraintWithMetadata(ColumnConstraintWithMetadata):
|
|
|
573
547
|
metadict[key][column][fn.__name__] = "a violation"
|
|
574
548
|
return truthparam, metadict
|
|
575
549
|
|
|
576
|
-
super(
|
|
550
|
+
super().__init__(
|
|
577
551
|
description,
|
|
578
552
|
validation_fn,
|
|
579
553
|
resulting_exception,
|
|
@@ -585,6 +559,7 @@ class MultiColumnConstraintWithMetadata(ColumnConstraintWithMetadata):
|
|
|
585
559
|
return ConstraintWithMetadata.validate(self, data, *args, **kwargs)
|
|
586
560
|
|
|
587
561
|
|
|
562
|
+
@beta
|
|
588
563
|
class MultiAggregateConstraintWithMetadata(MultiColumnConstraintWithMetadata):
|
|
589
564
|
"""This class is similar to multicolumn, but takes in functions that operate on the whole column at once
|
|
590
565
|
rather than ones that operate on each value --
|
|
@@ -612,16 +587,17 @@ class MultiAggregateConstraintWithMetadata(MultiColumnConstraintWithMetadata):
|
|
|
612
587
|
raise_or_typecheck=True,
|
|
613
588
|
name=None,
|
|
614
589
|
):
|
|
615
|
-
super(
|
|
590
|
+
super().__init__(
|
|
616
591
|
description,
|
|
617
592
|
fn_and_columns_dict,
|
|
618
593
|
resulting_exception,
|
|
619
594
|
raise_or_typecheck=raise_or_typecheck,
|
|
620
|
-
type_for_internal=ColumnAggregateConstraintWithMetadata,
|
|
595
|
+
type_for_internal=ColumnAggregateConstraintWithMetadata, # pyright: ignore[reportArgumentType]
|
|
621
596
|
name=name,
|
|
622
597
|
)
|
|
623
598
|
|
|
624
599
|
|
|
600
|
+
@beta
|
|
625
601
|
def non_null_validation(x):
|
|
626
602
|
"""Validates that a particular value in a column is not null.
|
|
627
603
|
|
|
@@ -635,6 +611,7 @@ def non_null_validation(x):
|
|
|
635
611
|
return not pd.isnull(x), {}
|
|
636
612
|
|
|
637
613
|
|
|
614
|
+
@beta
|
|
638
615
|
def all_unique_validator(column, ignore_missing_vals=False):
|
|
639
616
|
"""Validates that all values in an iterable are unique.
|
|
640
617
|
|
|
@@ -673,6 +650,7 @@ def all_unique_validator(column, ignore_missing_vals=False):
|
|
|
673
650
|
return not duplicated.any(), {"actual": column[duplicated]}
|
|
674
651
|
|
|
675
652
|
|
|
653
|
+
@beta
|
|
676
654
|
def nonnull(func):
|
|
677
655
|
"""Decorator for column validation functions to make them error on nulls.
|
|
678
656
|
|
|
@@ -691,11 +669,12 @@ def nonnull(func):
|
|
|
691
669
|
nval = non_null_validation(val)
|
|
692
670
|
return origval[0] and nval[0], {}
|
|
693
671
|
|
|
694
|
-
nvalidator.__doc__ += " and ensures no values are null"
|
|
672
|
+
nvalidator.__doc__ += " and ensures no values are null" # pyright: ignore[reportOperatorIssue]
|
|
695
673
|
|
|
696
674
|
return nvalidator
|
|
697
675
|
|
|
698
676
|
|
|
677
|
+
@beta
|
|
699
678
|
def column_range_validation_factory(minim=None, maxim=None, ignore_missing_vals=False):
|
|
700
679
|
"""Factory for validators testing if column values are within a range.
|
|
701
680
|
|
|
@@ -749,15 +728,14 @@ def column_range_validation_factory(minim=None, maxim=None, ignore_missing_vals=
|
|
|
749
728
|
return True, {}
|
|
750
729
|
return (isinstance(x, (type(minim), type(maxim)))) and (x <= maxim) and (x >= minim), {}
|
|
751
730
|
|
|
752
|
-
in_range_validation_fn.__doc__ = "checks whether values are between {} and {}"
|
|
753
|
-
minim, maxim
|
|
754
|
-
)
|
|
731
|
+
in_range_validation_fn.__doc__ = f"checks whether values are between {minim} and {maxim}"
|
|
755
732
|
if ignore_missing_vals:
|
|
756
733
|
in_range_validation_fn.__doc__ += ", ignoring nulls"
|
|
757
734
|
|
|
758
735
|
return in_range_validation_fn
|
|
759
736
|
|
|
760
737
|
|
|
738
|
+
@beta
|
|
761
739
|
def categorical_column_validator_factory(categories, ignore_missing_vals=False):
|
|
762
740
|
"""Factory for validators testing if all values are in some set.
|
|
763
741
|
|
|
@@ -812,6 +790,7 @@ def categorical_column_validator_factory(categories, ignore_missing_vals=False):
|
|
|
812
790
|
return categorical_validation_fn
|
|
813
791
|
|
|
814
792
|
|
|
793
|
+
@beta
|
|
815
794
|
def dtype_in_set_validation_factory(datatypes, ignore_missing_vals=False):
|
|
816
795
|
"""Factory for testing if the dtype of a val falls within some allowed set.
|
|
817
796
|
|
|
@@ -856,9 +835,7 @@ def dtype_in_set_validation_factory(datatypes, ignore_missing_vals=False):
|
|
|
856
835
|
return True, {}
|
|
857
836
|
return isinstance(x, datatypes), {}
|
|
858
837
|
|
|
859
|
-
dtype_in_set_validation_fn.__doc__ = "checks whether values are this type/types: {}"
|
|
860
|
-
datatypes
|
|
861
|
-
)
|
|
838
|
+
dtype_in_set_validation_fn.__doc__ = f"checks whether values are this type/types: {datatypes}"
|
|
862
839
|
if ignore_missing_vals:
|
|
863
840
|
dtype_in_set_validation_fn.__doc__ += ", ignoring nulls"
|
|
864
841
|
|
|
@@ -870,7 +847,7 @@ class ColumnRangeConstraintWithMetadata(ColumnConstraintWithMetadata):
|
|
|
870
847
|
self.name = self.__class__.__name__
|
|
871
848
|
|
|
872
849
|
description = f"Confirms values are between {minim} and {maxim}"
|
|
873
|
-
super(
|
|
850
|
+
super().__init__(
|
|
874
851
|
description=description,
|
|
875
852
|
validation_fn=column_range_validation_factory(minim=minim, maxim=maxim),
|
|
876
853
|
resulting_exception=ColumnWithMetadataException,
|
|
@@ -882,9 +859,7 @@ class ColumnRangeConstraintWithMetadata(ColumnConstraintWithMetadata):
|
|
|
882
859
|
if self.columns is None:
|
|
883
860
|
self.columns = list(data.columns)
|
|
884
861
|
self.columns.extend(args)
|
|
885
|
-
return super(
|
|
886
|
-
data, *self.columns, **kwargs
|
|
887
|
-
)
|
|
862
|
+
return super().validate(data, *self.columns, **kwargs)
|
|
888
863
|
|
|
889
864
|
|
|
890
865
|
class ColumnConstraint(Constraint):
|
|
@@ -892,11 +867,11 @@ class ColumnConstraint(Constraint):
|
|
|
892
867
|
|
|
893
868
|
Args:
|
|
894
869
|
error_description (Optional[str]): The plain string description that is output in the terminal if the constraint fails.
|
|
895
|
-
markdown_description (Optional[str]): A markdown supported description that is
|
|
870
|
+
markdown_description (Optional[str]): A markdown supported description that is shown in the Dagster UI if the constraint fails.
|
|
896
871
|
"""
|
|
897
872
|
|
|
898
873
|
def __init__(self, error_description=None, markdown_description=None):
|
|
899
|
-
super(
|
|
874
|
+
super().__init__(
|
|
900
875
|
error_description=error_description, markdown_description=markdown_description
|
|
901
876
|
)
|
|
902
877
|
|
|
@@ -919,9 +894,7 @@ class ColumnDTypeFnConstraint(ColumnConstraint):
|
|
|
919
894
|
def __init__(self, type_fn):
|
|
920
895
|
self.type_fn = check.callable_param(type_fn, "type_fn")
|
|
921
896
|
description = f'Dtype must satisfy "{self.type_fn.__name__}"'
|
|
922
|
-
super(
|
|
923
|
-
error_description=description, markdown_description=description
|
|
924
|
-
)
|
|
897
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
925
898
|
|
|
926
899
|
def validate(self, dataframe, column_name):
|
|
927
900
|
column_dtype = dataframe[column_name].dtype
|
|
@@ -942,12 +915,8 @@ class ColumnDTypeInSetConstraint(ColumnConstraint):
|
|
|
942
915
|
|
|
943
916
|
def __init__(self, expected_dtype_set):
|
|
944
917
|
self.expected_dtype_set = check.set_param(expected_dtype_set, "expected_dtype_set")
|
|
945
|
-
description = "Column dtype must be in the following set {}."
|
|
946
|
-
|
|
947
|
-
)
|
|
948
|
-
super(ColumnDTypeInSetConstraint, self).__init__(
|
|
949
|
-
error_description=description, markdown_description=description
|
|
950
|
-
)
|
|
918
|
+
description = f"Column dtype must be in the following set {self.expected_dtype_set}."
|
|
919
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
951
920
|
|
|
952
921
|
def validate(self, dataframe, column_name):
|
|
953
922
|
received_dtypes = dataframe[column_name].dtype
|
|
@@ -955,9 +924,7 @@ class ColumnDTypeInSetConstraint(ColumnConstraint):
|
|
|
955
924
|
raise ColumnConstraintViolationException(
|
|
956
925
|
constraint_name=self.name,
|
|
957
926
|
constraint_description=(
|
|
958
|
-
"{
|
|
959
|
-
base_error_message=self.error_description, received_dtypes=received_dtypes
|
|
960
|
-
)
|
|
927
|
+
f"{self.error_description}. DTypes received: {received_dtypes}"
|
|
961
928
|
),
|
|
962
929
|
column_name=column_name,
|
|
963
930
|
)
|
|
@@ -968,9 +935,7 @@ class NonNullableColumnConstraint(ColumnConstraint):
|
|
|
968
935
|
|
|
969
936
|
def __init__(self):
|
|
970
937
|
description = "No Null values allowed."
|
|
971
|
-
super(
|
|
972
|
-
error_description=description, markdown_description=description
|
|
973
|
-
)
|
|
938
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
974
939
|
|
|
975
940
|
def validate(self, dataframe, column_name):
|
|
976
941
|
rows_with_null_columns = dataframe[dataframe[column_name].isna()]
|
|
@@ -993,9 +958,7 @@ class UniqueColumnConstraint(ColumnConstraint):
|
|
|
993
958
|
def __init__(self, ignore_missing_vals):
|
|
994
959
|
description = "Column must be unique."
|
|
995
960
|
self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
|
|
996
|
-
super(
|
|
997
|
-
error_description=description, markdown_description=description
|
|
998
|
-
)
|
|
961
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
999
962
|
|
|
1000
963
|
def validate(self, dataframe, column_name):
|
|
1001
964
|
invalid = dataframe[column_name].duplicated()
|
|
@@ -1022,7 +985,7 @@ class CategoricalColumnConstraint(ColumnConstraint):
|
|
|
1022
985
|
def __init__(self, categories, ignore_missing_vals):
|
|
1023
986
|
self.categories = list(check.set_param(categories, "categories", of_type=str))
|
|
1024
987
|
self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
|
|
1025
|
-
super(
|
|
988
|
+
super().__init__(
|
|
1026
989
|
error_description=f"Expected Categories are {self.categories}",
|
|
1027
990
|
markdown_description=f"Category examples are {self.categories[:5]}...",
|
|
1028
991
|
)
|
|
@@ -1053,7 +1016,7 @@ class MinValueColumnConstraint(ColumnConstraint):
|
|
|
1053
1016
|
def __init__(self, min_value, ignore_missing_vals):
|
|
1054
1017
|
self.min_value = check.inst_param(min_value, "min_value", (int, float, datetime))
|
|
1055
1018
|
self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
|
|
1056
|
-
super(
|
|
1019
|
+
super().__init__(
|
|
1057
1020
|
markdown_description=f"values > {self.min_value}",
|
|
1058
1021
|
error_description=f"Column must have values > {self.min_value}",
|
|
1059
1022
|
)
|
|
@@ -1084,7 +1047,7 @@ class MaxValueColumnConstraint(ColumnConstraint):
|
|
|
1084
1047
|
def __init__(self, max_value, ignore_missing_vals):
|
|
1085
1048
|
self.max_value = check.inst_param(max_value, "max_value", (int, float, datetime))
|
|
1086
1049
|
self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
|
|
1087
|
-
super(
|
|
1050
|
+
super().__init__(
|
|
1088
1051
|
markdown_description=f"values < {self.max_value}",
|
|
1089
1052
|
error_description=f"Column must have values < {self.max_value}",
|
|
1090
1053
|
)
|
|
@@ -1118,11 +1081,9 @@ class InRangeColumnConstraint(ColumnConstraint):
|
|
|
1118
1081
|
self.min_value = check.inst_param(min_value, "min_value", (int, float, datetime))
|
|
1119
1082
|
self.max_value = check.inst_param(max_value, "max_value", (int, float, datetime))
|
|
1120
1083
|
self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
|
|
1121
|
-
super(
|
|
1084
|
+
super().__init__(
|
|
1122
1085
|
markdown_description=f"{self.min_value} < values < {self.max_value}",
|
|
1123
|
-
error_description="Column must have values between {} and {} inclusive."
|
|
1124
|
-
self.min_value, self.max_value
|
|
1125
|
-
),
|
|
1086
|
+
error_description=f"Column must have values between {self.min_value} and {self.max_value} inclusive.",
|
|
1126
1087
|
)
|
|
1127
1088
|
|
|
1128
1089
|
def validate(self, dataframe, column_name):
|
|
@@ -12,11 +12,10 @@ from dagster import (
|
|
|
12
12
|
_check as check,
|
|
13
13
|
dagster_type_loader,
|
|
14
14
|
)
|
|
15
|
-
from dagster._annotations import
|
|
15
|
+
from dagster._annotations import beta
|
|
16
16
|
from dagster._config import Selector
|
|
17
17
|
from dagster._core.definitions.metadata import normalize_metadata
|
|
18
18
|
from dagster._utils import dict_without_keys
|
|
19
|
-
from dagster._utils.backcompat import canonicalize_backcompat_args
|
|
20
19
|
|
|
21
20
|
from dagster_pandas.constraints import (
|
|
22
21
|
CONSTRAINT_METADATA_KEY,
|
|
@@ -43,7 +42,7 @@ CONSTRAINT_BLACKLIST = {ColumnDTypeFnConstraint, ColumnDTypeInSetConstraint}
|
|
|
43
42
|
)
|
|
44
43
|
)
|
|
45
44
|
def dataframe_loader(_context, config):
|
|
46
|
-
file_type, file_options =
|
|
45
|
+
file_type, file_options = next(iter(config.items()))
|
|
47
46
|
|
|
48
47
|
if file_type == "csv":
|
|
49
48
|
path = file_options["path"]
|
|
@@ -84,9 +83,7 @@ DataFrame = DagsterType(
|
|
|
84
83
|
|
|
85
84
|
def _construct_constraint_list(constraints):
|
|
86
85
|
def add_bullet(constraint_list, constraint_description):
|
|
87
|
-
return constraint_list + "+ {constraint_description}\n"
|
|
88
|
-
constraint_description=constraint_description
|
|
89
|
-
)
|
|
86
|
+
return constraint_list + f"+ {constraint_description}\n"
|
|
90
87
|
|
|
91
88
|
constraint_list = ""
|
|
92
89
|
for constraint in constraints:
|
|
@@ -100,13 +97,9 @@ def _build_column_header(column_name, constraints):
|
|
|
100
97
|
for constraint in constraints:
|
|
101
98
|
if isinstance(constraint, ColumnDTypeInSetConstraint):
|
|
102
99
|
dtypes_tuple = tuple(constraint.expected_dtype_set)
|
|
103
|
-
return header + ": `{
|
|
104
|
-
expected_dtypes=dtypes_tuple if len(dtypes_tuple) > 1 else dtypes_tuple[0]
|
|
105
|
-
)
|
|
100
|
+
return header + f": `{dtypes_tuple if len(dtypes_tuple) > 1 else dtypes_tuple[0]}`" # pyright: ignore[reportGeneralTypeIssues]
|
|
106
101
|
elif isinstance(constraint, ColumnDTypeFnConstraint):
|
|
107
|
-
return header + ": Validator `{
|
|
108
|
-
expected_dtype_fn=constraint.type_fn.__name__
|
|
109
|
-
)
|
|
102
|
+
return header + f": Validator `{constraint.type_fn.__name__}`"
|
|
110
103
|
return header
|
|
111
104
|
|
|
112
105
|
|
|
@@ -114,10 +107,7 @@ def create_dagster_pandas_dataframe_description(description, columns):
|
|
|
114
107
|
title = "\n".join([description, "### Columns", ""])
|
|
115
108
|
buildme = title
|
|
116
109
|
for column in columns:
|
|
117
|
-
buildme += "{}\n{}\n"
|
|
118
|
-
_build_column_header(column.name, column.constraints),
|
|
119
|
-
_construct_constraint_list(column.constraints),
|
|
120
|
-
)
|
|
110
|
+
buildme += f"{_build_column_header(column.name, column.constraints)}\n{_construct_constraint_list(column.constraints)}\n"
|
|
121
111
|
return buildme
|
|
122
112
|
|
|
123
113
|
|
|
@@ -132,7 +122,9 @@ def create_table_schema_metadata_from_dataframe(
|
|
|
132
122
|
Returns:
|
|
133
123
|
TableSchemaMetadataValue: returns an object with the TableSchema for the DataFrame.
|
|
134
124
|
"""
|
|
135
|
-
check.
|
|
125
|
+
check.inst_param(
|
|
126
|
+
pandas_df, "pandas_df", pd.DataFrame, "Input must be a pandas DataFrame object"
|
|
127
|
+
)
|
|
136
128
|
return MetadataValue.table_schema(
|
|
137
129
|
TableSchema(
|
|
138
130
|
columns=[
|
|
@@ -143,6 +135,7 @@ def create_table_schema_metadata_from_dataframe(
|
|
|
143
135
|
)
|
|
144
136
|
|
|
145
137
|
|
|
138
|
+
@beta
|
|
146
139
|
def create_dagster_pandas_dataframe_type(
|
|
147
140
|
name,
|
|
148
141
|
description=None,
|
|
@@ -150,7 +143,6 @@ def create_dagster_pandas_dataframe_type(
|
|
|
150
143
|
metadata_fn=None,
|
|
151
144
|
dataframe_constraints=None,
|
|
152
145
|
loader=None,
|
|
153
|
-
event_metadata_fn=None,
|
|
154
146
|
):
|
|
155
147
|
"""Constructs a custom pandas dataframe dagster type.
|
|
156
148
|
|
|
@@ -172,9 +164,6 @@ def create_dagster_pandas_dataframe_type(
|
|
|
172
164
|
# dataframes via configuration their own way if the default configs don't suffice. This is
|
|
173
165
|
# purely optional.
|
|
174
166
|
check.str_param(name, "name")
|
|
175
|
-
metadata_fn = canonicalize_backcompat_args(
|
|
176
|
-
metadata_fn, "metadata_fn", event_metadata_fn, "event_metadata_fn", "1.4.0"
|
|
177
|
-
)
|
|
178
167
|
metadata_fn = check.opt_callable_param(metadata_fn, "metadata_fn")
|
|
179
168
|
description = create_dagster_pandas_dataframe_description(
|
|
180
169
|
check.opt_str_param(description, "description", default=""),
|
|
@@ -185,8 +174,8 @@ def create_dagster_pandas_dataframe_type(
|
|
|
185
174
|
if not isinstance(value, pd.DataFrame):
|
|
186
175
|
return TypeCheck(
|
|
187
176
|
success=False,
|
|
188
|
-
description=
|
|
189
|
-
|
|
177
|
+
description=(
|
|
178
|
+
f"Must be a pandas.DataFrame. Got value of type. {type(value).__name__}"
|
|
190
179
|
),
|
|
191
180
|
)
|
|
192
181
|
|
|
@@ -201,7 +190,7 @@ def create_dagster_pandas_dataframe_type(
|
|
|
201
190
|
|
|
202
191
|
return TypeCheck(
|
|
203
192
|
success=True,
|
|
204
|
-
metadata=_execute_summary_stats(name, value, metadata_fn) if metadata_fn else None,
|
|
193
|
+
metadata=_execute_summary_stats(name, value, metadata_fn) if metadata_fn else None, # pyright: ignore[reportArgumentType]
|
|
205
194
|
)
|
|
206
195
|
|
|
207
196
|
return DagsterType(
|
|
@@ -213,7 +202,7 @@ def create_dagster_pandas_dataframe_type(
|
|
|
213
202
|
)
|
|
214
203
|
|
|
215
204
|
|
|
216
|
-
@
|
|
205
|
+
@beta
|
|
217
206
|
def create_structured_dataframe_type(
|
|
218
207
|
name,
|
|
219
208
|
description=None,
|
|
@@ -248,8 +237,8 @@ def create_structured_dataframe_type(
|
|
|
248
237
|
if not isinstance(value, pd.DataFrame):
|
|
249
238
|
return TypeCheck(
|
|
250
239
|
success=False,
|
|
251
|
-
description=
|
|
252
|
-
|
|
240
|
+
description=(
|
|
241
|
+
f"Must be a pandas.DataFrame. Got value of type. {type(value).__name__}"
|
|
253
242
|
),
|
|
254
243
|
)
|
|
255
244
|
individual_result_dict = {}
|
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
from dagster import (
|
|
2
|
+
FilesystemIOManager,
|
|
2
3
|
config_from_files,
|
|
3
4
|
file_relative_path,
|
|
4
|
-
fs_io_manager,
|
|
5
5
|
graph,
|
|
6
6
|
in_process_executor,
|
|
7
7
|
repository,
|
|
8
8
|
)
|
|
9
9
|
|
|
10
|
-
from .pandas_hello_world.ops import
|
|
10
|
+
from dagster_pandas.examples.pandas_hello_world.ops import (
|
|
11
|
+
always_fails_op,
|
|
12
|
+
papermill_pandas_hello_world,
|
|
13
|
+
sum_op,
|
|
14
|
+
sum_sq_op,
|
|
15
|
+
)
|
|
11
16
|
|
|
12
17
|
|
|
13
18
|
@graph
|
|
@@ -51,7 +56,7 @@ def papermill_pandas_hello_world_graph():
|
|
|
51
56
|
|
|
52
57
|
|
|
53
58
|
papermill_pandas_hello_world_test = papermill_pandas_hello_world_graph.to_job(
|
|
54
|
-
resource_defs={"io_manager":
|
|
59
|
+
resource_defs={"io_manager": FilesystemIOManager()},
|
|
55
60
|
config=config_from_files(
|
|
56
61
|
[
|
|
57
62
|
file_relative_path(
|
|
@@ -63,7 +68,7 @@ papermill_pandas_hello_world_test = papermill_pandas_hello_world_graph.to_job(
|
|
|
63
68
|
)
|
|
64
69
|
|
|
65
70
|
papermill_pandas_hello_world_prod = papermill_pandas_hello_world_graph.to_job(
|
|
66
|
-
resource_defs={"io_manager":
|
|
71
|
+
resource_defs={"io_manager": FilesystemIOManager()},
|
|
67
72
|
config=config_from_files(
|
|
68
73
|
[
|
|
69
74
|
file_relative_path(
|
|
@@ -2,6 +2,7 @@ from dagster import (
|
|
|
2
2
|
DagsterInvariantViolationError,
|
|
3
3
|
_check as check,
|
|
4
4
|
)
|
|
5
|
+
from dagster._annotations import beta
|
|
5
6
|
from pandas import DataFrame, Timestamp
|
|
6
7
|
from pandas.core.dtypes.common import (
|
|
7
8
|
is_bool_dtype,
|
|
@@ -42,6 +43,7 @@ def _construct_keyword_constraints(non_nullable, unique, ignore_missing_vals):
|
|
|
42
43
|
return constraints
|
|
43
44
|
|
|
44
45
|
|
|
46
|
+
@beta
|
|
45
47
|
class PandasColumn:
|
|
46
48
|
"""The main API for expressing column level schemas and constraints for your custom dataframe
|
|
47
49
|
types.
|
|
@@ -65,14 +67,11 @@ class PandasColumn:
|
|
|
65
67
|
# Ignore validation if column is missing from dataframe and is not required
|
|
66
68
|
if self.is_required:
|
|
67
69
|
raise ConstraintViolationException(
|
|
68
|
-
"Required column {
|
|
69
|
-
" {dataframe_columns}".format(
|
|
70
|
-
column_name=self.name, dataframe_columns=dataframe.columns
|
|
71
|
-
)
|
|
70
|
+
f"Required column {self.name} not in dataframe with columns {dataframe.columns}"
|
|
72
71
|
)
|
|
73
72
|
else:
|
|
74
73
|
for constraint in self.constraints:
|
|
75
|
-
constraint.validate(dataframe, self.name)
|
|
74
|
+
constraint.validate(dataframe, self.name) # pyright: ignore[reportAttributeAccessIssue]
|
|
76
75
|
|
|
77
76
|
@staticmethod
|
|
78
77
|
def exists(name, non_nullable=False, unique=False, ignore_missing_vals=False, is_required=None):
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.27.9"
|
|
@@ -1,22 +1,36 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: dagster-pandas
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.27.9
|
|
4
4
|
Summary: Utilities and examples for working with pandas and dagster, an opinionated framework for expressing data pipelines
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster
|
|
6
|
-
Author:
|
|
7
|
-
Author-email: hello@
|
|
6
|
+
Author: Dagster Labs
|
|
7
|
+
Author-email: hello@dagsterlabs.com
|
|
8
8
|
License: Apache-2.0
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
11
9
|
Classifier: Programming Language :: Python :: 3.9
|
|
12
10
|
Classifier: Programming Language :: Python :: 3.10
|
|
13
11
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
14
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
|
16
|
+
Requires-Python: >=3.9,<3.14
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
17
18
|
License-File: LICENSE
|
|
19
|
+
Requires-Dist: dagster==1.11.9
|
|
20
|
+
Requires-Dist: pandas
|
|
21
|
+
Dynamic: author
|
|
22
|
+
Dynamic: author-email
|
|
23
|
+
Dynamic: classifier
|
|
24
|
+
Dynamic: description
|
|
25
|
+
Dynamic: description-content-type
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: license
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
Dynamic: requires-dist
|
|
30
|
+
Dynamic: requires-python
|
|
31
|
+
Dynamic: summary
|
|
18
32
|
|
|
19
33
|
# dagster-pandas
|
|
20
34
|
|
|
21
35
|
The docs for `dagster-pandas` can be found
|
|
22
|
-
[here](https://docs.dagster.io/
|
|
36
|
+
[here](https://docs.dagster.io/api/python-api/libraries/dagster-pandas).
|
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Dict
|
|
4
3
|
|
|
5
4
|
from setuptools import find_packages, setup
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
def long_description() -> str:
|
|
9
8
|
here = os.path.abspath(os.path.dirname(__file__))
|
|
10
|
-
with open(os.path.join(here, "README.md"),
|
|
9
|
+
with open(os.path.join(here, "README.md"), encoding="utf8") as fh:
|
|
11
10
|
return fh.read()
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
def get_version() -> str:
|
|
15
|
-
version:
|
|
14
|
+
version: dict[str, str] = {}
|
|
16
15
|
with open(Path(__file__).parent / "dagster_pandas/version.py", encoding="utf8") as fp:
|
|
17
16
|
exec(fp.read(), version)
|
|
18
17
|
|
|
@@ -25,8 +24,8 @@ pin = "" if ver == "1!0+dev" else f"=={ver}"
|
|
|
25
24
|
setup(
|
|
26
25
|
name="dagster-pandas",
|
|
27
26
|
version=ver,
|
|
28
|
-
author="
|
|
29
|
-
author_email="hello@
|
|
27
|
+
author="Dagster Labs",
|
|
28
|
+
author_email="hello@dagsterlabs.com",
|
|
30
29
|
license="Apache-2.0",
|
|
31
30
|
description=(
|
|
32
31
|
"Utilities and examples for working with pandas and dagster, an opinionated "
|
|
@@ -36,15 +35,19 @@ setup(
|
|
|
36
35
|
long_description_content_type="text/markdown",
|
|
37
36
|
url="https://github.com/dagster-io/dagster",
|
|
38
37
|
classifiers=[
|
|
39
|
-
"Programming Language :: Python :: 3.7",
|
|
40
|
-
"Programming Language :: Python :: 3.8",
|
|
41
38
|
"Programming Language :: Python :: 3.9",
|
|
42
39
|
"Programming Language :: Python :: 3.10",
|
|
43
40
|
"Programming Language :: Python :: 3.11",
|
|
41
|
+
"Programming Language :: Python :: 3.12",
|
|
42
|
+
"Programming Language :: Python :: 3.13",
|
|
44
43
|
"License :: OSI Approved :: Apache Software License",
|
|
45
44
|
"Operating System :: OS Independent",
|
|
46
45
|
],
|
|
47
46
|
packages=find_packages(exclude=["dagster_pandas_tests*"]),
|
|
48
47
|
include_package_data=True,
|
|
49
|
-
|
|
48
|
+
python_requires=">=3.9,<3.14",
|
|
49
|
+
install_requires=[
|
|
50
|
+
"dagster==1.11.9",
|
|
51
|
+
"pandas",
|
|
52
|
+
],
|
|
50
53
|
)
|
dagster-pandas-0.19.5/README.md
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.19.5"
|
|
File without changes
|
|
File without changes
|
{dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas/examples/pandas_hello_world/env.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dagster-pandas-0.19.5 → dagster_pandas-0.27.9}/dagster_pandas.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|