dagster-pandas 0.25.8__tar.gz → 0.25.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dagster-pandas might be problematic. Click here for more details.
- {dagster-pandas-0.25.8/dagster_pandas.egg-info → dagster-pandas-0.25.10}/PKG-INFO +1 -1
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/constraints.py +24 -40
- dagster-pandas-0.25.10/dagster_pandas/version.py +1 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10/dagster_pandas.egg-info}/PKG-INFO +1 -1
- dagster-pandas-0.25.10/dagster_pandas.egg-info/requires.txt +2 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/setup.py +3 -4
- dagster-pandas-0.25.8/dagster_pandas/version.py +0 -1
- dagster-pandas-0.25.8/dagster_pandas.egg-info/requires.txt +0 -2
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/LICENSE +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/MANIFEST.in +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/README.md +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/__init__.py +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/data_frame.py +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/__init__.py +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world/__init__.py +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world/env.yaml +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world/environments/pandas_hello_world_prod.yaml +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world/environments/pandas_hello_world_test.yaml +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world/environments/papermill_pandas_hello_world_prod.yaml +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world/environments/papermill_pandas_hello_world_test.yaml +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world/ops.py +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world/solids.yaml +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world.yaml +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/workspace.yaml +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/py.typed +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/validation.py +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas.egg-info/SOURCES.txt +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas.egg-info/dependency_links.txt +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas.egg-info/top_level.txt +0 -0
- {dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dagster-pandas
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.10
|
|
4
4
|
Summary: Utilities and examples for working with pandas and dagster, an opinionated framework for expressing data pipelines
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster
|
|
6
6
|
Author: Dagster Labs
|
|
@@ -2,6 +2,7 @@ import sys
|
|
|
2
2
|
from collections import defaultdict
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
from functools import wraps
|
|
5
|
+
from typing import Final
|
|
5
6
|
|
|
6
7
|
import pandas as pd
|
|
7
8
|
from dagster import (
|
|
@@ -11,7 +12,6 @@ from dagster import (
|
|
|
11
12
|
)
|
|
12
13
|
from dagster._annotations import experimental
|
|
13
14
|
from pandas import DataFrame
|
|
14
|
-
from typing_extensions import Final
|
|
15
15
|
|
|
16
16
|
CONSTRAINT_METADATA_KEY: Final = "constraint_metadata"
|
|
17
17
|
|
|
@@ -45,7 +45,7 @@ class ConstraintWithMetadataException(Exception):
|
|
|
45
45
|
self.expectation = check.opt_inst_param(expectation, "expectation", (dict, list, str, set))
|
|
46
46
|
self.offending = check.opt_inst_param(offending, "offending", (dict, list, str, set))
|
|
47
47
|
self.actual = check.opt_inst_param(actual, "actual", (dict, list, str, set))
|
|
48
|
-
super(
|
|
48
|
+
super().__init__(
|
|
49
49
|
f"Violated {constraint_name} - {constraint_description}, {expectation} was/were expected, but we received {offending} which was/were {actual}"
|
|
50
50
|
)
|
|
51
51
|
|
|
@@ -76,14 +76,12 @@ class DataFrameConstraintViolationException(ConstraintViolationException):
|
|
|
76
76
|
"""Indicates a dataframe level constraint has been violated."""
|
|
77
77
|
|
|
78
78
|
def __init__(self, constraint_name, constraint_description):
|
|
79
|
-
super(
|
|
80
|
-
f"Violated {constraint_name} - {constraint_description}"
|
|
81
|
-
)
|
|
79
|
+
super().__init__(f"Violated {constraint_name} - {constraint_description}")
|
|
82
80
|
|
|
83
81
|
|
|
84
82
|
class DataFrameWithMetadataException(ConstraintWithMetadataException):
|
|
85
83
|
def __init__(self, constraint_name, constraint_description, expectation, actual):
|
|
86
|
-
super(
|
|
84
|
+
super().__init__(
|
|
87
85
|
constraint_name, constraint_description, expectation, "a malformed dataframe", actual
|
|
88
86
|
)
|
|
89
87
|
|
|
@@ -96,7 +94,7 @@ class ColumnConstraintViolationException(ConstraintViolationException):
|
|
|
96
94
|
self.constraint_description = constraint_description
|
|
97
95
|
self.column_name = column_name
|
|
98
96
|
self.offending_rows = offending_rows
|
|
99
|
-
super(
|
|
97
|
+
super().__init__(self.construct_message())
|
|
100
98
|
|
|
101
99
|
def construct_message(self):
|
|
102
100
|
base_message = f'Violated "{self.constraint_name}" for column "{self.column_name}" - {self.constraint_description}'
|
|
@@ -109,7 +107,7 @@ class ColumnConstraintViolationException(ConstraintViolationException):
|
|
|
109
107
|
|
|
110
108
|
class ColumnWithMetadataException(ConstraintWithMetadataException):
|
|
111
109
|
def __init__(self, constraint_name, constraint_description, expectation, offending, actual):
|
|
112
|
-
super(
|
|
110
|
+
super().__init__(
|
|
113
111
|
"the column constraint " + constraint_name,
|
|
114
112
|
constraint_description,
|
|
115
113
|
expectation,
|
|
@@ -230,7 +228,7 @@ class MultiConstraintWithMetadata(ConstraintWithMetadata):
|
|
|
230
228
|
metadict[key][validation_fn_arr[i].__name__] = dicta[key]
|
|
231
229
|
return (truthparam, metadict)
|
|
232
230
|
|
|
233
|
-
super(
|
|
231
|
+
super().__init__(
|
|
234
232
|
description,
|
|
235
233
|
validation_fn,
|
|
236
234
|
resulting_exception,
|
|
@@ -266,7 +264,7 @@ class StrictColumnsWithMetadata(ConstraintWithMetadata):
|
|
|
266
264
|
basestr = f"ensuring that the right columns, {self.column_list} were present"
|
|
267
265
|
if enforce_ordering:
|
|
268
266
|
basestr += " in the right order"
|
|
269
|
-
super(
|
|
267
|
+
super().__init__(
|
|
270
268
|
basestr,
|
|
271
269
|
validation_fcn,
|
|
272
270
|
DataFrameWithMetadataException,
|
|
@@ -284,7 +282,7 @@ class DataFrameConstraint(Constraint):
|
|
|
284
282
|
"""
|
|
285
283
|
|
|
286
284
|
def __init__(self, error_description=None, markdown_description=None):
|
|
287
|
-
super(
|
|
285
|
+
super().__init__(
|
|
288
286
|
error_description=error_description, markdown_description=markdown_description
|
|
289
287
|
)
|
|
290
288
|
|
|
@@ -309,9 +307,7 @@ class StrictColumnsConstraint(DataFrameConstraint):
|
|
|
309
307
|
description = f"No columns outside of {self.strict_column_list} allowed. "
|
|
310
308
|
if enforce_ordering:
|
|
311
309
|
description += "Columns must be in that order."
|
|
312
|
-
super(
|
|
313
|
-
error_description=description, markdown_description=description
|
|
314
|
-
)
|
|
310
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
315
311
|
|
|
316
312
|
def validate(self, dataframe):
|
|
317
313
|
check.inst_param(dataframe, "dataframe", DataFrame)
|
|
@@ -347,9 +343,7 @@ class RowCountConstraint(DataFrameConstraint):
|
|
|
347
343
|
if self.error_tolerance > self.num_allowed_rows:
|
|
348
344
|
raise ValueError("Tolerance can't be greater than the number of rows you expect.")
|
|
349
345
|
description = f"Dataframe must have {self.num_allowed_rows} +- {self.error_tolerance} rows."
|
|
350
|
-
super(
|
|
351
|
-
error_description=description, markdown_description=description
|
|
352
|
-
)
|
|
346
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
353
347
|
|
|
354
348
|
def validate(self, dataframe):
|
|
355
349
|
check.inst_param(dataframe, "dataframe", DataFrame)
|
|
@@ -546,7 +540,7 @@ class MultiColumnConstraintWithMetadata(ColumnConstraintWithMetadata):
|
|
|
546
540
|
metadict[key][column][fn.__name__] = "a violation"
|
|
547
541
|
return truthparam, metadict
|
|
548
542
|
|
|
549
|
-
super(
|
|
543
|
+
super().__init__(
|
|
550
544
|
description,
|
|
551
545
|
validation_fn,
|
|
552
546
|
resulting_exception,
|
|
@@ -585,7 +579,7 @@ class MultiAggregateConstraintWithMetadata(MultiColumnConstraintWithMetadata):
|
|
|
585
579
|
raise_or_typecheck=True,
|
|
586
580
|
name=None,
|
|
587
581
|
):
|
|
588
|
-
super(
|
|
582
|
+
super().__init__(
|
|
589
583
|
description,
|
|
590
584
|
fn_and_columns_dict,
|
|
591
585
|
resulting_exception,
|
|
@@ -839,7 +833,7 @@ class ColumnRangeConstraintWithMetadata(ColumnConstraintWithMetadata):
|
|
|
839
833
|
self.name = self.__class__.__name__
|
|
840
834
|
|
|
841
835
|
description = f"Confirms values are between {minim} and {maxim}"
|
|
842
|
-
super(
|
|
836
|
+
super().__init__(
|
|
843
837
|
description=description,
|
|
844
838
|
validation_fn=column_range_validation_factory(minim=minim, maxim=maxim),
|
|
845
839
|
resulting_exception=ColumnWithMetadataException,
|
|
@@ -851,9 +845,7 @@ class ColumnRangeConstraintWithMetadata(ColumnConstraintWithMetadata):
|
|
|
851
845
|
if self.columns is None:
|
|
852
846
|
self.columns = list(data.columns)
|
|
853
847
|
self.columns.extend(args)
|
|
854
|
-
return super(
|
|
855
|
-
data, *self.columns, **kwargs
|
|
856
|
-
)
|
|
848
|
+
return super().validate(data, *self.columns, **kwargs)
|
|
857
849
|
|
|
858
850
|
|
|
859
851
|
class ColumnConstraint(Constraint):
|
|
@@ -865,7 +857,7 @@ class ColumnConstraint(Constraint):
|
|
|
865
857
|
"""
|
|
866
858
|
|
|
867
859
|
def __init__(self, error_description=None, markdown_description=None):
|
|
868
|
-
super(
|
|
860
|
+
super().__init__(
|
|
869
861
|
error_description=error_description, markdown_description=markdown_description
|
|
870
862
|
)
|
|
871
863
|
|
|
@@ -888,9 +880,7 @@ class ColumnDTypeFnConstraint(ColumnConstraint):
|
|
|
888
880
|
def __init__(self, type_fn):
|
|
889
881
|
self.type_fn = check.callable_param(type_fn, "type_fn")
|
|
890
882
|
description = f'Dtype must satisfy "{self.type_fn.__name__}"'
|
|
891
|
-
super(
|
|
892
|
-
error_description=description, markdown_description=description
|
|
893
|
-
)
|
|
883
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
894
884
|
|
|
895
885
|
def validate(self, dataframe, column_name):
|
|
896
886
|
column_dtype = dataframe[column_name].dtype
|
|
@@ -912,9 +902,7 @@ class ColumnDTypeInSetConstraint(ColumnConstraint):
|
|
|
912
902
|
def __init__(self, expected_dtype_set):
|
|
913
903
|
self.expected_dtype_set = check.set_param(expected_dtype_set, "expected_dtype_set")
|
|
914
904
|
description = f"Column dtype must be in the following set {self.expected_dtype_set}."
|
|
915
|
-
super(
|
|
916
|
-
error_description=description, markdown_description=description
|
|
917
|
-
)
|
|
905
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
918
906
|
|
|
919
907
|
def validate(self, dataframe, column_name):
|
|
920
908
|
received_dtypes = dataframe[column_name].dtype
|
|
@@ -933,9 +921,7 @@ class NonNullableColumnConstraint(ColumnConstraint):
|
|
|
933
921
|
|
|
934
922
|
def __init__(self):
|
|
935
923
|
description = "No Null values allowed."
|
|
936
|
-
super(
|
|
937
|
-
error_description=description, markdown_description=description
|
|
938
|
-
)
|
|
924
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
939
925
|
|
|
940
926
|
def validate(self, dataframe, column_name):
|
|
941
927
|
rows_with_null_columns = dataframe[dataframe[column_name].isna()]
|
|
@@ -958,9 +944,7 @@ class UniqueColumnConstraint(ColumnConstraint):
|
|
|
958
944
|
def __init__(self, ignore_missing_vals):
|
|
959
945
|
description = "Column must be unique."
|
|
960
946
|
self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
|
|
961
|
-
super(
|
|
962
|
-
error_description=description, markdown_description=description
|
|
963
|
-
)
|
|
947
|
+
super().__init__(error_description=description, markdown_description=description)
|
|
964
948
|
|
|
965
949
|
def validate(self, dataframe, column_name):
|
|
966
950
|
invalid = dataframe[column_name].duplicated()
|
|
@@ -987,7 +971,7 @@ class CategoricalColumnConstraint(ColumnConstraint):
|
|
|
987
971
|
def __init__(self, categories, ignore_missing_vals):
|
|
988
972
|
self.categories = list(check.set_param(categories, "categories", of_type=str))
|
|
989
973
|
self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
|
|
990
|
-
super(
|
|
974
|
+
super().__init__(
|
|
991
975
|
error_description=f"Expected Categories are {self.categories}",
|
|
992
976
|
markdown_description=f"Category examples are {self.categories[:5]}...",
|
|
993
977
|
)
|
|
@@ -1018,7 +1002,7 @@ class MinValueColumnConstraint(ColumnConstraint):
|
|
|
1018
1002
|
def __init__(self, min_value, ignore_missing_vals):
|
|
1019
1003
|
self.min_value = check.inst_param(min_value, "min_value", (int, float, datetime))
|
|
1020
1004
|
self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
|
|
1021
|
-
super(
|
|
1005
|
+
super().__init__(
|
|
1022
1006
|
markdown_description=f"values > {self.min_value}",
|
|
1023
1007
|
error_description=f"Column must have values > {self.min_value}",
|
|
1024
1008
|
)
|
|
@@ -1049,7 +1033,7 @@ class MaxValueColumnConstraint(ColumnConstraint):
|
|
|
1049
1033
|
def __init__(self, max_value, ignore_missing_vals):
|
|
1050
1034
|
self.max_value = check.inst_param(max_value, "max_value", (int, float, datetime))
|
|
1051
1035
|
self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
|
|
1052
|
-
super(
|
|
1036
|
+
super().__init__(
|
|
1053
1037
|
markdown_description=f"values < {self.max_value}",
|
|
1054
1038
|
error_description=f"Column must have values < {self.max_value}",
|
|
1055
1039
|
)
|
|
@@ -1083,7 +1067,7 @@ class InRangeColumnConstraint(ColumnConstraint):
|
|
|
1083
1067
|
self.min_value = check.inst_param(min_value, "min_value", (int, float, datetime))
|
|
1084
1068
|
self.max_value = check.inst_param(max_value, "max_value", (int, float, datetime))
|
|
1085
1069
|
self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
|
|
1086
|
-
super(
|
|
1070
|
+
super().__init__(
|
|
1087
1071
|
markdown_description=f"{self.min_value} < values < {self.max_value}",
|
|
1088
1072
|
error_description=f"Column must have values between {self.min_value} and {self.max_value} inclusive.",
|
|
1089
1073
|
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.25.10"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: dagster-pandas
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.10
|
|
4
4
|
Summary: Utilities and examples for working with pandas and dagster, an opinionated framework for expressing data pipelines
|
|
5
5
|
Home-page: https://github.com/dagster-io/dagster
|
|
6
6
|
Author: Dagster Labs
|
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from pathlib import Path
|
|
3
|
-
from typing import Dict
|
|
4
3
|
|
|
5
4
|
from setuptools import find_packages, setup
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
def long_description() -> str:
|
|
9
8
|
here = os.path.abspath(os.path.dirname(__file__))
|
|
10
|
-
with open(os.path.join(here, "README.md"),
|
|
9
|
+
with open(os.path.join(here, "README.md"), encoding="utf8") as fh:
|
|
11
10
|
return fh.read()
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
def get_version() -> str:
|
|
15
|
-
version:
|
|
14
|
+
version: dict[str, str] = {}
|
|
16
15
|
with open(Path(__file__).parent / "dagster_pandas/version.py", encoding="utf8") as fp:
|
|
17
16
|
exec(fp.read(), version)
|
|
18
17
|
|
|
@@ -47,7 +46,7 @@ setup(
|
|
|
47
46
|
include_package_data=True,
|
|
48
47
|
python_requires=">=3.9,<3.13",
|
|
49
48
|
install_requires=[
|
|
50
|
-
"dagster==1.9.
|
|
49
|
+
"dagster==1.9.10",
|
|
51
50
|
"pandas",
|
|
52
51
|
],
|
|
53
52
|
)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.25.8"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world/env.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world/ops.py
RENAMED
|
File without changes
|
|
File without changes
|
{dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas/examples/pandas_hello_world.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dagster-pandas-0.25.8 → dagster-pandas-0.25.10}/dagster_pandas.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|