dagster-pandas 0.13.18rc0__py3-none-any.whl → 0.27.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,20 +2,27 @@ import sys
2
2
  from collections import defaultdict
3
3
  from datetime import datetime
4
4
  from functools import wraps
5
+ from typing import Final
5
6
 
6
7
  import pandas as pd
7
- from dagster import DagsterType, EventMetadataEntry, TypeCheck, check
8
- from dagster.utils.backcompat import experimental_class_warning
8
+ from dagster import (
9
+ DagsterType,
10
+ TypeCheck,
11
+ _check as check,
12
+ )
13
+ from dagster._annotations import beta
9
14
  from pandas import DataFrame
10
15
 
16
+ CONSTRAINT_METADATA_KEY: Final = "constraint_metadata"
17
+
11
18
 
12
19
  class ConstraintViolationException(Exception):
13
20
  """Indicates that a constraint has been violated."""
14
21
 
15
22
 
23
+ @beta
16
24
  class ConstraintWithMetadataException(Exception):
17
- """
18
- This class defines the response generated when a pandas DF fails validation -- it can be used to generate either a
25
+ """This class defines the response generated when a pandas DF fails validation -- it can be used to generate either a
19
26
  failed typecheck or an exception.
20
27
 
21
28
  Args:
@@ -39,31 +46,30 @@ class ConstraintWithMetadataException(Exception):
39
46
  self.expectation = check.opt_inst_param(expectation, "expectation", (dict, list, str, set))
40
47
  self.offending = check.opt_inst_param(offending, "offending", (dict, list, str, set))
41
48
  self.actual = check.opt_inst_param(actual, "actual", (dict, list, str, set))
42
- super(ConstraintWithMetadataException, self).__init__(
43
- "Violated {} - {}, {} was/were expected, but we received {} which was/were {}".format(
44
- constraint_name,
45
- constraint_description,
46
- expectation,
47
- offending,
48
- actual,
49
- )
49
+ super().__init__(
50
+ f"Violated {constraint_name} - {constraint_description}, {expectation} was/were expected, but we received {offending} which was/were {actual}"
50
51
  )
51
52
 
53
+ def normalize_metadata_json_value(self, val):
54
+ if isinstance(val, set):
55
+ return list(val)
56
+ else:
57
+ return val
58
+
52
59
  def convert_to_metadata(self):
53
- return EventMetadataEntry.json(
54
- {
60
+ return {
61
+ CONSTRAINT_METADATA_KEY: {
55
62
  "constraint_name": self.constraint_name,
56
63
  "constraint_description": self.constraint_description,
57
- "expected": self.expectation,
58
- "offending": self.offending,
59
- "actual": self.actual,
64
+ "expected": self.normalize_metadata_json_value(self.expectation),
65
+ "offending": self.normalize_metadata_json_value(self.offending),
66
+ "actual": self.normalize_metadata_json_value(self.actual),
60
67
  },
61
- "constraint-metadata",
62
- )
68
+ }
63
69
 
64
70
  def return_as_typecheck(self):
65
71
  return TypeCheck(
66
- success=False, description=self.args[0], metadata_entries=[self.convert_to_metadata()]
72
+ success=False, description=self.args[0], metadata=self.convert_to_metadata()
67
73
  )
68
74
 
69
75
 
@@ -71,16 +77,12 @@ class DataFrameConstraintViolationException(ConstraintViolationException):
71
77
  """Indicates a dataframe level constraint has been violated."""
72
78
 
73
79
  def __init__(self, constraint_name, constraint_description):
74
- super(DataFrameConstraintViolationException, self).__init__(
75
- "Violated {constraint_name} - {constraint_description}".format(
76
- constraint_name=constraint_name, constraint_description=constraint_description
77
- )
78
- )
80
+ super().__init__(f"Violated {constraint_name} - {constraint_description}")
79
81
 
80
82
 
81
83
  class DataFrameWithMetadataException(ConstraintWithMetadataException):
82
84
  def __init__(self, constraint_name, constraint_description, expectation, actual):
83
- super(DataFrameWithMetadataException, self).__init__(
85
+ super().__init__(
84
86
  constraint_name, constraint_description, expectation, "a malformed dataframe", actual
85
87
  )
86
88
 
@@ -93,24 +95,21 @@ class ColumnConstraintViolationException(ConstraintViolationException):
93
95
  self.constraint_description = constraint_description
94
96
  self.column_name = column_name
95
97
  self.offending_rows = offending_rows
96
- super(ColumnConstraintViolationException, self).__init__(self.construct_message())
98
+ super().__init__(self.construct_message())
97
99
 
98
100
  def construct_message(self):
99
- base_message = 'Violated "{constraint_name}" for column "{column_name}" - {constraint_description}'.format(
100
- constraint_name=self.constraint_name,
101
- constraint_description=self.constraint_description,
102
- column_name=self.column_name,
103
- )
101
+ base_message = f'Violated "{self.constraint_name}" for column "{self.column_name}" - {self.constraint_description}'
104
102
  if self.offending_rows is not None:
105
- base_message += "The offending (index, row values) are the following: {}".format(
106
- self.offending_rows
103
+ base_message += (
104
+ f"The offending (index, row values) are the following: {self.offending_rows}"
107
105
  )
108
106
  return base_message
109
107
 
110
108
 
109
+ @beta
111
110
  class ColumnWithMetadataException(ConstraintWithMetadataException):
112
111
  def __init__(self, constraint_name, constraint_description, expectation, offending, actual):
113
- super(ColumnWithMetadataException, self).__init__(
112
+ super().__init__(
114
113
  "the column constraint " + constraint_name,
115
114
  constraint_description,
116
115
  expectation,
@@ -120,12 +119,11 @@ class ColumnWithMetadataException(ConstraintWithMetadataException):
120
119
 
121
120
 
122
121
  class Constraint:
123
- """
124
- Base constraint object that all constraints inherit from.
122
+ """Base constraint object that all constraints inherit from.
125
123
 
126
124
  Args:
127
125
  error_description (Optional[str]): The plain string description that is output in the terminal if the constraint fails.
128
- markdown_description (Optional[str]): A markdown supported description that is emitted by dagit if the constraint fails.
126
+ markdown_description (Optional[str]): A markdown supported description that is shown in the Dagster UI if the constraint fails.
129
127
  """
130
128
 
131
129
  def __init__(self, error_description=None, markdown_description=None):
@@ -134,11 +132,11 @@ class Constraint:
134
132
  self.error_description = check.str_param(error_description, "error_description")
135
133
 
136
134
 
135
+ @beta
137
136
  class ConstraintWithMetadata:
138
- """
139
- This class defines a base constraint over pandas DFs with organized metadata
137
+ """This class defines a base constraint over pandas DFs with organized metadata.
140
138
 
141
- args:
139
+ Args:
142
140
  description (str): description of the constraint
143
141
  validation_fn (Callable[[DataFrame], Tuple[bool, dict[str, Union[dict,list, str, set]]]]:
144
142
  the validation function to run over inputted data
@@ -156,7 +154,6 @@ class ConstraintWithMetadata:
156
154
  def __init__(
157
155
  self, description, validation_fn, resulting_exception, raise_or_typecheck=True, name=None
158
156
  ):
159
- experimental_class_warning(self.__class__.__name__)
160
157
  if name is None:
161
158
  self.name = self.__class__.__name__
162
159
  else:
@@ -191,19 +188,17 @@ class ConstraintWithMetadata:
191
188
  )
192
189
  return DagsterType(
193
190
  name=self.name,
194
- description="A Pandas DataFrame with the following validation: {}".format(
195
- self.description
196
- ),
197
- type_check_fn=lambda x: self.validate(x, *args),
191
+ description=f"A Pandas DataFrame with the following validation: {self.description}",
192
+ type_check_fn=lambda x: self.validate(x, *args), # pyright: ignore[reportArgumentType]
198
193
  **kwargs,
199
194
  )
200
195
 
201
196
 
197
+ @beta
202
198
  class MultiConstraintWithMetadata(ConstraintWithMetadata):
203
- """
204
- Use this class if you have multiple constraints to check over the entire dataframe
199
+ """Use this class if you have multiple constraints to check over the entire dataframe.
205
200
 
206
- args:
201
+ Args:
207
202
  description (str): description of the constraint
208
203
  validation_fn_arr(List[Callable[[DataFrame], Tuple[bool, dict[str, Union[dict,list, str, set]]]]]):
209
204
  a list of the validation functions to run over inputted data
@@ -227,7 +222,6 @@ class MultiConstraintWithMetadata(ConstraintWithMetadata):
227
222
  validation_fn_arr = check.list_param(validation_fn_arr, "validation_fn_arr")
228
223
 
229
224
  def validation_fn(data, *args, **kwargs):
230
-
231
225
  results = [f(data, *args, **kwargs) for f in validation_fn_arr]
232
226
  truthparam = all(item[0] for item in results)
233
227
  metadict = defaultdict(dict)
@@ -237,7 +231,7 @@ class MultiConstraintWithMetadata(ConstraintWithMetadata):
237
231
  metadict[key][validation_fn_arr[i].__name__] = dicta[key]
238
232
  return (truthparam, metadict)
239
233
 
240
- super(MultiConstraintWithMetadata, self).__init__(
234
+ super().__init__(
241
235
  description,
242
236
  validation_fn,
243
237
  resulting_exception,
@@ -246,6 +240,7 @@ class MultiConstraintWithMetadata(ConstraintWithMetadata):
246
240
  )
247
241
 
248
242
 
243
+ @beta
249
244
  class StrictColumnsWithMetadata(ConstraintWithMetadata):
250
245
  def __init__(self, column_list, enforce_ordering=False, raise_or_typecheck=True, name=None):
251
246
  self.enforce_ordering = check.bool_param(enforce_ordering, "enforce_ordering")
@@ -270,10 +265,10 @@ class StrictColumnsWithMetadata(ConstraintWithMetadata):
270
265
  }
271
266
  return (False, resdict)
272
267
 
273
- basestr = "ensuring that the right columns, {} were present".format(self.column_list)
268
+ basestr = f"ensuring that the right columns, {self.column_list} were present"
274
269
  if enforce_ordering:
275
270
  basestr += " in the right order"
276
- super(StrictColumnsWithMetadata, self).__init__(
271
+ super().__init__(
277
272
  basestr,
278
273
  validation_fcn,
279
274
  DataFrameWithMetadataException,
@@ -283,16 +278,15 @@ class StrictColumnsWithMetadata(ConstraintWithMetadata):
283
278
 
284
279
 
285
280
  class DataFrameConstraint(Constraint):
286
- """
287
- Base constraint object that represent Dataframe shape constraints.
281
+ """Base constraint object that represent Dataframe shape constraints.
288
282
 
289
283
  Args:
290
284
  error_description (Optional[str]): The plain string description that is output in the terminal if the constraint fails.
291
- markdown_description (Optional[str]): A markdown supported description that is emitted by dagit if the constraint fails.
285
+ markdown_description (Optional[str]): A markdown supported description that is shown in the Dagster UI if the constraint fails.
292
286
  """
293
287
 
294
288
  def __init__(self, error_description=None, markdown_description=None):
295
- super(DataFrameConstraint, self).__init__(
289
+ super().__init__(
296
290
  error_description=error_description, markdown_description=markdown_description
297
291
  )
298
292
 
@@ -300,9 +294,9 @@ class DataFrameConstraint(Constraint):
300
294
  raise NotImplementedError()
301
295
 
302
296
 
297
+ @beta
303
298
  class StrictColumnsConstraint(DataFrameConstraint):
304
- """
305
- A dataframe constraint that validates column existence and ordering.
299
+ """A dataframe constraint that validates column existence and ordering.
306
300
 
307
301
  Args:
308
302
  strict_column_list (List[str]): The exact list of columns that your dataframe must have.
@@ -315,12 +309,10 @@ class StrictColumnsConstraint(DataFrameConstraint):
315
309
  self.strict_column_list = check.list_param(
316
310
  strict_column_list, "strict_column_list", of_type=str
317
311
  )
318
- description = "No columns outside of {cols} allowed. ".format(cols=self.strict_column_list)
312
+ description = f"No columns outside of {self.strict_column_list} allowed. "
319
313
  if enforce_ordering:
320
314
  description += "Columns must be in that order."
321
- super(StrictColumnsConstraint, self).__init__(
322
- error_description=description, markdown_description=description
323
- )
315
+ super().__init__(error_description=description, markdown_description=description)
324
316
 
325
317
  def validate(self, dataframe):
326
318
  check.inst_param(dataframe, "dataframe", DataFrame)
@@ -329,23 +321,22 @@ class StrictColumnsConstraint(DataFrameConstraint):
329
321
  if self.strict_column_list != columns_received:
330
322
  raise DataFrameConstraintViolationException(
331
323
  constraint_name=self.name,
332
- constraint_description="Expected the following ordering of columns {expected}. Received: {received}".format(
333
- expected=self.strict_column_list, received=columns_received
324
+ constraint_description=(
325
+ f"Expected the following ordering of columns {self.strict_column_list}. Received:"
326
+ f" {columns_received}"
334
327
  ),
335
328
  )
336
329
  for column in columns_received:
337
330
  if column not in self.strict_column_list:
338
331
  raise DataFrameConstraintViolationException(
339
332
  constraint_name=self.name,
340
- constraint_description="Expected {}. Recevied {}.".format(
341
- self.strict_column_list, columns_received
342
- ),
333
+ constraint_description=f"Expected {self.strict_column_list}. Recevied {columns_received}.",
343
334
  )
344
335
 
345
336
 
337
+ @beta
346
338
  class RowCountConstraint(DataFrameConstraint):
347
- """
348
- A dataframe constraint that validates the expected count of rows.
339
+ """A dataframe constraint that validates the expected count of rows.
349
340
 
350
341
  Args:
351
342
  num_allowed_rows (int): The number of allowed rows in your dataframe.
@@ -357,12 +348,8 @@ class RowCountConstraint(DataFrameConstraint):
357
348
  self.error_tolerance = abs(check.int_param(error_tolerance, "error_tolerance"))
358
349
  if self.error_tolerance > self.num_allowed_rows:
359
350
  raise ValueError("Tolerance can't be greater than the number of rows you expect.")
360
- description = "Dataframe must have {} +- {} rows.".format(
361
- self.num_allowed_rows, self.error_tolerance
362
- )
363
- super(RowCountConstraint, self).__init__(
364
- error_description=description, markdown_description=description
365
- )
351
+ description = f"Dataframe must have {self.num_allowed_rows} +- {self.error_tolerance} rows."
352
+ super().__init__(error_description=description, markdown_description=description)
366
353
 
367
354
  def validate(self, dataframe):
368
355
  check.inst_param(dataframe, "dataframe", DataFrame)
@@ -374,10 +361,8 @@ class RowCountConstraint(DataFrameConstraint):
374
361
  ):
375
362
  raise DataFrameConstraintViolationException(
376
363
  constraint_name=self.name,
377
- constraint_description="Expected {expected} +- {tolerance} rows. Got {received}".format(
378
- expected=self.num_allowed_rows,
379
- tolerance=self.error_tolerance,
380
- received=len(dataframe),
364
+ constraint_description=(
365
+ f"Expected {self.num_allowed_rows} +- {self.error_tolerance} rows. Got {len(dataframe)}"
381
366
  ),
382
367
  )
383
368
 
@@ -387,9 +372,9 @@ def apply_ignore_missing_data_to_mask(mask, column):
387
372
 
388
373
 
389
374
  class ColumnAggregateConstraintWithMetadata(ConstraintWithMetadata):
390
- """
391
- Similar to the base class, but now your validation functions should take in columns (pd.Series) not Dataframes.
392
- args:
375
+ """Similar to the base class, but now your validation functions should take in columns (pd.Series) not Dataframes.
376
+
377
+ Args:
393
378
  description (str): description of the constraint
394
379
  validation_fn (Callable[[pd.Series], Tuple[bool, dict[str, Union[dict,list, str, set]]]]:
395
380
  the validation function to run over inputted data
@@ -415,7 +400,7 @@ class ColumnAggregateConstraintWithMetadata(ConstraintWithMetadata):
415
400
  res = self.validation_fn(relevant_data[column])
416
401
  if not res[0]:
417
402
  offending_columns.add(column)
418
- if not res[1].get("actual") is None:
403
+ if res[1].get("actual") is not None:
419
404
  offending_values[column] = [x.item() for x in res[1].get("actual").to_numpy()]
420
405
  else:
421
406
  offending_values[column] = [x.item() for x in relevant_data[column].to_numpy()]
@@ -438,12 +423,13 @@ class ColumnAggregateConstraintWithMetadata(ConstraintWithMetadata):
438
423
 
439
424
 
440
425
  class ColumnConstraintWithMetadata(ConstraintWithMetadata):
441
- """
442
- This class is useful for constructing single constraints that
443
- you want to apply to multiple columns of your dataframe
426
+ """This class is useful for constructing single constraints that you want to apply to multiple
427
+ columns of your dataframe.
428
+
444
429
  The main difference from the base class in terms of construction is that now, your validation_fns should operate on
445
430
  individual values.
446
- args:
431
+
432
+ Args:
447
433
  description (str): description of the constraint
448
434
  validation_fn (Callable[[Any], Tuple[bool, dict[str, Union[dict,list, str, set]]]]:
449
435
  the validation function to run over inputted data
@@ -490,12 +476,12 @@ class ColumnConstraintWithMetadata(ConstraintWithMetadata):
490
476
  return exc.return_as_typecheck()
491
477
 
492
478
 
479
+ @beta
493
480
  class MultiColumnConstraintWithMetadata(ColumnConstraintWithMetadata):
494
- """
495
- This class is useful for constructing more complicated relationships between columns
481
+ """This class is useful for constructing more complicated relationships between columns
496
482
  and expectations -- i.e. you want some validations on column A, others on column B, etc.
497
- This lets you package up the metadata neatly,
498
- and also allows for cases like 'fail if any one of these constraints fails but still run all of them'
483
+ This lets you package up the metadata neatly, and also allows for cases like 'fail if any one of
484
+ these constraints fails but still run all of them'.
499
485
 
500
486
  Args:
501
487
  description (str): description of the overall set of validations
@@ -539,10 +525,10 @@ class MultiColumnConstraintWithMetadata(ColumnConstraintWithMetadata):
539
525
  result = new_validator.validate(
540
526
  DataFrame(data[column]), column, *args, **kwargs
541
527
  )
542
- result_val = result.success
528
+ result_val = result.success # pyright: ignore[reportOptionalMemberAccess]
543
529
  if result_val:
544
530
  continue
545
- result_dict = result.metadata_entries[0].entry_data.data
531
+ result_dict = result.metadata[CONSTRAINT_METADATA_KEY].data # pyright: ignore[reportAttributeAccessIssue,reportOptionalMemberAccess]
546
532
  truthparam = truthparam and result_val
547
533
  for key in result_dict.keys():
548
534
  if "constraint" not in key:
@@ -561,7 +547,7 @@ class MultiColumnConstraintWithMetadata(ColumnConstraintWithMetadata):
561
547
  metadict[key][column][fn.__name__] = "a violation"
562
548
  return truthparam, metadict
563
549
 
564
- super(MultiColumnConstraintWithMetadata, self).__init__(
550
+ super().__init__(
565
551
  description,
566
552
  validation_fn,
567
553
  resulting_exception,
@@ -573,9 +559,9 @@ class MultiColumnConstraintWithMetadata(ColumnConstraintWithMetadata):
573
559
  return ConstraintWithMetadata.validate(self, data, *args, **kwargs)
574
560
 
575
561
 
562
+ @beta
576
563
  class MultiAggregateConstraintWithMetadata(MultiColumnConstraintWithMetadata):
577
- """
578
- This class is similar to multicolumn, but takes in functions that operate on the whole column at once
564
+ """This class is similar to multicolumn, but takes in functions that operate on the whole column at once
579
565
  rather than ones that operate on each value --
580
566
  consider this similar to the difference between apply-map and apply aggregate.
581
567
 
@@ -601,19 +587,20 @@ class MultiAggregateConstraintWithMetadata(MultiColumnConstraintWithMetadata):
601
587
  raise_or_typecheck=True,
602
588
  name=None,
603
589
  ):
604
- super(MultiAggregateConstraintWithMetadata, self).__init__(
590
+ super().__init__(
605
591
  description,
606
592
  fn_and_columns_dict,
607
593
  resulting_exception,
608
594
  raise_or_typecheck=raise_or_typecheck,
609
- type_for_internal=ColumnAggregateConstraintWithMetadata,
595
+ type_for_internal=ColumnAggregateConstraintWithMetadata, # pyright: ignore[reportArgumentType]
610
596
  name=name,
611
597
  )
612
598
 
613
599
 
600
+ @beta
614
601
  def non_null_validation(x):
615
- """
616
- validates that a particular value in a column is not null
602
+ """Validates that a particular value in a column is not null.
603
+
617
604
  Usage:
618
605
  pass this as a column validator to
619
606
  :py:class:'~dagster_pandas.constraints.ColumnConstraintWithMetadata'
@@ -624,10 +611,11 @@ def non_null_validation(x):
624
611
  return not pd.isnull(x), {}
625
612
 
626
613
 
614
+ @beta
627
615
  def all_unique_validator(column, ignore_missing_vals=False):
628
- """
629
- validates that all values in an iterable are unique
630
- Returns duplicated values as metadata
616
+ """Validates that all values in an iterable are unique.
617
+
618
+ Returns duplicated values as metadata.
631
619
 
632
620
  Usage:
633
621
  As a validation function for a
@@ -662,16 +650,17 @@ def all_unique_validator(column, ignore_missing_vals=False):
662
650
  return not duplicated.any(), {"actual": column[duplicated]}
663
651
 
664
652
 
653
+ @beta
665
654
  def nonnull(func):
666
- """
667
- decorator for column validation functions to make them error on nulls
655
+ """Decorator for column validation functions to make them error on nulls.
656
+
668
657
  Usage:
669
658
  pass decorated functions as column validators to
670
659
  :py:class:'~dagster_pandas.constraints.ColumnConstraintWithMetadata'
671
660
  or :py:class:'~dagster_pandas.constraints.MultiColumnConstraintWithMetadata'
672
661
  Args:
673
662
  func (Callable[[Any], Tuple[bool, dict[str, Union[dict,list, str, set]]]]]):
674
- the column validator you want to error on nulls
663
+ the column validator you want to error on nulls.
675
664
  """
676
665
 
677
666
  @wraps(func)
@@ -680,18 +669,19 @@ def nonnull(func):
680
669
  nval = non_null_validation(val)
681
670
  return origval[0] and nval[0], {}
682
671
 
683
- nvalidator.__doc__ += " and ensures no values are null"
672
+ nvalidator.__doc__ += " and ensures no values are null" # pyright: ignore[reportOperatorIssue]
684
673
 
685
674
  return nvalidator
686
675
 
687
676
 
677
+ @beta
688
678
  def column_range_validation_factory(minim=None, maxim=None, ignore_missing_vals=False):
689
- """
690
- factory for validators testing if column values are within a range
679
+ """Factory for validators testing if column values are within a range.
680
+
691
681
  Args:
692
682
  minim(Optional[Comparable]): the low end of the range
693
683
  maxim(Optional[Comparable]): the high end of the range
694
- ignore_missing_vals(Optional[bool]): whether to ignore nulls
684
+ ignore_missing_vals(Optional[bool]): whether to ignore nulls.
695
685
 
696
686
  Returns: a validation function for this constraint
697
687
  Usage:
@@ -738,21 +728,20 @@ def column_range_validation_factory(minim=None, maxim=None, ignore_missing_vals=
738
728
  return True, {}
739
729
  return (isinstance(x, (type(minim), type(maxim)))) and (x <= maxim) and (x >= minim), {}
740
730
 
741
- in_range_validation_fn.__doc__ = "checks whether values are between {} and {}".format(
742
- minim, maxim
743
- )
731
+ in_range_validation_fn.__doc__ = f"checks whether values are between {minim} and {maxim}"
744
732
  if ignore_missing_vals:
745
733
  in_range_validation_fn.__doc__ += ", ignoring nulls"
746
734
 
747
735
  return in_range_validation_fn
748
736
 
749
737
 
738
+ @beta
750
739
  def categorical_column_validator_factory(categories, ignore_missing_vals=False):
751
- """
752
- factory for validators testing if all values are in some set
740
+ """Factory for validators testing if all values are in some set.
741
+
753
742
  Args:
754
743
  categories(Union[Sequence, set]): the set of allowed values
755
- ignore_missing_vals(Optional[bool]): whether to ignore nulls
744
+ ignore_missing_vals(Optional[bool]): whether to ignore nulls.
756
745
 
757
746
  Returns: a validation function for this constraint
758
747
 
@@ -785,7 +774,6 @@ def categorical_column_validator_factory(categories, ignore_missing_vals=False):
785
774
  metadata['actual'] == {'foo': {'categorical_validation_fn': [7]}}
786
775
 
787
776
  """
788
-
789
777
  categories = set(categories)
790
778
 
791
779
  def categorical_validation_fn(x):
@@ -794,7 +782,7 @@ def categorical_column_validator_factory(categories, ignore_missing_vals=False):
794
782
  return (x in categories), {}
795
783
 
796
784
  categorical_validation_fn.__doc__ = (
797
- "checks whether values are within this set of values: {}".format(categories)
785
+ f"checks whether values are within this set of values: {categories}"
798
786
  )
799
787
  if ignore_missing_vals:
800
788
  categorical_validation_fn.__doc__ += ", ignoring nulls"
@@ -802,9 +790,10 @@ def categorical_column_validator_factory(categories, ignore_missing_vals=False):
802
790
  return categorical_validation_fn
803
791
 
804
792
 
793
+ @beta
805
794
  def dtype_in_set_validation_factory(datatypes, ignore_missing_vals=False):
806
- """
807
- factory for testing if the dtype of a val falls within some allowed set
795
+ """Factory for testing if the dtype of a val falls within some allowed set.
796
+
808
797
  Args:
809
798
  datatypes(Union[set[type], type]): which datatype/datatypes are allowed
810
799
  ignore_missing_vals(Optional[bool]): whether to ignore nulls
@@ -846,9 +835,7 @@ def dtype_in_set_validation_factory(datatypes, ignore_missing_vals=False):
846
835
  return True, {}
847
836
  return isinstance(x, datatypes), {}
848
837
 
849
- dtype_in_set_validation_fn.__doc__ = "checks whether values are this type/types: {}".format(
850
- datatypes
851
- )
838
+ dtype_in_set_validation_fn.__doc__ = f"checks whether values are this type/types: {datatypes}"
852
839
  if ignore_missing_vals:
853
840
  dtype_in_set_validation_fn.__doc__ += ", ignoring nulls"
854
841
 
@@ -859,8 +846,8 @@ class ColumnRangeConstraintWithMetadata(ColumnConstraintWithMetadata):
859
846
  def __init__(self, minim=None, maxim=None, columns=None, raise_or_typecheck=True):
860
847
  self.name = self.__class__.__name__
861
848
 
862
- description = "Confirms values are between {} and {}".format(minim, maxim)
863
- super(ColumnRangeConstraintWithMetadata, self).__init__(
849
+ description = f"Confirms values are between {minim} and {maxim}"
850
+ super().__init__(
864
851
  description=description,
865
852
  validation_fn=column_range_validation_factory(minim=minim, maxim=maxim),
866
853
  resulting_exception=ColumnWithMetadataException,
@@ -872,22 +859,19 @@ class ColumnRangeConstraintWithMetadata(ColumnConstraintWithMetadata):
872
859
  if self.columns is None:
873
860
  self.columns = list(data.columns)
874
861
  self.columns.extend(args)
875
- return super(ColumnRangeConstraintWithMetadata, self).validate(
876
- data, *self.columns, **kwargs
877
- )
862
+ return super().validate(data, *self.columns, **kwargs)
878
863
 
879
864
 
880
865
  class ColumnConstraint(Constraint):
881
- """
882
- Base constraint object that represent dataframe column shape constraints.
866
+ """Base constraint object that represent dataframe column shape constraints.
883
867
 
884
868
  Args:
885
869
  error_description (Optional[str]): The plain string description that is output in the terminal if the constraint fails.
886
- markdown_description (Optional[str]): A markdown supported description that is emitted by dagit if the constraint fails.
870
+ markdown_description (Optional[str]): A markdown supported description that is shown in the Dagster UI if the constraint fails.
887
871
  """
888
872
 
889
873
  def __init__(self, error_description=None, markdown_description=None):
890
- super(ColumnConstraint, self).__init__(
874
+ super().__init__(
891
875
  error_description=error_description, markdown_description=markdown_description
892
876
  )
893
877
 
@@ -900,8 +884,7 @@ class ColumnConstraint(Constraint):
900
884
 
901
885
 
902
886
  class ColumnDTypeFnConstraint(ColumnConstraint):
903
- """
904
- A column constraint that applies a pandas dtype validation function to a columns dtype.
887
+ """A column constraint that applies a pandas dtype validation function to a columns dtype.
905
888
 
906
889
  Args:
907
890
  type_fn (Callable[[Set[str]], bool]): This is a function that takes the pandas columns dtypes and
@@ -911,9 +894,7 @@ class ColumnDTypeFnConstraint(ColumnConstraint):
911
894
  def __init__(self, type_fn):
912
895
  self.type_fn = check.callable_param(type_fn, "type_fn")
913
896
  description = f'Dtype must satisfy "{self.type_fn.__name__}"'
914
- super(ColumnDTypeFnConstraint, self).__init__(
915
- error_description=description, markdown_description=description
916
- )
897
+ super().__init__(error_description=description, markdown_description=description)
917
898
 
918
899
  def validate(self, dataframe, column_name):
919
900
  column_dtype = dataframe[column_name].dtype
@@ -926,8 +907,7 @@ class ColumnDTypeFnConstraint(ColumnConstraint):
926
907
 
927
908
 
928
909
  class ColumnDTypeInSetConstraint(ColumnConstraint):
929
- """
930
- A column constraint that validates the pandas column dtypes based on the expected set of dtypes.
910
+ """A column constraint that validates the pandas column dtypes based on the expected set of dtypes.
931
911
 
932
912
  Args:
933
913
  expected_dtype_set (Set[str]): The set of pandas dtypes that the pandas column dtypes must match.
@@ -935,35 +915,27 @@ class ColumnDTypeInSetConstraint(ColumnConstraint):
935
915
 
936
916
  def __init__(self, expected_dtype_set):
937
917
  self.expected_dtype_set = check.set_param(expected_dtype_set, "expected_dtype_set")
938
- description = "Column dtype must be in the following set {}.".format(
939
- self.expected_dtype_set
940
- )
941
- super(ColumnDTypeInSetConstraint, self).__init__(
942
- error_description=description, markdown_description=description
943
- )
918
+ description = f"Column dtype must be in the following set {self.expected_dtype_set}."
919
+ super().__init__(error_description=description, markdown_description=description)
944
920
 
945
921
  def validate(self, dataframe, column_name):
946
922
  received_dtypes = dataframe[column_name].dtype
947
923
  if str(received_dtypes) not in self.expected_dtype_set:
948
924
  raise ColumnConstraintViolationException(
949
925
  constraint_name=self.name,
950
- constraint_description="{base_error_message}. DTypes received: {received_dtypes}".format(
951
- base_error_message=self.error_description, received_dtypes=received_dtypes
926
+ constraint_description=(
927
+ f"{self.error_description}. DTypes received: {received_dtypes}"
952
928
  ),
953
929
  column_name=column_name,
954
930
  )
955
931
 
956
932
 
957
933
  class NonNullableColumnConstraint(ColumnConstraint):
958
- """
959
- A column constraint that ensures all values in a pandas column are not null.
960
- """
934
+ """A column constraint that ensures all values in a pandas column are not null."""
961
935
 
962
936
  def __init__(self):
963
937
  description = "No Null values allowed."
964
- super(NonNullableColumnConstraint, self).__init__(
965
- error_description=description, markdown_description=description
966
- )
938
+ super().__init__(error_description=description, markdown_description=description)
967
939
 
968
940
  def validate(self, dataframe, column_name):
969
941
  rows_with_null_columns = dataframe[dataframe[column_name].isna()]
@@ -977,8 +949,7 @@ class NonNullableColumnConstraint(ColumnConstraint):
977
949
 
978
950
 
979
951
  class UniqueColumnConstraint(ColumnConstraint):
980
- """
981
- A column constraint that ensures all values in a pandas column are unique.
952
+ """A column constraint that ensures all values in a pandas column are unique.
982
953
 
983
954
  Args:
984
955
  ignore_missing_vals (bool): If true, this constraint will enforce the constraint on non missing values.
@@ -987,9 +958,7 @@ class UniqueColumnConstraint(ColumnConstraint):
987
958
  def __init__(self, ignore_missing_vals):
988
959
  description = "Column must be unique."
989
960
  self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
990
- super(UniqueColumnConstraint, self).__init__(
991
- error_description=description, markdown_description=description
992
- )
961
+ super().__init__(error_description=description, markdown_description=description)
993
962
 
994
963
  def validate(self, dataframe, column_name):
995
964
  invalid = dataframe[column_name].duplicated()
@@ -1006,8 +975,7 @@ class UniqueColumnConstraint(ColumnConstraint):
1006
975
 
1007
976
 
1008
977
  class CategoricalColumnConstraint(ColumnConstraint):
1009
- """
1010
- A column constraint that ensures all values in a pandas column are a valid category.
978
+ """A column constraint that ensures all values in a pandas column are a valid category.
1011
979
 
1012
980
  Args:
1013
981
  categories (Set[str]): Set of categories that values in your pandas column must match.
@@ -1017,9 +985,9 @@ class CategoricalColumnConstraint(ColumnConstraint):
1017
985
  def __init__(self, categories, ignore_missing_vals):
1018
986
  self.categories = list(check.set_param(categories, "categories", of_type=str))
1019
987
  self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
1020
- super(CategoricalColumnConstraint, self).__init__(
1021
- error_description="Expected Categories are {}".format(self.categories),
1022
- markdown_description="Category examples are {}...".format(self.categories[:5]),
988
+ super().__init__(
989
+ error_description=f"Expected Categories are {self.categories}",
990
+ markdown_description=f"Category examples are {self.categories[:5]}...",
1023
991
  )
1024
992
 
1025
993
  def validate(self, dataframe, column_name):
@@ -1037,8 +1005,7 @@ class CategoricalColumnConstraint(ColumnConstraint):
1037
1005
 
1038
1006
 
1039
1007
  class MinValueColumnConstraint(ColumnConstraint):
1040
- """
1041
- A column constraint that ensures all values in a pandas column are greater than the provided
1008
+ """A column constraint that ensures all values in a pandas column are greater than the provided
1042
1009
  lower bound [inclusive].
1043
1010
 
1044
1011
  Args:
@@ -1049,9 +1016,9 @@ class MinValueColumnConstraint(ColumnConstraint):
1049
1016
  def __init__(self, min_value, ignore_missing_vals):
1050
1017
  self.min_value = check.inst_param(min_value, "min_value", (int, float, datetime))
1051
1018
  self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
1052
- super(MinValueColumnConstraint, self).__init__(
1053
- markdown_description="values > {}".format(self.min_value),
1054
- error_description="Column must have values > {}".format(self.min_value),
1019
+ super().__init__(
1020
+ markdown_description=f"values > {self.min_value}",
1021
+ error_description=f"Column must have values > {self.min_value}",
1055
1022
  )
1056
1023
 
1057
1024
  def validate(self, dataframe, column_name):
@@ -1069,8 +1036,7 @@ class MinValueColumnConstraint(ColumnConstraint):
1069
1036
 
1070
1037
 
1071
1038
  class MaxValueColumnConstraint(ColumnConstraint):
1072
- """
1073
- A column constraint that ensures all values in a pandas column are less than the provided
1039
+ """A column constraint that ensures all values in a pandas column are less than the provided
1074
1040
  upper bound [inclusive].
1075
1041
 
1076
1042
  Args:
@@ -1081,9 +1047,9 @@ class MaxValueColumnConstraint(ColumnConstraint):
1081
1047
  def __init__(self, max_value, ignore_missing_vals):
1082
1048
  self.max_value = check.inst_param(max_value, "max_value", (int, float, datetime))
1083
1049
  self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
1084
- super(MaxValueColumnConstraint, self).__init__(
1085
- markdown_description="values < {}".format(self.max_value),
1086
- error_description="Column must have values < {}".format(self.max_value),
1050
+ super().__init__(
1051
+ markdown_description=f"values < {self.max_value}",
1052
+ error_description=f"Column must have values < {self.max_value}",
1087
1053
  )
1088
1054
 
1089
1055
  def validate(self, dataframe, column_name):
@@ -1101,8 +1067,7 @@ class MaxValueColumnConstraint(ColumnConstraint):
1101
1067
 
1102
1068
 
1103
1069
  class InRangeColumnConstraint(ColumnConstraint):
1104
- """
1105
- A column constraint that ensures all values in a pandas column are between the lower and upper
1070
+ """A column constraint that ensures all values in a pandas column are between the lower and upper
1106
1071
  bound [inclusive].
1107
1072
 
1108
1073
  Args:
@@ -1116,11 +1081,9 @@ class InRangeColumnConstraint(ColumnConstraint):
1116
1081
  self.min_value = check.inst_param(min_value, "min_value", (int, float, datetime))
1117
1082
  self.max_value = check.inst_param(max_value, "max_value", (int, float, datetime))
1118
1083
  self.ignore_missing_vals = check.bool_param(ignore_missing_vals, "ignore_missing_vals")
1119
- super(InRangeColumnConstraint, self).__init__(
1120
- markdown_description="{} < values < {}".format(self.min_value, self.max_value),
1121
- error_description="Column must have values between {} and {} inclusive.".format(
1122
- self.min_value, self.max_value
1123
- ),
1084
+ super().__init__(
1085
+ markdown_description=f"{self.min_value} < values < {self.max_value}",
1086
+ error_description=f"Column must have values between {self.min_value} and {self.max_value} inclusive.",
1124
1087
  )
1125
1088
 
1126
1089
  def validate(self, dataframe, column_name):