dagster-pandas 0.13.12rc2__py3-none-any.whl → 0.27.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,24 @@
1
1
  import pandas as pd
2
2
  from dagster import (
3
- AssetMaterialization,
4
3
  DagsterInvariantViolationError,
5
4
  DagsterType,
6
- EventMetadataEntry,
7
5
  Field,
6
+ MetadataValue,
8
7
  StringSource,
8
+ TableColumn,
9
+ TableSchema,
10
+ TableSchemaMetadataValue,
9
11
  TypeCheck,
10
- check,
12
+ _check as check,
11
13
  dagster_type_loader,
12
- dagster_type_materializer,
13
14
  )
14
- from dagster.check import CheckError
15
- from dagster.config.field_utils import Selector
16
- from dagster.core.definitions.event_metadata import parse_metadata
17
- from dagster.core.errors import DagsterInvalidEventMetadata
18
- from dagster.utils import dict_without_keys
19
- from dagster.utils.backcompat import experimental
15
+ from dagster._annotations import beta
16
+ from dagster._config import Selector
17
+ from dagster._core.definitions.metadata import normalize_metadata
18
+ from dagster._utils import dict_without_keys
19
+
20
20
  from dagster_pandas.constraints import (
21
+ CONSTRAINT_METADATA_KEY,
21
22
  ColumnDTypeFnConstraint,
22
23
  ColumnDTypeInSetConstraint,
23
24
  ConstraintViolationException,
@@ -27,38 +28,6 @@ from dagster_pandas.validation import PandasColumn, validate_constraints
27
28
  CONSTRAINT_BLACKLIST = {ColumnDTypeFnConstraint, ColumnDTypeInSetConstraint}
28
29
 
29
30
 
30
- @dagster_type_materializer(
31
- Selector(
32
- {
33
- "csv": {
34
- "path": StringSource,
35
- "sep": Field(StringSource, is_required=False, default_value=","),
36
- },
37
- "parquet": {"path": StringSource},
38
- "table": {"path": StringSource},
39
- "pickle": {"path": StringSource},
40
- },
41
- )
42
- )
43
- def dataframe_materializer(_context, config, pandas_df):
44
- check.inst_param(pandas_df, "pandas_df", pd.DataFrame)
45
- file_type, file_options = list(config.items())[0]
46
-
47
- if file_type == "csv":
48
- path = file_options["path"]
49
- pandas_df.to_csv(path, index=False, **dict_without_keys(file_options, "path"))
50
- elif file_type == "parquet":
51
- pandas_df.to_parquet(file_options["path"])
52
- elif file_type == "table":
53
- pandas_df.to_csv(file_options["path"], sep="\t", index=False)
54
- elif file_type == "pickle":
55
- pandas_df.to_pickle(file_options["path"])
56
- else:
57
- check.failed("Unsupported file_type {file_type}".format(file_type=file_type))
58
-
59
- return AssetMaterialization.file(file_options["path"])
60
-
61
-
62
31
  @dagster_type_loader(
63
32
  Selector(
64
33
  {
@@ -73,7 +42,7 @@ def dataframe_materializer(_context, config, pandas_df):
73
42
  )
74
43
  )
75
44
  def dataframe_loader(_context, config):
76
- file_type, file_options = list(config.items())[0]
45
+ file_type, file_options = next(iter(config.items()))
77
46
 
78
47
  if file_type == "csv":
79
48
  path = file_options["path"]
@@ -85,9 +54,7 @@ def dataframe_loader(_context, config):
85
54
  elif file_type == "pickle":
86
55
  return pd.read_pickle(file_options["path"])
87
56
  else:
88
- raise DagsterInvariantViolationError(
89
- "Unsupported file_type {file_type}".format(file_type=file_type)
90
- )
57
+ raise DagsterInvariantViolationError(f"Unsupported file_type {file_type}")
91
58
 
92
59
 
93
60
  def df_type_check(_, value):
@@ -95,11 +62,11 @@ def df_type_check(_, value):
95
62
  return TypeCheck(success=False)
96
63
  return TypeCheck(
97
64
  success=True,
98
- metadata_entries=[
99
- EventMetadataEntry.text(str(len(value)), "row_count", "Number of rows in DataFrame"),
65
+ metadata={
66
+ "row_count": str(len(value)),
100
67
  # string cast columns since they may be things like datetime
101
- EventMetadataEntry.json({"columns": list(map(str, value.columns))}, "metadata"),
102
- ],
68
+ "metadata": {"columns": list(map(str, value.columns))},
69
+ },
103
70
  )
104
71
 
105
72
 
@@ -109,16 +76,14 @@ DataFrame = DagsterType(
109
76
  tabular data structure with labeled axes (rows and columns).
110
77
  See http://pandas.pydata.org/""",
111
78
  loader=dataframe_loader,
112
- materializer=dataframe_materializer,
113
79
  type_check_fn=df_type_check,
80
+ typing_type=pd.DataFrame,
114
81
  )
115
82
 
116
83
 
117
84
  def _construct_constraint_list(constraints):
118
85
  def add_bullet(constraint_list, constraint_description):
119
- return constraint_list + "+ {constraint_description}\n".format(
120
- constraint_description=constraint_description
121
- )
86
+ return constraint_list + f"+ {constraint_description}\n"
122
87
 
123
88
  constraint_list = ""
124
89
  for constraint in constraints:
@@ -128,17 +93,13 @@ def _construct_constraint_list(constraints):
128
93
 
129
94
 
130
95
  def _build_column_header(column_name, constraints):
131
- header = "**{column_name}**".format(column_name=column_name)
96
+ header = f"**{column_name}**"
132
97
  for constraint in constraints:
133
98
  if isinstance(constraint, ColumnDTypeInSetConstraint):
134
99
  dtypes_tuple = tuple(constraint.expected_dtype_set)
135
- return header + ": `{expected_dtypes}`".format(
136
- expected_dtypes=dtypes_tuple if len(dtypes_tuple) > 1 else dtypes_tuple[0]
137
- )
100
+ return header + f": `{dtypes_tuple if len(dtypes_tuple) > 1 else dtypes_tuple[0]}`" # pyright: ignore[reportGeneralTypeIssues]
138
101
  elif isinstance(constraint, ColumnDTypeFnConstraint):
139
- return header + ": Validator `{expected_dtype_fn}`".format(
140
- expected_dtype_fn=constraint.type_fn.__name__
141
- )
102
+ return header + f": Validator `{constraint.type_fn.__name__}`"
142
103
  return header
143
104
 
144
105
 
@@ -146,47 +107,64 @@ def create_dagster_pandas_dataframe_description(description, columns):
146
107
  title = "\n".join([description, "### Columns", ""])
147
108
  buildme = title
148
109
  for column in columns:
149
- buildme += "{}\n{}\n".format(
150
- _build_column_header(column.name, column.constraints),
151
- _construct_constraint_list(column.constraints),
152
- )
110
+ buildme += f"{_build_column_header(column.name, column.constraints)}\n{_construct_constraint_list(column.constraints)}\n"
153
111
  return buildme
154
112
 
155
113
 
114
+ def create_table_schema_metadata_from_dataframe(
115
+ pandas_df: pd.DataFrame,
116
+ ) -> TableSchemaMetadataValue:
117
+ """This function takes a pandas DataFrame and returns its metadata as a Dagster TableSchema.
118
+
119
+ Args:
120
+ pandas_df (pandas.DataFrame): A pandas DataFrame for which to create metadata.
121
+
122
+ Returns:
123
+ TableSchemaMetadataValue: returns an object with the TableSchema for the DataFrame.
124
+ """
125
+ check.inst_param(
126
+ pandas_df, "pandas_df", pd.DataFrame, "Input must be a pandas DataFrame object"
127
+ )
128
+ return MetadataValue.table_schema(
129
+ TableSchema(
130
+ columns=[
131
+ TableColumn(name=str(name), type=str(dtype))
132
+ for name, dtype in pandas_df.dtypes.items()
133
+ ]
134
+ )
135
+ )
136
+
137
+
138
+ @beta
156
139
  def create_dagster_pandas_dataframe_type(
157
140
  name,
158
141
  description=None,
159
142
  columns=None,
160
- event_metadata_fn=None,
143
+ metadata_fn=None,
161
144
  dataframe_constraints=None,
162
145
  loader=None,
163
- materializer=None,
164
146
  ):
165
- """
166
- Constructs a custom pandas dataframe dagster type.
147
+ """Constructs a custom pandas dataframe dagster type.
167
148
 
168
149
  Args:
169
150
  name (str): Name of the dagster pandas type.
170
151
  description (Optional[str]): A markdown-formatted string, displayed in tooling.
171
152
  columns (Optional[List[PandasColumn]]): A list of :py:class:`~dagster.PandasColumn` objects
172
153
  which express dataframe column schemas and constraints.
173
- event_metadata_fn (Optional[Callable[[], Union[Dict[str, Union[str, float, int, Dict, EventMetadata]], List[EventMetadataEntry]]]]):
154
+ metadata_fn (Optional[Callable[[], Union[Dict[str, Union[str, float, int, Dict, MetadataValue]])
174
155
  A callable which takes your dataframe and returns a dict with string label keys and
175
- EventMetadata values. Can optionally return a List[EventMetadataEntry].
156
+ MetadataValue values.
176
157
  dataframe_constraints (Optional[List[DataFrameConstraint]]): A list of objects that inherit from
177
158
  :py:class:`~dagster.DataFrameConstraint`. This allows you to express dataframe-level constraints.
178
159
  loader (Optional[DagsterTypeLoader]): An instance of a class that
179
160
  inherits from :py:class:`~dagster.DagsterTypeLoader`. If None, we will default
180
161
  to using `dataframe_loader`.
181
- materializer (Optional[DagsterTypeMaterializer]): An instance of a class
182
- that inherits from :py:class:`~dagster.DagsterTypeMaterializer`. If None, we will
183
- default to using `dataframe_materializer`.
184
162
  """
185
- # We allow for the plugging in of dagster_type_loaders/materializers so that
186
- # Users can load and materialize their custom dataframes via configuration their own way if the default
187
- # configs don't suffice. This is purely optional.
163
+ # We allow for the plugging in of a dagster_type_loader so that users can load their custom
164
+ # dataframes via configuration their own way if the default configs don't suffice. This is
165
+ # purely optional.
188
166
  check.str_param(name, "name")
189
- event_metadata_fn = check.opt_callable_param(event_metadata_fn, "event_metadata_fn")
167
+ metadata_fn = check.opt_callable_param(metadata_fn, "metadata_fn")
190
168
  description = create_dagster_pandas_dataframe_description(
191
169
  check.opt_str_param(description, "description", default=""),
192
170
  check.opt_list_param(columns, "columns", of_type=PandasColumn),
@@ -196,35 +174,35 @@ def create_dagster_pandas_dataframe_type(
196
174
  if not isinstance(value, pd.DataFrame):
197
175
  return TypeCheck(
198
176
  success=False,
199
- description="Must be a pandas.DataFrame. Got value of type. {type_name}".format(
200
- type_name=type(value).__name__
177
+ description=(
178
+ f"Must be a pandas.DataFrame. Got value of type. {type(value).__name__}"
201
179
  ),
202
180
  )
203
181
 
204
182
  try:
205
183
  validate_constraints(
206
- value, pandas_columns=columns, dataframe_constraints=dataframe_constraints
184
+ value,
185
+ pandas_columns=columns,
186
+ dataframe_constraints=dataframe_constraints,
207
187
  )
208
188
  except ConstraintViolationException as e:
209
189
  return TypeCheck(success=False, description=str(e))
210
190
 
211
191
  return TypeCheck(
212
192
  success=True,
213
- metadata_entries=_execute_summary_stats(name, value, event_metadata_fn)
214
- if event_metadata_fn
215
- else None,
193
+ metadata=_execute_summary_stats(name, value, metadata_fn) if metadata_fn else None, # pyright: ignore[reportArgumentType]
216
194
  )
217
195
 
218
196
  return DagsterType(
219
197
  name=name,
220
198
  type_check_fn=_dagster_type_check,
221
199
  loader=loader if loader else dataframe_loader,
222
- materializer=materializer if materializer else dataframe_materializer,
223
200
  description=description,
201
+ typing_type=pd.DataFrame,
224
202
  )
225
203
 
226
204
 
227
- @experimental
205
+ @beta
228
206
  def create_structured_dataframe_type(
229
207
  name,
230
208
  description=None,
@@ -232,11 +210,8 @@ def create_structured_dataframe_type(
232
210
  columns_aggregate_validator=None,
233
211
  dataframe_validator=None,
234
212
  loader=None,
235
- materializer=None,
236
213
  ):
237
- """
238
-
239
- Args:
214
+ """Args:
240
215
  name (str): the name of the new type
241
216
  description (Optional[str]): the description of the new type
242
217
  columns_validator (Optional[Union[ColumnConstraintWithMetadata, MultiColumnConstraintWithMetadata]]):
@@ -252,9 +227,6 @@ def create_structured_dataframe_type(
252
227
  loader (Optional[DagsterTypeLoader]): An instance of a class that
253
228
  inherits from :py:class:`~dagster.DagsterTypeLoader`. If None, we will default
254
229
  to using `dataframe_loader`.
255
- materializer (Optional[DagsterTypeMaterializer]): An instance of a class
256
- that inherits from :py:class:`~dagster.DagsterTypeMaterializer`. If None, we will
257
- default to using `dataframe_materializer`.
258
230
 
259
231
  Returns:
260
232
  a DagsterType with the corresponding name and packaged validation.
@@ -265,8 +237,8 @@ def create_structured_dataframe_type(
265
237
  if not isinstance(value, pd.DataFrame):
266
238
  return TypeCheck(
267
239
  success=False,
268
- description="Must be a pandas.DataFrame. Got value of type. {type_name}".format(
269
- type_name=type(value).__name__
240
+ description=(
241
+ f"Must be a pandas.DataFrame. Got value of type. {type(value).__name__}"
270
242
  ),
271
243
  )
272
244
  individual_result_dict = {}
@@ -282,7 +254,7 @@ def create_structured_dataframe_type(
282
254
  )
283
255
 
284
256
  typechecks_succeeded = True
285
- metadata = []
257
+ metadata = {}
286
258
  overall_description = "Failed Constraints: {}"
287
259
  constraint_clauses = []
288
260
  for key, result in individual_result_dict.items():
@@ -290,19 +262,14 @@ def create_structured_dataframe_type(
290
262
  if result_val:
291
263
  continue
292
264
  typechecks_succeeded = typechecks_succeeded and result_val
293
- result_dict = result.metadata_entries[0].entry_data.data
294
- metadata.append(
295
- EventMetadataEntry.json(
296
- result_dict,
297
- "{}-constraint-metadata".format(key),
298
- )
299
- )
300
- constraint_clauses.append("{} failing constraints, {}".format(key, result.description))
265
+ result_dict = result.metadata[CONSTRAINT_METADATA_KEY].data
266
+ metadata[f"{key}-constraint-metadata"] = MetadataValue.json(result_dict)
267
+ constraint_clauses.append(f"{key} failing constraints, {result.description}")
301
268
  # returns aggregates, then column, then dataframe
302
269
  return TypeCheck(
303
270
  success=typechecks_succeeded,
304
271
  description=overall_description.format(constraint_clauses),
305
- metadata_entries=sorted(metadata, key=lambda x: x.label),
272
+ metadata=metadata,
306
273
  )
307
274
 
308
275
  description = check.opt_str_param(description, "description", default="")
@@ -310,34 +277,20 @@ def create_structured_dataframe_type(
310
277
  name=name,
311
278
  type_check_fn=_dagster_type_check,
312
279
  loader=loader if loader else dataframe_loader,
313
- materializer=materializer if loader else dataframe_materializer,
314
280
  description=description,
315
281
  )
316
282
 
317
283
 
318
- def _execute_summary_stats(type_name, value, event_metadata_fn):
319
- if not event_metadata_fn:
284
+ def _execute_summary_stats(type_name, value, metadata_fn):
285
+ if not metadata_fn:
320
286
  return []
321
287
 
322
- metadata_or_metadata_entries = event_metadata_fn(value)
323
-
324
- invalid_message = (
325
- "The return value of the user-defined summary_statistics function for pandas "
326
- f"data frame type {type_name} returned {value}. This function must return "
327
- "Union[Dict[str, Union[str, float, int, Dict, EventMetadata]], List[EventMetadataEntry]]"
328
- )
329
-
330
- metadata = None
331
- metadata_entries = None
332
-
333
- if isinstance(metadata_or_metadata_entries, list):
334
- metadata_entries = metadata_or_metadata_entries
335
- elif isinstance(metadata_or_metadata_entries, dict):
336
- metadata = metadata_or_metadata_entries
337
- else:
338
- raise DagsterInvariantViolationError(invalid_message)
339
-
288
+ user_metadata = metadata_fn(value)
340
289
  try:
341
- return parse_metadata(metadata, metadata_entries)
342
- except (DagsterInvalidEventMetadata, CheckError):
343
- raise DagsterInvariantViolationError(invalid_message)
290
+ return normalize_metadata(user_metadata)
291
+ except:
292
+ raise DagsterInvariantViolationError(
293
+ "The return value of the user-defined summary_statistics function for pandas "
294
+ f"data frame type {type_name} returned {value}. This function must return "
295
+ "Dict[str, RawMetadataValue]."
296
+ )
@@ -1,18 +1,18 @@
1
1
  from dagster import (
2
- In,
3
- InputDefinition,
4
- Out,
5
- OutputDefinition,
2
+ FilesystemIOManager,
6
3
  config_from_files,
7
4
  file_relative_path,
8
- fs_io_manager,
9
5
  graph,
10
6
  in_process_executor,
11
7
  repository,
12
8
  )
13
9
 
14
- from ..data_frame import DataFrame
15
- from .pandas_hello_world.ops import always_fails_op, papermill_pandas_hello_world, sum_op, sum_sq_op
10
+ from dagster_pandas.examples.pandas_hello_world.ops import (
11
+ always_fails_op,
12
+ papermill_pandas_hello_world,
13
+ sum_op,
14
+ sum_sq_op,
15
+ )
16
16
 
17
17
 
18
18
  @graph
@@ -56,7 +56,7 @@ def papermill_pandas_hello_world_graph():
56
56
 
57
57
 
58
58
  papermill_pandas_hello_world_test = papermill_pandas_hello_world_graph.to_job(
59
- resource_defs={"io_manager": fs_io_manager},
59
+ resource_defs={"io_manager": FilesystemIOManager()},
60
60
  config=config_from_files(
61
61
  [
62
62
  file_relative_path(
@@ -68,7 +68,7 @@ papermill_pandas_hello_world_test = papermill_pandas_hello_world_graph.to_job(
68
68
  )
69
69
 
70
70
  papermill_pandas_hello_world_prod = papermill_pandas_hello_world_graph.to_job(
71
- resource_defs={"io_manager": fs_io_manager},
71
+ resource_defs={"io_manager": FilesystemIOManager()},
72
72
  config=config_from_files(
73
73
  [
74
74
  file_relative_path(
@@ -3,4 +3,4 @@ ops:
3
3
  inputs:
4
4
  num:
5
5
  csv:
6
- path: 'data/num_prod.csv'
6
+ path: "data/num_prod.csv"
@@ -3,4 +3,4 @@ ops:
3
3
  inputs:
4
4
  num:
5
5
  csv:
6
- path: 'data/num.csv'
6
+ path: "data/num.csv"
@@ -3,4 +3,4 @@ ops:
3
3
  inputs:
4
4
  df:
5
5
  csv:
6
- path: 'data/num_prod.csv'
6
+ path: "data/num_prod.csv"
@@ -3,4 +3,4 @@ ops:
3
3
  inputs:
4
4
  df:
5
5
  csv:
6
- path: 'data/num.csv'
6
+ path: "data/num.csv"
@@ -1,8 +1,8 @@
1
- import dagster_pandas as dagster_pd
2
1
  import dagstermill
3
- from dagster import In, InputDefinition, Out, OutputDefinition, file_relative_path, op
2
+ from dagster import In, Out, file_relative_path, op
4
3
 
5
- from ...data_frame import DataFrame
4
+ import dagster_pandas as dagster_pd
5
+ from dagster_pandas.data_frame import DataFrame
6
6
 
7
7
 
8
8
  @op(
@@ -34,12 +34,12 @@ def always_fails_op(**_kwargs):
34
34
 
35
35
 
36
36
  def nb_test_path(name):
37
- return file_relative_path(__file__, "../notebooks/{name}.ipynb".format(name=name))
37
+ return file_relative_path(__file__, f"../notebooks/{name}.ipynb")
38
38
 
39
39
 
40
- papermill_pandas_hello_world = dagstermill.define_dagstermill_solid(
40
+ papermill_pandas_hello_world = dagstermill.factory.define_dagstermill_op(
41
41
  name="papermill_pandas_hello_world",
42
42
  notebook_path=nb_test_path("papermill_pandas_hello_world"),
43
- input_defs=[InputDefinition(name="df", dagster_type=DataFrame)],
44
- output_defs=[OutputDefinition(DataFrame)],
43
+ ins={"df": In(DataFrame)},
44
+ outs={"result": Out(DataFrame)},
45
45
  )
@@ -3,4 +3,4 @@ ops:
3
3
  inputs:
4
4
  num:
5
5
  csv:
6
- path: 'data/num.csv'
6
+ path: "data/num.csv"
@@ -0,0 +1 @@
1
+ partial
@@ -1,4 +1,17 @@
1
- from dagster import DagsterInvariantViolationError, check
1
+ from dagster import (
2
+ DagsterInvariantViolationError,
3
+ _check as check,
4
+ )
5
+ from dagster._annotations import beta
6
+ from pandas import DataFrame, Timestamp
7
+ from pandas.core.dtypes.common import (
8
+ is_bool_dtype,
9
+ is_float_dtype,
10
+ is_integer_dtype,
11
+ is_numeric_dtype,
12
+ is_string_dtype,
13
+ )
14
+
2
15
  from dagster_pandas.constraints import (
3
16
  CategoricalColumnConstraint,
4
17
  ColumnDTypeFnConstraint,
@@ -10,14 +23,6 @@ from dagster_pandas.constraints import (
10
23
  NonNullableColumnConstraint,
11
24
  UniqueColumnConstraint,
12
25
  )
13
- from pandas import DataFrame, Timestamp
14
- from pandas.core.dtypes.common import (
15
- is_bool_dtype,
16
- is_float_dtype,
17
- is_integer_dtype,
18
- is_numeric_dtype,
19
- is_string_dtype,
20
- )
21
26
 
22
27
  PANDAS_NUMERIC_TYPES = {"int64", "float"}
23
28
 
@@ -38,9 +43,9 @@ def _construct_keyword_constraints(non_nullable, unique, ignore_missing_vals):
38
43
  return constraints
39
44
 
40
45
 
46
+ @beta
41
47
  class PandasColumn:
42
- """
43
- The main API for expressing column level schemas and constraints for your custom dataframe
48
+ """The main API for expressing column level schemas and constraints for your custom dataframe
44
49
  types.
45
50
 
46
51
  Args:
@@ -62,18 +67,15 @@ class PandasColumn:
62
67
  # Ignore validation if column is missing from dataframe and is not required
63
68
  if self.is_required:
64
69
  raise ConstraintViolationException(
65
- "Required column {column_name} not in dataframe with columns {dataframe_columns}".format(
66
- column_name=self.name, dataframe_columns=dataframe.columns
67
- )
70
+ f"Required column {self.name} not in dataframe with columns {dataframe.columns}"
68
71
  )
69
72
  else:
70
73
  for constraint in self.constraints:
71
- constraint.validate(dataframe, self.name)
74
+ constraint.validate(dataframe, self.name) # pyright: ignore[reportAttributeAccessIssue]
72
75
 
73
76
  @staticmethod
74
77
  def exists(name, non_nullable=False, unique=False, ignore_missing_vals=False, is_required=None):
75
- """
76
- Simple constructor for PandasColumns that expresses existence constraints.
78
+ """Simple constructor for PandasColumns that expresses existence constraints.
77
79
 
78
80
  Args:
79
81
  name (str): Name of the column. This must match up with the column name in the dataframe you
@@ -98,8 +100,7 @@ class PandasColumn:
98
100
  def boolean_column(
99
101
  name, non_nullable=False, unique=False, ignore_missing_vals=False, is_required=None
100
102
  ):
101
- """
102
- Simple constructor for PandasColumns that expresses boolean constraints on boolean dtypes.
103
+ """Simple constructor for PandasColumns that expresses boolean constraints on boolean dtypes.
103
104
 
104
105
  Args:
105
106
  name (str): Name of the column. This must match up with the column name in the dataframe you
@@ -131,8 +132,7 @@ class PandasColumn:
131
132
  ignore_missing_vals=False,
132
133
  is_required=None,
133
134
  ):
134
- """
135
- Simple constructor for PandasColumns that expresses numeric constraints numeric dtypes.
135
+ """Simple constructor for PandasColumns that expresses numeric constraints numeric dtypes.
136
136
 
137
137
  Args:
138
138
  name (str): Name of the column. This must match up with the column name in the dataframe you
@@ -173,8 +173,7 @@ class PandasColumn:
173
173
  ignore_missing_vals=False,
174
174
  is_required=None,
175
175
  ):
176
- """
177
- Simple constructor for PandasColumns that expresses numeric constraints on integer dtypes.
176
+ """Simple constructor for PandasColumns that expresses numeric constraints on integer dtypes.
178
177
 
179
178
  Args:
180
179
  name (str): Name of the column. This must match up with the column name in the dataframe you
@@ -215,8 +214,7 @@ class PandasColumn:
215
214
  ignore_missing_vals=False,
216
215
  is_required=None,
217
216
  ):
218
- """
219
- Simple constructor for PandasColumns that expresses numeric constraints on float dtypes.
217
+ """Simple constructor for PandasColumns that expresses numeric constraints on float dtypes.
220
218
 
221
219
  Args:
222
220
  name (str): Name of the column. This must match up with the column name in the dataframe you
@@ -258,8 +256,7 @@ class PandasColumn:
258
256
  is_required=None,
259
257
  tz=None,
260
258
  ):
261
- """
262
- Simple constructor for PandasColumns that expresses datetime constraints on 'datetime64[ns]' dtypes.
259
+ """Simple constructor for PandasColumns that expresses datetime constraints on 'datetime64[ns]' dtypes.
263
260
 
264
261
  Args:
265
262
  name (str): Name of the column. This must match up with the column name in the dataframe you
@@ -311,8 +308,7 @@ class PandasColumn:
311
308
  def string_column(
312
309
  name, non_nullable=False, unique=False, ignore_missing_vals=False, is_required=None
313
310
  ):
314
- """
315
- Simple constructor for PandasColumns that expresses constraints on string dtypes.
311
+ """Simple constructor for PandasColumns that expresses constraints on string dtypes.
316
312
 
317
313
  Args:
318
314
  name (str): Name of the column. This must match up with the column name in the dataframe you
@@ -338,14 +334,13 @@ class PandasColumn:
338
334
  def categorical_column(
339
335
  name,
340
336
  categories,
341
- of_types="object",
337
+ of_types=frozenset({"category", "object"}),
342
338
  non_nullable=False,
343
339
  unique=False,
344
340
  ignore_missing_vals=False,
345
341
  is_required=None,
346
342
  ):
347
- """
348
- Simple constructor for PandasColumns that expresses categorical constraints on specified dtypes.
343
+ """Simple constructor for PandasColumns that expresses categorical constraints on specified dtypes.
349
344
 
350
345
  Args:
351
346
  name (str): Name of the column. This must match up with the column name in the dataframe you
dagster_pandas/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.13.12rc2"
1
+ __version__ = "0.27.15"
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.4
2
+ Name: dagster-pandas
3
+ Version: 0.27.15
4
+ Summary: Utilities and examples for working with pandas and dagster, an opinionated framework for expressing data pipelines
5
+ Home-page: https://github.com/dagster-io/dagster
6
+ Author: Dagster Labs
7
+ Author-email: hello@dagsterlabs.com
8
+ License: Apache-2.0
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Requires-Python: >=3.9,<3.14
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: dagster==1.11.15
20
+ Requires-Dist: pandas
21
+ Dynamic: author
22
+ Dynamic: author-email
23
+ Dynamic: classifier
24
+ Dynamic: description
25
+ Dynamic: description-content-type
26
+ Dynamic: home-page
27
+ Dynamic: license
28
+ Dynamic: license-file
29
+ Dynamic: requires-dist
30
+ Dynamic: requires-python
31
+ Dynamic: summary
32
+
33
+ # dagster-pandas
34
+
35
+ The docs for `dagster-pandas` can be found
36
+ [here](https://docs.dagster.io/api/python-api/libraries/dagster-pandas).