dagster-census 0.17.12__py3-none-any.whl → 0.28.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,15 @@
1
- from dagster._core.utils import check_dagster_package_version
1
+ from dagster_shared.libraries import DagsterLibraryRegistry
2
2
 
3
- from .ops import census_trigger_sync_op
4
- from .resources import CensusResource, census_resource
5
- from .types import CensusOutput
6
- from .version import __version__
3
+ from dagster_census.components.census_component import CensusComponent as CensusComponent
4
+ from dagster_census.ops import census_trigger_sync_op
5
+ from dagster_census.resources import CensusResource
6
+ from dagster_census.types import CensusOutput
7
+ from dagster_census.version import __version__
7
8
 
8
- check_dagster_package_version("dagster-census", __version__)
9
+ DagsterLibraryRegistry.register("dagster-census", __version__)
9
10
 
10
11
  __all__ = [
11
- "CensusResource",
12
12
  "CensusOutput",
13
- "census_resource",
13
+ "CensusResource",
14
14
  "census_trigger_sync_op",
15
15
  ]
File without changes
@@ -0,0 +1,219 @@
1
+ from collections.abc import Callable, Iterable, Sequence
2
+ from dataclasses import dataclass, field
3
+ from pathlib import Path
4
+ from typing import Annotated, Optional, Union
5
+
6
+ import dagster as dg
7
+ import pydantic
8
+ from dagster._annotations import public
9
+ from dagster._core.definitions.metadata import TableMetadataSet
10
+ from dagster._utils.names import clean_name
11
+ from dagster.components.component.state_backed_component import StateBackedComponent
12
+ from dagster.components.resolved.base import resolve_fields
13
+ from dagster.components.utils.defs_state import (
14
+ DefsStateConfig,
15
+ DefsStateConfigArgs,
16
+ ResolvedDefsStateConfig,
17
+ )
18
+ from dagster_shared import check
19
+ from dagster_shared.serdes.serdes import deserialize_value
20
+
21
+ from dagster_census.components.census_scaffolder import CensusComponentScaffolder
22
+ from dagster_census.resources import CensusResource
23
+ from dagster_census.translator import (
24
+ CensusMetadataSet,
25
+ CensusSync,
26
+ CensusWorkspaceData,
27
+ generate_table_schema,
28
+ )
29
+
30
+
31
+ class CensusSyncSelectorByName(dg.Resolvable, dg.Model):
32
+ by_name: Annotated[
33
+ Sequence[str],
34
+ pydantic.Field(..., description="A list of sync names to include in the collection."),
35
+ ]
36
+
37
+
38
+ class CensusSyncSelectorById(dg.Resolvable, dg.Model):
39
+ by_id: Annotated[
40
+ Sequence[Union[int, str]], # Allow strings for use-cases like '{{ env.ENV_VAR }}'
41
+ pydantic.Field(..., description="A list of sync IDs to include in the collection."),
42
+ ]
43
+
44
+
45
+ def resolve_sync_selector(
46
+ context: dg.ResolutionContext, model
47
+ ) -> Optional[Callable[[CensusSync], bool]]:
48
+ if isinstance(model, str):
49
+ resolved = context.resolve_value(model)
50
+ resolved = check.callable_param(resolved, "unknown") # pyright: ignore[reportArgumentType]
51
+ return resolved
52
+ if isinstance(model, CensusSyncSelectorByName.model()):
53
+ resolved = resolve_fields(model, CensusSyncSelectorByName.model(), context)
54
+ return lambda sync: sync.name in resolved["by_name"]
55
+ elif isinstance(model, CensusSyncSelectorById.model()):
56
+ resolved = resolve_fields(model, CensusSyncSelectorById.model(), context)
57
+ return lambda sync: sync.id in resolved["by_id"]
58
+ else:
59
+ check.failed(f"Unknown sync target type: {type(model)}")
60
+
61
+
62
+ @dataclass
63
+ class CensusResourceArgs(dg.Resolvable, dg.Model):
64
+ """The fields are analogous to the fields at `CensusResource`."""
65
+
66
+ api_key: Annotated[str, pydantic.Field(...)]
67
+ request_max_retries: Annotated[int, pydantic.Field(None)]
68
+ request_retry_delay: Annotated[float, pydantic.Field(None)]
69
+ request_timeout: Annotated[int, pydantic.Field(None)]
70
+
71
+
72
+ def resolve_census_workspace(
73
+ context: dg.ResolutionContext, model: CensusResourceArgs
74
+ ) -> CensusResource:
75
+ return CensusResource(**resolve_fields(model, CensusResourceArgs, context))
76
+
77
+
78
+ @public
79
+ @dataclass
80
+ @dg.scaffold_with(CensusComponentScaffolder)
81
+ class CensusComponent(StateBackedComponent, dg.Resolvable):
82
+ """Loads Census syncs from a Census workspace as Dagster assets.
83
+ Materializing these assets will trigger the Census sync, enabling
84
+ you to schedule Census syncs using Dagster.
85
+
86
+ Example:
87
+
88
+ .. code-block:: yaml
89
+
90
+ # defs.yaml
91
+
92
+ type: dagster_census.CensusComponent
93
+ attributes:
94
+ workspace:
95
+ api_key: "{{ env.CENSUS_API_KEY }}"
96
+ sync_selector:
97
+ by_name:
98
+ - my_first_sync
99
+ - my_second_sync
100
+ """
101
+
102
+ workspace: Annotated[
103
+ CensusResource,
104
+ dg.Resolver(resolve_census_workspace, model_field_type=CensusResourceArgs),
105
+ ]
106
+
107
+ sync_selector: Annotated[
108
+ Optional[Callable[[CensusSync], bool]],
109
+ dg.Resolver(
110
+ resolve_sync_selector,
111
+ model_field_type=Union[
112
+ str, CensusSyncSelectorByName.model(), CensusSyncSelectorById.model()
113
+ ],
114
+ description="Function used to select Census syncs to pull into Dagster.",
115
+ ),
116
+ ] = None
117
+
118
+ defs_state: ResolvedDefsStateConfig = field(
119
+ default_factory=DefsStateConfigArgs.local_filesystem
120
+ )
121
+
122
+ @property
123
+ def defs_state_config(self) -> DefsStateConfig:
124
+ default_key = f"{self.__class__.__name__}"
125
+ return DefsStateConfig.from_args(self.defs_state, default_key=default_key)
126
+
127
+ def write_state_to_path(self, state_path: Path) -> None:
128
+ state = self.workspace.fetch_census_workspace_data()
129
+ state_path.write_text(dg.serialize_value(state))
130
+
131
+ @public
132
+ def get_asset_spec(self, sync: CensusSync) -> dg.AssetSpec:
133
+ metadata = {
134
+ **TableMetadataSet(
135
+ column_schema=generate_table_schema(sync.mappings),
136
+ table_name=sync.name,
137
+ ),
138
+ **CensusMetadataSet(
139
+ sync_id=sync.id,
140
+ sync_name=sync.name,
141
+ source_id=sync.source_id,
142
+ destination_id=sync.destination_id,
143
+ ),
144
+ }
145
+
146
+ return dg.AssetSpec(
147
+ key=dg.AssetKey(clean_name(sync.name)),
148
+ metadata=metadata,
149
+ description=f"Asset generated from Census sync {sync.id}",
150
+ kinds={"census"},
151
+ )
152
+
153
+ @public
154
+ def execute(
155
+ self, context: dg.AssetExecutionContext, census: CensusResource
156
+ ) -> Iterable[Union[dg.AssetMaterialization, dg.MaterializeResult]]:
157
+ """Executes a Census sync for the selected sync.
158
+
159
+ This method can be overridden in a subclass to customize the sync execution behavior,
160
+ such as adding custom logging or handling sync results differently.
161
+
162
+ Args:
163
+ context: The asset execution context provided by Dagster
164
+ census: The CensusResource used to trigger and monitor syncs
165
+
166
+ Returns:
167
+ MaterializeResult event from the Census sync
168
+
169
+ Example:
170
+ Override this method to add custom logging during sync execution:
171
+
172
+ .. code-block:: python
173
+
174
+ from dagster_census import CensusComponent
175
+ import dagster as dg
176
+
177
+ class CustomCensusComponent(CensusComponent):
178
+ def execute(self, context, census):
179
+ context.log.info(f"Starting Census sync for {context.asset_key}")
180
+ result = super().execute(context, census)
181
+ context.log.info("Census sync completed successfully")
182
+ return result
183
+ """
184
+ # Select the first asset-spec (since there is only one)
185
+ spec = next(iter(context.assets_def.specs))
186
+
187
+ census_metadataset = CensusMetadataSet.extract(spec.metadata)
188
+ census.trigger_sync_and_poll(sync_id=census_metadataset.sync_id)
189
+ yield dg.AssetMaterialization(asset_key=clean_name(census_metadataset.sync_name))
190
+
191
+ def _load_asset_specs(self, state: CensusWorkspaceData) -> Sequence[dg.AssetSpec]:
192
+ connection_selector_fn = self.sync_selector or (lambda sync: True)
193
+ return [self.get_asset_spec(sync) for sync in state.syncs if connection_selector_fn(sync)]
194
+
195
+ def _get_census_assets_def(self, sync_name: str, spec: dg.AssetSpec) -> dg.AssetsDefinition:
196
+ @dg.multi_asset(
197
+ name=f"census_{clean_name(sync_name)}",
198
+ can_subset=True,
199
+ specs=[spec],
200
+ )
201
+ def _asset(context: dg.AssetExecutionContext):
202
+ yield from self.execute(context, self.workspace)
203
+
204
+ return _asset
205
+
206
+ def build_defs_from_state(
207
+ self, context: dg.ComponentLoadContext, state_path: Optional[Path]
208
+ ) -> dg.Definitions:
209
+ if state_path is None:
210
+ return dg.Definitions()
211
+
212
+ state = deserialize_value(state_path.read_text(), CensusWorkspaceData)
213
+
214
+ assets = [
215
+ self._get_census_assets_def(CensusMetadataSet.extract(spec.metadata).sync_name, spec)
216
+ for spec in self._load_asset_specs(state)
217
+ ]
218
+
219
+ return dg.Definitions(assets=assets)
@@ -0,0 +1,26 @@
1
+ from typing import Optional
2
+
3
+ from dagster.components.component.component_scaffolder import Scaffolder
4
+ from dagster.components.component_scaffolding import scaffold_component
5
+ from dagster.components.scaffold.scaffold import ScaffoldRequest
6
+ from pydantic import BaseModel
7
+
8
+
9
+ class CensusScaffolderParams(BaseModel):
10
+ api_key: Optional[str] = None
11
+
12
+
13
+ class CensusComponentScaffolder(Scaffolder[CensusScaffolderParams]):
14
+ @classmethod
15
+ def get_scaffold_params(cls) -> type[CensusScaffolderParams]:
16
+ return CensusScaffolderParams
17
+
18
+ def scaffold(self, request: ScaffoldRequest[CensusScaffolderParams]) -> None:
19
+ scaffold_component(
20
+ request,
21
+ {
22
+ "workspace": {
23
+ "api_key": "{{ env.CENSUS_API_KEY }}",
24
+ }
25
+ },
26
+ )
dagster_census/ops.py CHANGED
@@ -1,8 +1,9 @@
1
1
  from dagster import Array, Bool, Field, In, Noneable, Nothing, Out, Output, op
2
+ from dagster._core.storage.tags import COMPUTE_KIND_TAG
2
3
 
3
- from .resources import DEFAULT_POLL_INTERVAL
4
- from .types import CensusOutput
5
- from .utils import generate_materialization
4
+ from dagster_census.resources import DEFAULT_POLL_INTERVAL
5
+ from dagster_census.types import CensusOutput
6
+ from dagster_census.utils import generate_materialization
6
7
 
7
8
 
8
9
  @op(
@@ -19,7 +20,7 @@ from .utils import generate_materialization
19
20
  "sync_id": Field(
20
21
  int,
21
22
  is_required=True,
22
- description="id of the parent sync.",
23
+ description="Id of the parent sync.",
23
24
  ),
24
25
  "force_full_sync": Field(
25
26
  config=Bool,
@@ -32,13 +33,13 @@ from .utils import generate_materialization
32
33
  "poll_interval": Field(
33
34
  float,
34
35
  default_value=DEFAULT_POLL_INTERVAL,
35
- description="The time (in seconds) that will be waited between successive polls.",
36
+ description="The time (in seconds) to wait between successive polls.",
36
37
  ),
37
38
  "poll_timeout": Field(
38
39
  Noneable(float),
39
40
  default_value=None,
40
41
  description=(
41
- "The maximum time that will waited before this operation is timed out. By "
42
+ "The maximum time to wait before this operation is timed out. By "
42
43
  "default, this will never time out."
43
44
  ),
44
45
  ),
@@ -59,19 +60,20 @@ from .utils import generate_materialization
59
60
  ),
60
61
  ),
61
62
  },
62
- tags={"kind": "census"},
63
+ tags={COMPUTE_KIND_TAG: "census"},
63
64
  )
64
65
  def census_trigger_sync_op(context):
65
- """
66
- Executes a Census sync for a given ``sync_id``, and polls until that sync
67
- completes, raising an error if it is unsuccessful. It outputs a CensusOutput which contains
68
- the details of the Census sync after the sync successfully completes, as well as details
69
- about which tables the sync updates.
66
+ """Executes a Census sync for a given ``sync_id`` and polls until that sync completes, raising
67
+ an error if it is unsuccessful.
68
+
69
+ It outputs a :py:class:`~dagster_census.CensusOutput` which contains the details of the Census
70
+ sync after it successfully completes.
70
71
 
71
72
  It requires the use of the :py:class:`~dagster_census.census_resource`, which allows it to
72
- communicate with the census API.
73
+ communicate with the Census API.
74
+
75
+ **Examples:**
73
76
 
74
- Examples:
75
77
  .. code-block:: python
76
78
 
77
79
  from dagster import job
File without changes
@@ -2,55 +2,91 @@ import datetime
2
2
  import json
3
3
  import logging
4
4
  import time
5
- from typing import Any, Mapping, Optional
5
+ from collections.abc import Mapping
6
+ from typing import Any, Optional
6
7
 
8
+ import pydantic
7
9
  import requests
8
- from dagster import Failure, Field, StringSource, __version__, get_dagster_logger, resource
10
+ from dagster import ConfigurableResource, Failure, __version__, get_dagster_logger
11
+ from dagster_shared.utils.cached_method import cached_method
9
12
  from requests.auth import HTTPBasicAuth
10
13
  from requests.exceptions import RequestException
11
14
 
12
- from .types import CensusOutput
15
+ from dagster_census.translator import CensusSync, CensusWorkspaceData
16
+ from dagster_census.types import CensusOutput
13
17
 
14
18
  CENSUS_API_BASE = "app.getcensus.com/api"
15
19
  CENSUS_VERSION = "v1"
16
20
 
17
21
  DEFAULT_POLL_INTERVAL = 10
18
22
 
23
+ SYNC_RUN_STATUSES = {"completed", "failed", "queued", "skipped", "working"}
19
24
 
20
- class CensusResource:
21
- """
22
- This class exposes methods on top of the Census REST API.
23
- """
24
25
 
25
- def __init__(
26
- self,
27
- api_key: str,
28
- request_max_retries: int = 3,
29
- request_retry_delay: float = 0.25,
30
- log: logging.Logger = get_dagster_logger(),
31
- ):
32
- self.api_key = api_key
26
+ class CensusResource(ConfigurableResource):
27
+ """This resource allows users to programatically interface with the Census REST API to launch
28
+ syncs and monitor their progress. This currently implements only a subset of the functionality
29
+ exposed by the API.
30
+
31
+ **Examples:**
33
32
 
34
- self._request_max_retries = request_max_retries
35
- self._request_retry_delay = request_retry_delay
33
+ .. code-block:: python
36
34
 
37
- self._log = log
35
+ import dagster as dg
36
+ from dagster_census import CensusResource
38
37
 
39
- @property
40
- def _api_key(self):
41
- if self.api_key.startswith("secret-token:"):
42
- return self.api_key
43
- return "secret-token:" + self.api_key
38
+ census_resource = CensusResource(
39
+ api_key=dg.EnvVar("CENSUS_API_KEY")
40
+ )
41
+
42
+ @dg.asset
43
+ def census_sync_asset(census: CensusResource):
44
+ census.trigger_sync_and_poll(sync_id=123456)
45
+
46
+ defs = dg.Definitions(
47
+ assets=[census_sync_asset],
48
+ resources={"census": census_resource}
49
+ )
50
+ """
51
+
52
+ api_key: str = pydantic.Field(..., description="The Census API key")
53
+ request_max_retries: int = pydantic.Field(
54
+ default=3,
55
+ description=(
56
+ "The maximum number of times requests to the Census API should be retried "
57
+ "before failing."
58
+ ),
59
+ )
60
+ request_retry_delay: float = pydantic.Field(
61
+ default=0.25,
62
+ description="Time (in seconds) to wait between each request retry.",
63
+ )
64
+ request_timeout: int = pydantic.Field(
65
+ default=15,
66
+ description="Time (in seconds) after which the requests to Census are declared timed out.",
67
+ )
44
68
 
45
69
  @property
46
70
  def api_base_url(self) -> str:
47
71
  return f"https://{CENSUS_API_BASE}/{CENSUS_VERSION}"
48
72
 
73
+ @classmethod
74
+ def _is_dagster_maintained(cls) -> bool:
75
+ return True
76
+
77
+ @property
78
+ @cached_method
79
+ def _log(self) -> logging.Logger:
80
+ return get_dagster_logger()
81
+
49
82
  def make_request(
50
- self, method: str, endpoint: str, data: Optional[str] = None
83
+ self,
84
+ method: str,
85
+ endpoint: str,
86
+ data: Optional[str] = None,
87
+ page_number: Optional[int] = None,
51
88
  ) -> Mapping[str, Any]:
52
- """
53
- Creates and sends a request to the desired Census API endpoint.
89
+ """Creates and sends a request to the desired Census API endpoint.
54
90
 
55
91
  Args:
56
92
  method (str): The http method to use for this request (e.g. "POST", "GET", "PATCH").
@@ -66,6 +102,11 @@ class CensusResource:
66
102
  "Content-Type": "application/json;version=2",
67
103
  }
68
104
 
105
+ if page_number is not None:
106
+ params = {"page": page_number}
107
+ else:
108
+ params = {}
109
+
69
110
  num_retries = 0
70
111
  while True:
71
112
  try:
@@ -73,23 +114,23 @@ class CensusResource:
73
114
  method=method,
74
115
  url=url,
75
116
  headers=headers,
76
- auth=HTTPBasicAuth("bearer", self._api_key),
117
+ auth=HTTPBasicAuth("bearer", self.api_key),
77
118
  data=data,
119
+ params=params,
78
120
  )
79
121
  response.raise_for_status()
80
122
  return response.json()
81
123
  except RequestException as e:
82
124
  self._log.error("Request to Census API failed: %s", e)
83
- if num_retries == self._request_max_retries:
125
+ if num_retries == self.request_max_retries:
84
126
  break
85
127
  num_retries += 1
86
- time.sleep(self._request_retry_delay)
128
+ time.sleep(self.request_retry_delay)
87
129
 
88
- raise Failure(f"Max retries ({self._request_max_retries}) exceeded with url: {url}.")
130
+ raise Failure(f"Max retries ({self.request_max_retries}) exceeded with url: {url}.")
89
131
 
90
132
  def get_sync(self, sync_id: int) -> Mapping[str, Any]:
91
- """
92
- Gets details about a given sync from the Census API.
133
+ """Gets details about a given sync from the Census API.
93
134
 
94
135
  Args:
95
136
  sync_id (int): The Census Sync ID.
@@ -100,8 +141,7 @@ class CensusResource:
100
141
  return self.make_request(method="GET", endpoint=f"syncs/{sync_id}")
101
142
 
102
143
  def get_source(self, source_id: int) -> Mapping[str, Any]:
103
- """
104
- Gets details about a given source from the Census API.
144
+ """Gets details about a given source from the Census API.
105
145
 
106
146
  Args:
107
147
  source_id (int): The Census Source ID.
@@ -112,8 +152,7 @@ class CensusResource:
112
152
  return self.make_request(method="GET", endpoint=f"sources/{source_id}")
113
153
 
114
154
  def get_destination(self, destination_id: int) -> Mapping[str, Any]:
115
- """
116
- Gets details about a given destination from the Census API.
155
+ """Gets details about a given destination from the Census API.
117
156
 
118
157
  Args:
119
158
  destination_id (int): The Census Destination ID.
@@ -124,8 +163,7 @@ class CensusResource:
124
163
  return self.make_request(method="GET", endpoint=f"destinations/{destination_id}")
125
164
 
126
165
  def get_sync_run(self, sync_run_id: int) -> Mapping[str, Any]:
127
- """
128
- Gets details about a specific sync run from the Census API.
166
+ """Gets details about a specific sync run from the Census API.
129
167
 
130
168
  Args:
131
169
  sync_run_id (int): The Census Sync Run ID.
@@ -141,8 +179,7 @@ class CensusResource:
141
179
  poll_interval: float = DEFAULT_POLL_INTERVAL,
142
180
  poll_timeout: Optional[float] = None,
143
181
  ) -> Mapping[str, Any]:
144
- """
145
- Given a Census sync run, poll until the run is complete.
182
+ """Given a Census sync run, poll until the run is complete.
146
183
 
147
184
  Args:
148
185
  sync_id (int): The Census Sync Run ID.
@@ -165,9 +202,18 @@ class CensusResource:
165
202
  " more."
166
203
  )
167
204
 
205
+ sync_status = response_dict["data"]["status"]
168
206
  sync_id = response_dict["data"]["sync_id"]
169
207
 
170
- if response_dict["status"] == "working":
208
+ if sync_status not in SYNC_RUN_STATUSES:
209
+ raise ValueError(
210
+ f"Unexpected response status '{sync_status}'; "
211
+ f"must be one of {','.join(sorted(SYNC_RUN_STATUSES))}. "
212
+ "See Management API docs for more information: "
213
+ "https://docs.getcensus.com/basics/developers/api/sync-runs"
214
+ )
215
+
216
+ if sync_status in {"queued", "working"}:
171
217
  self._log.debug(
172
218
  f"Sync {sync_id} still running after {datetime.datetime.now() - poll_start}."
173
219
  )
@@ -191,8 +237,7 @@ class CensusResource:
191
237
  return response_dict
192
238
 
193
239
  def trigger_sync(self, sync_id: int, force_full_sync: bool = False) -> Mapping[str, Any]:
194
- """
195
- Trigger an asynchronous run for a specific sync.
240
+ """Trigger an asynchronous run for a specific sync.
196
241
 
197
242
  Args:
198
243
  sync_id (int): The Census Sync Run ID.
@@ -213,8 +258,7 @@ class CensusResource:
213
258
  poll_interval: float = DEFAULT_POLL_INTERVAL,
214
259
  poll_timeout: Optional[float] = None,
215
260
  ) -> CensusOutput:
216
- """
217
- Trigger a run for a specific sync and poll until it has completed.
261
+ """Trigger a run for a specific sync and poll until it has completed.
218
262
 
219
263
  Args:
220
264
  sync_id (int): The Census Sync Run ID.
@@ -247,57 +291,32 @@ class CensusResource:
247
291
  destination=destination_details,
248
292
  )
249
293
 
294
+ @cached_method
295
+ def fetch_census_workspace_data(self) -> CensusWorkspaceData:
296
+ """Retrieves all Census syncs from the workspace and returns it as a CensusWorkspaceData object.
250
297
 
251
- @resource(
252
- config_schema={
253
- "api_key": Field(
254
- StringSource,
255
- is_required=True,
256
- description="Census API Key.",
257
- ),
258
- "request_max_retries": Field(
259
- int,
260
- default_value=3,
261
- description=(
262
- "The maximum number of times requests to the Census API should be retried "
263
- "before failing."
264
- ),
265
- ),
266
- "request_retry_delay": Field(
267
- float,
268
- default_value=0.25,
269
- description="Time (in seconds) to wait between each request retry.",
270
- ),
271
- },
272
- description="This resource helps manage Census connectors",
273
- )
274
- def census_resource(context) -> CensusResource:
275
- """
276
- This resource allows users to programatically interface with the Census REST API to launch
277
- syncs and monitor their progress. This currently implements only a subset of the functionality
278
- exposed by the API.
279
-
280
- **Examples:**
281
-
282
- .. code-block:: python
283
-
284
- from dagster import job
285
- from dagster_census import census_resource
286
-
287
- my_census_resource = census_resource.configured(
288
- {
289
- "api_key": {"env": "CENSUS_API_KEY"},
290
- }
298
+ Returns:
299
+ CensusWorkspaceData: A snapshot of the Census workspace's syncs.
300
+ """
301
+ all_syncs = []
302
+ page = self.make_request(method="GET", endpoint="syncs")
303
+
304
+ last_page_number = page["pagination"]["last_page"]
305
+ for sync in page["data"]:
306
+ all_syncs.append(self._census_sync_struct_from_json(sync))
307
+
308
+ for i in range(2, last_page_number + 1):
309
+ page = self.make_request(method="GET", endpoint="syncs", page_number=i)
310
+ for sync in page["data"]:
311
+ all_syncs.append(self._census_sync_struct_from_json(sync))
312
+
313
+ return CensusWorkspaceData(syncs=all_syncs)
314
+
315
+ def _census_sync_struct_from_json(self, sync: dict[str, Any]) -> CensusSync:
316
+ return CensusSync(
317
+ id=sync["id"],
318
+ name=sync["label"] or sync["resource_identifier"],
319
+ source_id=sync["source_attributes"]["connection_id"],
320
+ destination_id=sync["destination_attributes"]["connection_id"],
321
+ mappings=sync["mappings"],
291
322
  )
292
-
293
- @job(resource_defs={"census":my_census_resource})
294
- def my_census_job():
295
- ...
296
-
297
- """
298
- return CensusResource(
299
- api_key=context.resource_config["api_key"],
300
- request_max_retries=context.resource_config["request_max_retries"],
301
- request_retry_delay=context.resource_config["request_retry_delay"],
302
- log=context.log,
303
- )
@@ -0,0 +1,57 @@
1
+ from collections.abc import Mapping
2
+ from typing import Any
3
+
4
+ import dagster as dg
5
+ from dagster._core.definitions.metadata.metadata_set import NamespacedMetadataSet
6
+ from dagster._record import record
7
+ from dagster_shared.serdes import whitelist_for_serdes
8
+
9
+
10
+ @whitelist_for_serdes
11
+ @record
12
+ class CensusSync:
13
+ """Represents a Census sync, based on data as returned from the API."""
14
+
15
+ id: int
16
+ name: str
17
+ source_id: int
18
+ destination_id: int
19
+ mappings: list[dict[str, Any]]
20
+
21
+
22
+ @whitelist_for_serdes
23
+ @record
24
+ class CensusWorkspaceData:
25
+ """A record representing all content in a Census workspace.
26
+ Provided as context for the translator so that it can resolve dependencies between content.
27
+ """
28
+
29
+ syncs: list[CensusSync]
30
+
31
+ @property
32
+ def syncs_by_id(self) -> Mapping[int, CensusSync]:
33
+ """Returns a mapping of sync IDs to CensusSync objects."""
34
+ return {sync.id: sync for sync in self.syncs}
35
+
36
+
37
+ def generate_table_schema(sync_mappings_props: list[dict[str, Any]]) -> dg.TableSchema:
38
+ return dg.TableSchema(
39
+ columns=sorted(
40
+ [
41
+ dg.TableColumn(name=mapping["to"], type=mapping.get("field_type", "unknown"))
42
+ for mapping in sync_mappings_props
43
+ ],
44
+ key=lambda col: col.name,
45
+ )
46
+ )
47
+
48
+
49
+ class CensusMetadataSet(NamespacedMetadataSet):
50
+ sync_id: int
51
+ sync_name: str
52
+ source_id: int
53
+ destination_id: int
54
+
55
+ @classmethod
56
+ def namespace(cls) -> str:
57
+ return "dagster-census"
dagster_census/types.py CHANGED
@@ -1,4 +1,5 @@
1
- from typing import Any, Mapping, NamedTuple
1
+ from collections.abc import Mapping
2
+ from typing import Any, NamedTuple
2
3
 
3
4
 
4
5
  class CensusOutput(
@@ -11,14 +12,13 @@ class CensusOutput(
11
12
  ],
12
13
  )
13
14
  ):
14
- """
15
- Contains recorded information about the state of a Census sync after a sync completes.
15
+ """Contains recorded information about the state of a Census sync after a sync completes.
16
16
 
17
- Attributes:
17
+ Args:
18
18
  sync_run (Dict[str, Any]):
19
- The details of the specific sync run
19
+ The details of the specific sync run.
20
20
  source (Dict[str, Any]):
21
- Information about the source for the Census sync
21
+ Information about the source for the Census sync.
22
22
  destination (Dict[str, Any]):
23
- Information about the destination for the Census sync
23
+ Information about the destination for the Census sync.
24
24
  """
dagster_census/utils.py CHANGED
@@ -1,8 +1,8 @@
1
- from typing import Sequence
1
+ from collections.abc import Sequence
2
2
 
3
3
  from dagster import AssetMaterialization
4
4
 
5
- from .types import CensusOutput
5
+ from dagster_census.types import CensusOutput
6
6
 
7
7
 
8
8
  def generate_materialization(
dagster_census/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.17.12"
1
+ __version__ = "0.28.10"
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.4
2
+ Name: dagster-census
3
+ Version: 0.28.10
4
+ Summary: Package for integrating Census with Dagster.
5
+ Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-census
6
+ Author: Dagster Labs
7
+ Author-email: hello@dagsterlabs.com
8
+ License: Apache-2.0
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Programming Language :: Python :: 3.14
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: OS Independent
16
+ Requires-Python: >=3.10,<3.15
17
+ License-File: LICENSE
18
+ Requires-Dist: dagster==1.12.10
19
+ Dynamic: author
20
+ Dynamic: author-email
21
+ Dynamic: classifier
22
+ Dynamic: home-page
23
+ Dynamic: license
24
+ Dynamic: license-file
25
+ Dynamic: requires-dist
26
+ Dynamic: requires-python
27
+ Dynamic: summary
@@ -0,0 +1,16 @@
1
+ dagster_census/__init__.py,sha256=eIDWjz13trVGLVMDjAdq0LeUkbEiWex1ho2NX8cDFOY,501
2
+ dagster_census/ops.py,sha256=4N53-w0u3PG5kn-q1tW-7Ac-i5SCDFq9ku--g7gDig4,3659
3
+ dagster_census/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ dagster_census/resources.py,sha256=R-McSQLQTOImDVAQ3Tr-ZbvbUMQnbKpW7hkvtuwtS7I,11661
5
+ dagster_census/translator.py,sha256=caSshQ3ZfSsl_ASNlN6p24t-15WDRN29Z1xBg7Utbp4,1510
6
+ dagster_census/types.py,sha256=on1CIKWZMsSuw5h-HkzQPVctMQzQu2L9HgkX5gc9X54,698
7
+ dagster_census/utils.py,sha256=fFsKjn6xlLEel1-nvAwfAaemjSTg2gYxJHY8sK2e1no,1348
8
+ dagster_census/version.py,sha256=110BPTEyn13PDWnHpj0D7VMBpjnNxJ6-MM3ZHvQwyQ0,24
9
+ dagster_census/components/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ dagster_census/components/census_component.py,sha256=vgaVfT0pFVoC8ddhJJQEGJwjfZGsjLiTlXDOeNYW4q0,7970
11
+ dagster_census/components/census_scaffolder.py,sha256=n0YT0Rov6yJsPHAVpbtUrkxxF59z0WLCnRhYEpYtF4A,818
12
+ dagster_census-0.28.10.dist-info/licenses/LICENSE,sha256=4lsMW-RCvfVD4_F57wrmpe3vX1xwUk_OAKKmV_XT7Z0,11348
13
+ dagster_census-0.28.10.dist-info/METADATA,sha256=c2aYCvD4kTlMACHTjkTvgz9klKUJ0mIcPAQ80JGpAho,922
14
+ dagster_census-0.28.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ dagster_census-0.28.10.dist-info/top_level.txt,sha256=Lz6T6gBn89ilLGOLr8YIrBTRcWPCjeOtdNk8sdqmUnA,15
16
+ dagster_census-0.28.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.33.6)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -186,7 +186,7 @@
186
186
  same "printed page" as the copyright notice for easier
187
187
  identification within third-party archives.
188
188
 
189
- Copyright 2023 Elementl, Inc.
189
+ Copyright 2025 Dagster Labs, Inc.
190
190
 
191
191
  Licensed under the Apache License, Version 2.0 (the "License");
192
192
  you may not use this file except in compliance with the License.
@@ -1,20 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: dagster-census
3
- Version: 0.17.12
4
- Summary: Package for integrating Census with Dagster.
5
- Home-page: https://github.com/dagster-io/dagster/tree/master/python_modules/libraries/dagster-census
6
- Author: Elementl
7
- Author-email: hello@elementl.com
8
- License: Apache-2.0
9
- Platform: UNKNOWN
10
- Classifier: Programming Language :: Python :: 3.7
11
- Classifier: Programming Language :: Python :: 3.8
12
- Classifier: Programming Language :: Python :: 3.9
13
- Classifier: Programming Language :: Python :: 3.10
14
- Classifier: License :: OSI Approved :: Apache Software License
15
- Classifier: Operating System :: OS Independent
16
- License-File: LICENSE
17
- Requires-Dist: dagster (==1.1.12)
18
-
19
- UNKNOWN
20
-
@@ -1,11 +0,0 @@
1
- dagster_census/__init__.py,sha256=prAuCLLdlJo4r9VPpjBVx9kJAGsIDpqVU4r-mp9NkIg,395
2
- dagster_census/ops.py,sha256=1HxqQss4tTODoeOvlPcM2hTtrVccM4CpUoqXyuL61Gs,3606
3
- dagster_census/resources.py,sha256=ZokGPXNbccX4lgTUD_n5pXZj68wUquT--jeqFbQZW8A,10233
4
- dagster_census/types.py,sha256=-L2quR_B8Bp4l2wfZZz4RSyyF1XtGjpzzoegp19Y0rg,679
5
- dagster_census/utils.py,sha256=uWqrBW1USFl64TkNbaKh8xqwHRcbss1Pud4A3bmL0ZE,1325
6
- dagster_census/version.py,sha256=mzFSFzEnZmb_PgAaLMraT-PRhpIy19h0Zj4Dwro7BIc,24
7
- dagster_census-0.17.12.dist-info/LICENSE,sha256=-gtoVIAZYUHYmNHISZg982FI4Oh19mV1nxgTVW8eCB8,11344
8
- dagster_census-0.17.12.dist-info/METADATA,sha256=C0h2NVefKExkyGx1zCYR79AFgDYs6lO9b2TrhcEU_NQ,680
9
- dagster_census-0.17.12.dist-info/WHEEL,sha256=p46_5Uhzqz6AzeSosiOnxK-zmFja1i22CrQCjmYe8ec,92
10
- dagster_census-0.17.12.dist-info/top_level.txt,sha256=Lz6T6gBn89ilLGOLr8YIrBTRcWPCjeOtdNk8sdqmUnA,15
11
- dagster_census-0.17.12.dist-info/RECORD,,