digitalhub 0.13.0b3__py3-none-any.whl → 0.13.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of digitalhub might be problematic. Click here for more details.

@@ -26,23 +26,36 @@ def parse_identifier(
26
26
  entity_id: str | None = None,
27
27
  ) -> tuple[str, str, str | None, str | None, str | None]:
28
28
  """
29
- Parse entity identifier.
29
+ Parse and validate entity identifier into its components.
30
+
31
+ Processes an entity identifier that can be either a full entity key
32
+ (store://) or a simple entity name. When using a simple name,
33
+ additional parameters must be provided for proper identification.
30
34
 
31
35
  Parameters
32
36
  ----------
33
37
  identifier : str
34
- Entity key (store://...) or entity name.
35
- project : str
36
- Project name.
37
- entity_type : str
38
- Entity type.
39
- entity_id : str
40
- Entity ID.
38
+ The entity identifier to parse. Can be either a full entity key
39
+ (store://project/entity_type/kind/name:id) or a simple entity name.
40
+ project : str, optional
41
+ The project name. Required when identifier is not a full key.
42
+ entity_type : str, optional
43
+ The entity type. Required when identifier is not a full key.
44
+ entity_kind : str, optional
45
+ The entity kind specification.
46
+ entity_id : str, optional
47
+ The entity version identifier.
41
48
 
42
49
  Returns
43
50
  -------
44
51
  tuple[str, str, str | None, str | None, str | None]
45
- Project name, entity type, entity kind, entity name, entity ID.
52
+ A tuple containing (project_name, entity_type, entity_kind,
53
+ entity_name, entity_id) parsed from the identifier.
54
+
55
+ Raises
56
+ ------
57
+ ValueError
58
+ If identifier is not a full key and project or entity_type is None.
46
59
  """
47
60
  if not identifier.startswith("store://"):
48
61
  if project is None or entity_type is None:
@@ -56,19 +69,29 @@ def get_context_from_identifier(
56
69
  project: str | None = None,
57
70
  ) -> Context:
58
71
  """
59
- Get context from project.
72
+ Retrieve context instance from entity identifier or project name.
73
+
74
+ Extracts project information from the identifier and returns the
75
+ corresponding context. If the identifier is not a full key, the
76
+ project parameter must be provided explicitly.
60
77
 
61
78
  Parameters
62
79
  ----------
63
80
  identifier : str
64
- Entity key (store://...) or entity name.
65
- project : str
66
- Project name.
81
+ The entity identifier to extract context from. Can be either
82
+ a full entity key (store://...) or a simple entity name.
83
+ project : str, optional
84
+ The project name. Required when identifier is not a full key.
67
85
 
68
86
  Returns
69
87
  -------
70
88
  Context
71
- Context.
89
+ The context instance associated with the identified project.
90
+
91
+ Raises
92
+ ------
93
+ EntityError
94
+ If identifier is not a full key and project parameter is None.
72
95
  """
73
96
  if not identifier.startswith("store://"):
74
97
  if project is None:
@@ -83,19 +106,26 @@ def get_context_from_project(
83
106
  project: str,
84
107
  ) -> Context:
85
108
  """
86
- Check if the given project is in the context.
87
- Otherwise try to get the project from remote.
88
- Finally return the client.
109
+ Retrieve context for a project, fetching from remote if necessary.
110
+
111
+ Attempts to get the project context from the local cache first.
112
+ If the project is not found locally, tries to fetch it from the
113
+ remote backend and create the context.
89
114
 
90
115
  Parameters
91
116
  ----------
92
117
  project : str
93
- Project name.
118
+ The name of the project to get context for.
94
119
 
95
120
  Returns
96
121
  -------
97
122
  Context
98
- Context.
123
+ The context instance for the specified project.
124
+
125
+ Raises
126
+ ------
127
+ ContextError
128
+ If the project cannot be found locally or remotely.
99
129
  """
100
130
  try:
101
131
  return get_context(project)
@@ -105,19 +135,28 @@ def get_context_from_project(
105
135
 
106
136
  def get_context_from_remote(
107
137
  project: str,
108
- ) -> Client:
138
+ ) -> Context:
109
139
  """
110
- Get context from remote.
140
+ Fetch project context from remote backend and create local context.
141
+
142
+ Retrieves project information from the remote backend, builds the
143
+ project entity locally, and returns the corresponding context.
144
+ Used when a project is not available in the local context cache.
111
145
 
112
146
  Parameters
113
147
  ----------
114
148
  project : str
115
- Project name.
149
+ The name of the project to fetch from remote.
116
150
 
117
151
  Returns
118
152
  -------
119
- Client
120
- Client.
153
+ Context
154
+ The context instance created from the remote project data.
155
+
156
+ Raises
157
+ ------
158
+ ContextError
159
+ If the project is not found on the remote backend.
121
160
  """
122
161
  try:
123
162
  client = get_client()
@@ -135,23 +174,28 @@ def _read_base_entity(
135
174
  **kwargs,
136
175
  ) -> dict:
137
176
  """
138
- Read object from backend.
177
+ Read entity data from the backend API.
178
+
179
+ Internal utility function that performs a base-level entity read
180
+ operation through the client API. Builds the appropriate API
181
+ endpoint and retrieves the entity data as a dictionary.
139
182
 
140
183
  Parameters
141
184
  ----------
142
185
  client : Client
143
- Client instance.
186
+ The client instance to use for the API request.
144
187
  entity_type : str
145
- Entity type.
188
+ The type of entity to read (e.g., 'project', 'function').
146
189
  entity_name : str
147
- Entity name.
190
+ The name identifier of the entity to retrieve.
148
191
  **kwargs : dict
149
- Parameters to pass to the API call.
192
+ Additional parameters to pass to the API call, such as
193
+ version specifications or query filters.
150
194
 
151
195
  Returns
152
196
  -------
153
197
  dict
154
- Object instance.
198
+ Dictionary containing the entity data retrieved from the backend.
155
199
  """
156
200
  api = client.build_api(
157
201
  ApiCategories.BASE.value,
@@ -15,16 +15,23 @@ def eval_source(
15
15
  source: str | list[str] | None = None,
16
16
  ) -> Any:
17
17
  """
18
- Evaluate if source is local.
18
+ Evaluate whether the source is local or remote.
19
+
20
+ Determines if the provided source(s) reference local files or
21
+ remote resources. This evaluation affects how the artifact
22
+ will be processed and stored.
19
23
 
20
24
  Parameters
21
25
  ----------
22
- source : str | list[str]
23
- Source(s).
26
+ source : str, list[str], or None, optional
27
+ The source specification(s) to evaluate. Can be a single
28
+ source string, a list of source strings, or None.
24
29
 
25
30
  Returns
26
31
  -------
27
- None
32
+ Any
33
+ The result of the local source evaluation, indicating
34
+ whether the source is local or remote.
28
35
  """
29
36
  return eval_local_source(source)
30
37
 
@@ -37,25 +44,33 @@ def process_kwargs(
37
44
  **kwargs,
38
45
  ) -> dict:
39
46
  """
40
- Process spec kwargs.
47
+ Process and enhance specification parameters for artifact creation.
48
+
49
+ Processes the keyword arguments for artifact specification, handling
50
+ path generation and UUID assignment. If no path is provided, generates
51
+ a unique path based on project, entity type, name, and source.
41
52
 
42
53
  Parameters
43
54
  ----------
44
55
  project : str
45
- Project name.
56
+ The name of the project.
46
57
  name : str
47
- Object name.
48
- source : str
49
- Source(s).
50
- path : str
51
- Destination path of the entity. If not provided, it's generated.
58
+ The name of the artifact entity.
59
+ source : str or list[str]
60
+ The source specification(s) for the artifact content.
61
+ Can be a single source or multiple sources.
62
+ path : str, optional
63
+ The destination path for the artifact entity.
64
+ If None, a path will be automatically generated.
52
65
  **kwargs : dict
53
- Spec parameters.
66
+ Additional specification parameters to be processed
67
+ and passed to the artifact creation.
54
68
 
55
69
  Returns
56
70
  -------
57
71
  dict
58
- Kwargs updated.
72
+ The updated kwargs dictionary with processed path
73
+ and UUID information included.
59
74
  """
60
75
  if path is None:
61
76
  uuid = build_uuid()
@@ -33,16 +33,39 @@ def eval_source(
33
33
  project: str | None = None,
34
34
  ) -> Any:
35
35
  """
36
- Evaluate if source is local.
36
+ Evaluate and process data source for dataitem creation.
37
+
38
+ Determines the appropriate source handling based on whether a source
39
+ path or data object is provided. For table dataitems with data objects,
40
+ writes the data to a Parquet file and returns the file path.
37
41
 
38
42
  Parameters
39
43
  ----------
40
- source : SourcesOrListOfSources
41
- Source(s).
44
+ source : SourcesOrListOfSources, optional
45
+ The source specification(s) for the dataitem. Can be file paths,
46
+ URLs, or other source identifiers.
47
+ data : Any, optional
48
+ The data object to process (e.g., DataFrame). Alternative to source.
49
+ Exactly one of source or data must be provided.
50
+ kind : str, optional
51
+ The kind of dataitem being created (e.g., 'table').
52
+ name : str, optional
53
+ The name of the dataitem, used for generating file paths.
54
+ project : str, optional
55
+ The project name, used for context and path generation.
42
56
 
43
57
  Returns
44
58
  -------
45
- None
59
+ Any
60
+ The processed source. Returns the original source if provided,
61
+ or the path to a generated file if data was processed.
62
+
63
+ Raises
64
+ ------
65
+ ValueError
66
+ If both source and data are provided or both are None.
67
+ NotImplementedError
68
+ If the specified kind is not supported for data processing.
46
69
  """
47
70
  if (source is None) == (data is None):
48
71
  raise ValueError("You must provide source or data.")
@@ -69,24 +92,32 @@ def eval_data(
69
92
  engine: str | None = None,
70
93
  ) -> Any:
71
94
  """
72
- Evaluate data is loaded.
95
+ Evaluate and load data from source or return provided data.
96
+
97
+ For table dataitems, loads data from the source using the appropriate
98
+ store and reader. For other kinds, returns the data as-is.
73
99
 
74
100
  Parameters
75
101
  ----------
76
- project : str
77
- Project name.
78
- source : str
79
- Source(s).
80
- data : Any
81
- Dataframe to log. Alternative to source.
82
- file_format : str
83
- Extension of the file.
84
- engine : str
85
- Engine to use.
102
+ kind : str
103
+ The kind of dataitem (e.g., 'table') that determines
104
+ how data should be processed.
105
+ source : SourcesOrListOfSources
106
+ The source specification(s) to load data from.
107
+ data : Any, optional
108
+ Pre-loaded data object. If provided, may be returned directly
109
+ depending on the dataitem kind.
110
+ file_format : str, optional
111
+ The file format specification for reading the source
112
+ (e.g., 'parquet', 'csv').
113
+ engine : str, optional
114
+ The engine to use for reading the data (e.g., 'pandas', 'polars').
86
115
 
87
116
  Returns
88
117
  -------
89
- None
118
+ Any
119
+ The loaded data object for table dataitems, or the original
120
+ data parameter for other kinds.
90
121
  """
91
122
  if kind == EntityKinds.DATAITEM_TABLE.value:
92
123
  if data is None:
@@ -108,29 +139,37 @@ def process_kwargs(
108
139
  **kwargs,
109
140
  ) -> dict:
110
141
  """
111
- Process spec kwargs.
142
+ Process and enhance specification parameters for dataitem creation.
143
+
144
+ Processes the keyword arguments for dataitem specification, handling
145
+ schema extraction for table dataitems and path generation. Extracts
146
+ schema information from data objects when available.
112
147
 
113
148
  Parameters
114
149
  ----------
115
150
  project : str
116
- Project name.
151
+ The name of the project.
117
152
  name : str
118
- Object name.
153
+ The name of the dataitem entity.
119
154
  kind : str
120
- Kind the object.
155
+ The kind of dataitem being created (e.g., 'table').
121
156
  source : SourcesOrListOfSources
122
- Source(s).
123
- data : Any
124
- Dataframe to log. Alternative to source.
125
- path : str
126
- Destination path of the entity. If not provided, it's generated.
157
+ The source specification(s) for the dataitem content.
158
+ data : Any, optional
159
+ The data object for schema extraction and processing.
160
+ Used as an alternative to source for table dataitems.
161
+ path : str, optional
162
+ The destination path for the dataitem entity.
163
+ If None, a path will be automatically generated.
127
164
  **kwargs : dict
128
- Spec parameters.
165
+ Additional specification parameters to be processed
166
+ and passed to the dataitem creation.
129
167
 
130
168
  Returns
131
169
  -------
132
170
  dict
133
- Kwargs updated.
171
+ The updated kwargs dictionary with processed path,
172
+ UUID, and schema information included.
134
173
  """
135
174
  if data is not None:
136
175
  if kind == EntityKinds.DATAITEM_TABLE.value:
@@ -147,12 +186,17 @@ def process_kwargs(
147
186
 
148
187
  def clean_tmp_path(pth: SourcesOrListOfSources) -> None:
149
188
  """
150
- Clean temporary path.
189
+ Clean up temporary files and directories.
190
+
191
+ Removes temporary files or directories created during dataitem
192
+ processing. Handles both single paths and lists of paths,
193
+ ignoring any errors that occur during cleanup.
151
194
 
152
195
  Parameters
153
196
  ----------
154
197
  pth : SourcesOrListOfSources
155
- Path to clean.
198
+ The path or list of paths to clean up. Can be file paths
199
+ or directory paths that need to be removed.
156
200
 
157
201
  Returns
158
202
  -------
@@ -167,19 +211,25 @@ def clean_tmp_path(pth: SourcesOrListOfSources) -> None:
167
211
 
168
212
  def post_process(obj: Dataitem, data: Any) -> Dataitem:
169
213
  """
170
- Post process object.
214
+ Post-process dataitem object with additional metadata and previews.
215
+
216
+ Enhances the dataitem object with additional information extracted
217
+ from the data. For table dataitems, generates and stores a data
218
+ preview in the object's status.
171
219
 
172
220
  Parameters
173
221
  ----------
174
222
  obj : Dataitem
175
- The object.
223
+ The dataitem object to post-process and enhance.
176
224
  data : Any
177
- The data.
225
+ The data object used to generate previews and extract
226
+ additional metadata information.
178
227
 
179
228
  Returns
180
229
  -------
181
230
  Dataitem
182
- The object.
231
+ The enhanced dataitem object with updated status information
232
+ and saved changes.
183
233
  """
184
234
  if obj.kind == EntityKinds.DATAITEM_TABLE.value:
185
235
  reader = get_reader_by_object(data)
@@ -15,16 +15,23 @@ def eval_source(
15
15
  source: str | list[str] | None = None,
16
16
  ) -> Any:
17
17
  """
18
- Evaluate if source is local.
18
+ Evaluate whether the source is local or remote.
19
+
20
+ Determines if the provided source(s) reference local files or
21
+ remote resources. This evaluation affects how the model
22
+ will be processed and stored.
19
23
 
20
24
  Parameters
21
25
  ----------
22
- source : str | list[str]
23
- Source(s).
26
+ source : str, list[str], or None, optional
27
+ The source specification(s) to evaluate. Can be a single
28
+ source string, a list of source strings, or None.
24
29
 
25
30
  Returns
26
31
  -------
27
- None
32
+ Any
33
+ The result of the local source evaluation, indicating
34
+ whether the source is local or remote.
28
35
  """
29
36
  return eval_local_source(source)
30
37
 
@@ -37,25 +44,33 @@ def process_kwargs(
37
44
  **kwargs,
38
45
  ) -> dict:
39
46
  """
40
- Process spec kwargs.
47
+ Process and enhance specification parameters for model creation.
48
+
49
+ Processes the keyword arguments for model specification, handling
50
+ path generation and UUID assignment. If no path is provided, generates
51
+ a unique path based on project, entity type, name, and source.
41
52
 
42
53
  Parameters
43
54
  ----------
44
55
  project : str
45
- Project name.
56
+ The name of the project containing the model.
46
57
  name : str
47
- Object name.
48
- source : str
49
- Source(s).
50
- path : str
51
- Destination path of the entity. If not provided, it's generated.
58
+ The name of the model entity.
59
+ source : str or list[str]
60
+ The source specification(s) for the model content.
61
+ Can be a single source or multiple sources.
62
+ path : str, optional
63
+ The destination path for the model entity.
64
+ If None, a path will be automatically generated.
52
65
  **kwargs : dict
53
- Spec parameters.
66
+ Additional specification parameters to be processed
67
+ and passed to the model creation.
54
68
 
55
69
  Returns
56
70
  -------
57
71
  dict
58
- Kwargs updated.
72
+ The updated kwargs dictionary with processed path
73
+ and UUID information included.
59
74
  """
60
75
  if path is None:
61
76
  uuid = build_uuid()
@@ -36,6 +36,7 @@ class CredsEnvVar(Enum):
36
36
  """
37
37
  Supported credential environment variables.
38
38
  """
39
+
39
40
  # S3
40
41
  S3_ENDPOINT_URL = "AWS_ENDPOINT_URL"
41
42
  S3_ACCESS_KEY_ID = "AWS_ACCESS_KEY_ID"
@@ -37,7 +37,7 @@ def get_default_store(project: str) -> str:
37
37
  var = StoreEnv.DEFAULT_FILES_STORE.value
38
38
 
39
39
  context = get_context(project)
40
- store = context.config.get(var.lower())
40
+ store = context.config.get(var.lower().replace("dhcore_", ""))
41
41
  if store is not None:
42
42
  return store
43
43
 
@@ -21,6 +21,20 @@ if typing.TYPE_CHECKING:
21
21
 
22
22
 
23
23
  class StoreInfo:
24
+ """
25
+ Container for store class and configurator information.
26
+
27
+ Holds store class references and their associated configurators
28
+ for registration and instantiation in the store builder system.
29
+
30
+ Attributes
31
+ ----------
32
+ _store : Store
33
+ The store class to be instantiated.
34
+ _configurator : Configurator or None
35
+ The configurator class for store configuration, if required.
36
+ """
37
+
24
38
  def __init__(self, store: Store, configurator: Configurator | None = None) -> None:
25
39
  self._store = store
26
40
  self._configurator = configurator
@@ -28,7 +42,19 @@ class StoreInfo:
28
42
 
29
43
  class StoreBuilder:
30
44
  """
31
- Store builder class.
45
+ Store factory and registry for managing data store instances.
46
+
47
+ Provides registration, instantiation, and caching of data store
48
+ instances based on URI schemes. Supports various store types
49
+ including S3, SQL, local, and remote stores with their respective
50
+ configurators.
51
+
52
+ Attributes
53
+ ----------
54
+ _builders : dict[str, StoreInfo]
55
+ Registry of store types mapped to their StoreInfo instances.
56
+ _instances : dict[str, Store]
57
+ Cache of instantiated store instances by store type.
32
58
  """
33
59
 
34
60
  def __init__(self) -> None:
@@ -41,6 +67,31 @@ class StoreBuilder:
41
67
  store: Store,
42
68
  configurator: Configurator | None = None,
43
69
  ) -> None:
70
+ """
71
+ Register a store type with its class and optional configurator.
72
+
73
+ Adds a new store type to the builder registry, associating it
74
+ with a store class and optional configurator for later instantiation.
75
+
76
+ Parameters
77
+ ----------
78
+ store_type : str
79
+ The unique identifier for the store type (e.g., 's3', 'sql').
80
+ store : Store
81
+ The store class to register for this type.
82
+ configurator : Configurator, optional
83
+ The configurator class for store configuration.
84
+ If None, the store will be instantiated without configuration.
85
+
86
+ Returns
87
+ -------
88
+ None
89
+
90
+ Raises
91
+ ------
92
+ StoreError
93
+ If the store type is already registered in the builder.
94
+ """
44
95
  if store_type not in self._builders:
45
96
  self._builders[store_type] = StoreInfo(store, configurator)
46
97
  else:
@@ -48,17 +99,28 @@ class StoreBuilder:
48
99
 
49
100
  def get(self, uri: str) -> Store:
50
101
  """
51
- Get a store instance by URI, building it if necessary.
102
+ Get or create a store instance based on URI scheme.
103
+
104
+ Determines the appropriate store type from the URI scheme,
105
+ instantiates the store if not already cached, and returns
106
+ the store instance. Store instances are cached for reuse.
52
107
 
53
108
  Parameters
54
109
  ----------
55
110
  uri : str
56
- URI to parse.
111
+ The URI to parse for determining the store type.
112
+ The scheme (e.g., 's3://', 'sql://') determines which
113
+ store type to instantiate.
57
114
 
58
115
  Returns
59
116
  -------
60
117
  Store
61
- The store instance.
118
+ The store instance appropriate for handling the given URI.
119
+
120
+ Raises
121
+ ------
122
+ KeyError
123
+ If no store is registered for the URI scheme.
62
124
  """
63
125
  store_type = map_uri_scheme(uri)
64
126