ChessAnalysisPipeline 0.0.17.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CHAP/TaskManager.py +216 -0
- CHAP/__init__.py +27 -0
- CHAP/common/__init__.py +57 -0
- CHAP/common/models/__init__.py +8 -0
- CHAP/common/models/common.py +124 -0
- CHAP/common/models/integration.py +659 -0
- CHAP/common/models/map.py +1291 -0
- CHAP/common/processor.py +2869 -0
- CHAP/common/reader.py +658 -0
- CHAP/common/utils.py +110 -0
- CHAP/common/writer.py +730 -0
- CHAP/edd/__init__.py +23 -0
- CHAP/edd/models.py +876 -0
- CHAP/edd/processor.py +3069 -0
- CHAP/edd/reader.py +1023 -0
- CHAP/edd/select_material_params_gui.py +348 -0
- CHAP/edd/utils.py +1572 -0
- CHAP/edd/writer.py +26 -0
- CHAP/foxden/__init__.py +19 -0
- CHAP/foxden/models.py +71 -0
- CHAP/foxden/processor.py +124 -0
- CHAP/foxden/reader.py +224 -0
- CHAP/foxden/utils.py +80 -0
- CHAP/foxden/writer.py +168 -0
- CHAP/giwaxs/__init__.py +11 -0
- CHAP/giwaxs/models.py +491 -0
- CHAP/giwaxs/processor.py +776 -0
- CHAP/giwaxs/reader.py +8 -0
- CHAP/giwaxs/writer.py +8 -0
- CHAP/inference/__init__.py +7 -0
- CHAP/inference/processor.py +69 -0
- CHAP/inference/reader.py +8 -0
- CHAP/inference/writer.py +8 -0
- CHAP/models.py +227 -0
- CHAP/pipeline.py +479 -0
- CHAP/processor.py +125 -0
- CHAP/reader.py +124 -0
- CHAP/runner.py +277 -0
- CHAP/saxswaxs/__init__.py +7 -0
- CHAP/saxswaxs/processor.py +8 -0
- CHAP/saxswaxs/reader.py +8 -0
- CHAP/saxswaxs/writer.py +8 -0
- CHAP/server.py +125 -0
- CHAP/sin2psi/__init__.py +7 -0
- CHAP/sin2psi/processor.py +8 -0
- CHAP/sin2psi/reader.py +8 -0
- CHAP/sin2psi/writer.py +8 -0
- CHAP/tomo/__init__.py +15 -0
- CHAP/tomo/models.py +210 -0
- CHAP/tomo/processor.py +3862 -0
- CHAP/tomo/reader.py +9 -0
- CHAP/tomo/writer.py +59 -0
- CHAP/utils/__init__.py +6 -0
- CHAP/utils/converters.py +188 -0
- CHAP/utils/fit.py +2947 -0
- CHAP/utils/general.py +2655 -0
- CHAP/utils/material.py +274 -0
- CHAP/utils/models.py +595 -0
- CHAP/utils/parfile.py +224 -0
- CHAP/writer.py +122 -0
- MLaaS/__init__.py +0 -0
- MLaaS/ktrain.py +205 -0
- MLaaS/mnist_img.py +83 -0
- MLaaS/tfaas_client.py +371 -0
- chessanalysispipeline-0.0.17.dev3.dist-info/LICENSE +60 -0
- chessanalysispipeline-0.0.17.dev3.dist-info/METADATA +29 -0
- chessanalysispipeline-0.0.17.dev3.dist-info/RECORD +70 -0
- chessanalysispipeline-0.0.17.dev3.dist-info/WHEEL +5 -0
- chessanalysispipeline-0.0.17.dev3.dist-info/entry_points.txt +2 -0
- chessanalysispipeline-0.0.17.dev3.dist-info/top_level.txt +2 -0
CHAP/pipeline.py
ADDED
|
@@ -0,0 +1,479 @@
|
|
|
1
|
+
#-*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
File : pipeline.py
|
|
4
|
+
Author : Valentin Kuznetsov <vkuznet AT gmail dot com>
|
|
5
|
+
Description:
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# System modules
|
|
9
|
+
import logging
|
|
10
|
+
from time import time
|
|
11
|
+
from types import MethodType
|
|
12
|
+
from typing import (
|
|
13
|
+
Literal,
|
|
14
|
+
Optional,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Third party modules
|
|
18
|
+
from pydantic import (
|
|
19
|
+
ConfigDict,
|
|
20
|
+
Field,
|
|
21
|
+
FilePath,
|
|
22
|
+
PrivateAttr,
|
|
23
|
+
conlist,
|
|
24
|
+
constr,
|
|
25
|
+
model_validator,
|
|
26
|
+
)
|
|
27
|
+
from pydantic._internal._model_construction import ModelMetaclass
|
|
28
|
+
|
|
29
|
+
# Local modules
|
|
30
|
+
from CHAP.models import (
|
|
31
|
+
CHAPBaseModel,
|
|
32
|
+
RunConfig,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class PipelineData(dict):
|
|
37
|
+
"""Wrapper for all results of PipelineItem.execute."""
|
|
38
|
+
def __init__(self, name=None, data=None, schema=None):
|
|
39
|
+
super().__init__()
|
|
40
|
+
self.__setitem__('name', name)
|
|
41
|
+
self.__setitem__('data', data)
|
|
42
|
+
self.__setitem__('schema', schema)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class PipelineItem(RunConfig):
|
|
46
|
+
"""Class representing a single item in a `Pipeline` object."""
|
|
47
|
+
logger: Optional[logging.Logger] = None
|
|
48
|
+
name: Optional[constr(strip_whitespace=True, min_length=1)] = None
|
|
49
|
+
schema_: Optional[constr(strip_whitespace=True, min_length=1)] = \
|
|
50
|
+
Field(None, alias='schema')
|
|
51
|
+
|
|
52
|
+
_method: MethodType = PrivateAttr(default=None)
|
|
53
|
+
_method_type: Literal[
|
|
54
|
+
'read', 'process', 'write'] = PrivateAttr(default=None)
|
|
55
|
+
_args: dict = PrivateAttr(default={})
|
|
56
|
+
_allowed_args: conlist(item_type=str) = PrivateAttr(default=[])
|
|
57
|
+
_status: Literal[
|
|
58
|
+
'read', 'write_pending', 'written'] = PrivateAttr(default=None)
|
|
59
|
+
|
|
60
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
61
|
+
|
|
62
|
+
@model_validator(mode='after')
|
|
63
|
+
def validate_pipelineitem_after(self):
|
|
64
|
+
"""Validate the `PipelineItem` configuration.
|
|
65
|
+
|
|
66
|
+
:return: The validated configuration.
|
|
67
|
+
:rtype: PipelineItem
|
|
68
|
+
"""
|
|
69
|
+
# System modules
|
|
70
|
+
from inspect import (
|
|
71
|
+
# Parameter,
|
|
72
|
+
signature,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if self.name is None:
|
|
76
|
+
self.__name__ = self.__class__.__name__
|
|
77
|
+
else:
|
|
78
|
+
self.__name__ = self.name
|
|
79
|
+
if self.logger is None:
|
|
80
|
+
self.logger = logging.getLogger(self.__name__)
|
|
81
|
+
self.logger.propagate = False
|
|
82
|
+
log_handler = logging.StreamHandler()
|
|
83
|
+
log_handler.setFormatter(logging.Formatter(
|
|
84
|
+
'{asctime}: {name:20}: {levelname}: {message}',
|
|
85
|
+
datefmt='%Y-%m-%d %H:%M:%S', style='{'))
|
|
86
|
+
self.logger.addHandler(log_handler)
|
|
87
|
+
self.logger.setLevel(self.log_level)
|
|
88
|
+
|
|
89
|
+
if hasattr(self, 'read'):
|
|
90
|
+
self._method_type = 'read'
|
|
91
|
+
elif hasattr(self, 'process'):
|
|
92
|
+
self._method_type = 'process'
|
|
93
|
+
elif hasattr(self, 'write'):
|
|
94
|
+
self._method_type = 'write'
|
|
95
|
+
else:
|
|
96
|
+
return self
|
|
97
|
+
self._method = getattr(self, self._method_type)
|
|
98
|
+
sig = signature(self._method)
|
|
99
|
+
self._allowed_args = [k for k, v in sig.parameters.items()
|
|
100
|
+
if v.kind == v.POSITIONAL_OR_KEYWORD]
|
|
101
|
+
return self
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def method(self):
|
|
105
|
+
return self._method
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def method_type(self):
|
|
109
|
+
return self._method_type
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def run_config(self):
|
|
113
|
+
return RunConfig(**self.model_dump()).model_dump()
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def status(self):
|
|
117
|
+
return self._status
|
|
118
|
+
|
|
119
|
+
@status.setter
|
|
120
|
+
def status(self, status):
|
|
121
|
+
self._status = status
|
|
122
|
+
|
|
123
|
+
def get_args(self):
|
|
124
|
+
return self._args
|
|
125
|
+
|
|
126
|
+
def set_args(self, **args):
|
|
127
|
+
for k, v in args.items():
|
|
128
|
+
if k in self._allowed_args:
|
|
129
|
+
self._args[k] = v
|
|
130
|
+
|
|
131
|
+
def has_filename(self):
|
|
132
|
+
return hasattr(self, 'filename') and self.filename is not None
|
|
133
|
+
|
|
134
|
+
def get_schema(self):
|
|
135
|
+
return self.schema_
|
|
136
|
+
|
|
137
|
+
@staticmethod
|
|
138
|
+
def get_default_nxentry(nxobject):
|
|
139
|
+
"""Given a `nexusformat.nexus.NXroot` or
|
|
140
|
+
`nexusformat.nexus.NXentry` object, return the default or
|
|
141
|
+
first `nexusformat.nexus.NXentry` match.
|
|
142
|
+
|
|
143
|
+
:param nxobject: Input data.
|
|
144
|
+
:type nxobject: nexusformat.nexus.NXroot,
|
|
145
|
+
nexusformat.nexus.NXentry
|
|
146
|
+
:raises ValueError: If unable to retrieve a
|
|
147
|
+
`nexusformat.nexus.NXentry` object.
|
|
148
|
+
:return: The input data if a `nexusformat.nexus.NXentry`
|
|
149
|
+
object or the default or first `nexusformat.nexus.NXentry`
|
|
150
|
+
object if a `nexusformat.nexus.NXroot` object.
|
|
151
|
+
:rtype: nexusformat.nexus.NXentry
|
|
152
|
+
"""
|
|
153
|
+
# Third party modules
|
|
154
|
+
from nexusformat.nexus import (
|
|
155
|
+
NXentry,
|
|
156
|
+
NXroot,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
if isinstance(nxobject, NXroot):
|
|
160
|
+
if 'default' in nxobject.attrs:
|
|
161
|
+
nxentry = nxobject[nxobject.default]
|
|
162
|
+
else:
|
|
163
|
+
nxentries = [
|
|
164
|
+
v for v in nxobject.values() if isinstance(v, NXentry)]
|
|
165
|
+
if not nxentries:
|
|
166
|
+
raise ValueError('Unable to retrieve a NXentry object')
|
|
167
|
+
if len(nxentries) != 1:
|
|
168
|
+
print('WARNING: Found multiple NXentries, returning the '
|
|
169
|
+
'first')
|
|
170
|
+
nxentry = nxentries[0]
|
|
171
|
+
elif isinstance(nxobject, NXentry):
|
|
172
|
+
nxentry = nxobject
|
|
173
|
+
else:
|
|
174
|
+
raise ValueError(f'Invalid parameter nxobject ({nxobject})')
|
|
175
|
+
return nxentry
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def unwrap_pipelinedata(data):
|
|
179
|
+
"""Given a list of PipelineData objects, return a list of
|
|
180
|
+
their `data` values.
|
|
181
|
+
|
|
182
|
+
:param data: Input data to read, write, or process that needs
|
|
183
|
+
to be unwrapped from PipelineData before use.
|
|
184
|
+
:type data: list[PipelineData]
|
|
185
|
+
:return: The `'data'` values of the items in the input data.
|
|
186
|
+
:rtype: list[object]
|
|
187
|
+
"""
|
|
188
|
+
unwrapped_data = []
|
|
189
|
+
if isinstance(data, list):
|
|
190
|
+
for d in data:
|
|
191
|
+
if isinstance(d, PipelineData):
|
|
192
|
+
unwrapped_data.append(d['data'])
|
|
193
|
+
else:
|
|
194
|
+
unwrapped_data.append(d)
|
|
195
|
+
else:
|
|
196
|
+
unwrapped_data = [data]
|
|
197
|
+
return unwrapped_data
|
|
198
|
+
|
|
199
|
+
def get_config(
|
|
200
|
+
self, data=None, config=None, schema=None, remove=True):
|
|
201
|
+
"""Look through `data` for the last item which value for the
|
|
202
|
+
`'schema'` key matches `schema`. Convert the value for that
|
|
203
|
+
item's `'data'` key into the configuration's Pydantic model
|
|
204
|
+
identified by `schema` and return it. If no item is found and
|
|
205
|
+
config is specified, validate it against the configuration's
|
|
206
|
+
Pydantic model identified by `schema` and return it.
|
|
207
|
+
|
|
208
|
+
:param data: Input data from a previous `PipelineItem`.
|
|
209
|
+
:type data: list[PipelineData], optional
|
|
210
|
+
:param config: Initialization parameters for an instance of
|
|
211
|
+
the Pydantic model identified by `schema`, required if
|
|
212
|
+
data is unspecified, invalid or does not contain an item
|
|
213
|
+
that matches the schema, superseeds any equal parameters
|
|
214
|
+
contained in `data`.
|
|
215
|
+
:type config: dict, optional
|
|
216
|
+
:param schema: Name of the `PipelineItem` class to match in
|
|
217
|
+
`data` & return, defaults to the internal PipelineItem
|
|
218
|
+
`schema` attribute.
|
|
219
|
+
:type schema: str, optional
|
|
220
|
+
:param remove: If there is a matching entry in `data`, remove
|
|
221
|
+
it from the list, defaults to `True`.
|
|
222
|
+
:type remove: bool, optional
|
|
223
|
+
:raises ValueError: If there's no match for `schema` in `data`.
|
|
224
|
+
:return: The last matching validated configuration model.
|
|
225
|
+
:rtype: PipelineItem
|
|
226
|
+
"""
|
|
227
|
+
self.logger.debug(f'Getting {schema} configuration')
|
|
228
|
+
t0 = time()
|
|
229
|
+
|
|
230
|
+
if schema is None:
|
|
231
|
+
schema = self.schema_
|
|
232
|
+
matching_config = False
|
|
233
|
+
if data is not None:
|
|
234
|
+
try:
|
|
235
|
+
for i, d in reversed(list(enumerate(data))):
|
|
236
|
+
if d.get('schema') == schema:
|
|
237
|
+
matching_config = d.get('data')
|
|
238
|
+
if remove:
|
|
239
|
+
data.pop(i)
|
|
240
|
+
break
|
|
241
|
+
except Exception:
|
|
242
|
+
pass
|
|
243
|
+
|
|
244
|
+
if matching_config:
|
|
245
|
+
if config is not None:
|
|
246
|
+
# Local modules
|
|
247
|
+
from CHAP.utils.general import dictionary_update
|
|
248
|
+
|
|
249
|
+
# Update matching_config with config if both exist
|
|
250
|
+
matching_config = dictionary_update(matching_config, config)
|
|
251
|
+
else:
|
|
252
|
+
if isinstance(config, dict):
|
|
253
|
+
matching_config = config
|
|
254
|
+
else:
|
|
255
|
+
raise ValueError(
|
|
256
|
+
f'Unable to find a configuration for schema `{schema}`')
|
|
257
|
+
if self._method_type == 'read' and 'inputdir' not in matching_config:
|
|
258
|
+
matching_config['inputdir'] = self.inputdir
|
|
259
|
+
if self._method_type == 'write' and 'outputdir' not in matching_config:
|
|
260
|
+
matching_config['outputdir'] = self.outputdir
|
|
261
|
+
|
|
262
|
+
mod_name, cls_name = schema.rsplit('.', 1)
|
|
263
|
+
module = __import__(f'CHAP.{mod_name}', fromlist=cls_name)
|
|
264
|
+
model_config = getattr(module, cls_name)(**matching_config)
|
|
265
|
+
|
|
266
|
+
self.logger.debug(
|
|
267
|
+
f'Got {schema} configuration in {time()-t0:.3f} seconds')
|
|
268
|
+
|
|
269
|
+
return model_config
|
|
270
|
+
|
|
271
|
+
@staticmethod
|
|
272
|
+
def get_data(data, name=None, schema=None, remove=True):
|
|
273
|
+
"""Look through `data` for the last item which `'data'` value
|
|
274
|
+
is a nexusformat.nexus.NXobject object or matches a given name
|
|
275
|
+
or schema. Pick the last item for which the `'name'` key
|
|
276
|
+
matches `name` if set or the `'schema'` key matches `schema`
|
|
277
|
+
if set, pick the last match for a nexusformat.nexus.NXobject
|
|
278
|
+
object otherwise. Return the data object.
|
|
279
|
+
|
|
280
|
+
:param data: Input data from a previous `PipelineItem`.
|
|
281
|
+
:type data: list[PipelineData].
|
|
282
|
+
:param name: Name of the data item to match in `data` & return.
|
|
283
|
+
:type name: str, optional
|
|
284
|
+
:param schema: Name of the `PipelineItem` class to match in
|
|
285
|
+
`data` & return.
|
|
286
|
+
:type schema: Union[str, list[str]], optional
|
|
287
|
+
:param remove: If there is a matching entry in `data`, remove
|
|
288
|
+
it from the list, defaults to `True`.
|
|
289
|
+
:type remove: bool, optional
|
|
290
|
+
:raises ValueError: If there's no match for `name` or 'schema`
|
|
291
|
+
in `data`, or if there is no object of type
|
|
292
|
+
nexusformat.nexus.NXobject.
|
|
293
|
+
:return: The last matching data item.
|
|
294
|
+
:rtype: obj
|
|
295
|
+
"""
|
|
296
|
+
# Third party modules
|
|
297
|
+
from nexusformat.nexus import NXobject
|
|
298
|
+
|
|
299
|
+
result = None
|
|
300
|
+
if name is None and schema is None:
|
|
301
|
+
for i, d in reversed(list(enumerate(data))):
|
|
302
|
+
if isinstance(d.get('data'), NXobject):
|
|
303
|
+
result = d.get('data')
|
|
304
|
+
if remove:
|
|
305
|
+
data.pop(i)
|
|
306
|
+
break
|
|
307
|
+
else:
|
|
308
|
+
raise ValueError(f'No NXobject data item found')
|
|
309
|
+
elif name is not None:
|
|
310
|
+
for i, d in reversed(list(enumerate(data))):
|
|
311
|
+
if d.get('name') == name:
|
|
312
|
+
result = d.get('data')
|
|
313
|
+
if remove:
|
|
314
|
+
data.pop(i)
|
|
315
|
+
break
|
|
316
|
+
else:
|
|
317
|
+
raise ValueError(f'No match for data item named "{name}"')
|
|
318
|
+
elif schema is not None:
|
|
319
|
+
if isinstance(schema, str):
|
|
320
|
+
schema = [schema]
|
|
321
|
+
for i, d in reversed(list(enumerate(data))):
|
|
322
|
+
if d.get('schema') in schema:
|
|
323
|
+
result = d.get('data')
|
|
324
|
+
if remove:
|
|
325
|
+
data.pop(i)
|
|
326
|
+
break
|
|
327
|
+
else:
|
|
328
|
+
raise ValueError(
|
|
329
|
+
f'No match for data item with schema "{schema}"')
|
|
330
|
+
|
|
331
|
+
return result
|
|
332
|
+
|
|
333
|
+
def execute(self, data):
|
|
334
|
+
"""Run the appropriate method of the object and return the
|
|
335
|
+
result.
|
|
336
|
+
|
|
337
|
+
:param data: Input data.
|
|
338
|
+
:type data: list[PipelineData]
|
|
339
|
+
:return: The wrapped result of running read, process, or write.
|
|
340
|
+
:rtype: Union[PipelineData, tuple[PipelineData]]
|
|
341
|
+
"""
|
|
342
|
+
if 'data' in self._allowed_args:
|
|
343
|
+
self._args['data'] = data
|
|
344
|
+
t0 = time()
|
|
345
|
+
self.logger.debug(f'Executing "{self._method_type}" with schema '
|
|
346
|
+
f'"{self.schema_}" and {self._args}')
|
|
347
|
+
self.logger.info(f'Executing "{self._method_type}"')
|
|
348
|
+
data = self._method(**self._args)
|
|
349
|
+
self.logger.info(
|
|
350
|
+
f'Finished "{self._method}" in {time()-t0:.0f} seconds\n')
|
|
351
|
+
return data
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
class Pipeline(CHAPBaseModel):
|
|
355
|
+
"""Class representing a full `Pipeline` object."""
|
|
356
|
+
args: conlist(item_type=dict, min_length=1)
|
|
357
|
+
logger: Optional[logging.Logger] = None
|
|
358
|
+
mmcs: conlist(item_type=ModelMetaclass, min_length=1)
|
|
359
|
+
|
|
360
|
+
_data: conlist(item_type=PipelineData) = PrivateAttr(default=[])
|
|
361
|
+
_items: conlist(item_type=PipelineItem) = PrivateAttr(default=[])
|
|
362
|
+
#_output_filenames: conlist(item_type=FilePath) = PrivateAttr(default=[])
|
|
363
|
+
_filename_mapping: dict = PrivateAttr(default={})
|
|
364
|
+
|
|
365
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
366
|
+
|
|
367
|
+
@model_validator(mode='after')
|
|
368
|
+
def validate_pipeline_after(self):
|
|
369
|
+
"""Validate the `Pipeline` configuration and initialize and
|
|
370
|
+
validate the private attributes.
|
|
371
|
+
|
|
372
|
+
:return: The validated configuration.
|
|
373
|
+
:rtype: Pipeline
|
|
374
|
+
"""
|
|
375
|
+
t0 = time()
|
|
376
|
+
self.__name__ = self.__class__.__name__
|
|
377
|
+
if self.logger is None:
|
|
378
|
+
self.logger = logging.getLogger(self.__name__)
|
|
379
|
+
self.logger.propagate = False
|
|
380
|
+
|
|
381
|
+
output_filenames = []
|
|
382
|
+
for mmc, args in zip(self.mmcs, self.args):
|
|
383
|
+
item = mmc(data=self._data, modelmetaclass=mmc, **args)
|
|
384
|
+
if item.has_filename():
|
|
385
|
+
if item.method_type == 'read':
|
|
386
|
+
if item._mapping_filename in self._filename_mapping:
|
|
387
|
+
item.filename = self._filename_mapping[
|
|
388
|
+
item._mapping_filename]['path']
|
|
389
|
+
item.status = self._filename_mapping[
|
|
390
|
+
item._mapping_filename]['status']
|
|
391
|
+
else:
|
|
392
|
+
#if item.filename in self._output_filenames:
|
|
393
|
+
if item.filename in output_filenames:
|
|
394
|
+
self._filename_mapping[item._mapping_filename] = {
|
|
395
|
+
'path': item.filename,
|
|
396
|
+
'status': 'write_pending'}
|
|
397
|
+
item.status = 'write_pending'
|
|
398
|
+
else:
|
|
399
|
+
self._filename_mapping[item._mapping_filename] = {
|
|
400
|
+
'path': item.filename, 'status': None}
|
|
401
|
+
elif item.method_type == 'write':
|
|
402
|
+
if (not item.force_overwrite
|
|
403
|
+
and self.filename in output_filenames):
|
|
404
|
+
#and self.filename in self._output_filenames):
|
|
405
|
+
raise ValueError(
|
|
406
|
+
'Writing to an existing file without overwrite '
|
|
407
|
+
f'permission. Remove {self.filename} or set '
|
|
408
|
+
'"force_overwrite" in the pipeline configuration '
|
|
409
|
+
f'for {item.name}')
|
|
410
|
+
item.set_args(**args)
|
|
411
|
+
if (item.method_type == 'read'
|
|
412
|
+
and item.status not in ('read', 'write_pending')):
|
|
413
|
+
if item.get_schema() is not None:
|
|
414
|
+
self.logger.debug(
|
|
415
|
+
f'Validating "{item.method_type}" with schema '
|
|
416
|
+
f'"{item.get_schema()}" and {item.get_args()}')
|
|
417
|
+
self.logger.info(f'Validating "{item.method_type}"')
|
|
418
|
+
data = item.method(**item.get_args())
|
|
419
|
+
self._data.append(PipelineData(
|
|
420
|
+
name=item.name, data=data, schema=item.get_schema()))
|
|
421
|
+
if item.has_filename():
|
|
422
|
+
self._filename_mapping[
|
|
423
|
+
item._mapping_filename]['status'] = 'read'
|
|
424
|
+
else:
|
|
425
|
+
item.status = 'read'
|
|
426
|
+
if item.method_type == 'write' and item.has_filename():
|
|
427
|
+
for k, v in self._filename_mapping.items():
|
|
428
|
+
if v['path'] == item.filename:
|
|
429
|
+
self._filename_mapping[k]['status'] = \
|
|
430
|
+
'write_pending'
|
|
431
|
+
#if item.filename not in self._output_filenames:
|
|
432
|
+
# self._output_filenames.append(item.filename)
|
|
433
|
+
if item.filename not in output_filenames:
|
|
434
|
+
output_filenames.append(item.filename)
|
|
435
|
+
self._items.append(item)
|
|
436
|
+
self.logger.info(f'Validated pipeline in {time()-t0:.3f} seconds')
|
|
437
|
+
|
|
438
|
+
return self
|
|
439
|
+
|
|
440
|
+
def execute(self):
|
|
441
|
+
"""Executes the pipeline."""
|
|
442
|
+
t0 = time()
|
|
443
|
+
self.logger.info('Executing "execute"\n')
|
|
444
|
+
|
|
445
|
+
for mmc, item, args in zip(self.mmcs, self._items, self.args):
|
|
446
|
+
if hasattr(item, 'execute'):
|
|
447
|
+
current_item = mmc(data=self._data, modelmetaclass=mmc, **args)
|
|
448
|
+
self.logger.info(f'Calling "execute" on {item}\n')
|
|
449
|
+
read_status = None
|
|
450
|
+
if item.method_type == 'read' and item.has_filename():
|
|
451
|
+
read_status = self._filename_mapping[
|
|
452
|
+
item._mapping_filename]['status']
|
|
453
|
+
current_item.status = read_status
|
|
454
|
+
current_item.filename = item.filename
|
|
455
|
+
current_item.set_args(**item.get_args())
|
|
456
|
+
if not (item.method_type == 'read' and read_status == 'read'):
|
|
457
|
+
data = current_item.execute(data=self._data)
|
|
458
|
+
if current_item.method_type == 'read':
|
|
459
|
+
self._data.append(PipelineData(
|
|
460
|
+
name=current_item.name, data=data,
|
|
461
|
+
schema=current_item.get_schema()))
|
|
462
|
+
elif current_item.method_type == 'process':
|
|
463
|
+
if isinstance(data, tuple):
|
|
464
|
+
self._data.extend(
|
|
465
|
+
[d if isinstance(d, PipelineData)
|
|
466
|
+
else PipelineData(
|
|
467
|
+
name=current_item.name, data=d,
|
|
468
|
+
schema=current_item.get_schema())
|
|
469
|
+
for d in data])
|
|
470
|
+
else:
|
|
471
|
+
self._data.append(PipelineData(
|
|
472
|
+
name=current_item.name, data=data,
|
|
473
|
+
schema=current_item.get_schema()))
|
|
474
|
+
elif item.method_type == 'write' and item.has_filename():
|
|
475
|
+
for k, v in self._filename_mapping.items():
|
|
476
|
+
if v['path'] == item.filename:
|
|
477
|
+
self._filename_mapping[k]['status'] = 'written'
|
|
478
|
+
self.logger.info(f'Executed "execute" in {time()-t0:.3f} seconds')
|
|
479
|
+
return self._data
|
CHAP/processor.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
#-*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
File : processor.py
|
|
5
|
+
Author : Valentin Kuznetsov <vkuznet AT gmail dot com>
|
|
6
|
+
Description: Processor module
|
|
7
|
+
|
|
8
|
+
Define a generic `Processor` object.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
# System modules
|
|
12
|
+
import argparse
|
|
13
|
+
import logging
|
|
14
|
+
from sys import modules
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
# Third party modules
|
|
18
|
+
from pydantic import model_validator
|
|
19
|
+
|
|
20
|
+
# Local modules
|
|
21
|
+
from CHAP.pipeline import PipelineItem
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Processor(PipelineItem):
|
|
25
|
+
"""Generic data processor.
|
|
26
|
+
|
|
27
|
+
The job of any `Processor` in a `Pipeline` is to receive data
|
|
28
|
+
returned by the previous `PipelineItem`, process it in some way,
|
|
29
|
+
and return the result for the next `PipelineItem` to use as input.
|
|
30
|
+
"""
|
|
31
|
+
@model_validator(mode='before')
|
|
32
|
+
@classmethod
|
|
33
|
+
def validate_processor_before(cls, data):
|
|
34
|
+
# System modules
|
|
35
|
+
from copy import deepcopy
|
|
36
|
+
|
|
37
|
+
# Local modules
|
|
38
|
+
from CHAP.utils.general import (
|
|
39
|
+
dictionary_update,
|
|
40
|
+
is_str_or_str_series,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if isinstance(data, dict):
|
|
44
|
+
if 'data' in data and 'modelmetaclass' in data:
|
|
45
|
+
mmc = data['modelmetaclass']
|
|
46
|
+
pipeline_fields = mmc.model_fields.get('pipeline_fields')
|
|
47
|
+
if pipeline_fields is not None:
|
|
48
|
+
for k, v in pipeline_fields.default.items():
|
|
49
|
+
if is_str_or_str_series(v, log=False):
|
|
50
|
+
schema = v
|
|
51
|
+
merge_key_paths = None
|
|
52
|
+
else:
|
|
53
|
+
schema = v.get('schema')
|
|
54
|
+
merge_key_paths = v.get('merge_key_paths')
|
|
55
|
+
try:
|
|
56
|
+
value = deepcopy(mmc.get_data(
|
|
57
|
+
data['data'], schema=schema, remove=False))
|
|
58
|
+
except:
|
|
59
|
+
pass
|
|
60
|
+
else:
|
|
61
|
+
if k in data:
|
|
62
|
+
data[k] = dictionary_update(
|
|
63
|
+
value, data[k],
|
|
64
|
+
merge_key_paths=merge_key_paths,
|
|
65
|
+
sort=True)
|
|
66
|
+
else:
|
|
67
|
+
data[k] = value
|
|
68
|
+
return data
|
|
69
|
+
|
|
70
|
+
def process(self, data):
|
|
71
|
+
"""Extract the contents of the input data, add a string to it,
|
|
72
|
+
and return the amended value.
|
|
73
|
+
|
|
74
|
+
:param data: Input data.
|
|
75
|
+
:return: Processed data.
|
|
76
|
+
"""
|
|
77
|
+
# If needed, extract data from a returned value of Reader.read
|
|
78
|
+
if isinstance(data, list):
|
|
79
|
+
if all(isinstance(d, dict) for d in data):
|
|
80
|
+
data = data[0]['data']
|
|
81
|
+
if data is None:
|
|
82
|
+
return []
|
|
83
|
+
# The process operation is a simple string concatenation
|
|
84
|
+
data += 'process part\n'
|
|
85
|
+
# Return data back to pipeline
|
|
86
|
+
return data
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class OptionParser():
|
|
90
|
+
"""User based option parser."""
|
|
91
|
+
def __init__(self):
|
|
92
|
+
self.parser = argparse.ArgumentParser(prog='PROG')
|
|
93
|
+
self.parser.add_argument(
|
|
94
|
+
'--data', action='store',
|
|
95
|
+
dest='data', default='', help='Input data')
|
|
96
|
+
self.parser.add_argument(
|
|
97
|
+
'--processor', action='store',
|
|
98
|
+
dest='processor', default='Processor', help='Processor class name')
|
|
99
|
+
self.parser.add_argument(
|
|
100
|
+
'--log-level', choices=logging._nameToLevel.keys(),
|
|
101
|
+
dest='log_level', default='INFO', help='logging level')
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def main(opt_parser=OptionParser):
|
|
105
|
+
"""Main function."""
|
|
106
|
+
optmgr = opt_parser()
|
|
107
|
+
opts = optmgr.parser.parse_args()
|
|
108
|
+
cls_name = opts.processor
|
|
109
|
+
try:
|
|
110
|
+
processor_cls = getattr(modules[__name__], cls_name)
|
|
111
|
+
except AttributeError:
|
|
112
|
+
print(f'Unsupported processor {cls_name}')
|
|
113
|
+
raise
|
|
114
|
+
|
|
115
|
+
processor = processor_cls()
|
|
116
|
+
processor.logger.setLevel(getattr(logging, opts.log_level))
|
|
117
|
+
log_handler = logging.StreamHandler()
|
|
118
|
+
log_handler.setFormatter(logging.Formatter(
|
|
119
|
+
'{name:20}: {message}', style='{'))
|
|
120
|
+
processor.logger.addHandler(log_handler)
|
|
121
|
+
processor.process(opts.data)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
if __name__ == '__main__':
|
|
125
|
+
main()
|