ChessAnalysisPipeline 0.0.17.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. CHAP/TaskManager.py +216 -0
  2. CHAP/__init__.py +27 -0
  3. CHAP/common/__init__.py +57 -0
  4. CHAP/common/models/__init__.py +8 -0
  5. CHAP/common/models/common.py +124 -0
  6. CHAP/common/models/integration.py +659 -0
  7. CHAP/common/models/map.py +1291 -0
  8. CHAP/common/processor.py +2869 -0
  9. CHAP/common/reader.py +658 -0
  10. CHAP/common/utils.py +110 -0
  11. CHAP/common/writer.py +730 -0
  12. CHAP/edd/__init__.py +23 -0
  13. CHAP/edd/models.py +876 -0
  14. CHAP/edd/processor.py +3069 -0
  15. CHAP/edd/reader.py +1023 -0
  16. CHAP/edd/select_material_params_gui.py +348 -0
  17. CHAP/edd/utils.py +1572 -0
  18. CHAP/edd/writer.py +26 -0
  19. CHAP/foxden/__init__.py +19 -0
  20. CHAP/foxden/models.py +71 -0
  21. CHAP/foxden/processor.py +124 -0
  22. CHAP/foxden/reader.py +224 -0
  23. CHAP/foxden/utils.py +80 -0
  24. CHAP/foxden/writer.py +168 -0
  25. CHAP/giwaxs/__init__.py +11 -0
  26. CHAP/giwaxs/models.py +491 -0
  27. CHAP/giwaxs/processor.py +776 -0
  28. CHAP/giwaxs/reader.py +8 -0
  29. CHAP/giwaxs/writer.py +8 -0
  30. CHAP/inference/__init__.py +7 -0
  31. CHAP/inference/processor.py +69 -0
  32. CHAP/inference/reader.py +8 -0
  33. CHAP/inference/writer.py +8 -0
  34. CHAP/models.py +227 -0
  35. CHAP/pipeline.py +479 -0
  36. CHAP/processor.py +125 -0
  37. CHAP/reader.py +124 -0
  38. CHAP/runner.py +277 -0
  39. CHAP/saxswaxs/__init__.py +7 -0
  40. CHAP/saxswaxs/processor.py +8 -0
  41. CHAP/saxswaxs/reader.py +8 -0
  42. CHAP/saxswaxs/writer.py +8 -0
  43. CHAP/server.py +125 -0
  44. CHAP/sin2psi/__init__.py +7 -0
  45. CHAP/sin2psi/processor.py +8 -0
  46. CHAP/sin2psi/reader.py +8 -0
  47. CHAP/sin2psi/writer.py +8 -0
  48. CHAP/tomo/__init__.py +15 -0
  49. CHAP/tomo/models.py +210 -0
  50. CHAP/tomo/processor.py +3862 -0
  51. CHAP/tomo/reader.py +9 -0
  52. CHAP/tomo/writer.py +59 -0
  53. CHAP/utils/__init__.py +6 -0
  54. CHAP/utils/converters.py +188 -0
  55. CHAP/utils/fit.py +2947 -0
  56. CHAP/utils/general.py +2655 -0
  57. CHAP/utils/material.py +274 -0
  58. CHAP/utils/models.py +595 -0
  59. CHAP/utils/parfile.py +224 -0
  60. CHAP/writer.py +122 -0
  61. MLaaS/__init__.py +0 -0
  62. MLaaS/ktrain.py +205 -0
  63. MLaaS/mnist_img.py +83 -0
  64. MLaaS/tfaas_client.py +371 -0
  65. chessanalysispipeline-0.0.17.dev3.dist-info/LICENSE +60 -0
  66. chessanalysispipeline-0.0.17.dev3.dist-info/METADATA +29 -0
  67. chessanalysispipeline-0.0.17.dev3.dist-info/RECORD +70 -0
  68. chessanalysispipeline-0.0.17.dev3.dist-info/WHEEL +5 -0
  69. chessanalysispipeline-0.0.17.dev3.dist-info/entry_points.txt +2 -0
  70. chessanalysispipeline-0.0.17.dev3.dist-info/top_level.txt +2 -0
CHAP/pipeline.py ADDED
@@ -0,0 +1,479 @@
1
+ #-*- coding: utf-8 -*-
2
+ """
3
+ File : pipeline.py
4
+ Author : Valentin Kuznetsov <vkuznet AT gmail dot com>
5
+ Description:
6
+ """
7
+
8
+ # System modules
9
+ import logging
10
+ from time import time
11
+ from types import MethodType
12
+ from typing import (
13
+ Literal,
14
+ Optional,
15
+ )
16
+
17
+ # Third party modules
18
+ from pydantic import (
19
+ ConfigDict,
20
+ Field,
21
+ FilePath,
22
+ PrivateAttr,
23
+ conlist,
24
+ constr,
25
+ model_validator,
26
+ )
27
+ from pydantic._internal._model_construction import ModelMetaclass
28
+
29
+ # Local modules
30
+ from CHAP.models import (
31
+ CHAPBaseModel,
32
+ RunConfig,
33
+ )
34
+
35
+
36
+ class PipelineData(dict):
37
+ """Wrapper for all results of PipelineItem.execute."""
38
+ def __init__(self, name=None, data=None, schema=None):
39
+ super().__init__()
40
+ self.__setitem__('name', name)
41
+ self.__setitem__('data', data)
42
+ self.__setitem__('schema', schema)
43
+
44
+
45
+ class PipelineItem(RunConfig):
46
+ """Class representing a single item in a `Pipeline` object."""
47
+ logger: Optional[logging.Logger] = None
48
+ name: Optional[constr(strip_whitespace=True, min_length=1)] = None
49
+ schema_: Optional[constr(strip_whitespace=True, min_length=1)] = \
50
+ Field(None, alias='schema')
51
+
52
+ _method: MethodType = PrivateAttr(default=None)
53
+ _method_type: Literal[
54
+ 'read', 'process', 'write'] = PrivateAttr(default=None)
55
+ _args: dict = PrivateAttr(default={})
56
+ _allowed_args: conlist(item_type=str) = PrivateAttr(default=[])
57
+ _status: Literal[
58
+ 'read', 'write_pending', 'written'] = PrivateAttr(default=None)
59
+
60
+ model_config = ConfigDict(arbitrary_types_allowed=True)
61
+
62
+ @model_validator(mode='after')
63
+ def validate_pipelineitem_after(self):
64
+ """Validate the `PipelineItem` configuration.
65
+
66
+ :return: The validated configuration.
67
+ :rtype: PipelineItem
68
+ """
69
+ # System modules
70
+ from inspect import (
71
+ # Parameter,
72
+ signature,
73
+ )
74
+
75
+ if self.name is None:
76
+ self.__name__ = self.__class__.__name__
77
+ else:
78
+ self.__name__ = self.name
79
+ if self.logger is None:
80
+ self.logger = logging.getLogger(self.__name__)
81
+ self.logger.propagate = False
82
+ log_handler = logging.StreamHandler()
83
+ log_handler.setFormatter(logging.Formatter(
84
+ '{asctime}: {name:20}: {levelname}: {message}',
85
+ datefmt='%Y-%m-%d %H:%M:%S', style='{'))
86
+ self.logger.addHandler(log_handler)
87
+ self.logger.setLevel(self.log_level)
88
+
89
+ if hasattr(self, 'read'):
90
+ self._method_type = 'read'
91
+ elif hasattr(self, 'process'):
92
+ self._method_type = 'process'
93
+ elif hasattr(self, 'write'):
94
+ self._method_type = 'write'
95
+ else:
96
+ return self
97
+ self._method = getattr(self, self._method_type)
98
+ sig = signature(self._method)
99
+ self._allowed_args = [k for k, v in sig.parameters.items()
100
+ if v.kind == v.POSITIONAL_OR_KEYWORD]
101
+ return self
102
+
103
+ @property
104
+ def method(self):
105
+ return self._method
106
+
107
+ @property
108
+ def method_type(self):
109
+ return self._method_type
110
+
111
+ @property
112
+ def run_config(self):
113
+ return RunConfig(**self.model_dump()).model_dump()
114
+
115
+ @property
116
+ def status(self):
117
+ return self._status
118
+
119
+ @status.setter
120
+ def status(self, status):
121
+ self._status = status
122
+
123
+ def get_args(self):
124
+ return self._args
125
+
126
+ def set_args(self, **args):
127
+ for k, v in args.items():
128
+ if k in self._allowed_args:
129
+ self._args[k] = v
130
+
131
+ def has_filename(self):
132
+ return hasattr(self, 'filename') and self.filename is not None
133
+
134
+ def get_schema(self):
135
+ return self.schema_
136
+
137
+ @staticmethod
138
+ def get_default_nxentry(nxobject):
139
+ """Given a `nexusformat.nexus.NXroot` or
140
+ `nexusformat.nexus.NXentry` object, return the default or
141
+ first `nexusformat.nexus.NXentry` match.
142
+
143
+ :param nxobject: Input data.
144
+ :type nxobject: nexusformat.nexus.NXroot,
145
+ nexusformat.nexus.NXentry
146
+ :raises ValueError: If unable to retrieve a
147
+ `nexusformat.nexus.NXentry` object.
148
+ :return: The input data if a `nexusformat.nexus.NXentry`
149
+ object or the default or first `nexusformat.nexus.NXentry`
150
+ object if a `nexusformat.nexus.NXroot` object.
151
+ :rtype: nexusformat.nexus.NXentry
152
+ """
153
+ # Third party modules
154
+ from nexusformat.nexus import (
155
+ NXentry,
156
+ NXroot,
157
+ )
158
+
159
+ if isinstance(nxobject, NXroot):
160
+ if 'default' in nxobject.attrs:
161
+ nxentry = nxobject[nxobject.default]
162
+ else:
163
+ nxentries = [
164
+ v for v in nxobject.values() if isinstance(v, NXentry)]
165
+ if not nxentries:
166
+ raise ValueError('Unable to retrieve a NXentry object')
167
+ if len(nxentries) != 1:
168
+ print('WARNING: Found multiple NXentries, returning the '
169
+ 'first')
170
+ nxentry = nxentries[0]
171
+ elif isinstance(nxobject, NXentry):
172
+ nxentry = nxobject
173
+ else:
174
+ raise ValueError(f'Invalid parameter nxobject ({nxobject})')
175
+ return nxentry
176
+
177
+ @staticmethod
178
+ def unwrap_pipelinedata(data):
179
+ """Given a list of PipelineData objects, return a list of
180
+ their `data` values.
181
+
182
+ :param data: Input data to read, write, or process that needs
183
+ to be unwrapped from PipelineData before use.
184
+ :type data: list[PipelineData]
185
+ :return: The `'data'` values of the items in the input data.
186
+ :rtype: list[object]
187
+ """
188
+ unwrapped_data = []
189
+ if isinstance(data, list):
190
+ for d in data:
191
+ if isinstance(d, PipelineData):
192
+ unwrapped_data.append(d['data'])
193
+ else:
194
+ unwrapped_data.append(d)
195
+ else:
196
+ unwrapped_data = [data]
197
+ return unwrapped_data
198
+
199
+ def get_config(
200
+ self, data=None, config=None, schema=None, remove=True):
201
+ """Look through `data` for the last item which value for the
202
+ `'schema'` key matches `schema`. Convert the value for that
203
+ item's `'data'` key into the configuration's Pydantic model
204
+ identified by `schema` and return it. If no item is found and
205
+ config is specified, validate it against the configuration's
206
+ Pydantic model identified by `schema` and return it.
207
+
208
+ :param data: Input data from a previous `PipelineItem`.
209
+ :type data: list[PipelineData], optional
210
+ :param config: Initialization parameters for an instance of
211
+ the Pydantic model identified by `schema`, required if
212
+ data is unspecified, invalid or does not contain an item
213
+ that matches the schema, superseeds any equal parameters
214
+ contained in `data`.
215
+ :type config: dict, optional
216
+ :param schema: Name of the `PipelineItem` class to match in
217
+ `data` & return, defaults to the internal PipelineItem
218
+ `schema` attribute.
219
+ :type schema: str, optional
220
+ :param remove: If there is a matching entry in `data`, remove
221
+ it from the list, defaults to `True`.
222
+ :type remove: bool, optional
223
+ :raises ValueError: If there's no match for `schema` in `data`.
224
+ :return: The last matching validated configuration model.
225
+ :rtype: PipelineItem
226
+ """
227
+ self.logger.debug(f'Getting {schema} configuration')
228
+ t0 = time()
229
+
230
+ if schema is None:
231
+ schema = self.schema_
232
+ matching_config = False
233
+ if data is not None:
234
+ try:
235
+ for i, d in reversed(list(enumerate(data))):
236
+ if d.get('schema') == schema:
237
+ matching_config = d.get('data')
238
+ if remove:
239
+ data.pop(i)
240
+ break
241
+ except Exception:
242
+ pass
243
+
244
+ if matching_config:
245
+ if config is not None:
246
+ # Local modules
247
+ from CHAP.utils.general import dictionary_update
248
+
249
+ # Update matching_config with config if both exist
250
+ matching_config = dictionary_update(matching_config, config)
251
+ else:
252
+ if isinstance(config, dict):
253
+ matching_config = config
254
+ else:
255
+ raise ValueError(
256
+ f'Unable to find a configuration for schema `{schema}`')
257
+ if self._method_type == 'read' and 'inputdir' not in matching_config:
258
+ matching_config['inputdir'] = self.inputdir
259
+ if self._method_type == 'write' and 'outputdir' not in matching_config:
260
+ matching_config['outputdir'] = self.outputdir
261
+
262
+ mod_name, cls_name = schema.rsplit('.', 1)
263
+ module = __import__(f'CHAP.{mod_name}', fromlist=cls_name)
264
+ model_config = getattr(module, cls_name)(**matching_config)
265
+
266
+ self.logger.debug(
267
+ f'Got {schema} configuration in {time()-t0:.3f} seconds')
268
+
269
+ return model_config
270
+
271
+ @staticmethod
272
+ def get_data(data, name=None, schema=None, remove=True):
273
+ """Look through `data` for the last item which `'data'` value
274
+ is a nexusformat.nexus.NXobject object or matches a given name
275
+ or schema. Pick the last item for which the `'name'` key
276
+ matches `name` if set or the `'schema'` key matches `schema`
277
+ if set, pick the last match for a nexusformat.nexus.NXobject
278
+ object otherwise. Return the data object.
279
+
280
+ :param data: Input data from a previous `PipelineItem`.
281
+ :type data: list[PipelineData].
282
+ :param name: Name of the data item to match in `data` & return.
283
+ :type name: str, optional
284
+ :param schema: Name of the `PipelineItem` class to match in
285
+ `data` & return.
286
+ :type schema: Union[str, list[str]], optional
287
+ :param remove: If there is a matching entry in `data`, remove
288
+ it from the list, defaults to `True`.
289
+ :type remove: bool, optional
290
+ :raises ValueError: If there's no match for `name` or 'schema`
291
+ in `data`, or if there is no object of type
292
+ nexusformat.nexus.NXobject.
293
+ :return: The last matching data item.
294
+ :rtype: obj
295
+ """
296
+ # Third party modules
297
+ from nexusformat.nexus import NXobject
298
+
299
+ result = None
300
+ if name is None and schema is None:
301
+ for i, d in reversed(list(enumerate(data))):
302
+ if isinstance(d.get('data'), NXobject):
303
+ result = d.get('data')
304
+ if remove:
305
+ data.pop(i)
306
+ break
307
+ else:
308
+ raise ValueError(f'No NXobject data item found')
309
+ elif name is not None:
310
+ for i, d in reversed(list(enumerate(data))):
311
+ if d.get('name') == name:
312
+ result = d.get('data')
313
+ if remove:
314
+ data.pop(i)
315
+ break
316
+ else:
317
+ raise ValueError(f'No match for data item named "{name}"')
318
+ elif schema is not None:
319
+ if isinstance(schema, str):
320
+ schema = [schema]
321
+ for i, d in reversed(list(enumerate(data))):
322
+ if d.get('schema') in schema:
323
+ result = d.get('data')
324
+ if remove:
325
+ data.pop(i)
326
+ break
327
+ else:
328
+ raise ValueError(
329
+ f'No match for data item with schema "{schema}"')
330
+
331
+ return result
332
+
333
+ def execute(self, data):
334
+ """Run the appropriate method of the object and return the
335
+ result.
336
+
337
+ :param data: Input data.
338
+ :type data: list[PipelineData]
339
+ :return: The wrapped result of running read, process, or write.
340
+ :rtype: Union[PipelineData, tuple[PipelineData]]
341
+ """
342
+ if 'data' in self._allowed_args:
343
+ self._args['data'] = data
344
+ t0 = time()
345
+ self.logger.debug(f'Executing "{self._method_type}" with schema '
346
+ f'"{self.schema_}" and {self._args}')
347
+ self.logger.info(f'Executing "{self._method_type}"')
348
+ data = self._method(**self._args)
349
+ self.logger.info(
350
+ f'Finished "{self._method}" in {time()-t0:.0f} seconds\n')
351
+ return data
352
+
353
+
354
+ class Pipeline(CHAPBaseModel):
355
+ """Class representing a full `Pipeline` object."""
356
+ args: conlist(item_type=dict, min_length=1)
357
+ logger: Optional[logging.Logger] = None
358
+ mmcs: conlist(item_type=ModelMetaclass, min_length=1)
359
+
360
+ _data: conlist(item_type=PipelineData) = PrivateAttr(default=[])
361
+ _items: conlist(item_type=PipelineItem) = PrivateAttr(default=[])
362
+ #_output_filenames: conlist(item_type=FilePath) = PrivateAttr(default=[])
363
+ _filename_mapping: dict = PrivateAttr(default={})
364
+
365
+ model_config = ConfigDict(arbitrary_types_allowed=True)
366
+
367
+ @model_validator(mode='after')
368
+ def validate_pipeline_after(self):
369
+ """Validate the `Pipeline` configuration and initialize and
370
+ validate the private attributes.
371
+
372
+ :return: The validated configuration.
373
+ :rtype: Pipeline
374
+ """
375
+ t0 = time()
376
+ self.__name__ = self.__class__.__name__
377
+ if self.logger is None:
378
+ self.logger = logging.getLogger(self.__name__)
379
+ self.logger.propagate = False
380
+
381
+ output_filenames = []
382
+ for mmc, args in zip(self.mmcs, self.args):
383
+ item = mmc(data=self._data, modelmetaclass=mmc, **args)
384
+ if item.has_filename():
385
+ if item.method_type == 'read':
386
+ if item._mapping_filename in self._filename_mapping:
387
+ item.filename = self._filename_mapping[
388
+ item._mapping_filename]['path']
389
+ item.status = self._filename_mapping[
390
+ item._mapping_filename]['status']
391
+ else:
392
+ #if item.filename in self._output_filenames:
393
+ if item.filename in output_filenames:
394
+ self._filename_mapping[item._mapping_filename] = {
395
+ 'path': item.filename,
396
+ 'status': 'write_pending'}
397
+ item.status = 'write_pending'
398
+ else:
399
+ self._filename_mapping[item._mapping_filename] = {
400
+ 'path': item.filename, 'status': None}
401
+ elif item.method_type == 'write':
402
+ if (not item.force_overwrite
403
+ and self.filename in output_filenames):
404
+ #and self.filename in self._output_filenames):
405
+ raise ValueError(
406
+ 'Writing to an existing file without overwrite '
407
+ f'permission. Remove {self.filename} or set '
408
+ '"force_overwrite" in the pipeline configuration '
409
+ f'for {item.name}')
410
+ item.set_args(**args)
411
+ if (item.method_type == 'read'
412
+ and item.status not in ('read', 'write_pending')):
413
+ if item.get_schema() is not None:
414
+ self.logger.debug(
415
+ f'Validating "{item.method_type}" with schema '
416
+ f'"{item.get_schema()}" and {item.get_args()}')
417
+ self.logger.info(f'Validating "{item.method_type}"')
418
+ data = item.method(**item.get_args())
419
+ self._data.append(PipelineData(
420
+ name=item.name, data=data, schema=item.get_schema()))
421
+ if item.has_filename():
422
+ self._filename_mapping[
423
+ item._mapping_filename]['status'] = 'read'
424
+ else:
425
+ item.status = 'read'
426
+ if item.method_type == 'write' and item.has_filename():
427
+ for k, v in self._filename_mapping.items():
428
+ if v['path'] == item.filename:
429
+ self._filename_mapping[k]['status'] = \
430
+ 'write_pending'
431
+ #if item.filename not in self._output_filenames:
432
+ # self._output_filenames.append(item.filename)
433
+ if item.filename not in output_filenames:
434
+ output_filenames.append(item.filename)
435
+ self._items.append(item)
436
+ self.logger.info(f'Validated pipeline in {time()-t0:.3f} seconds')
437
+
438
+ return self
439
+
440
+ def execute(self):
441
+ """Executes the pipeline."""
442
+ t0 = time()
443
+ self.logger.info('Executing "execute"\n')
444
+
445
+ for mmc, item, args in zip(self.mmcs, self._items, self.args):
446
+ if hasattr(item, 'execute'):
447
+ current_item = mmc(data=self._data, modelmetaclass=mmc, **args)
448
+ self.logger.info(f'Calling "execute" on {item}\n')
449
+ read_status = None
450
+ if item.method_type == 'read' and item.has_filename():
451
+ read_status = self._filename_mapping[
452
+ item._mapping_filename]['status']
453
+ current_item.status = read_status
454
+ current_item.filename = item.filename
455
+ current_item.set_args(**item.get_args())
456
+ if not (item.method_type == 'read' and read_status == 'read'):
457
+ data = current_item.execute(data=self._data)
458
+ if current_item.method_type == 'read':
459
+ self._data.append(PipelineData(
460
+ name=current_item.name, data=data,
461
+ schema=current_item.get_schema()))
462
+ elif current_item.method_type == 'process':
463
+ if isinstance(data, tuple):
464
+ self._data.extend(
465
+ [d if isinstance(d, PipelineData)
466
+ else PipelineData(
467
+ name=current_item.name, data=d,
468
+ schema=current_item.get_schema())
469
+ for d in data])
470
+ else:
471
+ self._data.append(PipelineData(
472
+ name=current_item.name, data=data,
473
+ schema=current_item.get_schema()))
474
+ elif item.method_type == 'write' and item.has_filename():
475
+ for k, v in self._filename_mapping.items():
476
+ if v['path'] == item.filename:
477
+ self._filename_mapping[k]['status'] = 'written'
478
+ self.logger.info(f'Executed "execute" in {time()-t0:.3f} seconds')
479
+ return self._data
CHAP/processor.py ADDED
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env python
2
+ #-*- coding: utf-8 -*-
3
+ """
4
+ File : processor.py
5
+ Author : Valentin Kuznetsov <vkuznet AT gmail dot com>
6
+ Description: Processor module
7
+
8
+ Define a generic `Processor` object.
9
+ """
10
+
11
+ # System modules
12
+ import argparse
13
+ import logging
14
+ from sys import modules
15
+ from typing import Optional
16
+
17
+ # Third party modules
18
+ from pydantic import model_validator
19
+
20
+ # Local modules
21
+ from CHAP.pipeline import PipelineItem
22
+
23
+
24
+ class Processor(PipelineItem):
25
+ """Generic data processor.
26
+
27
+ The job of any `Processor` in a `Pipeline` is to receive data
28
+ returned by the previous `PipelineItem`, process it in some way,
29
+ and return the result for the next `PipelineItem` to use as input.
30
+ """
31
+ @model_validator(mode='before')
32
+ @classmethod
33
+ def validate_processor_before(cls, data):
34
+ # System modules
35
+ from copy import deepcopy
36
+
37
+ # Local modules
38
+ from CHAP.utils.general import (
39
+ dictionary_update,
40
+ is_str_or_str_series,
41
+ )
42
+
43
+ if isinstance(data, dict):
44
+ if 'data' in data and 'modelmetaclass' in data:
45
+ mmc = data['modelmetaclass']
46
+ pipeline_fields = mmc.model_fields.get('pipeline_fields')
47
+ if pipeline_fields is not None:
48
+ for k, v in pipeline_fields.default.items():
49
+ if is_str_or_str_series(v, log=False):
50
+ schema = v
51
+ merge_key_paths = None
52
+ else:
53
+ schema = v.get('schema')
54
+ merge_key_paths = v.get('merge_key_paths')
55
+ try:
56
+ value = deepcopy(mmc.get_data(
57
+ data['data'], schema=schema, remove=False))
58
+ except:
59
+ pass
60
+ else:
61
+ if k in data:
62
+ data[k] = dictionary_update(
63
+ value, data[k],
64
+ merge_key_paths=merge_key_paths,
65
+ sort=True)
66
+ else:
67
+ data[k] = value
68
+ return data
69
+
70
+ def process(self, data):
71
+ """Extract the contents of the input data, add a string to it,
72
+ and return the amended value.
73
+
74
+ :param data: Input data.
75
+ :return: Processed data.
76
+ """
77
+ # If needed, extract data from a returned value of Reader.read
78
+ if isinstance(data, list):
79
+ if all(isinstance(d, dict) for d in data):
80
+ data = data[0]['data']
81
+ if data is None:
82
+ return []
83
+ # The process operation is a simple string concatenation
84
+ data += 'process part\n'
85
+ # Return data back to pipeline
86
+ return data
87
+
88
+
89
+ class OptionParser():
90
+ """User based option parser."""
91
+ def __init__(self):
92
+ self.parser = argparse.ArgumentParser(prog='PROG')
93
+ self.parser.add_argument(
94
+ '--data', action='store',
95
+ dest='data', default='', help='Input data')
96
+ self.parser.add_argument(
97
+ '--processor', action='store',
98
+ dest='processor', default='Processor', help='Processor class name')
99
+ self.parser.add_argument(
100
+ '--log-level', choices=logging._nameToLevel.keys(),
101
+ dest='log_level', default='INFO', help='logging level')
102
+
103
+
104
+ def main(opt_parser=OptionParser):
105
+ """Main function."""
106
+ optmgr = opt_parser()
107
+ opts = optmgr.parser.parse_args()
108
+ cls_name = opts.processor
109
+ try:
110
+ processor_cls = getattr(modules[__name__], cls_name)
111
+ except AttributeError:
112
+ print(f'Unsupported processor {cls_name}')
113
+ raise
114
+
115
+ processor = processor_cls()
116
+ processor.logger.setLevel(getattr(logging, opts.log_level))
117
+ log_handler = logging.StreamHandler()
118
+ log_handler.setFormatter(logging.Formatter(
119
+ '{name:20}: {message}', style='{'))
120
+ processor.logger.addHandler(log_handler)
121
+ processor.process(opts.data)
122
+
123
+
124
+ if __name__ == '__main__':
125
+ main()