pdfbl.sequential 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,556 @@
1
+ import json
2
+ import re
3
+ import threading
4
+ import time
5
+ import warnings
6
+ from pathlib import Path
7
+ from queue import Queue
8
+ from types import SimpleNamespace
9
+ from typing import Literal
10
+
11
+ from bg_mpl_stylesheets.styles import all_styles
12
+ from matplotlib import pyplot as plt
13
+ from prompt_toolkit import PromptSession
14
+ from prompt_toolkit.patch_stdout import patch_stdout
15
+
16
+ from pdfbl.sequential.pdfadapter import PDFAdapter
17
+
18
+ plt.style.use(all_styles["bg-style"])
19
+
20
+
21
+ class SequentialCMIRunner:
22
+ def __init__(self):
23
+ self.input_files_known = []
24
+ self.input_files_completed = []
25
+ self.input_files_running = []
26
+ self.adapter = PDFAdapter()
27
+ self.visualization_data = {}
28
+
29
+ def _validate_inputs(self):
30
+ for path_name in [
31
+ "input_data_dir",
32
+ "output_result_dir",
33
+ ]:
34
+ if not Path(self.inputs[path_name]).exists():
35
+ raise FileNotFoundError(
36
+ f"Path '{self.inputs[path_name]}' for "
37
+ f"'{path_name}' does not exist. Please check the "
38
+ "provided path."
39
+ )
40
+ if not Path(self.inputs[path_name]).is_dir():
41
+ raise NotADirectoryError(
42
+ f"Path '{self.inputs[path_name]}' for "
43
+ f"'{path_name}' is not a directory. Please check the "
44
+ "provided path."
45
+ )
46
+ if not Path(self.inputs["structure_path"]).exists():
47
+ raise FileNotFoundError(
48
+ f"Structure file '{self.inputs['structure_path']}' does not "
49
+ "exist. Please check the provided path."
50
+ )
51
+ profile_files = list(Path(self.inputs["input_data_dir"]).glob("*"))
52
+ if len(profile_files) > 0: # skip variable checking if no input files
53
+ for tmp_file_path in profile_files:
54
+ matches = re.findall(
55
+ self.inputs["filename_order_pattern"], tmp_file_path.name
56
+ )
57
+ if len(matches) == 0:
58
+ raise ValueError(
59
+ f"Input file '{tmp_file_path}' does not match the "
60
+ "filename order pattern. Please check the pattern "
61
+ "or the input files."
62
+ )
63
+ tmp_adatper = PDFAdapter()
64
+ tmp_adatper.initialize_profile(str(tmp_file_path))
65
+ tmp_adatper.initialize_structures([self.inputs["structure_path"]])
66
+ tmp_adatper.initialize_contribution()
67
+ tmp_adatper.initialize_recipe()
68
+ allowed_variable_names = list(
69
+ tmp_adatper.recipe._parameters.keys()
70
+ )
71
+ for var_name in self.inputs["refinable_variable_names"]:
72
+ if var_name not in allowed_variable_names:
73
+ raise ValueError(
74
+ f"Refinable variable '{var_name}' not found in the "
75
+ "recipe. Please choose from the existing variables: "
76
+ f"{allowed_variable_names}"
77
+ )
78
+ for var_name in self.inputs.get("plot_variable_names", []):
79
+ if var_name not in allowed_variable_names:
80
+ raise ValueError(
81
+ f"Variable '{var_name}' is not found in the recipe. "
82
+ "Please choose from the existing variables: "
83
+ f"{allowed_variable_names}"
84
+ )
85
+ else:
86
+ warnings.warn(
87
+ "No input profile files found in the input data directory. "
88
+ "Skipping variable name validation."
89
+ )
90
+ allowed_result_entry_names = [
91
+ "residual",
92
+ "contributions",
93
+ "restraints",
94
+ "chi2",
95
+ "reduced_chi2",
96
+ ]
97
+ for entry_name in self.inputs.get("plot_result_names", []):
98
+ if entry_name not in allowed_result_entry_names:
99
+ raise ValueError(
100
+ f"Result entry '{entry_name}' is not a valid entry to "
101
+ "plot. Please choose from the following entries: "
102
+ f"{allowed_result_entry_names}"
103
+ )
104
+ for entry_name in self.inputs.get(
105
+ "plot_intermediate_result_names", []
106
+ ):
107
+ if entry_name not in allowed_result_entry_names:
108
+ raise ValueError(
109
+ f"Intermediate result '{entry_name}' is not a valid "
110
+ "entry to plot. Please choose from the following "
111
+ "entries: "
112
+ f"{allowed_result_entry_names}"
113
+ )
114
+
115
+ def load_inputs(
116
+ self,
117
+ input_data_dir,
118
+ structure_path,
119
+ output_result_dir="results",
120
+ filename_order_pattern=r"(\d+)K\.gr",
121
+ whether_plot_y=False,
122
+ whether_plot_ycalc=False,
123
+ plot_variable_names=None,
124
+ plot_result_names=None,
125
+ plot_intermediate_result_names=None,
126
+ refinable_variable_names=None,
127
+ initial_variable_values=None,
128
+ xmin=None,
129
+ xmax=None,
130
+ dx=None,
131
+ qmin=None,
132
+ qmax=None,
133
+ show_plot=True,
134
+ ):
135
+ """Load and validate input configuration for sequential PDF
136
+ refinement.
137
+
138
+ This method initializes the sequential CMI runner with input data,
139
+ structure information, and refinement parameters, and the plotting
140
+ configuration.
141
+
142
+ Parameters
143
+ ----------
144
+ input_data_dir : str
145
+ The path to the directory containing input PDF profile files.
146
+ structure_path : str
147
+ The path to the structure file (e.g., CIF format) used for
148
+ refinement.
149
+ output_result_dir : str
150
+ The path to the directory for storing refinement results.
151
+ Default is "results".
152
+ filename_order_pattern : str
153
+ The regular expression pattern to extract ordering information
154
+ from filenames.
155
+ Default is r"(\d+)K\.gr" to extract temperature values from
156
+ filenames.
157
+ refinable_variable_names : list of str
158
+ The list of variable names to refine.
159
+ Must exist in the recipe.
160
+ Default variable names are all possible variables that can
161
+ be created from the input structure and profile.
162
+ initial_variable_values : dict
163
+ The dictionary mapping variable names to their initial values.
164
+ Default is None.
165
+ xmin : float
166
+ The minimum x-value for the PDF profile.
167
+ Default is the value parsed from the input file.
168
+ xmax : float
169
+ The maximum x-value for the PDF profile.
170
+ Default is the value parsed from the input file.
171
+ dx : float
172
+ The step size for the PDF profile.
173
+ Default is the value parsed from the input file.
174
+ qmin : float
175
+ The minimum q-value for the PDF profile.
176
+ Default is the value parsed from the input file.
177
+ qmax : float
178
+ The maximum q-value for the PDF profile.
179
+ Default is the value parsed from the input file.
180
+ show_plot : bool
181
+ Whether to display plots during refinement. Default is True.
182
+ whether_plot_y : bool
183
+ Whether to plot the experimental PDF data (y). Default is False.
184
+ whether_plot_ycalc : bool
185
+ Whether to plot the calculated PDF data (ycalc). Default is False.
186
+ plot_variable_names : list of str
187
+ The list of variable names to plot during refinement.
188
+ Default is None.
189
+ plot_result_names : list of str
190
+ The list of fit result entries to plot.
191
+ Allowed values: "residual", "contributions", "restraints", "chi2",
192
+ "reduced_chi2". Default is None.
193
+ plot_intermediate_result_names : list of str
194
+ The list of intermediate result entries to plot during refinement.
195
+ Allowed values: "residual", "contributions", "restraints", "chi2",
196
+ "reduced_chi2". Default is None.
197
+
198
+ Raises
199
+ ------
200
+ FileNotFoundError
201
+ If the input data directory, output result directory, or structure
202
+ file does not exist.
203
+ NotADirectoryError
204
+ If input_data_dir or output_result_dir is not a directory.
205
+ ValueError
206
+ If a refinable variable name is not found in the recipe, or if a
207
+ plot result name is not valid.
208
+
209
+ Examples
210
+ --------
211
+ >>> runner = SequentialCMIRunner()
212
+ >>> runner.load_inputs(
213
+ ... input_data_dir="./data",
214
+ ... structure_path="./structure.cif",
215
+ ... output_result_dir="./results",
216
+ ... refinable_variable_names=["a", "all"],
217
+ ... plot_variable_names=["a"],
218
+ ... plot_result_names=["chi2"],
219
+ ... plot_intermediate_result_names=["residual"],
220
+ ... )
221
+ """ # noqa: W605
222
+ self.inputs = {
223
+ "input_data_dir": input_data_dir,
224
+ "structure_path": structure_path,
225
+ "output_result_dir": output_result_dir,
226
+ "filename_order_pattern": filename_order_pattern,
227
+ "xmin": xmin,
228
+ "xmax": xmax,
229
+ "dx": dx,
230
+ "qmin": qmin,
231
+ "qmax": qmax,
232
+ "refinable_variable_names": refinable_variable_names or [],
233
+ "initial_variable_values": initial_variable_values or {},
234
+ "whether_plot_y": whether_plot_y,
235
+ "whether_plot_ycalc": whether_plot_ycalc,
236
+ "plot_variable_names": plot_variable_names or [],
237
+ "plot_result_names": plot_result_names or [],
238
+ "plot_intermediate_result_names": plot_intermediate_result_names
239
+ or [],
240
+ }
241
+ self.show_plot = show_plot
242
+ self._validate_inputs()
243
+ self._initialize_plots()
244
+
245
+ def _initialize_plots(self):
246
+ whether_plot_y = self.inputs["whether_plot_y"]
247
+ whether_plot_ycalc = self.inputs["whether_plot_ycalc"]
248
+ plot_variable_names = self.inputs["plot_variable_names"]
249
+ plot_result_names = self.inputs["plot_result_names"]
250
+ plot_intermediate_result_names = self.inputs[
251
+ "plot_intermediate_result_names"
252
+ ]
253
+ if whether_plot_y and whether_plot_ycalc:
254
+ fig, _ = plt.subplots(2, 1)
255
+ label = ["ycalc", "y"]
256
+ elif whether_plot_ycalc or whether_plot_y:
257
+ fig, _ = plt.subplots()
258
+ if whether_plot_ycalc:
259
+ label = ["ycalc"]
260
+ else:
261
+ label = ["y"]
262
+ else:
263
+ fig = None
264
+ if fig:
265
+ axes = fig.axes
266
+ lines = []
267
+ for i in range(len(axes)):
268
+ (line,) = axes[i].plot(
269
+ [],
270
+ [],
271
+ label=label[i],
272
+ color=plt.rcParams["axes.prop_cycle"].by_key()["color"][i],
273
+ )
274
+ lines.append(line)
275
+ self.visualization_data[label[i]] = {
276
+ "line": line,
277
+ "xdata": Queue(),
278
+ "ydata": Queue(),
279
+ }
280
+ fig.legend()
281
+ names = ["variables", "results", "intermediate_results"]
282
+ plot_tasks = [
283
+ plot_variable_names,
284
+ plot_result_names,
285
+ plot_intermediate_result_names,
286
+ ]
287
+ for i in range(len(plot_tasks)):
288
+ if plot_tasks[i] is not None:
289
+ self.visualization_data[names[i]] = {}
290
+ for var_name in plot_tasks[i]:
291
+ fig, ax = plt.subplots()
292
+ (line,) = ax.plot([], [], label=var_name, marker="o")
293
+ self.visualization_data[names[i]][var_name] = {
294
+ "line": line,
295
+ "buffer": [],
296
+ "ydata": Queue(),
297
+ }
298
+ fig.suptitle(f"{names[i].capitalize()}: {var_name}")
299
+ if plot_intermediate_result_names is not None:
300
+ for var_name in plot_intermediate_result_names:
301
+ self.adapter.monitor_intermediate_results(
302
+ var_name,
303
+ step=10,
304
+ queue=self.visualization_data["intermediate_results"][
305
+ var_name
306
+ ]["ydata"],
307
+ )
308
+
309
+ def _update_plot(self):
310
+ for key, plot_pack in self.visualization_data.items():
311
+ if key in ["ycalc", "y"]:
312
+ if not plot_pack["xdata"].empty():
313
+ line = plot_pack["line"]
314
+ xdata = plot_pack["xdata"].get()
315
+ ydata = plot_pack["ydata"].get()
316
+ line.set_xdata(xdata)
317
+ line.set_ydata(ydata)
318
+ line.axes.relim()
319
+ line.axes.autoscale_view()
320
+ elif (
321
+ key == "variables"
322
+ or key == "results"
323
+ or key == "intermediate_results"
324
+ ):
325
+ for _, data_pack in plot_pack.items():
326
+ if not data_pack["ydata"].empty():
327
+ line = data_pack["line"]
328
+ buffer = data_pack["buffer"]
329
+ new_y = data_pack["ydata"].get()
330
+ buffer.append(new_y)
331
+ xdata = list(range(1, len(buffer) + 1))
332
+ ydata = buffer
333
+ line.set_xdata(xdata)
334
+ line.set_ydata(ydata)
335
+ line.axes.relim()
336
+ line.axes.autoscale_view()
337
+
338
+ def _check_for_new_data(self):
339
+ input_data_dir = self.inputs["input_data_dir"]
340
+ filename_order_pattern = self.inputs["filename_order_pattern"]
341
+ files = [file for file in Path(input_data_dir).glob("*")]
342
+ sorted_file = sorted(
343
+ files,
344
+ key=lambda file: int(
345
+ re.findall(filename_order_pattern, file.name)[0]
346
+ ),
347
+ )
348
+ if (
349
+ self.input_files_known
350
+ != sorted_file[: len(self.input_files_known)]
351
+ ):
352
+ raise RuntimeError(
353
+ "Wrong order to run sequential toolset is detected. "
354
+ "This is likely due to files appearing in the input directory "
355
+ "in the wrong order. Please restart the sequential toolset."
356
+ )
357
+ if self.input_files_known == sorted_file:
358
+ return
359
+ self.input_files_known = sorted_file
360
+ self.input_files_running = [
361
+ f
362
+ for f in self.input_files_known
363
+ if f not in self.input_files_completed
364
+ ]
365
+ print(f"{[str(f) for f in self.input_files_running]} detected.")
366
+
367
+ def set_start_input_file(
368
+ self, input_filename, input_filename_to_result_filename
369
+ ):
370
+ """Set the starting input file for sequential refinement and
371
+ continue the interrupted sequential refinement from that point.
372
+
373
+ Parameters
374
+ ----------
375
+ input_filename : str
376
+ The name of the input file to start from. This file must be in the
377
+ input data directory.
378
+ input_filename_to_result_filename : function
379
+ The function that takes an input filename and returns the
380
+ corresponding result filename. This is used to locate the last
381
+ result file for loading variable values.
382
+ """
383
+ self._check_for_new_data()
384
+ input_file_path = Path(self.inputs["input_data_dir"]) / input_filename
385
+ if input_file_path not in self.input_files_known:
386
+ raise ValueError(
387
+ f"Input file {input_filename} not found in known input files."
388
+ )
389
+ start_index = self.input_files_known.index(input_file_path)
390
+ self.input_files_completed = self.input_files_known[:start_index]
391
+ self.input_files_running = self.input_files_known[start_index:]
392
+ last_result_file = input_filename_to_result_filename(
393
+ self.input_files_completed[-1].name
394
+ )
395
+ last_result_file = (
396
+ Path(self.inputs["output_result_dir"]) / last_result_file
397
+ )
398
+ if not Path(last_result_file).exists():
399
+ raise FileNotFoundError(
400
+ f"Result file {last_result_file} not found. "
401
+ "Cannot load last result variable values. "
402
+ "Please check the provided function or use "
403
+ "an earlier input file."
404
+ )
405
+ last_result_variables_values = json.load(open(last_result_file, "r"))[
406
+ "variables"
407
+ ]
408
+ last_result_variables_values = {
409
+ name: pack["value"]
410
+ for name, pack in last_result_variables_values.items()
411
+ }
412
+ self.last_result_variables_values = last_result_variables_values
413
+ print(f"Starting from input file: {self.input_files_running[0].name}")
414
+
415
+ def _run_one_cycle(self, stop_event=SimpleNamespace(is_set=lambda: False)):
416
+ self._check_for_new_data()
417
+ xmin = self.inputs["xmin"]
418
+ xmax = self.inputs["xmax"]
419
+ dx = self.inputs["dx"]
420
+ qmin = self.inputs["qmin"]
421
+ qmax = self.inputs["qmax"]
422
+ structure_path = self.inputs["structure_path"]
423
+ output_result_dir = self.inputs["output_result_dir"]
424
+ initial_variable_values = self.inputs["initial_variable_values"]
425
+ refinable_variable_names = self.inputs["refinable_variable_names"]
426
+ if not self.input_files_running:
427
+ return None
428
+ for input_file in self.input_files_running:
429
+ if stop_event.is_set():
430
+ break
431
+ print(f"Processing {input_file.name}...")
432
+ self.adapter.initialize_profile(
433
+ str(input_file),
434
+ xmin=xmin,
435
+ xmax=xmax,
436
+ dx=dx,
437
+ qmin=qmin,
438
+ qmax=qmax,
439
+ )
440
+ self.adapter.initialize_structures([structure_path])
441
+ self.adapter.initialize_contribution()
442
+ self.adapter.initialize_recipe()
443
+ if not hasattr(self, "last_result_variables_values"):
444
+ self.last_result_variables_values = initial_variable_values
445
+ self.adapter.set_initial_variable_values(
446
+ self.last_result_variables_values
447
+ )
448
+ if refinable_variable_names is None:
449
+ refinable_variable_names = list(initial_variable_values.keys())
450
+ self.adapter.refine_variables(refinable_variable_names)
451
+ results = self.adapter.save_results(
452
+ filename=str(
453
+ Path(output_result_dir) / f"{input_file.stem}_result.json"
454
+ ),
455
+ mode="dict",
456
+ )
457
+ self.last_result_variables_values = {
458
+ name: pack["value"]
459
+ for name, pack in results["variables"].items()
460
+ }
461
+ self.input_files_completed.append(input_file)
462
+ if "ycalc" in self.visualization_data:
463
+ xdata = self.adapter.recipe.pdfcontribution.profile.x
464
+ ydata = self.adapter.recipe.pdfcontribution.profile.ycalc
465
+ self.visualization_data["ycalc"]["xdata"].put(xdata)
466
+ self.visualization_data["ycalc"]["ydata"].put(ydata)
467
+ if "y" in self.visualization_data:
468
+ xdata = self.adapter.recipe.pdfcontribution.profile.x
469
+ ydata = self.adapter.recipe.pdfcontribution.profile.y
470
+ self.visualization_data["y"]["xdata"].put(xdata)
471
+ self.visualization_data["y"]["ydata"].put(ydata)
472
+ for var_name in self.visualization_data.get("variables", {}):
473
+ new_value = self.adapter.recipe._parameters[var_name].value
474
+ self.visualization_data["variables"][var_name]["ydata"].put(
475
+ new_value
476
+ )
477
+ for entry_name in self.visualization_data.get("results", {}):
478
+ fitresults_dict = self.adapter.save_results(mode="dict")
479
+ entry_value = fitresults_dict.get(entry_name, None)
480
+ self.visualization_data["results"][entry_name]["ydata"].put(
481
+ entry_value
482
+ )
483
+ print("Completed!")
484
+ self.input_files_running = []
485
+
486
+ def run(self, mode: Literal["batch", "stream"]):
487
+ """Run the sequential refinement process in either batch or
488
+ streaming mode.
489
+
490
+ Parameters
491
+ ----------
492
+ mode : str
493
+ The mode to run the sequential refinement. Must be either "batch"
494
+ or "stream". In "batch" mode, the toolset will run through all
495
+ available input files once and then stop. In "stream" mode, the
496
+ runner will continuously monitor the input data directory for new
497
+ files and process them as they appear, until the user decides
498
+ to stop the process.
499
+ """
500
+ if mode == "batch":
501
+ self._run_one_cycle()
502
+ self._update_plot()
503
+ elif mode == "stream":
504
+ stop_event = threading.Event()
505
+ session = PromptSession()
506
+ if (self.visualization_data is not None) and self.show_plot:
507
+ plt.ion()
508
+ plt.pause(0.01)
509
+
510
+ def stream_loop():
511
+ while not stop_event.is_set():
512
+ self._run_one_cycle(stop_event)
513
+ stop_event.wait(1) # Check for new data every 1s
514
+
515
+ def input_loop():
516
+ with patch_stdout():
517
+ print("=== COMMANDS ===")
518
+ print("Type STOP to exit")
519
+ print("================")
520
+ while not stop_event.is_set():
521
+ cmd = session.prompt("> ")
522
+ if cmd.strip() == "STOP":
523
+ stop_event.set()
524
+ print(
525
+ "Stopping the streaming sequential toolset..."
526
+ )
527
+ else:
528
+ print(
529
+ "Unrecognized input. "
530
+ "Please type 'STOP' to end."
531
+ )
532
+ visualization_data = {}
533
+ for (
534
+ category_name,
535
+ data_pack,
536
+ ) in self.visualization_data.items():
537
+ for var_name, var_pack in data_pack.items():
538
+ if "buffer" in var_pack:
539
+ visualization_data[category_name] = {
540
+ var_name: var_pack["buffer"]
541
+ }
542
+ with open("visualization_data.json", "w") as f:
543
+ json.dump(visualization_data, f, indent=2)
544
+
545
+ input_thread = threading.Thread(target=input_loop)
546
+ input_thread.start()
547
+ fit_thread = threading.Thread(target=stream_loop)
548
+ fit_thread.start()
549
+ while not stop_event.is_set():
550
+ self._update_plot()
551
+ plt.pause(0.01)
552
+ time.sleep(1)
553
+ fit_thread.join()
554
+ input_thread.join()
555
+ else:
556
+ raise ValueError(f"Unknown mode: {mode}")
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env python
2
+ ##############################################################################
3
+ #
4
+ # (c) 2025 Simon Billinge.
5
+ # All rights reserved.
6
+ #
7
+ # File coded by: members of the Billinge Group and PDF beamline at NSLS-II.
8
+ #
9
+ # See GitHub contributions for a more detailed list of contributors.
10
+ # https://github.com/pdf-bl/pdfbl.sequential/graphs/contributors # noqa: E501
11
+ #
12
+ # See LICENSE.rst for license information.
13
+ #
14
+ ##############################################################################
15
+ """Definition of __version__."""
16
+
17
+ # We do not use the other three variables, but can be added back if needed.
18
+ # __all__ = ["__date__", "__git_commit__", "__timestamp__", "__version__"]
19
+
20
+ # obtain version information
21
+ from importlib.metadata import PackageNotFoundError, version
22
+
23
+ try:
24
+ __version__ = version("pdfbl.sequential")
25
+ except PackageNotFoundError:
26
+ __version__ = "unknown"