landau 1.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ on:
2
+ release:
3
+ types: [published]
4
+ jobs:
5
+ pypi-publish:
6
+ name: Upload release to PyPI
7
+ runs-on: ubuntu-latest
8
+ environment:
9
+ name: pypi
10
+ url: https://pypi.org/p/landau
11
+ permissions:
12
+ id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
13
+ steps:
14
+ # retrieve your distributions here
15
+ - uses: actions/checkout@v4
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: '3.11'
19
+ - name: Build
20
+ shell: bash
21
+ run: |
22
+ pip install build
23
+ python -m build --sdist
24
+ - name: pypi-publish
25
+ uses: pypa/gh-action-pypi-publish@v1.12.4
@@ -0,0 +1,3 @@
1
+ # 1.0.0
2
+
3
+ Initial version.
landau-1.3.2/LICENSE ADDED
@@ -0,0 +1,29 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2021, Max-Planck-Institut für Eisenforschung GmbH - Computational Materials Design (CM) Department
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ * Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
landau-1.3.2/PKG-INFO ADDED
@@ -0,0 +1,55 @@
1
+ Metadata-Version: 2.4
2
+ Name: landau
3
+ Version: 1.3.2
4
+ Author-email: Marvin Poul <pmrv@posteo.de>
5
+ Project-URL: Repository, https://github.com/eisenforschung/landau
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Python: <3.14,>=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: matplotlib<4,>=3
12
+ Requires-Dist: numpy<3,>1
13
+ Requires-Dist: pandas<3,>=2.2
14
+ Requires-Dist: scipy<2,>=1.11.0
15
+ Requires-Dist: shapely<=2.0.7,>1
16
+ Requires-Dist: seaborn<=0.13.2,>=0.12
17
+ Requires-Dist: scikit-learn<2,>=1.2
18
+ Provides-Extra: constraints
19
+ Requires-Dist: polyfit; extra == "constraints"
20
+ Dynamic: license-file
21
+
22
+ [![DOI](https://zenodo.org/badge/931240296.svg)](https://doi.org/10.5281/zenodo.15513439)
23
+
24
+ # landau.py
25
+
26
+ A library to calculate thermodynamic equilibria and plot phase diagrams in the
27
+ (semi-)grand ensemble.
28
+
29
+
30
+ ## Building packages
31
+
32
+ ```bash
33
+ pip install build
34
+ python -m build --sdist
35
+ ```
36
+
37
+ ## Citation
38
+
39
+ This code is part of a [publication](https://doi.org/10.21203/rs.3.rs-4732459/v1), please cite it accordingly if you use this package in your work
40
+
41
+ ```
42
+ @article{poul2024automated,
43
+ title={Automated Generation of Structure Datasets for Machine Learning Potentials and Alloys},
44
+ author={Poul, Marvin and Huber, Liam and Neugebauer, J{\"o}rg},
45
+ year={2024}
46
+ }
47
+ ```
48
+
49
+ # Examples
50
+
51
+ ## Phase Diagram of Two Ideal Solutions
52
+
53
+ ![image](https://github.com/user-attachments/assets/02730176-be36-4f72-bf95-b607d2b5fa3d)
54
+
55
+ [See the Ideal Solution notebook.](notebooks/IdealSolution.ipynb)
landau-1.3.2/README.md ADDED
@@ -0,0 +1,34 @@
1
+ [![DOI](https://zenodo.org/badge/931240296.svg)](https://doi.org/10.5281/zenodo.15513439)
2
+
3
+ # landau.py
4
+
5
+ A library to calculate thermodynamic equilibria and plot phase diagrams in the
6
+ (semi-)grand ensemble.
7
+
8
+
9
+ ## Building packages
10
+
11
+ ```bash
12
+ pip install build
13
+ python -m build --sdist
14
+ ```
15
+
16
+ ## Citation
17
+
18
+ This code is part of a [publication](https://doi.org/10.21203/rs.3.rs-4732459/v1), please cite it accordingly if you use this package in your work
19
+
20
+ ```
21
+ @article{poul2024automated,
22
+ title={Automated Generation of Structure Datasets for Machine Learning Potentials and Alloys},
23
+ author={Poul, Marvin and Huber, Liam and Neugebauer, J{\"o}rg},
24
+ year={2024}
25
+ }
26
+ ```
27
+
28
+ # Examples
29
+
30
+ ## Phase Diagram of Two Ideal Solutions
31
+
32
+ ![image](https://github.com/user-attachments/assets/02730176-be36-4f72-bf95-b607d2b5fa3d)
33
+
34
+ [See the Ideal Solution notebook.](notebooks/IdealSolution.ipynb)
@@ -0,0 +1,15 @@
1
+ from .phases import (
2
+ LinePhase,
3
+ TemperatureDepandantLinePhase,
4
+ TemperatureDependentLinePhase,
5
+ IdealSolution,
6
+ RegularSolution,
7
+ InterpolatingPhase,
8
+ )
9
+
10
+ from .plot import plot_phase_diagram
11
+
12
+ try:
13
+ from ._version import __version__
14
+ except ImportError:
15
+ __version__ = "dev"
@@ -0,0 +1,21 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
6
+ TYPE_CHECKING = False
7
+ if TYPE_CHECKING:
8
+ from typing import Tuple
9
+ from typing import Union
10
+
11
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
12
+ else:
13
+ VERSION_TUPLE = object
14
+
15
+ version: str
16
+ __version__: str
17
+ __version_tuple__: VERSION_TUPLE
18
+ version_tuple: VERSION_TUPLE
19
+
20
+ __version__ = version = '1.3.2'
21
+ __version_tuple__ = version_tuple = (1, 3, 2)
@@ -0,0 +1,452 @@
1
+ """
2
+ Calculates phase diagrams from sets of Phases.
3
+ """
4
+
5
+ from functools import partial
6
+ import numbers
7
+ import warnings
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ import scipy.optimize as so
12
+ from scipy.spatial import Delaunay
13
+ from typing import Iterable
14
+
15
+ from scipy.constants import Boltzmann, eV
16
+
17
+ kB = Boltzmann / eV
18
+
19
+ from .phases import Phase, AbstractLinePhase
20
+
21
+
22
+ def find_one_point(phase1, phase2, potential, var_range):
23
+ """
24
+ Find a exact phase transition between to phases.
25
+
26
+ Args:
27
+ phase1, phase2 (:class:`landau.phase.Phase`):
28
+ the two phases
29
+ potential (callable):
30
+ function that given a phase and an intensive state variable returns a thermodynamic potential
31
+ var_range (tuple of float):
32
+ interval to search for the transition
33
+ """
34
+ return so.root_scalar(
35
+ lambda x: potential(phase1, x) - potential(phase2, x), bracket=var_range, x0=np.mean(var_range), xtol=1e-6
36
+ ).root
37
+
38
+
39
+ def find_mu_one_point(phase1, phase2, mu_range, T):
40
+ """
41
+ Extract chemical potential of a single phase equilibrium.
42
+ """
43
+ mu = find_one_point(phase1, phase2, lambda p, mu: p.semigrand_potential(T, mu), mu_range)
44
+ return [
45
+ {"mu": mu, "phi": phase1.semigrand_potential(T, mu), "c": phase1.concentration(T, mu), "phase": phase1.name},
46
+ {"mu": mu, "phi": phase2.semigrand_potential(T, mu), "c": phase2.concentration(T, mu), "phase": phase2.name},
47
+ ]
48
+
49
+
50
+ def find_T_one_point(phase1, phase2, T_range, mu):
51
+ T = find_one_point(phase1, phase2, lambda p, T: p.semigrand_potential(T, mu), T_range)
52
+ return [
53
+ {"T": T, "phi": phase1.semigrand_potential(T, mu), "c": phase1.concentration(T, mu), "phase": phase1.name},
54
+ {"T": T, "phi": phase2.semigrand_potential(T, mu), "c": phase2.concentration(T, mu), "phase": phase2.name},
55
+ ]
56
+
57
+
58
+ def find_all_points(stable_df, phases, by="mu"):
59
+ """
60
+ Map find_one_point over all estimated equilibria at a given T or mu.
61
+ """
62
+ assert by in ["T", "mu"], "Wrong by value"
63
+ stable_df = stable_df.sort_values(by).reset_index(drop=True)
64
+ boundary_guesses = stable_df.index[(stable_df.phase != stable_df.phase.shift(-1).ffill())]
65
+ if by == "mu":
66
+ boundaries = [
67
+ # {"mu": -np.inf, "c": 0, "phase": stable_df.phase.iloc[0]},
68
+ # {"mu": np.inf, "c": 1, "phase": stable_df.phase.iloc[-1]},
69
+ ]
70
+ else:
71
+ boundaries = [
72
+ # {"T": stable_df["T"].min(), "c": stable_df.c.iloc[0], "phase": stable_df.phase.iloc[0]},
73
+ # {"T": stable_df["T"].max(), "c": stable_df.c.iloc[-1], "phase": stable_df.phase.iloc[-1]},
74
+ ]
75
+ for g in boundary_guesses:
76
+ r1 = stable_df.loc[g]
77
+ r2 = stable_df.loc[g + 1]
78
+ p1 = phases[r1.phase]
79
+ p2 = phases[r2.phase]
80
+ match by:
81
+ case "mu":
82
+ mus = sorted([r1.mu, r2.mu])
83
+ refinements = find_mu_one_point(
84
+ p1,
85
+ p2,
86
+ mus,
87
+ r1["T"],
88
+ )
89
+ case "T":
90
+ Ts = sorted([r1["T"], r2["T"]])
91
+ refinements = find_T_one_point(
92
+ p1,
93
+ p2,
94
+ Ts,
95
+ r1["mu"],
96
+ )
97
+ # the find*point functions find the point where the potentials of the two phases are equal, but a third phase
98
+ # could be lower in potential, so only add the refined points if they are truly stable
99
+ # FIXME: technically this only needs to be done once, since the refinements share T/mu
100
+ for phase in phases.values():
101
+ if phase.name in (r1.phase, r2.phase):
102
+ continue
103
+ for point in refinements:
104
+ T = point.get("T", r1["T"])
105
+ mu = point.get("mu", r1["mu"])
106
+ if phase.semigrand_potential(T, mu) < point["phi"]:
107
+ break
108
+ else:
109
+ continue
110
+ break
111
+ else:
112
+ boundaries.extend(refinements)
113
+ df = pd.DataFrame(boundaries)
114
+ if len(df) > 0:
115
+ df = df.sort_values(by)
116
+ return df
117
+
118
+
119
+ def find_triangle(phases, cand):
120
+ """
121
+ Assumes two of the three points in cand are of the same phase.
122
+
123
+ Args:
124
+ phases (Phases): all phases
125
+ cand (dataframe): subset of phase diagram dataframe of the three points of the triangle
126
+ """
127
+ # one of p1, p2 will be the peak of the triangle, the other one the center of the base
128
+ p1, p2 = cand.groupby("phase")[["T", "mu"]].mean().to_numpy()
129
+
130
+ def project(t):
131
+ T, mu = p1 + (p2 - p1) * t
132
+ return T, mu
133
+
134
+ phase1, phase2 = [phases[p] for p in cand.phase.unique()]
135
+ try:
136
+ t = find_one_point(phase1, phase2, lambda phase, t: phase.semigrand_potential(*project(t)), (0, 1))
137
+ except ValueError:
138
+ warnings.warn(f"Failed to refine triangle between {p1} and {p2} of phases {cand.phase.unique()}!", stacklevel=2)
139
+ return []
140
+ T, mu = p1 + (p2 - p1) * t
141
+ if T < 0:
142
+ return []
143
+ phi = phase1.semigrand_potential(T, mu)
144
+ # check that no other phase is lower than the refined boundary
145
+ if any(p.semigrand_potential(T, mu) < phi for p in phases.values() if p.name not in (phase1.name, phase2.name)):
146
+ # TODO: could try and refine here on the boundary (p1, more_stable_phase) and (p2, more_stable_phase)
147
+ return []
148
+ return [
149
+ {"T": T, "mu": mu, "phi": phases[p].semigrand_potential(T, mu), "c": phases[p].concentration(T, mu), "phase": p}
150
+ for p in cand.phase.unique()
151
+ ]
152
+
153
+
154
+ def refine_phase_diagram(df, phases, min_c=0, max_c=1):
155
+ """Add additional points to a coarse phase diagram by searching for exact transitions."""
156
+ udf = df.query("not stable").reset_index(drop=True)
157
+ udf["border"] = False
158
+ df = df.query("stable").reset_index(drop=True)
159
+ df["border"] = False
160
+ df["refined"] = "no"
161
+ data = [df, udf]
162
+ multiple_mus = len(df["mu"].unique()) > 1
163
+ multiple_ts = len(df["T"].unique()) > 1
164
+ if multiple_mus and multiple_ts:
165
+ # declare edges of the sampling window as borders so to not confuse get_transitions, debatably hacky
166
+ df.loc[df["T"] == df["T"].min(), "border"] = True
167
+ df.loc[df["T"] == df["T"].max(), "border"] = True
168
+ # left and right edges as well, set here to +-inf to make sure the
169
+ # cluster algo below separates top and left and right edges, even
170
+ # hackier
171
+ left = df.loc[df["mu"] == df["mu"].min()][["phase", "T"]]
172
+ left["mu"] = -np.inf
173
+ left["c"] = min_c
174
+ left["border"] = True
175
+ left["stable"] = True
176
+ data.append(left)
177
+ right = df.loc[df["mu"] == df["mu"].max()][["phase", "T"]]
178
+ right["mu"] = +np.inf
179
+ right["c"] = max_c
180
+ right["border"] = True
181
+ right["stable"] = True
182
+ data.append(right)
183
+ # Main idea:
184
+ # - tessellate input points
185
+ # - count the number of unique phases in each triangle
186
+ # - if > 1 there must be at least one phase transition in the triangle
187
+ # - find_triangle assumes (erroneously) that it can be found on the vector connecting the peak of the triangle
188
+ # to the center of the base
189
+ # - if = 3 there's probably a triple point in there (but doesn't need to be actually, should check that)
190
+ dela = Delaunay(df[["mu", "T"]])
191
+ coex = df.phase.to_numpy()[dela.simplices]
192
+ phase_counts = np.array([len(set(x)) for x in coex])
193
+ line_candidates = [df.iloc[i] for i in dela.simplices[phase_counts == 2]]
194
+ trip_candidates = [df.iloc[i] for i in dela.simplices[phase_counts == 3]]
195
+ # you'd think this to be faster, but somehow un/pickling the phases is very slow, likely because it has to do it
196
+ # for each triangle and involves fitting the phases all over
197
+ # with ProcessPoolExecutor(4) as pool:
198
+ # ddf = pool.map(partial(find_triangle, phases), line_candidates)
199
+ ddf = map(partial(find_triangle, phases), line_candidates)
200
+ ddf = pd.DataFrame(sum(ddf, []))
201
+ ddf["stable"] = True
202
+ ddf["border"] = True
203
+ ddf["refined"] = "delaunay"
204
+ data.append(ddf)
205
+
206
+ def refine_triples(tr):
207
+ T, mu = tr[["T", "mu"]].mean()
208
+ p1, p2, p3 = (phases[p] for p in tr.phase.unique())
209
+
210
+ def triplemin(x):
211
+ T, mu = x
212
+ phi1 = p1.semigrand_potential(T, mu)
213
+ phi2 = p2.semigrand_potential(T, mu)
214
+ phi3 = p3.semigrand_potential(T, mu)
215
+ return abs(phi1 - phi2) + abs(phi2 - phi3) + abs(phi3 - phi1)
216
+
217
+ T, mu = so.fmin(triplemin, (T, mu), disp=False)
218
+ if T < 0:
219
+ return []
220
+ return [
221
+ {
222
+ "T": T,
223
+ "mu": mu,
224
+ "phi": phases[p].semigrand_potential(T, mu),
225
+ "c": phases[p].concentration(T, mu),
226
+ "phase": p,
227
+ }
228
+ for p in tr.phase.unique()
229
+ ]
230
+
231
+ tdf = []
232
+ for tri in trip_candidates:
233
+ tdf.extend(refine_triples(tri))
234
+ tdf = pd.DataFrame(tdf)
235
+ tdf["stable"] = True
236
+ tdf["border"] = True
237
+ tdf["refined"] = "delaunay-triple"
238
+ data.append(tdf)
239
+ else:
240
+ if multiple_mus:
241
+ mdf = (
242
+ df.groupby("T", group_keys=True)
243
+ .apply(find_all_points, phases=phases, by="mu", include_groups=True)
244
+ .reset_index()
245
+ )
246
+ mdf["stable"] = True
247
+ mdf["border"] = True
248
+ mdf["refined"] = "mu"
249
+ data.append(mdf.drop("level_1", axis="columns"))
250
+ if multiple_ts:
251
+ Tdf = (
252
+ df.groupby("mu", group_keys=True)
253
+ .apply(find_all_points, phases=phases, by="T", include_groups=True)
254
+ .reset_index()
255
+ )
256
+ Tdf["stable"] = True
257
+ Tdf["border"] = True
258
+ Tdf["refined"] = "T"
259
+ data.append(Tdf.drop("level_1", axis="columns"))
260
+ return pd.concat(data, ignore_index=True)
261
+
262
+
263
+ def guess_mu_range(phases: Iterable[Phase], T: float, samples: int, tolerance: float = 1e-2):
264
+ """Guess chemical potential window from the ideal solution.
265
+
266
+ Searches numerically for chemical potentials which stabilize
267
+ concentrations close to 0 and 1 and then use the concentrations
268
+ encountered along the way to numerically invert the c(mu) mapping.
269
+ Using an even c grid with mu(c) then yields a decent sampling of mu
270
+ space so that the final phase diagram is described everywhere equally.
271
+
272
+ Args:
273
+ phases: list of phases to consider
274
+ T: temperature at which to estimate mu(c)
275
+ samples: how many mu samples to return
276
+
277
+ Returns:
278
+ array of chemical potentials that likely cover the whole concentration space
279
+ """
280
+ # TODO: this can be used immediately also for the actual phase diagram
281
+ # calculation: keep track of which phase is the most likely
282
+ import scipy.optimize as so
283
+ import scipy.interpolate as si
284
+ import numpy as np
285
+ # semigrand canonical "average" concentration
286
+ # use this to avoid discontinuities and be phase agnostic
287
+
288
+ def c(mu):
289
+ phis = np.array([p.semigrand_potential(T, mu) for p in phases])
290
+ conc = np.array([p.concentration(T, mu) for p in phases])
291
+ phis -= phis.min(axis=0)
292
+ beta = 1 / (kB * T)
293
+ prob = np.exp(-beta * phis)
294
+ prob /= prob.sum(axis=0)
295
+ ci = (prob * conc).sum(axis=0)
296
+ return ci
297
+
298
+ resi = so.minimize(lambda x: +c(x[0]), x0=[0], tol=tolerance, method="BFGS")
299
+ resa = so.minimize(lambda x: -c(x[0]), x0=[0], tol=tolerance, method="BFGS")
300
+ mu0 = resi.x[0]
301
+ mu1 = resa.x[0]
302
+ if mu0 == mu1:
303
+ if tolerance > 1e-7:
304
+ return guess_mu_range(phases, T, samples, tolerance/10)
305
+ raise ValueError(
306
+ "chemical potential range degenerate! Check that phases that not all phases have the same fixed "
307
+ "concentration!"
308
+ )
309
+ mm = np.linspace(mu0, mu1, samples)
310
+ cc = c(mm)
311
+ c0 = min(cc) + tolerance
312
+ c1 = max(cc) - tolerance
313
+ return si.interp1d(cc, mm)(np.linspace(c0, c1, samples)), c0, c1
314
+
315
+
316
+ def calc_phase_diagram(
317
+ phases: Iterable[Phase],
318
+ Ts: Iterable[float] | float,
319
+ mu: Iterable[float] | float | int,
320
+ refine: bool = True,
321
+ keep_unstable: bool = False,
322
+ ):
323
+ """
324
+ Calculate phase diagram at given sampling points.
325
+
326
+ Args:
327
+ phases (iterable of Phases)
328
+ Ts (iterable of floats): sampling points in temperature
329
+ mu (iterable of floats): sampling points in chemical potential; if int
330
+ guess sampling points with guess_mu_range at max(Ts)
331
+ refine (bool): add additional sampling points at exact phase transitions
332
+ keep_unstable (bool): only keep entries of stable phases, otherwise keep entries of all phases at all sampling points
333
+
334
+ Returns:
335
+ dataframe of phase points
336
+ """
337
+ if not isinstance(Ts, Iterable):
338
+ Ts = [Ts]
339
+ phases = {p.name: p for p in phases}
340
+ if isinstance(mu, numbers.Integral) and mu != 0:
341
+ # we would often pass mu=0 to calculate a fixed mu, temperature only diagram and it'd be a bit annoying to pass
342
+ # mu=0.0 all the time, so we special case as above
343
+ try:
344
+ mu, min_c, max_c = guess_mu_range(phases.values(), max(Ts), int(mu))
345
+ except ValueError:
346
+ if all(isinstance(p, AbstractLinePhase) for p in phases.values()):
347
+ raise ValueError(
348
+ "Cannot guess chemical potential range of line phases with all the same concentration!"
349
+ ) from None
350
+ raise
351
+ elif refine:
352
+ min_c, max_c = None, None
353
+
354
+ def get(s, T):
355
+ phi = s.semigrand_potential(T, mu)
356
+ return {"T": T, "phase": s.name, "phi": phi, "mu": mu, "c": s.concentration(T, mu)}
357
+
358
+ pdf = pd.DataFrame([get(s, T) for s in phases.values() for T in Ts])
359
+ pdf = pdf.explode(["mu", "phi", "c"]).infer_objects().reset_index(drop=True)
360
+ pdf["stable"] = False
361
+ pdf.loc[pdf.groupby(["T", "mu"], group_keys=False).phi.idxmin(), "stable"] = True
362
+ if refine:
363
+ min_c = pdf.c.min()
364
+ max_c = pdf.c.max()
365
+ pdf = refine_phase_diagram(pdf, phases, min_c=min_c, max_c=max_c)
366
+ pdf["f"] = pdf.phi + pdf.mu * pdf.c
367
+
368
+ def sub(dd):
369
+ dd = dd.query("-inf<mu<inf")
370
+ c0 = dd.c.min()
371
+ c1 = dd.c.max()
372
+ f0 = dd.query("c==@c0").f.min()
373
+ f1 = dd.query("c==@c1").f.min()
374
+ return dd.f - (f0 * (1 - dd.c) + f1 * dd.c)
375
+
376
+ fex = pdf.groupby("T", group_keys=False).apply(sub, include_groups=False)
377
+ if len(Ts) > 1:
378
+ pdf["f_excess"] = fex
379
+ else:
380
+ # thank you pandas, this saved me -10min of my life.
381
+ pdf["f_excess"] = fex.T
382
+ if not keep_unstable:
383
+ pdf = pdf.query("stable")
384
+ return pdf
385
+
386
+
387
+ from sklearn.cluster import AgglomerativeClustering
388
+
389
+
390
+ def reduce(dd):
391
+ dd = dd.sort_values("c")
392
+ return pd.Series(
393
+ {
394
+ "transition": "-".join(dd.phase.tolist()),
395
+ "c": dd.c.tolist(),
396
+ "phase": dd.phase.tolist(),
397
+ }
398
+ )
399
+
400
+
401
+ def cluster(dd, eps=0.01, use_mu=True):
402
+ t = dd["T"]
403
+ # Guard against isothermal segments
404
+ if t.min() != t.max():
405
+ t = (t - t.min()) / (t.max() - t.min())
406
+ ids = pd.Series(np.zeros_like(dd.index), index=dd.index)
407
+ # picking eps is a pain and HDBSCAN not available in my env yet
408
+ # cluster = DBSCAN(eps=eps, min_samples=3)
409
+ cluster = AgglomerativeClustering(
410
+ n_clusters=None,
411
+ # FIXME: hand optimized value; smaller values tend to partition the
412
+ # same transition too often
413
+ distance_threshold=0.5,
414
+ linkage="single",
415
+ )
416
+ if use_mu:
417
+ # on the left and right side of the phase diagram refining adds points with
418
+ # mu +- inf, which chokes the cluster methods, but we know they should be
419
+ # their own segments, so special case them below
420
+ F = np.isfinite(dd.mu)
421
+ if F.any() and sum(F) >= 2:
422
+ ids.loc[F] = cluster.fit_predict(np.transpose([t.loc[F], dd.c.loc[F], dd.mu.loc[F]]))
423
+ m = ids.max()
424
+ ids.loc[dd.mu == +np.inf] = m + 1
425
+ ids.loc[dd.mu == -np.inf] = m + 2
426
+ else:
427
+ ids.loc[:] = cluster.fit_predict(np.transpose([t, dd.c]))
428
+ return ids
429
+
430
+
431
+ def get_transitions(df):
432
+ """
433
+ Identify "continuous" two-phase transition lines in mu/T space, i.e. transitions between the same two phases and along which mu/T are continuous.
434
+
435
+ Useful for plotting below, but potentially also to augment the existing refining routines and
436
+ acquire additional Free energies from calphy/etc. to improve the diagram.
437
+ """
438
+ bdf = df.query("border")
439
+ # go from a table of mu/c/T points that are on the phase boundaries to a table where the two points that are at the same mu/T are grouped together
440
+ # use this information to add 'transition' column; handles also the case where border points are at mu=+-inf, there we have only one point
441
+ tdf = bdf.groupby(["mu", "T"])[["c", "phase"]].apply(reduce)
442
+ # immediately explode again to go back to our familiar representation, but now with the added 'transition' column
443
+ tdf = tdf.reset_index().explode(["c", "phase"]).infer_objects().reset_index(drop=True)
444
+
445
+ # cluster points that are assigned as one transition, because the same transition can appear multiple times in "disconnected" manner in a phase
446
+ # diagram, e.g. a solid solution in contact with the melt interrupted by a higher melting intermetallic
447
+ tdf["transition_unit"] = tdf.groupby("transition", group_keys=False).apply(cluster, include_groups=False)
448
+ tdf["border_segment"] = tdf[["transition", "transition_unit"]].apply(
449
+ lambda r: "_".join(map(str, r.tolist())), axis="columns"
450
+ )
451
+
452
+ return tdf