pdfanalysis 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ """
2
+ PDFanalysis package for automated PDF structure analysis.
3
+
4
+ This package provides tools for:
5
+ - PDF extraction from experimental data
6
+ - Structure generation (nanoparticles)
7
+ - Structure customization
8
+ - PDF refinement
9
+ - Structure screening
10
+ - Report generation
11
+ """
12
+
13
+ from .pdf_extractor import PDFExtractor
14
+ from .structure_generator import StructureGenerator
15
+ from .structure_custom import StructureCustom
16
+ from .structure_report_generator import StructureReportGenerator
17
+ from .pdf_refinement import PDFRefinement
18
+ from .pdf_refinement_fast import PDFRefinementFast
19
+ from .structure_screener import StructureScreener
20
+ from .pdfanalysis import perform_automatic_pdf_analysis
21
+
22
+ __all__ = [
23
+ 'PDFExtractor',
24
+ 'StructureGenerator',
25
+ 'StructureCustom',
26
+ 'StructureReportGenerator',
27
+ 'PDFRefinement',
28
+ 'PDFRefinementFast',
29
+ 'StructureScreener',
30
+ 'perform_automatic_pdf_analysis',
31
+ ]
32
+
33
+ __version__ = '1.0.0'
@@ -0,0 +1,106 @@
1
+ """
2
+ PDF Extractor module for extracting PDFs from experimental data.
3
+ """
4
+ import subprocess
5
+ import os
6
+ import numpy as np
7
+ from matplotlib import pyplot as plt
8
+
9
+
10
+ class PDFExtractor:
11
+ def __init__(self,
12
+ datafilelist,
13
+ composition,
14
+ qmin,
15
+ qmax,
16
+ qmaxinst,
17
+ wavelength=0.7107,
18
+ dataformat='QA',
19
+ rmin=0,
20
+ rmax=50,
21
+ rstep=0.01,
22
+ bgscale=1,
23
+ rpoly=0.9,
24
+ emptyfile=None):
25
+ self.datafilelist=datafilelist
26
+ self.emptyfile=emptyfile
27
+ self.composition=composition
28
+ self.qmin=qmin
29
+ self.qmax=qmax
30
+ self.qmaxinst=qmaxinst
31
+ self.wl=wavelength
32
+ self.dataformat=dataformat
33
+ self.rmin=rmin
34
+ self.rmax=rmax
35
+ self.rstep=rstep
36
+ self.bgscale=bgscale
37
+ self.rpoly=rpoly
38
+
39
+
40
+ def writecfg(self):
41
+ """
42
+ datafilelist: list of paths to data files from wich PDF should be extracted
43
+ """
44
+
45
+ self.datapath=os.path.dirname(self.datafilelist[0])
46
+ self.pdfpath=self.datapath+'/extracted_PDF'
47
+
48
+ os.makedirs(self.pdfpath,exist_ok=True)
49
+
50
+ cfg=open(self.pdfpath+'/pdfgetX3_GUI.cfg','w')
51
+ cfg.write('[DEFAULT] \n')
52
+ cfg.write('dataformat = %s' %self.dataformat +' \n')
53
+
54
+
55
+
56
+ cfg.write('inputfile='+''.join(os.path.basename(i) +'\n' +'\t'
57
+ for i in self.datafilelist[:-1]))
58
+ cfg.write('\t %s' %os.path.basename(self.datafilelist[-1])+'\n')
59
+ cfg.write('datapath = %s' % os.path.dirname(self.datafilelist[0])+'/' +'\n')
60
+ if self.emptyfile is not None:
61
+ cfg.write('\t %s' %os.path.dirname(self.emptyfile)+'\n')
62
+
63
+ cfg.write('bgscale=%f \n' %self.bgscale)
64
+ cfg.write('backgroundfile=%s' % os.path.basename(self.emptyfile)+'\n')
65
+
66
+
67
+ cfg.write('composition= %s \n'%str(self.composition))
68
+ cfg.write('qmin=%f \n' %self.qmin)
69
+ cfg.write('qmax=%f \n' %self.qmax)
70
+ cfg.write('qmaxinst=%f \n' %self.qmaxinst)
71
+ cfg.write('wavelength=%f \n' %self.wl)
72
+ cfg.write('mode = xray \n')
73
+ cfg.write('rpoly=%f \n' %self.rpoly)
74
+ cfg.write('rmin=%f \n' %self.rmin)
75
+ cfg.write('rstep=%f \n' %self.rstep)
76
+ cfg.write('rmax=%f \n' %self.rmax)
77
+ cfg.write('output=%s' %self.pdfpath +'/@b.@o \n')
78
+ cfg.write('outputtype = sq,gr \n')
79
+ #cfg.write('plot = iq,fq,gr \n' )
80
+ cfg.write('force = yes \n')
81
+
82
+ cfg.close()
83
+ return
84
+
85
+
86
+ def extractpdf(self):
87
+ self.writecfg()
88
+ command = 'conda run -n py36 pdfgetx3 -c' +self.pdfpath+'/pdfgetX3_GUI.cfg'
89
+
90
+ # Use subprocess to execute the command
91
+ subprocess.run(command, shell=True)
92
+ print(f'PDF file(s) extracted in {self.pdfpath}')
93
+ # Plot pdf
94
+
95
+ fig,ax=plt.subplots()
96
+ for file in self.datafilelist:
97
+ rootname=(os.path.basename(file).split('/')[-1]).split('.')[0]
98
+ pdffile=self.pdfpath+f'/{rootname}.gr'
99
+ r,g=np.loadtxt(pdffile,skiprows=27,unpack=True)
100
+ ax.plot(r,g,label=rootname)
101
+ ax.set_xlabel('r ($\\AA$)')
102
+ ax.set_ylabel('G(r)')
103
+ fig.legend()
104
+ fig.tight_layout()
105
+
106
+ return self.pdfpath
@@ -0,0 +1,268 @@
1
+ """
2
+ PDF Refinement module for full structure refinement using diffpy.srfit.
3
+ """
4
+ import os
5
+ import numpy as np
6
+ import matplotlib as mpl
7
+ import matplotlib.pyplot as plt
8
+ from pathlib import Path
9
+ from scipy.optimize import least_squares
10
+ from diffpy.srfit.fitbase import FitRecipe, FitContribution, Profile, FitResults
11
+ from diffpy.srfit.pdf import PDFParser, DebyePDFGenerator
12
+ from diffpy.structure import Structure
13
+
14
+
15
+ class PDFRefinement():
16
+ def __init__(self,
17
+ pdffile:str,
18
+ strufile:str,
19
+ qdamp:float=0.014,
20
+ qbroad:float=0.04,
21
+ refinement_tags:dict={'scale_factor': True, 'zoomscale': True, 'delta2': True, 'Uiso': True},
22
+ save_tag:bool=False,
23
+ RUN_PARALLEL:bool=True,
24
+ rmin=0.01,
25
+ rbins:int=1,
26
+ screening_tag:bool=False):
27
+
28
+ """
29
+ refinement_tags={'scale_factor': True, 'zoomscale': True, 'delta2': True, 'Uiso': True}
30
+ pdffile: path to pdf file
31
+ strufile path to structure file
32
+ qdamp qdamp value (default=0.014)
33
+ qbroad qbroad value (default==0.04)
34
+ save_tag: save refinement data (default=False)
35
+ RUN_PARALLEL=True
36
+ rbins: int, can be adjusted to increase rstep (default=1)
37
+ screening_tag=False
38
+ """
39
+ # Check file formats
40
+ pdf_extension=os.path.basename(pdffile).split('.')[-1]
41
+ if pdf_extension == 'gr':
42
+ self.pdffile = pdffile
43
+ else:
44
+ print('PDF file should be a .gr file, extracted with pdfgtetx3')
45
+ stru_extension=os.path.basename(strufile).split('.')[-1]
46
+ if stru_extension == 'xyz':
47
+ self.strufile = strufile
48
+ else:
49
+ print('Structure files must adopt the xyz standard format')
50
+
51
+ # Initialize attributes
52
+ self.path=os.path.dirname(self.strufile)
53
+ self.qdamp = qdamp
54
+ self.qbroad = qbroad
55
+ self.refinement_tags = refinement_tags
56
+ self.save_tag = save_tag
57
+ self.RUN_PARALLEL=RUN_PARALLEL
58
+ self.rbins=rbins
59
+ self.screening_tag=screening_tag
60
+ # Read metadata from pdffile
61
+ with open(self.pdffile, 'r') as f:
62
+ for line in f:
63
+ if "qmin" in line:
64
+ self.qmin = float(line.split(' = ')[1].strip())
65
+ if "qmax" in line:
66
+ self.qmax = float(line.split(' = ')[1].strip())
67
+ # Load data from the PDF file
68
+ r = np.loadtxt(self.pdffile, usecols=(0), skiprows=29)
69
+ self.rmin = rmin
70
+ self.rmax = np.max(r)
71
+ self.rstep = ((self.rmax-self.rmin) / (len(r) - 1))*self.rbins
72
+
73
+ # Create fit recipe
74
+ self.recipe = self.make_recipe()
75
+
76
+ def make_recipe(self):
77
+ PDF_RMIN=self.rmin
78
+ PDF_RMAX=self.rmax
79
+ PDF_RSTEP=self.rstep
80
+ QBROAD_I=self.qbroad
81
+ QDAMP_I=self.qdamp
82
+ QMIN=self.qmin
83
+ QMAX=self.qmax
84
+ ZOOMSCALE_I=1
85
+ UISO_I=0.005
86
+ stru1 = Structure(filename=self.strufile)
87
+
88
+ profile = Profile()
89
+ parser = PDFParser()
90
+ parser.parseFile(self.pdffile)
91
+ profile.loadParsedData(parser)
92
+ profile.setCalculationRange(xmin=PDF_RMIN, xmax=PDF_RMAX, dx=PDF_RSTEP)
93
+
94
+ # 10: Create a Debye PDF Generator object for the discrete structure model.
95
+ generator_cluster1 = DebyePDFGenerator("G1")
96
+ generator_cluster1.setStructure(stru1, periodic=False)
97
+
98
+ # 11: Create a Fit Contribution object.
99
+ contribution = FitContribution("cluster")
100
+ contribution.addProfileGenerator(generator_cluster1)
101
+
102
+ # If you have a multi-core computer (you probably do), run your refinement in parallel!
103
+ if self.RUN_PARALLEL:
104
+ try:
105
+ import psutil
106
+ import multiprocessing
107
+ from multiprocessing import Pool
108
+ except ImportError:
109
+ print("\nYou don't appear to have the necessary packages for parallelization")
110
+ syst_cores = multiprocessing.cpu_count()
111
+ cpu_percent = psutil.cpu_percent()
112
+ avail_cores = np.floor((100 - cpu_percent) / (100.0 / syst_cores))
113
+ ncpu = int(np.max([1, avail_cores]))
114
+ pool = Pool(processes=ncpu)
115
+ generator_cluster1.parallel(ncpu=ncpu, mapfunc=pool.map)
116
+
117
+ contribution.setProfile(profile, xname="r")
118
+
119
+ # 13: Set an equation, based on your PDF generators.
120
+ contribution.setEquation("s1*G1")
121
+
122
+ # 14: Create the Fit Recipe object that holds all the details of the fit.
123
+ recipe = FitRecipe()
124
+ recipe.addContribution(contribution)
125
+
126
+ # 15: Initialize the instrument parameters, Q_damp and Q_broad, and
127
+ # assign Q_max and Q_min.
128
+ generator_cluster1.qdamp.value = QDAMP_I
129
+ generator_cluster1.qbroad.value = QBROAD_I
130
+ generator_cluster1.setQmax(QMAX)
131
+ generator_cluster1.setQmin(QMIN)
132
+
133
+ # 16: Add, initialize, and tag variables in the Fit Recipe object.
134
+ # In this case we also add psize, which is the NP size.
135
+ recipe.addVar(contribution.s1, float(1), tag="scale_factor")
136
+
137
+ # 17: Define a phase and lattice from the Debye PDF Generator
138
+ # object and assign an isotropic lattice expansion factor tagged
139
+ # "zoomscale" to the structure.
140
+ phase_cluster1 = generator_cluster1.phase
141
+ lattice1 = phase_cluster1.getLattice()
142
+ recipe.newVar("zoomscale", ZOOMSCALE_I, tag="zoomscale")
143
+ recipe.constrain(lattice1.a, 'zoomscale')
144
+ recipe.constrain(lattice1.b, 'zoomscale')
145
+ recipe.constrain(lattice1.c, 'zoomscale')
146
+ # 18: Initialize an atoms object and constrain the isotropic
147
+ # Atomic Displacement Paramaters (ADPs) per element.
148
+ atoms1 = phase_cluster1.getScatterers()
149
+ recipe.newVar("Uiso", UISO_I, tag="Uiso")
150
+ for atom in atoms1:
151
+ recipe.constrain(atom.Uiso, "Uiso")
152
+ recipe.restrain("Uiso",lb=0,ub=1,scaled=True,sig=0.00001)
153
+ recipe.addVar(generator_cluster1.delta2, name="delta2", value=float(4), tag="delta2")
154
+ recipe.restrain("delta2",lb=0,ub=12,scaled=True,sig=0.00001)
155
+ return recipe
156
+
157
+
158
+ def get_filename(self,file):
159
+ filename=os.path.basename(file).split('/')[-1]
160
+ return filename.split('.')[0]
161
+
162
+ def refine(self):
163
+ # Establish the location of the data and a name for our fit.
164
+ gr_path = str(self.pdffile)
165
+ FIT_ID=self.get_filename(self.pdffile)+'_'+self.get_filename(self.strufile)
166
+ basename = FIT_ID
167
+ # Establish the full path of the structure file
168
+ stru_path = self.strufile
169
+ recipe = self.recipe
170
+ # Amount of information to write to the terminal during fitting.
171
+ if not self.screening_tag:
172
+ recipe.fithooks[0].verbose = 3
173
+ else:
174
+ recipe.fithooks[0].verbose = 0
175
+
176
+
177
+ recipe.fix("all")
178
+ # Define values to refin from self.refinement_tags
179
+ tags=[]
180
+ for key in self.refinement_tags:
181
+ if self.refinement_tags[key]==True:
182
+ tags.append(key)
183
+
184
+ tags.append("all")
185
+ for tag in tags:
186
+ recipe.free(tag)
187
+
188
+ least_squares(recipe.residual, recipe.values, x_scale="jac")
189
+
190
+ # Write the fitted data to a file.
191
+ profile = recipe.cluster.profile
192
+ #profile.savetxt(fitdir / f"{basename}.fit")
193
+
194
+ res = FitResults(recipe)
195
+ if not self.screening_tag:
196
+ res.printResults()
197
+
198
+ #res.saveResults(resdir / f"{basename}.res", header=header)
199
+
200
+ # Save refinement results
201
+ if self.save_tag:
202
+ self.save_fitresults(profile,res)
203
+ else:
204
+ pass
205
+ return res.rw
206
+
207
+ def save_fitresults(self,profile,res):
208
+ basename=self.get_filename(self.pdffile)+'_'+self.get_filename(self.strufile)
209
+
210
+ PWD=Path(self.path)
211
+ # Make some folders to store our output files.
212
+ resdir = PWD / "res"
213
+ fitdir = PWD / "fit"
214
+ figdir = PWD / "fig"
215
+ folders = [resdir, fitdir, figdir]
216
+ for folder in folders:
217
+ if not folder.exists():
218
+ folder.mkdir()
219
+ # save exp and calc pdf
220
+ profile.savetxt(fitdir / f"{basename}.fit")
221
+ # Write the fit results to a file.
222
+ header = "%s"%str(basename)+".\n"
223
+ header+="data file:%s"%str(self.pdffile)+"\n"
224
+ header+="structure file:%s"%str(self.strufile)+"\n"
225
+ header+="Fitting parameters \n"
226
+ header+="rmin=%f"%self.rmin+"\n"
227
+ header+="rmax=%f"%self.rmax+"\n"
228
+ header+="rstep=%f"%self.rstep+"\n"
229
+ header+="QBROAD=%f"%self.qbroad+"\n"
230
+ header+="QDAMP=%f"%self.qdamp+"\n"
231
+ header+="QMIN=%f"%self.qmin+"\n"
232
+ header+="QMAX=%f"%self.qmax+"\n"
233
+ res.saveResults(resdir / f"{basename}.res", header=header)
234
+
235
+ #Make plot
236
+ fig_name= figdir / basename
237
+ if not isinstance(fig_name, Path):
238
+ fig_name = Path(fig_name)
239
+ plt.clf()
240
+ plt.close('all')
241
+ r = self.recipe.cluster.profile.x
242
+ g = self.recipe.cluster.profile.y
243
+ gcalc = self.recipe.cluster.profile.ycalc
244
+ # Make an array of identical shape as g which is offset from g.
245
+ diff = g - gcalc
246
+ diffzero = (min(g)-np.abs(max(diff))) * \
247
+ np.ones_like(g)
248
+ # Calculate the residual (difference) array and offset it vertically.
249
+ diff = g - gcalc + diffzero
250
+ # Change some style details of the plot
251
+ mpl.rcParams.update(mpl.rcParamsDefault)
252
+ # Create a figure and an axis on which to plot
253
+ fig, ax1 = plt.subplots(1, 1)
254
+ # Plot the difference offset line
255
+ ax1.plot(r, diffzero, lw=1.0, ls="--", c="black")
256
+ # Plot the measured data
257
+ ax1.plot(r,g,ls="None",marker="o",ms=5,mew=0.2,mfc="None",label="G(r) Data")
258
+ ax1.plot(r, diff, lw=1.2, label="G(r) diff")
259
+ ax1.plot(r,gcalc,'g',label='G(r) calc')
260
+ ax1.set_xlabel(r"r ($\mathrm{\AA}$)")
261
+ ax1.set_ylabel(r"G ($\mathrm{\AA}$$^{-2}$)")
262
+ ax1.tick_params(axis="both",which="major",top=True,right=True)
263
+ ax1.set_xlim(self.rmin, self.rmax)
264
+ ax1.legend(ncol=2)
265
+ fig.tight_layout()
266
+ ax1.set_title(basename+'\n'+f'Rw={res.rw:.4f}')
267
+ # Save plot
268
+ fig.savefig(fig_name.parent / f"{fig_name.name}.png", format="png")
@@ -0,0 +1,111 @@
1
+ """
2
+ PDF Refinement Fast module for rapid structure screening.
3
+ """
4
+ from scipy.optimize import least_squares
5
+ from diffpy.srfit.fitbase import FitRecipe, FitContribution, Profile, FitResults
6
+ from diffpy.srfit.pdf import PDFParser, DebyePDFGenerator
7
+ from diffpy.structure import Structure
8
+ import numpy as np
9
+
10
+
11
+ class PDFRefinementFast:
12
+ """
13
+ Fast PDF refinement class for STRUCTURE SCREENING.
14
+ Same interface as PDFRefinement, but MUCH faster.
15
+ """
16
+
17
+ def __init__(self,
18
+ pdffile: str,
19
+ strufile: str,
20
+ qdamp: float = 0.014,
21
+ qbroad: float = 0.04,
22
+ rbins: int = 4,
23
+ rmin: float = 2.0,
24
+ rmax_fast: float = 15.0,
25
+ screening_tag: bool = True):
26
+
27
+ self.pdffile = pdffile
28
+ self.strufile = strufile
29
+ self.qdamp = qdamp
30
+ self.qbroad = qbroad
31
+ self.rbins = rbins
32
+ self.rmin = rmin
33
+ self.rmax_fast = rmax_fast
34
+ self.screening_tag = screening_tag
35
+
36
+ self.recipe = self._make_fast_recipe()
37
+
38
+ # ------------------------------------------------------------
39
+
40
+ def _make_fast_recipe(self):
41
+ # --- Structure
42
+ stru = Structure(filename=self.strufile)
43
+
44
+ # --- PDF data
45
+ profile = Profile()
46
+ parser = PDFParser()
47
+ parser.parseFile(self.pdffile)
48
+ profile.loadParsedData(parser)
49
+
50
+ r = profile.x
51
+ rmax_data = np.max(r)
52
+ rmax = min(self.rmax_fast, rmax_data)
53
+
54
+ # Coarsen grid (rbins)
55
+ rstep = (rmax - self.rmin) / (len(r) // self.rbins)
56
+
57
+ profile.setCalculationRange(
58
+ xmin=self.rmin,
59
+ xmax=rmax,
60
+ dx=rstep
61
+ )
62
+
63
+ # --- Debye generator
64
+ gen = DebyePDFGenerator("G")
65
+ gen.setStructure(stru, periodic=False)
66
+ gen.qdamp.value = self.qdamp
67
+ gen.qbroad.value = self.qbroad
68
+
69
+ # --- Contribution
70
+ contrib = FitContribution("cluster")
71
+ contrib.addProfileGenerator(gen)
72
+ contrib.setProfile(profile, xname="r")
73
+ contrib.setEquation("s*G")
74
+
75
+ # --- Recipe
76
+ recipe = FitRecipe()
77
+ recipe.addContribution(contrib)
78
+
79
+ # --- Minimal parameter set
80
+ recipe.addVar(contrib.s, 1.0, tag="scale")
81
+
82
+ phase = gen.phase
83
+ lattice = phase.getLattice()
84
+
85
+ recipe.newVar("zoomscale", 1.0, tag="zoomscale")
86
+ recipe.constrain(lattice.a, "zoomscale")
87
+ recipe.constrain(lattice.b, "zoomscale")
88
+ recipe.constrain(lattice.c, "zoomscale")
89
+
90
+ # Fix everything except scale + zoomscale
91
+ recipe.fix("all")
92
+ recipe.free("scale")
93
+ recipe.free("zoomscale")
94
+
95
+ # Silence output
96
+ recipe.fithooks[0].verbose = 0
97
+
98
+ return recipe
99
+
100
+ # ------------------------------------------------------------
101
+
102
+ def refine(self):
103
+ least_squares(
104
+ self.recipe.residual,
105
+ self.recipe.values,
106
+ x_scale="jac",
107
+ max_nfev=12
108
+ )
109
+
110
+ res = FitResults(self.recipe)
111
+ return res.rw