diffpy.utils 3.6.1rc1__py3-none-any.whl → 3.7.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,15 +13,205 @@
13
13
  #
14
14
  ##############################################################################
15
15
 
16
- import os
16
+ from pathlib import Path
17
17
 
18
18
  import numpy
19
19
 
20
20
  from diffpy.utils import validators
21
+ from diffpy.utils._deprecator import build_deprecation_message, deprecated
21
22
 
23
+ base = "diffpy.utils.parsers.loaddata"
24
+ removal_version = "4.0.0"
22
25
 
26
+ loaddata_deprecation_msg = build_deprecation_message(
27
+ base,
28
+ "loadData",
29
+ "load_data",
30
+ removal_version,
31
+ new_base="diffpy.utils.parsers",
32
+ )
33
+
34
+
35
+ @deprecated(loaddata_deprecation_msg)
23
36
  def loadData(
24
37
  filename, minrows=10, headers=False, hdel="=", hignore=None, **kwargs
38
+ ):
39
+ """This function has been deprecated and will be removed in version
40
+ 4.0.0.
41
+
42
+ Please use diffpy.utils.parsers.load_data instead.
43
+ """
44
+ return load_data(filename, minrows, headers, hdel, hignore, **kwargs)
45
+
46
+
47
+ class TextDataLoader(object):
48
+ """Smart loading of a text data with possibly multiple datasets.
49
+
50
+ Parameters
51
+ ----------
52
+ minrows: int
53
+ Minimum number of rows in the first data block. (Default 10.)
54
+ usecols: tuple
55
+ Which columns in our dataset to use. Ignores all other columns. If
56
+ None (default), use all columns.
57
+ skiprows
58
+ Rows in dataset to skip. (Currently not functional.)
59
+ """
60
+
61
+ def __init__(self, minrows=10, usecols=None, skiprows=None):
62
+ if minrows is not None:
63
+ self.minrows = minrows
64
+ if usecols is not None:
65
+ self.usecols = tuple(usecols)
66
+ # FIXME: implement usage in _findDataBlocks
67
+ if skiprows is not None:
68
+ self.skiprows = skiprows
69
+ # data items
70
+ self._reset()
71
+ return
72
+
73
+ def _reset(self):
74
+ self.filename = ""
75
+ self.headers = []
76
+ self.datasets = []
77
+ self._resetvars()
78
+ return
79
+
80
+ def _resetvars(self):
81
+ self._filename = ""
82
+ self._lines = None
83
+ self._splitlines = None
84
+ self._words = None
85
+ self._linerecs = None
86
+ self._wordrecs = None
87
+ return
88
+
89
+ def read(self, filename):
90
+ """Open a file and run readfp.
91
+
92
+ Use if file is not already open for read byte.
93
+ """
94
+ with open(filename, "rb") as fp:
95
+ self.readfp(fp)
96
+ return
97
+
98
+ def readfp(self, fp, append=False):
99
+ """Get file details.
100
+
101
+ File details include:
102
+ * File name.
103
+ * All data blocks findable by load_data.
104
+ * Headers (if present) for each data block. (Generally the headers
105
+ contain column name information).
106
+ """
107
+ self._reset()
108
+ # try to read lines from fp first
109
+ self._lines = fp.readlines()
110
+ # and if good, assign filename
111
+ self.filename = getattr(fp, "name", "")
112
+ self._words = "".join(self._lines).split()
113
+ self._splitlines = [line.split() for line in self._lines]
114
+ self._findDataBlocks()
115
+ return
116
+
117
+ def _findDataBlocks(self):
118
+ mincols = 1
119
+ if self.usecols is not None and len(self.usecols):
120
+ mincols = max(mincols, max(self.usecols) + 1)
121
+ mincols = max(mincols, abs(min(self.usecols)))
122
+ nlines = len(self._lines)
123
+ nwords = len(self._words)
124
+ # idx - line index, nw0, nw1 - index of the first and last word,
125
+ # nf - number of words, ok - has data
126
+ self._linerecs = numpy.recarray(
127
+ (nlines,),
128
+ dtype=[
129
+ ("idx", int),
130
+ ("nw0", int),
131
+ ("nw1", int),
132
+ ("nf", int),
133
+ ("ok", bool),
134
+ ],
135
+ )
136
+ lr = self._linerecs
137
+ lr.idx = numpy.arange(nlines)
138
+ lr.nf = [len(sl) for sl in self._splitlines]
139
+ lr.nw1 = lr.nf.cumsum()
140
+ lr.nw0 = lr.nw1 - lr.nf
141
+ lr.ok = True
142
+ # word records
143
+ lw = self._wordrecs = numpy.recarray(
144
+ (nwords,),
145
+ dtype=[
146
+ ("idx", int),
147
+ ("line", int),
148
+ ("col", int),
149
+ ("ok", bool),
150
+ ("value", float),
151
+ ],
152
+ )
153
+ lw.idx = numpy.arange(nwords)
154
+ n1 = numpy.zeros(nwords, dtype=bool)
155
+ n1[lr.nw1[:-1]] = True
156
+ lw.line = n1.cumsum()
157
+ lw.col = lw.idx - lr.nw0[lw.line]
158
+ lw.ok = True
159
+ values = nwords * [0.0]
160
+ for i, w in enumerate(self._words):
161
+ try:
162
+ values[i] = float(w)
163
+ except ValueError:
164
+ lw.ok[i] = False
165
+ # prune lines that have a non-float values:
166
+ lw.values = values
167
+ if self.usecols is None:
168
+ badlines = lw.line[~lw.ok]
169
+ lr.ok[badlines] = False
170
+ else:
171
+ for col in self.usecols:
172
+ badlines = lw.line[(lw.col == col) & ~lw.ok]
173
+ lr.ok[badlines] = False
174
+ lr1 = lr[lr.nf >= mincols]
175
+ okb = numpy.r_[lr1.ok[:1], lr1.ok[1:] & ~lr1.ok[:-1], False]
176
+ oke = numpy.r_[False, ~lr1.ok[1:] & lr1.ok[:-1], lr1.ok[-1:]]
177
+ blockb = numpy.r_[True, lr1.nf[1:] != lr1.nf[:-1], False]
178
+ blocke = numpy.r_[False, blockb[1:-1], True]
179
+ beg = numpy.nonzero(okb | blockb)[0]
180
+ end = numpy.nonzero(oke | blocke)[0]
181
+ rowcounts = end - beg
182
+ assert not numpy.any(rowcounts < 0)
183
+ goodrows = rowcounts >= self.minrows
184
+ begend = numpy.transpose([beg, end - 1])[goodrows]
185
+ hbeg = 0
186
+ for dbeg, dend in begend:
187
+ bb1 = lr1[dbeg]
188
+ ee1 = lr1[dend]
189
+ hend = bb1.idx
190
+ header = "".join(self._lines[hbeg:hend])
191
+ hbeg = ee1.idx + 1
192
+ if self.usecols is None:
193
+ data = numpy.reshape(lw.value[bb1.nw0 : ee1.nw1], (-1, bb1.nf))
194
+ else:
195
+ tdata = numpy.empty(
196
+ (len(self.usecols), dend - dbeg), dtype=float
197
+ )
198
+ for j, trow in zip(self.usecols, tdata):
199
+ j %= bb1.nf
200
+ trow[:] = lw.value[bb1.nw0 + j : ee1.nw1 : bb1.nf]
201
+ data = tdata.transpose()
202
+ self.headers.append(header)
203
+ self.datasets.append(data)
204
+ # finish reading to a last header and empty dataset
205
+ if hbeg < len(self._lines):
206
+ header = "".join(self._lines[hbeg:])
207
+ data = numpy.empty(0, dtype=float)
208
+ self.headers.append(header)
209
+ self.datasets.append(data)
210
+ return
211
+
212
+
213
+ def load_data(
214
+ filename, minrows=10, headers=False, hdel="=", hignore=None, **kwargs
25
215
  ):
26
216
  """Find and load data from a text file.
27
217
 
@@ -31,7 +221,7 @@ def loadData(
31
221
 
32
222
  Parameters
33
223
  ----------
34
- filename
224
+ filename: Path or string
35
225
  Name of the file we want to load data from.
36
226
  minrows: int
37
227
  Minimum number of rows in the first data block. All rows must have
@@ -66,8 +256,8 @@ def loadData(
66
256
  comma-separated data blocks, set delimiter to ','.
67
257
  unpack: bool
68
258
  Return data as a sequence of columns that allows tuple unpacking such
69
- as x, y = loadData(FILENAME, unpack=True). Note transposing the
70
- loaded array as loadData(FILENAME).T has the same effect.
259
+ as x, y = load_data(FILENAME, unpack=True). Note transposing the
260
+ loaded array as load_data(FILENAME).T has the same effect.
71
261
  usecols:
72
262
  Zero-based index of columns to be loaded, by default use all detected
73
263
  columns. The reading skips data blocks that do not have the usecols-
@@ -115,10 +305,11 @@ def loadData(
115
305
  return nc, nv
116
306
 
117
307
  # Check if file exists before trying to open
118
- if not os.path.exists(filename):
308
+ filename = Path(filename)
309
+ if not filename.is_file():
119
310
  raise IOError(
120
311
  (
121
- f"File {filename} cannot be found. "
312
+ f"File {str(filename)} cannot be found. "
122
313
  "Please rerun the program specifying a valid filename."
123
314
  )
124
315
  )
@@ -196,169 +387,3 @@ def loadData(
196
387
  kwargs.setdefault("usecols", list(range(ncvblock[0])))
197
388
  data_block = loadtxt(fid, **kwargs)
198
389
  return data_block
199
-
200
-
201
- class TextDataLoader(object):
202
- """Smart loading of a text data with possibly multiple datasets.
203
-
204
- Parameters
205
- ----------
206
- minrows: int
207
- Minimum number of rows in the first data block. (Default 10.)
208
- usecols: tuple
209
- Which columns in our dataset to use. Ignores all other columns. If
210
- None (default), use all columns.
211
- skiprows
212
- Rows in dataset to skip. (Currently not functional.)
213
- """
214
-
215
- def __init__(self, minrows=10, usecols=None, skiprows=None):
216
- if minrows is not None:
217
- self.minrows = minrows
218
- if usecols is not None:
219
- self.usecols = tuple(usecols)
220
- # FIXME: implement usage in _findDataBlocks
221
- if skiprows is not None:
222
- self.skiprows = skiprows
223
- # data items
224
- self._reset()
225
- return
226
-
227
- def _reset(self):
228
- self.filename = ""
229
- self.headers = []
230
- self.datasets = []
231
- self._resetvars()
232
- return
233
-
234
- def _resetvars(self):
235
- self._filename = ""
236
- self._lines = None
237
- self._splitlines = None
238
- self._words = None
239
- self._linerecs = None
240
- self._wordrecs = None
241
- return
242
-
243
- def read(self, filename):
244
- """Open a file and run readfp.
245
-
246
- Use if file is not already open for read byte.
247
- """
248
- with open(filename, "rb") as fp:
249
- self.readfp(fp)
250
- return
251
-
252
- def readfp(self, fp, append=False):
253
- """Get file details.
254
-
255
- File details include:
256
- * File name.
257
- * All data blocks findable by loadData.
258
- * Headers (if present) for each data block. (Generally the headers
259
- contain column name information).
260
- """
261
- self._reset()
262
- # try to read lines from fp first
263
- self._lines = fp.readlines()
264
- # and if good, assign filename
265
- self.filename = getattr(fp, "name", "")
266
- self._words = "".join(self._lines).split()
267
- self._splitlines = [line.split() for line in self._lines]
268
- self._findDataBlocks()
269
- return
270
-
271
- def _findDataBlocks(self):
272
- mincols = 1
273
- if self.usecols is not None and len(self.usecols):
274
- mincols = max(mincols, max(self.usecols) + 1)
275
- mincols = max(mincols, abs(min(self.usecols)))
276
- nlines = len(self._lines)
277
- nwords = len(self._words)
278
- # idx - line index, nw0, nw1 - index of the first and last word,
279
- # nf - number of words, ok - has data
280
- self._linerecs = numpy.recarray(
281
- (nlines,),
282
- dtype=[
283
- ("idx", int),
284
- ("nw0", int),
285
- ("nw1", int),
286
- ("nf", int),
287
- ("ok", bool),
288
- ],
289
- )
290
- lr = self._linerecs
291
- lr.idx = numpy.arange(nlines)
292
- lr.nf = [len(sl) for sl in self._splitlines]
293
- lr.nw1 = lr.nf.cumsum()
294
- lr.nw0 = lr.nw1 - lr.nf
295
- lr.ok = True
296
- # word records
297
- lw = self._wordrecs = numpy.recarray(
298
- (nwords,),
299
- dtype=[
300
- ("idx", int),
301
- ("line", int),
302
- ("col", int),
303
- ("ok", bool),
304
- ("value", float),
305
- ],
306
- )
307
- lw.idx = numpy.arange(nwords)
308
- n1 = numpy.zeros(nwords, dtype=bool)
309
- n1[lr.nw1[:-1]] = True
310
- lw.line = n1.cumsum()
311
- lw.col = lw.idx - lr.nw0[lw.line]
312
- lw.ok = True
313
- values = nwords * [0.0]
314
- for i, w in enumerate(self._words):
315
- try:
316
- values[i] = float(w)
317
- except ValueError:
318
- lw.ok[i] = False
319
- # prune lines that have a non-float values:
320
- lw.values = values
321
- if self.usecols is None:
322
- badlines = lw.line[~lw.ok]
323
- lr.ok[badlines] = False
324
- else:
325
- for col in self.usecols:
326
- badlines = lw.line[(lw.col == col) & ~lw.ok]
327
- lr.ok[badlines] = False
328
- lr1 = lr[lr.nf >= mincols]
329
- okb = numpy.r_[lr1.ok[:1], lr1.ok[1:] & ~lr1.ok[:-1], False]
330
- oke = numpy.r_[False, ~lr1.ok[1:] & lr1.ok[:-1], lr1.ok[-1:]]
331
- blockb = numpy.r_[True, lr1.nf[1:] != lr1.nf[:-1], False]
332
- blocke = numpy.r_[False, blockb[1:-1], True]
333
- beg = numpy.nonzero(okb | blockb)[0]
334
- end = numpy.nonzero(oke | blocke)[0]
335
- rowcounts = end - beg
336
- assert not numpy.any(rowcounts < 0)
337
- goodrows = rowcounts >= self.minrows
338
- begend = numpy.transpose([beg, end - 1])[goodrows]
339
- hbeg = 0
340
- for dbeg, dend in begend:
341
- bb1 = lr1[dbeg]
342
- ee1 = lr1[dend]
343
- hend = bb1.idx
344
- header = "".join(self._lines[hbeg:hend])
345
- hbeg = ee1.idx + 1
346
- if self.usecols is None:
347
- data = numpy.reshape(lw.value[bb1.nw0 : ee1.nw1], (-1, bb1.nf))
348
- else:
349
- tdata = numpy.empty(
350
- (len(self.usecols), dend - dbeg), dtype=float
351
- )
352
- for j, trow in zip(self.usecols, tdata):
353
- j %= bb1.nf
354
- trow[:] = lw.value[bb1.nw0 + j : ee1.nw1 : bb1.nf]
355
- data = tdata.transpose()
356
- self.headers.append(header)
357
- self.datasets.append(data)
358
- # finish reading to a last header and empty dataset
359
- if hbeg < len(self._lines):
360
- header = "".join(self._lines[hbeg:])
361
- data = numpy.empty(0, dtype=float)
362
- self.headers.append(header)
363
- self.datasets.append(data)
364
- return
@@ -33,10 +33,11 @@ def serialize_data(
33
33
  show_path=True,
34
34
  serial_file=None,
35
35
  ):
36
- """Serialize file data into a dictionary. Can also save dictionary into a
37
- serial language file. Dictionary is formatted as {filename: data}.
36
+ """Serialize file data into a dictionary. Can also save dictionary
37
+ into a serial language file. Dictionary is formatted as {filename:
38
+ data}.
38
39
 
39
- Requires hdata and data_table (can be generated by loadData).
40
+ Requires hdata and data_table (can be generated by load_data).
40
41
 
41
42
  Parameters
42
43
  ----------
diffpy/utils/resampler.py CHANGED
@@ -30,15 +30,15 @@ def wsinterp(x, xp, fp, left=None, right=None):
30
30
 
31
31
  Parameters
32
32
  ----------
33
- x: ndarray
33
+ x: ``ndarray``
34
34
  The x values at which interpolation is computed.
35
- xp: ndarray
35
+ xp: ``ndarray``
36
36
  The array of known x values.
37
- fp: ndarray
37
+ fp: ``ndarray``
38
38
  The array of y values associated with xp.
39
39
  left: float
40
40
  If given, set fp for x < xp[0] to left. Otherwise, if left is None
41
- (default) or not given, set fp for x < xp[0] to fp evaluated at xp[-1].
41
+ (default) or not given, set fp for x < xp[0] to fp evaluated at xp[0].
42
42
  right: float
43
43
  If given, set fp for x > xp[-1] to right. Otherwise, if right is None
44
44
  (default) or not given, set fp for x > xp[-1] to fp evaluated at
@@ -46,7 +46,7 @@ def wsinterp(x, xp, fp, left=None, right=None):
46
46
 
47
47
  Returns
48
48
  -------
49
- ndarray or float
49
+ ``ndarray`` or float
50
50
  The interpolated values at points x. Returns a single float if x is a
51
51
  scalar, otherwise returns a numpy.ndarray.
52
52
  """
@@ -80,8 +80,8 @@ def wsinterp(x, xp, fp, left=None, right=None):
80
80
 
81
81
 
82
82
  def nsinterp(xp, fp, qmin=0, qmax=25, left=None, right=None):
83
- """One-dimensional Whittaker-Shannon interpolation onto the Nyquist-Shannon
84
- grid.
83
+ """One-dimensional Whittaker-Shannon interpolation onto the Nyquist-
84
+ Shannon grid.
85
85
 
86
86
  Takes a band-limited function fp and original grid xp and resamples fp on
87
87
  the NS grid. Uses the minimum number of points N required by the Nyquist
@@ -91,9 +91,9 @@ def nsinterp(xp, fp, qmin=0, qmax=25, left=None, right=None):
91
91
 
92
92
  Parameters
93
93
  ----------
94
- xp: ndarray
94
+ xp: ``ndarray``
95
95
  The array of known x values.
96
- fp: ndarray
96
+ fp: ``ndarray``
97
97
  The array of y values associated with xp.
98
98
  qmin: float
99
99
  The lower band limit in the frequency domain.
@@ -102,7 +102,7 @@ def nsinterp(xp, fp, qmin=0, qmax=25, left=None, right=None):
102
102
 
103
103
  Returns
104
104
  -------
105
- x: ndarray
105
+ x: ``ndarray``
106
106
  The Nyquist-Shannon grid computed for the given qmin and qmax.
107
107
  fp_at_x: ndarray
108
108
  The interpolated values at points x. Returns a single float if x is a
@@ -139,7 +139,7 @@ def resample(r, s, dr):
139
139
 
140
140
  Returns
141
141
  -------
142
- Returns resampled (r, s).
142
+ Returns resampled ``(r, s)``.
143
143
  """
144
144
 
145
145
  warnings.warn(