ipyvasp 0.9.84__py2.py3-none-any.whl → 0.9.86__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ipyvasp/__init__.py CHANGED
@@ -18,6 +18,7 @@ __all__ = [ # For documentation purpose
18
18
  "webshow",
19
19
  "load_results",
20
20
  "parse_text",
21
+ "get_lines",
21
22
  "summarize",
22
23
  ]
23
24
 
ipyvasp/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.9.84"
1
+ __version__ = "0.9.86"
ipyvasp/core/parser.py CHANGED
@@ -1,4 +1,4 @@
1
- __all__ = ["Vasprun", "Vaspout", "minify_vasprun", "xml2dict"]
1
+ __all__ = ["Vasprun", "Vaspout", "minify_vasprun", "xml2dict","read"]
2
2
 
3
3
  import re
4
4
  from io import StringIO
@@ -114,6 +114,35 @@ class Vaspout(DataSource):
114
114
  def __init__(self, path):
115
115
  raise NotImplementedError("Vaspout is not implemented yet.")
116
116
 
117
+ def read(file, start_match, stop_match=r'\n', nth_match=1, skip_last=False,apply=None):
118
+ """Reads a part of the file between start_match and stop_match and returns a generator. It is lazy and fast.
119
+ `start_match` and `stop_match`(default is end of same line) are regular expressions. `nth_match` is the number of occurence of start_match to start reading.
120
+ `skip_last` is used to determine whether to keep or skip last line.
121
+ `apply` should be None or `func` to transform each captured line.
122
+ """
123
+ if "|" in start_match:
124
+ raise ValueError(
125
+ "start_match should be a single match, so '|' character is not allowed."
126
+ )
127
+ with Path(file).open("r") as f:
128
+ lines = islice(f, None) # this is fast
129
+ matched = False
130
+ n_start = 1
131
+ for line in lines:
132
+ if re.search(start_match, line, flags=re.DOTALL):
133
+ if nth_match != n_start:
134
+ n_start += 1
135
+ else:
136
+ matched = True
137
+ if matched and re.search(
138
+ stop_match, line, flags=re.DOTALL
139
+ ): # avoid stop before start
140
+ matched = False
141
+ if not skip_last:
142
+ yield apply(line) if callable(apply) else line
143
+ break # stop reading
144
+ if matched: # should be after break to handle last line above
145
+ yield apply(line) if callable(apply) else line
117
146
 
118
147
  class Vasprun(DataSource):
119
148
  "Reads vasprun.xml file lazily. It reads only the required data from the file when a plot or data access is requested."
@@ -129,36 +158,14 @@ class Vasprun(DataSource):
129
158
  skipk if isinstance(skipk, (int, np.integer)) else self.get_skipk()
130
159
  )
131
160
 
132
- def read(self, start_match, stop_match, nth_match=1, skip_last=False):
161
+ def read(self, start_match, stop_match=r'\n', nth_match=1, skip_last=False,apply=None):
133
162
  """Reads a part of the file between start_match and stop_match and returns a generator. It is lazy and fast.
134
- `start_match` and `stop_match` are regular expressions. `nth_match` is the number of occurence of start_match to start reading.
163
+ `start_match` and `stop_match`(default is end of same line) are regular expressions. `nth_match` is the number of occurence of start_match to start reading.
135
164
  `skip_last` is used to determine whether to keep or skip last line.
165
+ `apply` should be None or `func` to transform each captured line.
136
166
  """
137
- if "|" in start_match:
138
- raise ValueError(
139
- "start_match should be a single match, so '|' character is not allowed."
140
- )
141
-
142
- with self.path.open("r") as f:
143
- lines = islice(f, None) # this is fast
144
- matched = False
145
- n_start = 1
146
- for line in lines:
147
- if re.search(start_match, line, flags=re.DOTALL):
148
- if nth_match != n_start:
149
- n_start += 1
150
- else:
151
- matched = True
152
- if matched and re.search(
153
- stop_match, line, flags=re.DOTALL
154
- ): # avoid stop before start
155
- matched = False
156
- if not skip_last:
157
- yield line
158
-
159
- break # stop reading
160
- if matched: # should be after break to handle last line above
161
- yield line
167
+ kws = {k:v for k,v in locals().items() if k !='self'}
168
+ return read(self.path,**kws)
162
169
 
163
170
  def get_skipk(self):
164
171
  "Returns the number of k-points to skip in band structure plot in case of HSE calculation."
ipyvasp/misc.py CHANGED
@@ -70,10 +70,8 @@ class OUTCAR:
70
70
  def path(self):
71
71
  return self._path
72
72
 
73
- @_sub_doc(vp.Vasprun.read)
74
- @_sig_kwargs(vp.Vasprun.read, ("self",))
75
- def read(self, start_match, stop_match, **kwargs):
76
- return vp.Vasprun.read(
77
- self, start_match, stop_match, **kwargs
78
- ) # Pass all the arguments to the function
73
+ @_sub_doc(vp.read)
74
+ def read(self, start_match, stop_match=r'\n', nth_match=1, skip_last=False,apply=None):
75
+ kws = {k:v for k,v in locals().items() if k !='self'}
76
+ return vp.read(self.path, **kws) # Pass all the arguments to the function
79
77
 
ipyvasp/utils.py CHANGED
@@ -1,5 +1,6 @@
1
1
  __all__ = [
2
2
  "get_file_size",
3
+ "get_lines",
3
4
  "set_dir",
4
5
  "interpolate_data",
5
6
  "rolling_mean",
@@ -10,6 +11,7 @@ __all__ = [
10
11
 
11
12
  import re
12
13
  import os
14
+ import io
13
15
  from contextlib import contextmanager
14
16
  from pathlib import Path
15
17
  from inspect import signature, getdoc
@@ -26,13 +28,45 @@ import matplotlib.pyplot as plt
26
28
  def get_file_size(path: str):
27
29
  """Return file size"""
28
30
  if (p := Path(path)).is_file():
29
- size = p.stat.st_size
30
- for unit in ["Bytes", "KB", "MB", "GB", "TB"]:
31
+ size = p.stat().st_size
32
+ for unit in ["B", "KB", "MB", "GB", "TB"]:
31
33
  if size < 1024.0:
32
34
  return "%3.2f %s" % (size, unit)
33
35
  size /= 1024.0
34
36
  else:
35
37
  return ""
38
+
39
+ def get_lines(f, indices):
40
+ """Read lines by indexing from an opened file pointer `f`. Negative indexing is supported to read lines from end.
41
+ Returns a single str of line if one integer given, otherwise a list of lines.
42
+ This consumes a lot less memory then indexing over `f.readlines()[index]`.
43
+
44
+ >>> with open('some_file','r') as f:
45
+ >>> get_lines(f, -1) # last line
46
+ >>> get_lines(f, range(5)) # first 5 lines
47
+ >>> get_lines(f, range(-5,0)) # last 5 lines
48
+ """
49
+ if not isinstance(f, io.TextIOWrapper):
50
+ raise TypeError(f"f should be file-like object. got {type(f)}")
51
+
52
+ return_line = False
53
+ if isinstance(indices, int):
54
+ indices = [indices]
55
+ return_line = True
56
+
57
+ if not isinstance(indices, (tuple,list, range)):
58
+ raise TypeError(f"indices should int/list/tuple/range, got {type(indices)}")
59
+
60
+ f.seek(0)
61
+ if min(indices) < 0:
62
+ if not hasattr(f, '_nlines'): # do this once, assuming file is not changed while reading
63
+ f._nlines = sum(1 for _ in enumerate(f))
64
+ f.seek(0)
65
+
66
+ indices = [i + (f._nlines if i < 0 else 0) for i in indices] # make all positive
67
+
68
+ lines = [l for i, l in enumerate(f) if i in indices]
69
+ return lines[0] if return_line else lines
36
70
 
37
71
 
38
72
  def _sig_kwargs(from_func, skip_params=()):
ipyvasp/widgets.py CHANGED
@@ -40,7 +40,7 @@ import plotly.graph_objects as go
40
40
  from . import utils as gu
41
41
  from . import lattice as lat
42
42
  from .core import serializer, parser as vp, plot_toolkit as ptk
43
- from .utils import _sig_kwargs, _sub_doc
43
+ from .utils import _sig_kwargs, _sub_doc, get_file_size
44
44
  from ._enplots import _fmt_labels
45
45
 
46
46
 
@@ -68,7 +68,7 @@ def summarize(files, func, **kwargs):
68
68
  for name, path in files.items():
69
69
  output = func(path, **kwargs)
70
70
  if not isinstance(output, dict):
71
- raise TypeError("Function must return a dictionary.")
71
+ raise TypeError("Function must return a dictionary to create DataFrame.")
72
72
 
73
73
  if "FILE" in output:
74
74
  raise KeyError(
@@ -115,6 +115,8 @@ class Files:
115
115
  Use methods on return such as `summarize`, `with_name`, `filtered`, `interact` and others.
116
116
 
117
117
  >>> Files(root_1, glob_1,...).add(root_2, glob_2,...) # Fully flexible to chain
118
+
119
+ WARNING: Don't use write operations on paths in files in batch mode, it can cause unrecoverable data loss.
118
120
  """
119
121
  def __init__(self, path_or_files = '.', glob = '*', exclude = None,files_only = False, dirs_only=False):
120
122
  if isinstance(path_or_files, Files):
@@ -168,10 +170,6 @@ class Files:
168
170
  def __add__(self, other):
169
171
  raise NotImplementedError("Use self.add method instead!")
170
172
 
171
- def map(self,func):
172
- "Map files to a function!"
173
- return map(func, self._files)
174
-
175
173
  def with_name(self, name):
176
174
  "Change name of all files. Only keeps existing files."
177
175
  return self.__class__([f.with_name(name) for f in self._files])
@@ -420,6 +418,54 @@ class Files:
420
418
  def bands_widget(self, height='450px'):
421
419
  "Get BandsWidget instance with these files."
422
420
  return BandsWidget(files=self._files, height=height)
421
+
422
+ def map(self,func, to_df=False):
423
+ """Map files to a function that takes path as argument.
424
+ If `to_df=True`, func may return a dict to create named columns, or just two columns will be created.
425
+ Otherwise returns generator of elemnets `(path, func(path))`.
426
+ If you need to operate on opened file pointer, use `.mapf` instead.
427
+
428
+ >>> import ipyvasp as ipv
429
+ >>> files = ipv.Files(...)
430
+ >>> files.map(lambda path: ipv.read(path, '<pattern>',apply = lambda line: float(line.split()[0])))
431
+ >>> files.map(lambda path: ipv.load(path), to_df=True)
432
+ """
433
+ if to_df:
434
+ return self._try_return_df(func)
435
+ return ((path, func(path)) for path in self._files) # generator must
436
+
437
+ def _try_return_df(self, func):
438
+ try: return summarize(self._files,func)
439
+ except: return pd.DataFrame(((path, func(path)) for path in self._files))
440
+
441
+ def mapf(self, func, to_df=False,mode='r', encoding=None):
442
+ """Map files to a function that takes opened file pointer as argument. Opened files are automatically closed and should be in readonly mode.
443
+ Load files content into a generator sequence of tuples like `(path, func(open(path)))` or DataFrame if `to_df=True`.
444
+ If `to_df=True`, func may return a dict to create named columns, or just two columns will be created.
445
+ If you need to operate on just path, use `.map` instead.
446
+
447
+ >>> import json
448
+ >>> import ipyvasp as ipv
449
+ >>> files = ipv.Files(...)
450
+ >>> files.mapf(lambda fp: json.load(fp,cls=ipv.DecodeToNumpy),to_df=True) # or use ipv.load(path) in map
451
+ >>> files.mapf(lambda fp: ipv.get_lines(fp, range(5)) # read first five lines
452
+ >>> files.mapf(lambda fp: ipv.get_lines(fp, range(-5,0)) # read last five lines
453
+ >>> files.mapf(lambda fp: ipv.get_lines(fp, -1) # read last line
454
+ """
455
+ if not mode in 'rb':
456
+ raise ValueError("Only 'r'/'rb' mode is allowed in this context!")
457
+
458
+ def loader(path):
459
+ with open(path, mode=mode,encoding=encoding) as f:
460
+ return func(f)
461
+
462
+ if to_df:
463
+ return self._try_return_df(loader)
464
+ return ((path, loader(path)) for path in self._files) # generator must
465
+
466
+ def stat(self):
467
+ "Get files stat as DataFrame. Currently only size is supported."
468
+ return self.summarize(lambda path: {"size": get_file_size(path)})
423
469
 
424
470
 
425
471
  @fix_signature
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ipyvasp
3
- Version: 0.9.84
3
+ Version: 0.9.86
4
4
  Summary: A processing tool for VASP DFT input/output processing in Jupyter Notebook.
5
5
  Home-page: https://github.com/massgh/ipyvasp
6
6
  Author: Abdul Saboor
@@ -1,25 +1,25 @@
1
- ipyvasp/__init__.py,sha256=rlorju9arMtHw1QRYPljday-PyZWJdSCxg4lw3g6t0Q,1409
1
+ ipyvasp/__init__.py,sha256=a9rUl8DdqIiZsj03LMJFnJKIuxmtmjmN2Dqj6hmjUls,1427
2
2
  ipyvasp/__main__.py,sha256=eJV1TZSiT8mC_VqAeksNnBI2I8mKMiPkEIlwikbtOjI,216
3
3
  ipyvasp/_enplots.py,sha256=D38paN8zqZgluNAwmCwcocd7-_h_T0HTGolI1eBkDes,37484
4
4
  ipyvasp/_lattice.py,sha256=kOseNCIWt-VCnkhFQZEcsXhyNYobjfqNfAl3seXHiVU,105584
5
- ipyvasp/_version.py,sha256=3xnjEM8AMDNSU-FRZtXZ39MIuWcHwZXBmMyzZckPxEs,24
5
+ ipyvasp/_version.py,sha256=Ni3S6NaV45wRiYfJLhXykakqV8br57IhjNd9paj9-MY,24
6
6
  ipyvasp/bsdos.py,sha256=JvYvHLqMp3eVaJ0amD-9kxp7FehQIFq3WFUxsO5dj0Q,31794
7
7
  ipyvasp/cli.py,sha256=aWFEVhNmnW8eSOp5uh95JaDwLQ9K9nlCQcbnOSuhWgw,6844
8
8
  ipyvasp/evals_dataframe.py,sha256=-sqxK7LPV6sYDO_XXmZ80FznOaXTkVdbqJKKvTUtMak,20637
9
9
  ipyvasp/lattice.py,sha256=VfyhmbpRHA3nePWUmaoMmCiAehby_VvDuDcp34OK3rA,33685
10
- ipyvasp/misc.py,sha256=SZJ_ePUR2-HEKYTEpDHVRVE7zpIQVTCjiuw0BCC9UTU,2349
10
+ ipyvasp/misc.py,sha256=gPA-71V_fuzyvinHR4P1kYMm90lJYdt24733pA5_4Kk,2340
11
11
  ipyvasp/potential.py,sha256=tzA73c5lkp6ahLSJchMrU043-QWaOV0nIOUA7VMmfKQ,11408
12
12
  ipyvasp/surface.py,sha256=MjE5oB0wW6Pca_C-xu8rN6OMH7lUEeNPNyM7Kz_Im-8,23766
13
- ipyvasp/utils.py,sha256=nw_oglztXfl0CZmo63PBstI4didTkuXhThYqhMP8DVI,14247
14
- ipyvasp/widgets.py,sha256=X4gPkTy--aFZpXGCQJVg4HByt5-mpVB6gNiFmTTIyRg,46628
13
+ ipyvasp/utils.py,sha256=_AD1LMXpy-wNHJpTx4uEahQyDWv_KY70FGxoZCEjVLY,15598
14
+ ipyvasp/widgets.py,sha256=dMMRD5mfTEPnG5j04wuQviGkKP8tcI5oqInny4qDd7I,49209
15
15
  ipyvasp/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- ipyvasp/core/parser.py,sha256=TmRoIhyIGJ4nr32tb1MAAy5clUZh0CQh-2hP8mnTaVc,38221
16
+ ipyvasp/core/parser.py,sha256=if5DsBpDS7qCIlWsl6JD0ovRBk01h0CdKYa4vE9SSzw,38945
17
17
  ipyvasp/core/plot_toolkit.py,sha256=V-IQo7MrOhmNCGpWHIMtV04TmmubgFvGgH_yd4YmoX4,36158
18
18
  ipyvasp/core/serializer.py,sha256=v0ma4htirybtQo_wFhIEjkRoZMQk9ETDz85iloaq3YY,38427
19
19
  ipyvasp/core/spatial_toolkit.py,sha256=dXowREhiFzBvvr5f_bApzFhf8IzjH2E2Ix90oCBUetY,14885
20
- ipyvasp-0.9.84.dist-info/LICENSE,sha256=F3SO5RiAZOMfmMGf1KOuk2g_c4ObvuBJhd9iBLDgXoQ,1263
21
- ipyvasp-0.9.84.dist-info/METADATA,sha256=hX-hNmmx-Yh8dpfR3ZGAigi5cKt7yLQhI6vIWDiPqnI,2421
22
- ipyvasp-0.9.84.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
23
- ipyvasp-0.9.84.dist-info/entry_points.txt,sha256=C7m0Sjmr14wFjflCkWXLzr5N6-cQj8uJC9n82mUtzt8,44
24
- ipyvasp-0.9.84.dist-info/top_level.txt,sha256=ftziWlMWu_1VpDP1sRTFrkfBnWxAi393HYDVu4wRhUk,8
25
- ipyvasp-0.9.84.dist-info/RECORD,,
20
+ ipyvasp-0.9.86.dist-info/LICENSE,sha256=F3SO5RiAZOMfmMGf1KOuk2g_c4ObvuBJhd9iBLDgXoQ,1263
21
+ ipyvasp-0.9.86.dist-info/METADATA,sha256=62X139t3R61h2ug7ZYsTQwTKSewuGwfyfS_kS7QpJhc,2421
22
+ ipyvasp-0.9.86.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
23
+ ipyvasp-0.9.86.dist-info/entry_points.txt,sha256=C7m0Sjmr14wFjflCkWXLzr5N6-cQj8uJC9n82mUtzt8,44
24
+ ipyvasp-0.9.86.dist-info/top_level.txt,sha256=ftziWlMWu_1VpDP1sRTFrkfBnWxAi393HYDVu4wRhUk,8
25
+ ipyvasp-0.9.86.dist-info/RECORD,,