PyPI - ipyvasp - Versions diffs - 0.9.84__py2.py3-none-any.whl → 0.9.86__py2.py3-none-any.whl - Mend

ipyvasp 0.9.84py2.py3-none-any.whl → 0.9.86py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

ipyvasp/__init__.py +1 -0
ipyvasp/_version.py +1 -1
ipyvasp/core/parser.py +35 -28
ipyvasp/misc.py +4 -6
ipyvasp/utils.py +36 -2
ipyvasp/widgets.py +52 -6
{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/METADATA +1 -1
{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/RECORD +12 -12
{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/LICENSE +0 -0
{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/WHEEL +0 -0
{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/entry_points.txt +0 -0
{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/top_level.txt +0 -0

ipyvasp/__init__.py CHANGED Viewed

@@ -18,6 +18,7 @@ __all__ = [  # For documentation purpose
     "webshow",
     "load_results",
     "parse_text",
+    "get_lines",
     "summarize",
 ]

ipyvasp/_version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.9.84"
1	+ __version__ = "0.9.86"

ipyvasp/core/parser.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__all__ = ["Vasprun", "Vaspout", "minify_vasprun", "xml2dict"]
+__all__ = ["Vasprun", "Vaspout", "minify_vasprun", "xml2dict","read"]
 import re
 from io import StringIO
@@ -114,6 +114,35 @@ class Vaspout(DataSource):
     def __init__(self, path):
         raise NotImplementedError("Vaspout is not implemented yet.")
+def read(file, start_match, stop_match=r'\n', nth_match=1, skip_last=False,apply=None):
+    """Reads a part of the file between start_match and stop_match and returns a generator. It is lazy and fast.
+    `start_match` and `stop_match`(default is end of same line) are regular expressions. `nth_match` is the number of occurence of start_match to start reading.
+    `skip_last` is used to determine whether to keep or skip last line.
+    `apply` should be None or `func` to transform each captured line.
+    """
+    if "|" in start_match:
+        raise ValueError(
+            "start_match should be a single match, so '|' character is not allowed."
+        )
+    with Path(file).open("r") as f:
+        lines = islice(f, None)  # this is fast
+        matched = False
+        n_start = 1
+        for line in lines:
+            if re.search(start_match, line, flags=re.DOTALL):
+                if nth_match != n_start:
+                    n_start += 1
+                else:
+                    matched = True
+            if matched and re.search(
+                stop_match, line, flags=re.DOTALL
+            ):  # avoid stop before start
+                matched = False
+                if not skip_last:
+                    yield apply(line) if callable(apply) else line
+                break  # stop reading
+            if matched:  # should be after break to handle last line above
+                yield apply(line) if callable(apply) else line
 class Vasprun(DataSource):
     "Reads vasprun.xml file lazily. It reads only the required data from the file when a plot or data access is requested."
@@ -129,36 +158,14 @@ class Vasprun(DataSource):
             skipk if isinstance(skipk, (int, np.integer)) else self.get_skipk()
         )
-    def read(self, start_match, stop_match, nth_match=1, skip_last=False):
+    def read(self, start_match, stop_match=r'\n', nth_match=1, skip_last=False,apply=None):
         """Reads a part of the file between start_match and stop_match and returns a generator. It is lazy and fast.
-        `start_match` and `stop_match` are regular expressions. `nth_match` is the number of occurence of start_match to start reading.
+        `start_match` and `stop_match`(default is end of same line) are regular expressions. `nth_match` is the number of occurence of start_match to start reading.
         `skip_last` is used to determine whether to keep or skip last line.
+        `apply` should be None or `func` to transform each captured line.
         """
-        if "|" in start_match:
-            raise ValueError(
-                "start_match should be a single match, so '|' character is not allowed."
-            )
-        with self.path.open("r") as f:
-            lines = islice(f, None)  # this is fast
-            matched = False
-            n_start = 1
-            for line in lines:
-                if re.search(start_match, line, flags=re.DOTALL):
-                    if nth_match != n_start:
-                        n_start += 1
-                    else:
-                        matched = True
-                if matched and re.search(
-                    stop_match, line, flags=re.DOTALL
-                ):  # avoid stop before start
-                    matched = False
-                    if not skip_last:
-                        yield line
-                    break  # stop reading
-                if matched:  # should be after break to handle last line above
-                    yield line
+        kws = {k:v for k,v in locals().items() if k !='self'}
+        return read(self.path,**kws)
     def get_skipk(self):
         "Returns the number of k-points to skip in band structure plot in case of HSE calculation."

ipyvasp/misc.py CHANGED Viewed

@@ -70,10 +70,8 @@ class OUTCAR:
     def path(self):
         return self._path
-    @_sub_doc(vp.Vasprun.read)
-    @_sig_kwargs(vp.Vasprun.read, ("self",))
-    def read(self, start_match, stop_match, **kwargs):
-        return vp.Vasprun.read(
-            self, start_match, stop_match, **kwargs
-        )  # Pass all the arguments to the function
+    @_sub_doc(vp.read)
+    def read(self, start_match, stop_match=r'\n', nth_match=1, skip_last=False,apply=None):
+        kws = {k:v for k,v in locals().items() if k !='self'}
+        return vp.read(self.path, **kws)  # Pass all the arguments to the function

ipyvasp/utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 __all__ = [
     "get_file_size",
+    "get_lines",
     "set_dir",
     "interpolate_data",
     "rolling_mean",
@@ -10,6 +11,7 @@ __all__ = [
 import re
 import os
+import io
 from contextlib import contextmanager
 from pathlib import Path
 from inspect import signature, getdoc
@@ -26,13 +28,45 @@ import matplotlib.pyplot as plt
 def get_file_size(path: str):
     """Return file size"""
     if (p := Path(path)).is_file():
-        size = p.stat.st_size
-        for unit in ["Bytes", "KB", "MB", "GB", "TB"]:
+        size = p.stat().st_size
+        for unit in ["B", "KB", "MB", "GB", "TB"]:
             if size < 1024.0:
                 return "%3.2f %s" % (size, unit)
             size /= 1024.0
     else:
         return ""
+def get_lines(f, indices):
+    """Read lines by indexing from an opened file pointer `f`. Negative indexing is supported to read lines from end.
+    Returns a single str of line if one integer given, otherwise a list of lines.
+    This consumes a lot less memory then indexing over `f.readlines()[index]`.
+    >>> with open('some_file','r') as f:
+    >>>     get_lines(f, -1) # last line
+    >>>     get_lines(f, range(5)) # first 5 lines
+    >>>     get_lines(f, range(-5,0)) # last 5 lines
+    """
+    if not isinstance(f, io.TextIOWrapper):
+        raise TypeError(f"f should be file-like object. got {type(f)}")
+    return_line = False
+    if isinstance(indices, int):
+        indices = [indices]
+        return_line = True
+    if not isinstance(indices, (tuple,list, range)):
+        raise TypeError(f"indices should int/list/tuple/range, got {type(indices)}")
+    f.seek(0)
+    if min(indices) < 0:
+        if not hasattr(f, '_nlines'): # do this once, assuming file is not changed while reading
+            f._nlines = sum(1 for _ in enumerate(f))
+            f.seek(0)
+        indices = [i + (f._nlines if i < 0 else 0) for i in indices] # make all positive
+    lines = [l for i, l in enumerate(f) if i in indices]
+    return lines[0] if return_line else lines
 def _sig_kwargs(from_func, skip_params=()):

ipyvasp/widgets.py CHANGED Viewed

@@ -40,7 +40,7 @@ import plotly.graph_objects as go
 from . import utils as gu
 from . import lattice as lat
 from .core import serializer, parser as vp, plot_toolkit as ptk
-from .utils import _sig_kwargs, _sub_doc
+from .utils import _sig_kwargs, _sub_doc, get_file_size
 from ._enplots import _fmt_labels
@@ -68,7 +68,7 @@ def summarize(files, func, **kwargs):
     for name, path in files.items():
         output = func(path, **kwargs)
         if not isinstance(output, dict):
-            raise TypeError("Function must return a dictionary.")
+            raise TypeError("Function must return a dictionary to create DataFrame.")
         if "FILE" in output:
             raise KeyError(
@@ -115,6 +115,8 @@ class Files:
     Use methods on return such as `summarize`, `with_name`, `filtered`, `interact` and others.
     >>> Files(root_1, glob_1,...).add(root_2, glob_2,...) # Fully flexible to chain
+    WARNING: Don't use write operations on paths in files in batch mode, it can cause unrecoverable data loss.
     """
     def __init__(self, path_or_files = '.', glob = '*', exclude = None,files_only = False, dirs_only=False):
         if isinstance(path_or_files, Files):
@@ -168,10 +170,6 @@ class Files:
     def __add__(self, other):
         raise NotImplementedError("Use self.add method instead!")
-    def map(self,func):
-        "Map files to a function!"
-        return map(func, self._files)
     def with_name(self, name):
         "Change name of all files. Only keeps existing files."
         return self.__class__([f.with_name(name) for f in self._files])
@@ -420,6 +418,54 @@ class Files:
     def bands_widget(self, height='450px'):
         "Get BandsWidget instance with these files."
         return BandsWidget(files=self._files, height=height)
+    def map(self,func, to_df=False):
+        """Map files to a function that takes path as argument.
+        If `to_df=True`, func may return a dict to create named columns, or just two columns will be created.
+        Otherwise returns generator of elemnets `(path, func(path))`.
+        If you need to operate on opened file pointer, use `.mapf` instead.
+        >>> import ipyvasp as ipv
+        >>> files = ipv.Files(...)
+        >>> files.map(lambda path: ipv.read(path, '<pattern>',apply = lambda line: float(line.split()[0])))
+        >>> files.map(lambda path: ipv.load(path), to_df=True)
+        """
+        if to_df:
+            return self._try_return_df(func)
+        return ((path, func(path)) for path in self._files) # generator must
+    def _try_return_df(self, func):
+        try: return summarize(self._files,func)
+        except: return pd.DataFrame(((path, func(path)) for path in self._files))
+    def mapf(self, func, to_df=False,mode='r', encoding=None):
+        """Map files to a function that takes opened file pointer as argument. Opened files are automatically closed and should be in readonly mode.
+        Load files content into a generator sequence of  tuples like `(path, func(open(path)))` or DataFrame if `to_df=True`.
+        If `to_df=True`, func may return a dict to create named columns, or just two columns will be created.
+        If you need to operate on just path, use `.map` instead.
+        >>> import json
+        >>> import ipyvasp as ipv
+        >>> files = ipv.Files(...)
+        >>> files.mapf(lambda fp: json.load(fp,cls=ipv.DecodeToNumpy),to_df=True) # or use ipv.load(path) in map
+        >>> files.mapf(lambda fp: ipv.get_lines(fp, range(5)) # read first five lines
+        >>> files.mapf(lambda fp: ipv.get_lines(fp, range(-5,0)) # read last five lines
+        >>> files.mapf(lambda fp: ipv.get_lines(fp, -1) # read last line
+        """
+        if not mode in 'rb':
+            raise ValueError("Only 'r'/'rb' mode is allowed in this context!")
+        def loader(path):
+            with open(path, mode=mode,encoding=encoding) as f:
+                return func(f)
+        if to_df:
+            return self._try_return_df(loader)
+        return ((path, loader(path)) for path in self._files) # generator must
+    def stat(self):
+        "Get files stat as DataFrame. Currently only size is supported."
+        return self.summarize(lambda path: {"size": get_file_size(path)})
 @fix_signature

{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ipyvasp
-Version: 0.9.84
+Version: 0.9.86
 Summary: A processing tool for VASP DFT input/output processing in Jupyter Notebook.
 Home-page: https://github.com/massgh/ipyvasp
 Author: Abdul Saboor

{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/RECORD RENAMED Viewed

@@ -1,25 +1,25 @@
-ipyvasp/__init__.py,sha256=rlorju9arMtHw1QRYPljday-PyZWJdSCxg4lw3g6t0Q,1409
+ipyvasp/__init__.py,sha256=a9rUl8DdqIiZsj03LMJFnJKIuxmtmjmN2Dqj6hmjUls,1427
 ipyvasp/__main__.py,sha256=eJV1TZSiT8mC_VqAeksNnBI2I8mKMiPkEIlwikbtOjI,216
 ipyvasp/_enplots.py,sha256=D38paN8zqZgluNAwmCwcocd7-_h_T0HTGolI1eBkDes,37484
 ipyvasp/_lattice.py,sha256=kOseNCIWt-VCnkhFQZEcsXhyNYobjfqNfAl3seXHiVU,105584
-ipyvasp/_version.py,sha256=3xnjEM8AMDNSU-FRZtXZ39MIuWcHwZXBmMyzZckPxEs,24
+ipyvasp/_version.py,sha256=Ni3S6NaV45wRiYfJLhXykakqV8br57IhjNd9paj9-MY,24
 ipyvasp/bsdos.py,sha256=JvYvHLqMp3eVaJ0amD-9kxp7FehQIFq3WFUxsO5dj0Q,31794
 ipyvasp/cli.py,sha256=aWFEVhNmnW8eSOp5uh95JaDwLQ9K9nlCQcbnOSuhWgw,6844
 ipyvasp/evals_dataframe.py,sha256=-sqxK7LPV6sYDO_XXmZ80FznOaXTkVdbqJKKvTUtMak,20637
 ipyvasp/lattice.py,sha256=VfyhmbpRHA3nePWUmaoMmCiAehby_VvDuDcp34OK3rA,33685
-ipyvasp/misc.py,sha256=SZJ_ePUR2-HEKYTEpDHVRVE7zpIQVTCjiuw0BCC9UTU,2349
+ipyvasp/misc.py,sha256=gPA-71V_fuzyvinHR4P1kYMm90lJYdt24733pA5_4Kk,2340
 ipyvasp/potential.py,sha256=tzA73c5lkp6ahLSJchMrU043-QWaOV0nIOUA7VMmfKQ,11408
 ipyvasp/surface.py,sha256=MjE5oB0wW6Pca_C-xu8rN6OMH7lUEeNPNyM7Kz_Im-8,23766
-ipyvasp/utils.py,sha256=nw_oglztXfl0CZmo63PBstI4didTkuXhThYqhMP8DVI,14247
-ipyvasp/widgets.py,sha256=X4gPkTy--aFZpXGCQJVg4HByt5-mpVB6gNiFmTTIyRg,46628
+ipyvasp/utils.py,sha256=_AD1LMXpy-wNHJpTx4uEahQyDWv_KY70FGxoZCEjVLY,15598
+ipyvasp/widgets.py,sha256=dMMRD5mfTEPnG5j04wuQviGkKP8tcI5oqInny4qDd7I,49209
 ipyvasp/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ipyvasp/core/parser.py,sha256=TmRoIhyIGJ4nr32tb1MAAy5clUZh0CQh-2hP8mnTaVc,38221
+ipyvasp/core/parser.py,sha256=if5DsBpDS7qCIlWsl6JD0ovRBk01h0CdKYa4vE9SSzw,38945
 ipyvasp/core/plot_toolkit.py,sha256=V-IQo7MrOhmNCGpWHIMtV04TmmubgFvGgH_yd4YmoX4,36158
 ipyvasp/core/serializer.py,sha256=v0ma4htirybtQo_wFhIEjkRoZMQk9ETDz85iloaq3YY,38427
 ipyvasp/core/spatial_toolkit.py,sha256=dXowREhiFzBvvr5f_bApzFhf8IzjH2E2Ix90oCBUetY,14885
-ipyvasp-0.9.84.dist-info/LICENSE,sha256=F3SO5RiAZOMfmMGf1KOuk2g_c4ObvuBJhd9iBLDgXoQ,1263
-ipyvasp-0.9.84.dist-info/METADATA,sha256=hX-hNmmx-Yh8dpfR3ZGAigi5cKt7yLQhI6vIWDiPqnI,2421
-ipyvasp-0.9.84.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
-ipyvasp-0.9.84.dist-info/entry_points.txt,sha256=C7m0Sjmr14wFjflCkWXLzr5N6-cQj8uJC9n82mUtzt8,44
-ipyvasp-0.9.84.dist-info/top_level.txt,sha256=ftziWlMWu_1VpDP1sRTFrkfBnWxAi393HYDVu4wRhUk,8
-ipyvasp-0.9.84.dist-info/RECORD,,
+ipyvasp-0.9.86.dist-info/LICENSE,sha256=F3SO5RiAZOMfmMGf1KOuk2g_c4ObvuBJhd9iBLDgXoQ,1263
+ipyvasp-0.9.86.dist-info/METADATA,sha256=62X139t3R61h2ug7ZYsTQwTKSewuGwfyfS_kS7QpJhc,2421
+ipyvasp-0.9.86.dist-info/WHEEL,sha256=iYlv5fX357PQyRT2o6tw1bN-YcKFFHKqB_LwHO5wP-g,110
+ipyvasp-0.9.86.dist-info/entry_points.txt,sha256=C7m0Sjmr14wFjflCkWXLzr5N6-cQj8uJC9n82mUtzt8,44
+ipyvasp-0.9.86.dist-info/top_level.txt,sha256=ftziWlMWu_1VpDP1sRTFrkfBnWxAi393HYDVu4wRhUk,8
+ipyvasp-0.9.86.dist-info/RECORD,,

{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/LICENSE RENAMED Viewed

File without changes

{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/WHEEL RENAMED Viewed

File without changes

{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ipyvasp-0.9.84.dist-info → ipyvasp-0.9.86.dist-info}/top_level.txt RENAMED Viewed

File without changes

ipyvasp 0.9.84__py2.py3-none-any.whl → 0.9.86__py2.py3-none-any.whl

ipyvasp 0.9.84py2.py3-none-any.whl → 0.9.86py2.py3-none-any.whl