PyPI - feilian - Versions diffs - 1.1.7__tar.gz → 1.1.9__tar.gz - Mend

feilian 1.1.7tar.gz → 1.1.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of feilian might be problematic. Click here for more details.

Files changed (21) hide show

{feilian-1.1.7 → feilian-1.1.9}/PKG-INFO RENAMED Viewed

@@ -1,11 +1,13 @@
 Metadata-Version: 2.1
 Name: feilian
-Version: 1.1.7
+Version: 1.1.9
 Summary: General data processing tool.
 Author-email: darkpeath <darkpeath@gmail.com>
 Project-URL: Homepage, https://github.com/darkpeath/feilian
 Description-Content-Type: text/markdown
+Requires-Dist: pandas
 Provides-Extra: extra
+Requires-Dist: tqdm; extra == "extra"
 # feilian

{feilian-1.1.7 → feilian-1.1.9}/feilian/__init__.py RENAMED Viewed

@@ -6,6 +6,7 @@ from .dataframe import is_empty_text, is_nonempty_text, is_blank_text, is_non_bl
 from .datetime import format_time, format_date
 from .arg import ArgValueParser
 from .json import read_json, save_json
+from .utils import flatten_dict
 from .version import __version__
 __all__ = [
@@ -15,5 +16,6 @@ __all__ = [
     'format_time', 'format_date',
     'ArgValueParser',
     'read_json', 'save_json',
+    'flatten_dict',
     '__version__',
 ]

{feilian-1.1.7 → feilian-1.1.9}/feilian/_dist_ver.py RENAMED Viewed

@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
 # file generated by setuptools_scm
 # don't change, don't track in version control
-VERSION = (1, 1, 7)
-__version__ = '1.1.7'
+VERSION = (1, 1, 9)
+__version__ = '1.1.9'

{feilian-1.1.7 → feilian-1.1.9}/feilian/arg.py RENAMED Viewed

@@ -1,6 +1,10 @@
 # -*- coding: utf-8 -*-
-from typing import Union, List, Any, Iterable, Callable, Set, Optional, Tuple, Dict, Hashable
+from typing import (
+    Union, List, Any, Iterable,
+    Callable, Set, Optional, Tuple,
+    Dict, Hashable, Sequence,
+)
 try:
     from typing import Literal
 except ImportError:
@@ -28,12 +32,12 @@ def _get_or_default(value: Any, mapping: Dict[Hashable, Any], default_key: Any)
 class ArgValueParser(object):
     @classmethod
-    def split_and_parse_strs(cls, strings: Union[List[str], str, None],
+    def split_and_parse_strs(cls, strings: Union[Sequence[str], str, None],
                              func: Callable[[str], Any] = None,
                              sep=',', do_trim=True, ignore_blank=True) -> Iterable[Any]:
         """
         split and parse multi string values
-        :param strings:  list of strings
+        :param strings:  sequence of strings
         :param func:     function to parse single string value
         :param sep:      seperator to split single string
         :param do_trim:  trim every word or not
@@ -51,7 +55,7 @@ class ArgValueParser(object):
                     yield func(x) if func else x
     @classmethod
-    def split_strs_to_set(cls, values: Union[List[str], str, None],
+    def split_strs_to_set(cls, values: Union[Sequence[str], str, None],
                           func: Callable[[str], Any] = None,
                           sep=',', do_trim=True, ignore_blank=True) -> Optional[Set[Any]]:
         """
@@ -60,7 +64,7 @@ class ArgValueParser(object):
         return set(cls.split_and_parse_strs(values, func, sep, do_trim, ignore_blank))
     @classmethod
-    def split_strs_to_list(cls, values: Union[List[str], str, None],
+    def split_strs_to_list(cls, values: Union[Sequence[str], str, None],
                            func: Callable[[str], Any] = None,
                            sep=',', do_trim=True, ignore_blank=True) -> Optional[List[Any]]:
         """

{feilian-1.1.7 → feilian-1.1.9}/feilian/dataframe.py RENAMED Viewed

@@ -22,8 +22,11 @@ pd_version = [int(x) for x in pd.__version__.split('.')]
 if pd_version[0] < 1 or (pd_version[0] == 1 and pd_version[1] < 5):
     PD_PARAM_NEWLINE = 'line_terminator'
+FILE_FORMAT = Literal['csv', 'tsv', 'json', 'xlsx', 'parquet']
+COMPRESSION_FORMAT = Literal[None, 'infer', 'snappy', 'gzip', 'brotli', 'bz2', 'zip', 'xz']
 def read_dataframe(file: str, *args, sheet_name=0,
-                   file_format: Literal['csv', 'tsv', 'json', 'xlsx'] = None,
+                   file_format: FILE_FORMAT = None,
                    jsonl=False, dtype: type = None,
                    **kwargs) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
     """
@@ -31,7 +34,7 @@ def read_dataframe(file: str, *args, sheet_name=0,
     :param file:        the file to be read
     :param args:        extra args for `pd.read_xx()`
     :param sheet_name:      `sheet_name` for `pd.read_excel()`
-    :param file_format:     csv, tsv, json ,xlsx
+    :param file_format:     csv, tsv, json ,xlsx, parquet
     :param jsonl:       jsonl format or not, only used in json format
     :param dtype:       `dtype` for `pd.read_xx()`
     :param kwargs:      extra kwargs for `pd.read_xx()`
@@ -46,12 +49,16 @@ def read_dataframe(file: str, *args, sheet_name=0,
         if key in kwargs and kwargs.pop(key):
             jsonl = True
-    # if the file format is tsv, actually same as csv
+    # handle special formats
     if file_format == 'tsv':
+        # if the file format is tsv, actually same as csv
         file_format = 'csv'
         if 'sep' in kwargs:
             kwargs.pop('sep')
         kwargs['delimiter'] = '\t'
+    elif file_format == 'jsonl':
+        file_format = 'json'
+        jsonl = True
     if file_format == 'csv':
         return pd.read_csv(file, *args, dtype=dtype, **kwargs)
@@ -59,13 +66,16 @@ def read_dataframe(file: str, *args, sheet_name=0,
         return pd.read_excel(file, *args, sheet_name=sheet_name, dtype=dtype, **kwargs)
     elif file_format == 'json':
         return pd.read_json(file, *args, lines=jsonl, dtype=dtype, **kwargs)
+    elif file_format == 'parquet':
+        return pd.read_parquet(file, *args, **kwargs)
     else:
         raise IOError(f"Unknown file format: {file}")
 def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]',  'pd.WriteBuffer[str]'],
                    df: Union[pd.DataFrame, Iterable[Union[pd.Series, Dict[str, Any]]]],
                    *args, sheet_name='Sheet1',
-                   file_format: Literal['csv', 'tsv', 'json', 'xlsx'] = None,
+                   file_format: FILE_FORMAT = None,
+                   compression: COMPRESSION_FORMAT = None,
                    index=False, index_label=None,
                    encoding='utf-8', newline='\n',
                    force_ascii=False,
@@ -80,7 +90,9 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]',  'pd.WriteBuffer[st
     :param df:                  the data
     :param args:                extra args for df.to_xx()
     :param sheet_name:          `sheet_name` for excel format
-    :param file_format:         csv, tsv, json, xlsx
+    :param file_format:         csv, tsv, json, xlsx, parquet
+    :param compression:         name of the compression to use.
+                                use `None` for no compression.
     :param index:               save index or not, see docs in df.to_csv();
                                 if set as str and `index_label` not set, `index_label` will be set as this
     :param index_label:         header for the index when `index` is `True`
@@ -127,24 +139,31 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]',  'pd.WriteBuffer[st
     if index_label is None and isinstance(index, str):
         index, index_label = True, index
-    # tsv is actually a csv
+    # handle special formats
     if file_format == 'tsv':
+        # tsv is actually a csv
         file_format = 'csv'
         kwargs['sep'] = '\t'
+    elif file_format == 'jsonl':
+        file_format = 'json'
+        jsonl = True
     # save to file for different format
     if file_format == 'csv':
         kwargs[PD_PARAM_NEWLINE] = newline
-        df.to_csv(file, *args, index=index, index_label=index_label, encoding=encoding, **kwargs)
+        df.to_csv(file, *args, compression=compression, index=index, index_label=index_label,
+                  encoding=encoding, **kwargs)
     elif file_format == 'xlsx':
         df.to_excel(file, *args, index=index, index_label=index_label, sheet_name=sheet_name, **kwargs)
     elif file_format == 'json':
         if jsonl:
             orient = 'records'
             index = True
-        df.to_json(file, *args, index=index, force_ascii=force_ascii,
-                   orient=orient, lines=jsonl,
+        df.to_json(file, *args, compression=compression, index=index,
+                   force_ascii=force_ascii, orient=orient, lines=jsonl,
                    **kwargs)
+    elif file_format == 'parquet':
+        df.to_parquet(file, *args, compression=compression, index=index, **kwargs)
     else:
         raise IOError(f"Unknown file format: {file}")

feilian-1.1.9/feilian/json.py ADDED Viewed

@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+from typing import Dict, List, Union, Any
+import json
+from .io import ensure_parent_dir_exist
+def read_json(filepath: str, jsonl=False, encoding='utf-8', **kwargs):
+    """
+    An agent for `json.load()` with some default value.
+    """
+    with open(filepath, encoding=encoding) as f:
+        if jsonl:
+            return [json.loads(x) for x in f]
+        else:
+            return json.load(f, **kwargs)
+def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]], jsonl=False,
+              encoding='utf-8', newline='\n', indent=2, ensure_ascii=False, **kwargs):
+    """
+    An agent for `json.dump()` with some default value.
+    """
+    if jsonl and not isinstance(data, list):
+        # data should be a list
+        raise ValueError("data should be a list when save as jsonl format")
+    ensure_parent_dir_exist(filepath)
+    with open(filepath, 'w', encoding=encoding, newline=newline) as f:
+        if jsonl:
+            for x in data:
+                f.write(json.dumps(x, ensure_ascii=ensure_ascii, **kwargs))
+                f.write(newline)
+        else:
+            json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs)

feilian-1.1.9/feilian/utils.py ADDED Viewed

@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# coding: utf-8
+from typing import Dict, Any, Union, Collection
+def flatten_dict(data: Dict[str, Any], prefix="", joiner=".",
+                 exclude: Union[None, str, Collection[str]] = None,
+                 frozen: Union[None, str, Collection[str]] = None,
+                 empty_as_default=False, empty_value=None,
+                 res: Dict[str, Any] = None) -> Dict[str, Any]:
+    """
+    flatten dict as a flat one layer dict
+    :param data:      origin dict
+    :param prefix:    prefix for key in the dict
+    :param joiner:    join symbol for different layer key
+    :param exclude:   prefix to be excluded from result
+    :param frozen:    keys not to be flattened
+    :param empty_as_default:  should set a default value if value is an empty dict
+    :param empty_value:       if `empty_as_default` is `True`, used as the default value for empty dict
+    :param res:       the result flat layer dict, create a new one if not given.
+    """
+    if res is None:
+        res = {}
+    if isinstance(exclude, str):
+        exclude = {exclude}
+    if isinstance(frozen, str):
+        frozen = {frozen}
+    # all keys are start with the prefix, ignore data
+    if exclude and prefix in exclude:
+        return res
+    # all keys in data should be frozen
+    if frozen and prefix in frozen:
+        for k, v in data.items():
+            res[prefix+k] = v
+        return res
+    for k, v in data.items():
+        k = prefix + k
+        if exclude and k in exclude:
+            # only the key should be excluded
+            continue
+        if frozen and k in frozen:
+            # frozen key, keep it as original value
+            res[k] = v
+            continue
+        if isinstance(v, dict):
+            if len(v) == 0:
+                # empty dict, set as default value if set
+                if empty_as_default:
+                    res[k] = empty_value
+            else:
+                # value is a dict, flatten recursively
+                flatten_dict(v, prefix=k+joiner, joiner=joiner, exclude=exclude, frozen=frozen, res=res)
+        else:
+            # normal value, keep it as original value
+            res[k] = v
+    return res

{feilian-1.1.7 → feilian-1.1.9}/feilian/version.py RENAMED Viewed

@@ -3,7 +3,7 @@
 try:
     from ._dist_ver import VERSION, __version__
 except ImportError:
-    from importlib.metadata import version, PackageNotFoundError
+    from importlib_metadata import version, PackageNotFoundError
     try:
         __version__ = version('feilian')
     except PackageNotFoundError:

{feilian-1.1.7 → feilian-1.1.9}/feilian.egg-info/PKG-INFO RENAMED Viewed

@@ -1,11 +1,13 @@
 Metadata-Version: 2.1
 Name: feilian
-Version: 1.1.7
+Version: 1.1.9
 Summary: General data processing tool.
 Author-email: darkpeath <darkpeath@gmail.com>
 Project-URL: Homepage, https://github.com/darkpeath/feilian
 Description-Content-Type: text/markdown
+Requires-Dist: pandas
 Provides-Extra: extra
+Requires-Dist: tqdm; extra == "extra"
 # feilian

{feilian-1.1.7 → feilian-1.1.9}/feilian.egg-info/SOURCES.txt RENAMED Viewed

@@ -10,6 +10,7 @@ feilian/datetime.py
 feilian/io.py
 feilian/json.py
 feilian/string.py
+feilian/utils.py
 feilian/version.py
 feilian.egg-info/PKG-INFO
 feilian.egg-info/SOURCES.txt

feilian-1.1.7/feilian/json.py DELETED Viewed

@@ -1,21 +0,0 @@
-# -*- coding: utf-8 -*-
-from typing import Dict, List, Union, Any
-import json
-from .io import ensure_parent_dir_exist
-def read_json(filepath: str, encoding='utf-8', **kwargs):
-    """
-    An agent for `json.load()` with some default value.
-    """
-    with open(filepath, encoding=encoding) as f:
-        return json.load(f, **kwargs)
-def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]],
-              encoding='utf-8', newline='\n', indent=2, ensure_ascii=False, **kwargs):
-    """
-    An agent for `json.dump()` with some default value.
-    """
-    ensure_parent_dir_exist(filepath)
-    with open(filepath, 'w', encoding=encoding, newline=newline) as f:
-        json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs)