PyPI - feilian - Versions diffs - 1.1.7__py3-none-any.whl → 1.1.8__py3-none-any.whl - Mend

feilian 1.1.7py3-none-any.whl → 1.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of feilian might be problematic. Click here for more details.

Files changed (10) hide show

feilian/_dist_ver.py +2 -2
feilian/arg.py +9 -5
feilian/dataframe.py +28 -9
feilian/json.py +15 -4
feilian/version.py +1 -1
{feilian-1.1.7.dist-info → feilian-1.1.8.dist-info}/METADATA +1 -1
feilian-1.1.8.dist-info/RECORD +13 -0
{feilian-1.1.7.dist-info → feilian-1.1.8.dist-info}/WHEEL +1 -1
feilian-1.1.7.dist-info/RECORD +0 -13
{feilian-1.1.7.dist-info → feilian-1.1.8.dist-info}/top_level.txt +0 -0

feilian/_dist_ver.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
 # file generated by setuptools_scm
 # don't change, don't track in version control
-VERSION = (1, 1, 7)
-__version__ = '1.1.7'
+VERSION = (1, 1, 8)
+__version__ = '1.1.8'

feilian/arg.py CHANGED Viewed

@@ -1,6 +1,10 @@
 # -*- coding: utf-8 -*-
-from typing import Union, List, Any, Iterable, Callable, Set, Optional, Tuple, Dict, Hashable
+from typing import (
+    Union, List, Any, Iterable,
+    Callable, Set, Optional, Tuple,
+    Dict, Hashable, Sequence,
+)
 try:
     from typing import Literal
 except ImportError:
@@ -28,12 +32,12 @@ def _get_or_default(value: Any, mapping: Dict[Hashable, Any], default_key: Any)
 class ArgValueParser(object):
     @classmethod
-    def split_and_parse_strs(cls, strings: Union[List[str], str, None],
+    def split_and_parse_strs(cls, strings: Union[Sequence[str], str, None],
                              func: Callable[[str], Any] = None,
                              sep=',', do_trim=True, ignore_blank=True) -> Iterable[Any]:
         """
         split and parse multi string values
-        :param strings:  list of strings
+        :param strings:  sequence of strings
         :param func:     function to parse single string value
         :param sep:      seperator to split single string
         :param do_trim:  trim every word or not
@@ -51,7 +55,7 @@ class ArgValueParser(object):
                     yield func(x) if func else x
     @classmethod
-    def split_strs_to_set(cls, values: Union[List[str], str, None],
+    def split_strs_to_set(cls, values: Union[Sequence[str], str, None],
                           func: Callable[[str], Any] = None,
                           sep=',', do_trim=True, ignore_blank=True) -> Optional[Set[Any]]:
         """
@@ -60,7 +64,7 @@ class ArgValueParser(object):
         return set(cls.split_and_parse_strs(values, func, sep, do_trim, ignore_blank))
     @classmethod
-    def split_strs_to_list(cls, values: Union[List[str], str, None],
+    def split_strs_to_list(cls, values: Union[Sequence[str], str, None],
                            func: Callable[[str], Any] = None,
                            sep=',', do_trim=True, ignore_blank=True) -> Optional[List[Any]]:
         """

feilian/dataframe.py CHANGED Viewed

@@ -22,8 +22,11 @@ pd_version = [int(x) for x in pd.__version__.split('.')]
 if pd_version[0] < 1 or (pd_version[0] == 1 and pd_version[1] < 5):
     PD_PARAM_NEWLINE = 'line_terminator'
+FILE_FORMAT = Literal['csv', 'tsv', 'json', 'xlsx', 'parquet']
+COMPRESSION_FORMAT = Literal[None, 'infer', 'snappy', 'gzip', 'brotli', 'bz2', 'zip', 'xz']
 def read_dataframe(file: str, *args, sheet_name=0,
-                   file_format: Literal['csv', 'tsv', 'json', 'xlsx'] = None,
+                   file_format: FILE_FORMAT = None,
                    jsonl=False, dtype: type = None,
                    **kwargs) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
     """
@@ -31,7 +34,7 @@ def read_dataframe(file: str, *args, sheet_name=0,
     :param file:        the file to be read
     :param args:        extra args for `pd.read_xx()`
     :param sheet_name:      `sheet_name` for `pd.read_excel()`
-    :param file_format:     csv, tsv, json ,xlsx
+    :param file_format:     csv, tsv, json ,xlsx, parquet
     :param jsonl:       jsonl format or not, only used in json format
     :param dtype:       `dtype` for `pd.read_xx()`
     :param kwargs:      extra kwargs for `pd.read_xx()`
@@ -46,12 +49,16 @@ def read_dataframe(file: str, *args, sheet_name=0,
         if key in kwargs and kwargs.pop(key):
             jsonl = True
-    # if the file format is tsv, actually same as csv
+    # handle special formats
     if file_format == 'tsv':
+        # if the file format is tsv, actually same as csv
         file_format = 'csv'
         if 'sep' in kwargs:
             kwargs.pop('sep')
         kwargs['delimiter'] = '\t'
+    elif file_format == 'jsonl':
+        file_format = 'json'
+        jsonl = True
     if file_format == 'csv':
         return pd.read_csv(file, *args, dtype=dtype, **kwargs)
@@ -59,13 +66,16 @@ def read_dataframe(file: str, *args, sheet_name=0,
         return pd.read_excel(file, *args, sheet_name=sheet_name, dtype=dtype, **kwargs)
     elif file_format == 'json':
         return pd.read_json(file, *args, lines=jsonl, dtype=dtype, **kwargs)
+    elif file_format == 'parquet':
+        return pd.read_parquet(file, *args, **kwargs)
     else:
         raise IOError(f"Unknown file format: {file}")
 def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]',  'pd.WriteBuffer[str]'],
                    df: Union[pd.DataFrame, Iterable[Union[pd.Series, Dict[str, Any]]]],
                    *args, sheet_name='Sheet1',
-                   file_format: Literal['csv', 'tsv', 'json', 'xlsx'] = None,
+                   file_format: FILE_FORMAT = None,
+                   compression: COMPRESSION_FORMAT = None,
                    index=False, index_label=None,
                    encoding='utf-8', newline='\n',
                    force_ascii=False,
@@ -80,7 +90,9 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]',  'pd.WriteBuffer[st
     :param df:                  the data
     :param args:                extra args for df.to_xx()
     :param sheet_name:          `sheet_name` for excel format
-    :param file_format:         csv, tsv, json, xlsx
+    :param file_format:         csv, tsv, json, xlsx, parquet
+    :param compression:         name of the compression to use.
+                                use `None` for no compression.
     :param index:               save index or not, see docs in df.to_csv();
                                 if set as str and `index_label` not set, `index_label` will be set as this
     :param index_label:         header for the index when `index` is `True`
@@ -127,24 +139,31 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]',  'pd.WriteBuffer[st
     if index_label is None and isinstance(index, str):
         index, index_label = True, index
-    # tsv is actually a csv
+    # handle special formats
     if file_format == 'tsv':
+        # tsv is actually a csv
         file_format = 'csv'
         kwargs['sep'] = '\t'
+    elif file_format == 'jsonl':
+        file_format = 'json'
+        jsonl = True
     # save to file for different format
     if file_format == 'csv':
         kwargs[PD_PARAM_NEWLINE] = newline
-        df.to_csv(file, *args, index=index, index_label=index_label, encoding=encoding, **kwargs)
+        df.to_csv(file, *args, compression=compression, index=index, index_label=index_label,
+                  encoding=encoding, **kwargs)
     elif file_format == 'xlsx':
         df.to_excel(file, *args, index=index, index_label=index_label, sheet_name=sheet_name, **kwargs)
     elif file_format == 'json':
         if jsonl:
             orient = 'records'
             index = True
-        df.to_json(file, *args, index=index, force_ascii=force_ascii,
-                   orient=orient, lines=jsonl,
+        df.to_json(file, *args, compression=compression, index=index,
+                   force_ascii=force_ascii, orient=orient, lines=jsonl,
                    **kwargs)
+    elif file_format == 'parquet':
+        df.to_parquet(file, *args, compression=compression, index=index, **kwargs)
     else:
         raise IOError(f"Unknown file format: {file}")

feilian/json.py CHANGED Viewed

@@ -4,18 +4,29 @@ from typing import Dict, List, Union, Any
 import json
 from .io import ensure_parent_dir_exist
-def read_json(filepath: str, encoding='utf-8', **kwargs):
+def read_json(filepath: str, jsonl=False, encoding='utf-8', **kwargs):
     """
     An agent for `json.load()` with some default value.
     """
     with open(filepath, encoding=encoding) as f:
-        return json.load(f, **kwargs)
+        if jsonl:
+            return [json.loads(x) for x in f]
+        else:
+            return json.load(f, **kwargs)
-def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]],
+def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]], jsonl=False,
               encoding='utf-8', newline='\n', indent=2, ensure_ascii=False, **kwargs):
     """
     An agent for `json.dump()` with some default value.
     """
+    if jsonl and not isinstance(data, list):
+        # data should be a list
+        raise ValueError("data should be a list when save as jsonl format")
     ensure_parent_dir_exist(filepath)
     with open(filepath, 'w', encoding=encoding, newline=newline) as f:
-        json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs)
+        if jsonl:
+            for x in data:
+                f.write(json.dumps(x, ensure_ascii=ensure_ascii, **kwargs))
+                f.write(newline)
+        else:
+            json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs)

feilian/version.py CHANGED Viewed

@@ -3,7 +3,7 @@
 try:
     from ._dist_ver import VERSION, __version__
 except ImportError:
-    from importlib.metadata import version, PackageNotFoundError
+    from importlib_metadata import version, PackageNotFoundError
     try:
         __version__ = version('feilian')
     except PackageNotFoundError:

{feilian-1.1.7.dist-info → feilian-1.1.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: feilian
-Version: 1.1.7
+Version: 1.1.8
 Summary: General data processing tool.
 Author-email: darkpeath <darkpeath@gmail.com>
 Project-URL: Homepage, https://github.com/darkpeath/feilian

feilian-1.1.8.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+feilian/__init__.py,sha256=D_OYSAfDALQnRsbaAXMnHyC0LtykaIXLTHq7XNcsYjU,763
+feilian/_dist_ver.py,sha256=nHzOoqCTSUUNWuG27m4tgLUVSw_cZ9edFvXao_UemJA,148
+feilian/arg.py,sha256=n2nIcmC_3rb9A6BOzm9C5z3-T4lnubaGzH2sFhtqwZQ,8402
+feilian/dataframe.py,sha256=G7Ai_JsMS7kNfqRNptqGOOxGWjyYwcJrT73IttDO1vo,10653
+feilian/datetime.py,sha256=IONvWhLeGEy9IVe6GWKEW3FhrfRrShyhGP8-RTf9r3c,763
+feilian/io.py,sha256=aYN3QwWcLoRKzhGMNutqdkmxArVcXfeWXzxCB07LcFc,155
+feilian/json.py,sha256=PSjDJ3MCdolKwfAOmT9DuS8KnJZo9oGABKgJDduCliU,1187
+feilian/string.py,sha256=G_X3dnR0Oxmi4hXF-6E5jm5M7GPjGoMYrSMyI1dj6Z4,370
+feilian/version.py,sha256=oH_DvE7jRCWlCCX9SSadwxwRJXFas_rIisYLBGPYZn4,350
+feilian-1.1.8.dist-info/METADATA,sha256=VIBIvtc9CJxD39A5m6be_sQqYD5zRXuWOftRPBjgvDo,902
+feilian-1.1.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+feilian-1.1.8.dist-info/top_level.txt,sha256=1Q2-B6KJrcTr7drW_kik35PTVEUJLPP4wVrn0kYKwGw,8
+feilian-1.1.8.dist-info/RECORD,,

{feilian-1.1.7.dist-info → feilian-1.1.8.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.41.2)
+Generator: bdist_wheel (0.43.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

feilian-1.1.7.dist-info/RECORD DELETED Viewed

@@ -1,13 +0,0 @@
-feilian/__init__.py,sha256=D_OYSAfDALQnRsbaAXMnHyC0LtykaIXLTHq7XNcsYjU,763
-feilian/_dist_ver.py,sha256=J13_krnbZzV1wGrNVLJT8EIwC3Tx29u3m5Bykt3ljFc,148
-feilian/arg.py,sha256=lGZ99RkU9uhE_ziaFK2SClS3vrygteNNxTm3i_68SWw,8359
-feilian/dataframe.py,sha256=ac-8RqE2YTFmE6Gk-BdqMRAC-VqSYfyC1y1gwJe3fWQ,9838
-feilian/datetime.py,sha256=IONvWhLeGEy9IVe6GWKEW3FhrfRrShyhGP8-RTf9r3c,763
-feilian/io.py,sha256=aYN3QwWcLoRKzhGMNutqdkmxArVcXfeWXzxCB07LcFc,155
-feilian/json.py,sha256=1GsnL-CASi4xBaycMN-Tw1ytxty7GeL2wmt7nfLfnB4,754
-feilian/string.py,sha256=G_X3dnR0Oxmi4hXF-6E5jm5M7GPjGoMYrSMyI1dj6Z4,370
-feilian/version.py,sha256=rwuYOfaG8mzghVTba-c5zBWNrj-wZaE8iO7fX4tcGUQ,350
-feilian-1.1.7.dist-info/METADATA,sha256=Yolz6Nlb4bd_eTpYi2BzavCnzs6OWefQWdnv5LKiR2E,902
-feilian-1.1.7.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
-feilian-1.1.7.dist-info/top_level.txt,sha256=1Q2-B6KJrcTr7drW_kik35PTVEUJLPP4wVrn0kYKwGw,8
-feilian-1.1.7.dist-info/RECORD,,

{feilian-1.1.7.dist-info → feilian-1.1.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

feilian 1.1.7__py3-none-any.whl → 1.1.8__py3-none-any.whl

Potentially problematic release.

feilian 1.1.7py3-none-any.whl → 1.1.8py3-none-any.whl