feilian 1.1.7__tar.gz → 1.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feilian might be problematic. Click here for more details.

@@ -1,11 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: feilian
3
- Version: 1.1.7
3
+ Version: 1.1.9
4
4
  Summary: General data processing tool.
5
5
  Author-email: darkpeath <darkpeath@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/darkpeath/feilian
7
7
  Description-Content-Type: text/markdown
8
+ Requires-Dist: pandas
8
9
  Provides-Extra: extra
10
+ Requires-Dist: tqdm; extra == "extra"
9
11
 
10
12
  # feilian
11
13
 
@@ -6,6 +6,7 @@ from .dataframe import is_empty_text, is_nonempty_text, is_blank_text, is_non_bl
6
6
  from .datetime import format_time, format_date
7
7
  from .arg import ArgValueParser
8
8
  from .json import read_json, save_json
9
+ from .utils import flatten_dict
9
10
  from .version import __version__
10
11
 
11
12
  __all__ = [
@@ -15,5 +16,6 @@ __all__ = [
15
16
  'format_time', 'format_date',
16
17
  'ArgValueParser',
17
18
  'read_json', 'save_json',
19
+ 'flatten_dict',
18
20
  '__version__',
19
21
  ]
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # file generated by setuptools_scm
3
3
  # don't change, don't track in version control
4
- VERSION = (1, 1, 7)
5
- __version__ = '1.1.7'
4
+ VERSION = (1, 1, 9)
5
+ __version__ = '1.1.9'
@@ -1,6 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
2
 
3
- from typing import Union, List, Any, Iterable, Callable, Set, Optional, Tuple, Dict, Hashable
3
+ from typing import (
4
+ Union, List, Any, Iterable,
5
+ Callable, Set, Optional, Tuple,
6
+ Dict, Hashable, Sequence,
7
+ )
4
8
  try:
5
9
  from typing import Literal
6
10
  except ImportError:
@@ -28,12 +32,12 @@ def _get_or_default(value: Any, mapping: Dict[Hashable, Any], default_key: Any)
28
32
 
29
33
  class ArgValueParser(object):
30
34
  @classmethod
31
- def split_and_parse_strs(cls, strings: Union[List[str], str, None],
35
+ def split_and_parse_strs(cls, strings: Union[Sequence[str], str, None],
32
36
  func: Callable[[str], Any] = None,
33
37
  sep=',', do_trim=True, ignore_blank=True) -> Iterable[Any]:
34
38
  """
35
39
  split and parse multi string values
36
- :param strings: list of strings
40
+ :param strings: sequence of strings
37
41
  :param func: function to parse single string value
38
42
  :param sep: seperator to split single string
39
43
  :param do_trim: trim every word or not
@@ -51,7 +55,7 @@ class ArgValueParser(object):
51
55
  yield func(x) if func else x
52
56
 
53
57
  @classmethod
54
- def split_strs_to_set(cls, values: Union[List[str], str, None],
58
+ def split_strs_to_set(cls, values: Union[Sequence[str], str, None],
55
59
  func: Callable[[str], Any] = None,
56
60
  sep=',', do_trim=True, ignore_blank=True) -> Optional[Set[Any]]:
57
61
  """
@@ -60,7 +64,7 @@ class ArgValueParser(object):
60
64
  return set(cls.split_and_parse_strs(values, func, sep, do_trim, ignore_blank))
61
65
 
62
66
  @classmethod
63
- def split_strs_to_list(cls, values: Union[List[str], str, None],
67
+ def split_strs_to_list(cls, values: Union[Sequence[str], str, None],
64
68
  func: Callable[[str], Any] = None,
65
69
  sep=',', do_trim=True, ignore_blank=True) -> Optional[List[Any]]:
66
70
  """
@@ -22,8 +22,11 @@ pd_version = [int(x) for x in pd.__version__.split('.')]
22
22
  if pd_version[0] < 1 or (pd_version[0] == 1 and pd_version[1] < 5):
23
23
  PD_PARAM_NEWLINE = 'line_terminator'
24
24
 
25
+ FILE_FORMAT = Literal['csv', 'tsv', 'json', 'xlsx', 'parquet']
26
+ COMPRESSION_FORMAT = Literal[None, 'infer', 'snappy', 'gzip', 'brotli', 'bz2', 'zip', 'xz']
27
+
25
28
  def read_dataframe(file: str, *args, sheet_name=0,
26
- file_format: Literal['csv', 'tsv', 'json', 'xlsx'] = None,
29
+ file_format: FILE_FORMAT = None,
27
30
  jsonl=False, dtype: type = None,
28
31
  **kwargs) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
29
32
  """
@@ -31,7 +34,7 @@ def read_dataframe(file: str, *args, sheet_name=0,
31
34
  :param file: the file to be read
32
35
  :param args: extra args for `pd.read_xx()`
33
36
  :param sheet_name: `sheet_name` for `pd.read_excel()`
34
- :param file_format: csv, tsv, json ,xlsx
37
+ :param file_format: csv, tsv, json ,xlsx, parquet
35
38
  :param jsonl: jsonl format or not, only used in json format
36
39
  :param dtype: `dtype` for `pd.read_xx()`
37
40
  :param kwargs: extra kwargs for `pd.read_xx()`
@@ -46,12 +49,16 @@ def read_dataframe(file: str, *args, sheet_name=0,
46
49
  if key in kwargs and kwargs.pop(key):
47
50
  jsonl = True
48
51
 
49
- # if the file format is tsv, actually same as csv
52
+ # handle special formats
50
53
  if file_format == 'tsv':
54
+ # if the file format is tsv, actually same as csv
51
55
  file_format = 'csv'
52
56
  if 'sep' in kwargs:
53
57
  kwargs.pop('sep')
54
58
  kwargs['delimiter'] = '\t'
59
+ elif file_format == 'jsonl':
60
+ file_format = 'json'
61
+ jsonl = True
55
62
 
56
63
  if file_format == 'csv':
57
64
  return pd.read_csv(file, *args, dtype=dtype, **kwargs)
@@ -59,13 +66,16 @@ def read_dataframe(file: str, *args, sheet_name=0,
59
66
  return pd.read_excel(file, *args, sheet_name=sheet_name, dtype=dtype, **kwargs)
60
67
  elif file_format == 'json':
61
68
  return pd.read_json(file, *args, lines=jsonl, dtype=dtype, **kwargs)
69
+ elif file_format == 'parquet':
70
+ return pd.read_parquet(file, *args, **kwargs)
62
71
  else:
63
72
  raise IOError(f"Unknown file format: {file}")
64
73
 
65
74
  def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[str]'],
66
75
  df: Union[pd.DataFrame, Iterable[Union[pd.Series, Dict[str, Any]]]],
67
76
  *args, sheet_name='Sheet1',
68
- file_format: Literal['csv', 'tsv', 'json', 'xlsx'] = None,
77
+ file_format: FILE_FORMAT = None,
78
+ compression: COMPRESSION_FORMAT = None,
69
79
  index=False, index_label=None,
70
80
  encoding='utf-8', newline='\n',
71
81
  force_ascii=False,
@@ -80,7 +90,9 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[st
80
90
  :param df: the data
81
91
  :param args: extra args for df.to_xx()
82
92
  :param sheet_name: `sheet_name` for excel format
83
- :param file_format: csv, tsv, json, xlsx
93
+ :param file_format: csv, tsv, json, xlsx, parquet
94
+ :param compression: name of the compression to use.
95
+ use `None` for no compression.
84
96
  :param index: save index or not, see docs in df.to_csv();
85
97
  if set as str and `index_label` not set, `index_label` will be set as this
86
98
  :param index_label: header for the index when `index` is `True`
@@ -127,24 +139,31 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[st
127
139
  if index_label is None and isinstance(index, str):
128
140
  index, index_label = True, index
129
141
 
130
- # tsv is actually a csv
142
+ # handle special formats
131
143
  if file_format == 'tsv':
144
+ # tsv is actually a csv
132
145
  file_format = 'csv'
133
146
  kwargs['sep'] = '\t'
147
+ elif file_format == 'jsonl':
148
+ file_format = 'json'
149
+ jsonl = True
134
150
 
135
151
  # save to file for different format
136
152
  if file_format == 'csv':
137
153
  kwargs[PD_PARAM_NEWLINE] = newline
138
- df.to_csv(file, *args, index=index, index_label=index_label, encoding=encoding, **kwargs)
154
+ df.to_csv(file, *args, compression=compression, index=index, index_label=index_label,
155
+ encoding=encoding, **kwargs)
139
156
  elif file_format == 'xlsx':
140
157
  df.to_excel(file, *args, index=index, index_label=index_label, sheet_name=sheet_name, **kwargs)
141
158
  elif file_format == 'json':
142
159
  if jsonl:
143
160
  orient = 'records'
144
161
  index = True
145
- df.to_json(file, *args, index=index, force_ascii=force_ascii,
146
- orient=orient, lines=jsonl,
162
+ df.to_json(file, *args, compression=compression, index=index,
163
+ force_ascii=force_ascii, orient=orient, lines=jsonl,
147
164
  **kwargs)
165
+ elif file_format == 'parquet':
166
+ df.to_parquet(file, *args, compression=compression, index=index, **kwargs)
148
167
  else:
149
168
  raise IOError(f"Unknown file format: {file}")
150
169
 
@@ -0,0 +1,32 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from typing import Dict, List, Union, Any
4
+ import json
5
+ from .io import ensure_parent_dir_exist
6
+
7
+ def read_json(filepath: str, jsonl=False, encoding='utf-8', **kwargs):
8
+ """
9
+ An agent for `json.load()` with some default value.
10
+ """
11
+ with open(filepath, encoding=encoding) as f:
12
+ if jsonl:
13
+ return [json.loads(x) for x in f]
14
+ else:
15
+ return json.load(f, **kwargs)
16
+
17
+ def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]], jsonl=False,
18
+ encoding='utf-8', newline='\n', indent=2, ensure_ascii=False, **kwargs):
19
+ """
20
+ An agent for `json.dump()` with some default value.
21
+ """
22
+ if jsonl and not isinstance(data, list):
23
+ # data should be a list
24
+ raise ValueError("data should be a list when save as jsonl format")
25
+ ensure_parent_dir_exist(filepath)
26
+ with open(filepath, 'w', encoding=encoding, newline=newline) as f:
27
+ if jsonl:
28
+ for x in data:
29
+ f.write(json.dumps(x, ensure_ascii=ensure_ascii, **kwargs))
30
+ f.write(newline)
31
+ else:
32
+ json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs)
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ from typing import Dict, Any, Union, Collection
5
+
6
+ def flatten_dict(data: Dict[str, Any], prefix="", joiner=".",
7
+ exclude: Union[None, str, Collection[str]] = None,
8
+ frozen: Union[None, str, Collection[str]] = None,
9
+ empty_as_default=False, empty_value=None,
10
+ res: Dict[str, Any] = None) -> Dict[str, Any]:
11
+ """
12
+ flatten dict as a flat one layer dict
13
+ :param data: origin dict
14
+ :param prefix: prefix for key in the dict
15
+ :param joiner: join symbol for different layer key
16
+ :param exclude: prefix to be excluded from result
17
+ :param frozen: keys not to be flattened
18
+ :param empty_as_default: should set a default value if value is an empty dict
19
+ :param empty_value: if `empty_as_default` is `True`, used as the default value for empty dict
20
+ :param res: the result flat layer dict, create a new one if not given.
21
+ """
22
+ if res is None:
23
+ res = {}
24
+ if isinstance(exclude, str):
25
+ exclude = {exclude}
26
+ if isinstance(frozen, str):
27
+ frozen = {frozen}
28
+
29
+ # all keys are start with the prefix, ignore data
30
+ if exclude and prefix in exclude:
31
+ return res
32
+
33
+ # all keys in data should be frozen
34
+ if frozen and prefix in frozen:
35
+ for k, v in data.items():
36
+ res[prefix+k] = v
37
+ return res
38
+
39
+ for k, v in data.items():
40
+ k = prefix + k
41
+
42
+ if exclude and k in exclude:
43
+ # only the key should be excluded
44
+ continue
45
+
46
+ if frozen and k in frozen:
47
+ # frozen key, keep it as original value
48
+ res[k] = v
49
+ continue
50
+
51
+ if isinstance(v, dict):
52
+ if len(v) == 0:
53
+ # empty dict, set as default value if set
54
+ if empty_as_default:
55
+ res[k] = empty_value
56
+ else:
57
+ # value is a dict, flatten recursively
58
+ flatten_dict(v, prefix=k+joiner, joiner=joiner, exclude=exclude, frozen=frozen, res=res)
59
+ else:
60
+ # normal value, keep it as original value
61
+ res[k] = v
62
+
63
+ return res
64
+
@@ -3,7 +3,7 @@
3
3
  try:
4
4
  from ._dist_ver import VERSION, __version__
5
5
  except ImportError:
6
- from importlib.metadata import version, PackageNotFoundError
6
+ from importlib_metadata import version, PackageNotFoundError
7
7
  try:
8
8
  __version__ = version('feilian')
9
9
  except PackageNotFoundError:
@@ -1,11 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: feilian
3
- Version: 1.1.7
3
+ Version: 1.1.9
4
4
  Summary: General data processing tool.
5
5
  Author-email: darkpeath <darkpeath@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/darkpeath/feilian
7
7
  Description-Content-Type: text/markdown
8
+ Requires-Dist: pandas
8
9
  Provides-Extra: extra
10
+ Requires-Dist: tqdm; extra == "extra"
9
11
 
10
12
  # feilian
11
13
 
@@ -10,6 +10,7 @@ feilian/datetime.py
10
10
  feilian/io.py
11
11
  feilian/json.py
12
12
  feilian/string.py
13
+ feilian/utils.py
13
14
  feilian/version.py
14
15
  feilian.egg-info/PKG-INFO
15
16
  feilian.egg-info/SOURCES.txt
@@ -1,21 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- from typing import Dict, List, Union, Any
4
- import json
5
- from .io import ensure_parent_dir_exist
6
-
7
- def read_json(filepath: str, encoding='utf-8', **kwargs):
8
- """
9
- An agent for `json.load()` with some default value.
10
- """
11
- with open(filepath, encoding=encoding) as f:
12
- return json.load(f, **kwargs)
13
-
14
- def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]],
15
- encoding='utf-8', newline='\n', indent=2, ensure_ascii=False, **kwargs):
16
- """
17
- An agent for `json.dump()` with some default value.
18
- """
19
- ensure_parent_dir_exist(filepath)
20
- with open(filepath, 'w', encoding=encoding, newline=newline) as f:
21
- json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes