feilian 1.1.7__py3-none-any.whl → 1.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feilian might be problematic. Click here for more details.

feilian/_dist_ver.py CHANGED
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # file generated by setuptools_scm
3
3
  # don't change, don't track in version control
4
- VERSION = (1, 1, 7)
5
- __version__ = '1.1.7'
4
+ VERSION = (1, 1, 8)
5
+ __version__ = '1.1.8'
feilian/arg.py CHANGED
@@ -1,6 +1,10 @@
1
1
  # -*- coding: utf-8 -*-
2
2
 
3
- from typing import Union, List, Any, Iterable, Callable, Set, Optional, Tuple, Dict, Hashable
3
+ from typing import (
4
+ Union, List, Any, Iterable,
5
+ Callable, Set, Optional, Tuple,
6
+ Dict, Hashable, Sequence,
7
+ )
4
8
  try:
5
9
  from typing import Literal
6
10
  except ImportError:
@@ -28,12 +32,12 @@ def _get_or_default(value: Any, mapping: Dict[Hashable, Any], default_key: Any)
28
32
 
29
33
  class ArgValueParser(object):
30
34
  @classmethod
31
- def split_and_parse_strs(cls, strings: Union[List[str], str, None],
35
+ def split_and_parse_strs(cls, strings: Union[Sequence[str], str, None],
32
36
  func: Callable[[str], Any] = None,
33
37
  sep=',', do_trim=True, ignore_blank=True) -> Iterable[Any]:
34
38
  """
35
39
  split and parse multi string values
36
- :param strings: list of strings
40
+ :param strings: sequence of strings
37
41
  :param func: function to parse single string value
38
42
  :param sep: seperator to split single string
39
43
  :param do_trim: trim every word or not
@@ -51,7 +55,7 @@ class ArgValueParser(object):
51
55
  yield func(x) if func else x
52
56
 
53
57
  @classmethod
54
- def split_strs_to_set(cls, values: Union[List[str], str, None],
58
+ def split_strs_to_set(cls, values: Union[Sequence[str], str, None],
55
59
  func: Callable[[str], Any] = None,
56
60
  sep=',', do_trim=True, ignore_blank=True) -> Optional[Set[Any]]:
57
61
  """
@@ -60,7 +64,7 @@ class ArgValueParser(object):
60
64
  return set(cls.split_and_parse_strs(values, func, sep, do_trim, ignore_blank))
61
65
 
62
66
  @classmethod
63
- def split_strs_to_list(cls, values: Union[List[str], str, None],
67
+ def split_strs_to_list(cls, values: Union[Sequence[str], str, None],
64
68
  func: Callable[[str], Any] = None,
65
69
  sep=',', do_trim=True, ignore_blank=True) -> Optional[List[Any]]:
66
70
  """
feilian/dataframe.py CHANGED
@@ -22,8 +22,11 @@ pd_version = [int(x) for x in pd.__version__.split('.')]
22
22
  if pd_version[0] < 1 or (pd_version[0] == 1 and pd_version[1] < 5):
23
23
  PD_PARAM_NEWLINE = 'line_terminator'
24
24
 
25
+ FILE_FORMAT = Literal['csv', 'tsv', 'json', 'xlsx', 'parquet']
26
+ COMPRESSION_FORMAT = Literal[None, 'infer', 'snappy', 'gzip', 'brotli', 'bz2', 'zip', 'xz']
27
+
25
28
  def read_dataframe(file: str, *args, sheet_name=0,
26
- file_format: Literal['csv', 'tsv', 'json', 'xlsx'] = None,
29
+ file_format: FILE_FORMAT = None,
27
30
  jsonl=False, dtype: type = None,
28
31
  **kwargs) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
29
32
  """
@@ -31,7 +34,7 @@ def read_dataframe(file: str, *args, sheet_name=0,
31
34
  :param file: the file to be read
32
35
  :param args: extra args for `pd.read_xx()`
33
36
  :param sheet_name: `sheet_name` for `pd.read_excel()`
34
- :param file_format: csv, tsv, json ,xlsx
37
+ :param file_format: csv, tsv, json ,xlsx, parquet
35
38
  :param jsonl: jsonl format or not, only used in json format
36
39
  :param dtype: `dtype` for `pd.read_xx()`
37
40
  :param kwargs: extra kwargs for `pd.read_xx()`
@@ -46,12 +49,16 @@ def read_dataframe(file: str, *args, sheet_name=0,
46
49
  if key in kwargs and kwargs.pop(key):
47
50
  jsonl = True
48
51
 
49
- # if the file format is tsv, actually same as csv
52
+ # handle special formats
50
53
  if file_format == 'tsv':
54
+ # if the file format is tsv, actually same as csv
51
55
  file_format = 'csv'
52
56
  if 'sep' in kwargs:
53
57
  kwargs.pop('sep')
54
58
  kwargs['delimiter'] = '\t'
59
+ elif file_format == 'jsonl':
60
+ file_format = 'json'
61
+ jsonl = True
55
62
 
56
63
  if file_format == 'csv':
57
64
  return pd.read_csv(file, *args, dtype=dtype, **kwargs)
@@ -59,13 +66,16 @@ def read_dataframe(file: str, *args, sheet_name=0,
59
66
  return pd.read_excel(file, *args, sheet_name=sheet_name, dtype=dtype, **kwargs)
60
67
  elif file_format == 'json':
61
68
  return pd.read_json(file, *args, lines=jsonl, dtype=dtype, **kwargs)
69
+ elif file_format == 'parquet':
70
+ return pd.read_parquet(file, *args, **kwargs)
62
71
  else:
63
72
  raise IOError(f"Unknown file format: {file}")
64
73
 
65
74
  def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[str]'],
66
75
  df: Union[pd.DataFrame, Iterable[Union[pd.Series, Dict[str, Any]]]],
67
76
  *args, sheet_name='Sheet1',
68
- file_format: Literal['csv', 'tsv', 'json', 'xlsx'] = None,
77
+ file_format: FILE_FORMAT = None,
78
+ compression: COMPRESSION_FORMAT = None,
69
79
  index=False, index_label=None,
70
80
  encoding='utf-8', newline='\n',
71
81
  force_ascii=False,
@@ -80,7 +90,9 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[st
80
90
  :param df: the data
81
91
  :param args: extra args for df.to_xx()
82
92
  :param sheet_name: `sheet_name` for excel format
83
- :param file_format: csv, tsv, json, xlsx
93
+ :param file_format: csv, tsv, json, xlsx, parquet
94
+ :param compression: name of the compression to use.
95
+ use `None` for no compression.
84
96
  :param index: save index or not, see docs in df.to_csv();
85
97
  if set as str and `index_label` not set, `index_label` will be set as this
86
98
  :param index_label: header for the index when `index` is `True`
@@ -127,24 +139,31 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[st
127
139
  if index_label is None and isinstance(index, str):
128
140
  index, index_label = True, index
129
141
 
130
- # tsv is actually a csv
142
+ # handle special formats
131
143
  if file_format == 'tsv':
144
+ # tsv is actually a csv
132
145
  file_format = 'csv'
133
146
  kwargs['sep'] = '\t'
147
+ elif file_format == 'jsonl':
148
+ file_format = 'json'
149
+ jsonl = True
134
150
 
135
151
  # save to file for different format
136
152
  if file_format == 'csv':
137
153
  kwargs[PD_PARAM_NEWLINE] = newline
138
- df.to_csv(file, *args, index=index, index_label=index_label, encoding=encoding, **kwargs)
154
+ df.to_csv(file, *args, compression=compression, index=index, index_label=index_label,
155
+ encoding=encoding, **kwargs)
139
156
  elif file_format == 'xlsx':
140
157
  df.to_excel(file, *args, index=index, index_label=index_label, sheet_name=sheet_name, **kwargs)
141
158
  elif file_format == 'json':
142
159
  if jsonl:
143
160
  orient = 'records'
144
161
  index = True
145
- df.to_json(file, *args, index=index, force_ascii=force_ascii,
146
- orient=orient, lines=jsonl,
162
+ df.to_json(file, *args, compression=compression, index=index,
163
+ force_ascii=force_ascii, orient=orient, lines=jsonl,
147
164
  **kwargs)
165
+ elif file_format == 'parquet':
166
+ df.to_parquet(file, *args, compression=compression, index=index, **kwargs)
148
167
  else:
149
168
  raise IOError(f"Unknown file format: {file}")
150
169
 
feilian/json.py CHANGED
@@ -4,18 +4,29 @@ from typing import Dict, List, Union, Any
4
4
  import json
5
5
  from .io import ensure_parent_dir_exist
6
6
 
7
- def read_json(filepath: str, encoding='utf-8', **kwargs):
7
+ def read_json(filepath: str, jsonl=False, encoding='utf-8', **kwargs):
8
8
  """
9
9
  An agent for `json.load()` with some default value.
10
10
  """
11
11
  with open(filepath, encoding=encoding) as f:
12
- return json.load(f, **kwargs)
12
+ if jsonl:
13
+ return [json.loads(x) for x in f]
14
+ else:
15
+ return json.load(f, **kwargs)
13
16
 
14
- def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]],
17
+ def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]], jsonl=False,
15
18
  encoding='utf-8', newline='\n', indent=2, ensure_ascii=False, **kwargs):
16
19
  """
17
20
  An agent for `json.dump()` with some default value.
18
21
  """
22
+ if jsonl and not isinstance(data, list):
23
+ # data should be a list
24
+ raise ValueError("data should be a list when save as jsonl format")
19
25
  ensure_parent_dir_exist(filepath)
20
26
  with open(filepath, 'w', encoding=encoding, newline=newline) as f:
21
- json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs)
27
+ if jsonl:
28
+ for x in data:
29
+ f.write(json.dumps(x, ensure_ascii=ensure_ascii, **kwargs))
30
+ f.write(newline)
31
+ else:
32
+ json.dump(data, f, indent=indent, ensure_ascii=ensure_ascii, **kwargs)
feilian/version.py CHANGED
@@ -3,7 +3,7 @@
3
3
  try:
4
4
  from ._dist_ver import VERSION, __version__
5
5
  except ImportError:
6
- from importlib.metadata import version, PackageNotFoundError
6
+ from importlib_metadata import version, PackageNotFoundError
7
7
  try:
8
8
  __version__ = version('feilian')
9
9
  except PackageNotFoundError:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: feilian
3
- Version: 1.1.7
3
+ Version: 1.1.8
4
4
  Summary: General data processing tool.
5
5
  Author-email: darkpeath <darkpeath@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/darkpeath/feilian
@@ -0,0 +1,13 @@
1
+ feilian/__init__.py,sha256=D_OYSAfDALQnRsbaAXMnHyC0LtykaIXLTHq7XNcsYjU,763
2
+ feilian/_dist_ver.py,sha256=nHzOoqCTSUUNWuG27m4tgLUVSw_cZ9edFvXao_UemJA,148
3
+ feilian/arg.py,sha256=n2nIcmC_3rb9A6BOzm9C5z3-T4lnubaGzH2sFhtqwZQ,8402
4
+ feilian/dataframe.py,sha256=G7Ai_JsMS7kNfqRNptqGOOxGWjyYwcJrT73IttDO1vo,10653
5
+ feilian/datetime.py,sha256=IONvWhLeGEy9IVe6GWKEW3FhrfRrShyhGP8-RTf9r3c,763
6
+ feilian/io.py,sha256=aYN3QwWcLoRKzhGMNutqdkmxArVcXfeWXzxCB07LcFc,155
7
+ feilian/json.py,sha256=PSjDJ3MCdolKwfAOmT9DuS8KnJZo9oGABKgJDduCliU,1187
8
+ feilian/string.py,sha256=G_X3dnR0Oxmi4hXF-6E5jm5M7GPjGoMYrSMyI1dj6Z4,370
9
+ feilian/version.py,sha256=oH_DvE7jRCWlCCX9SSadwxwRJXFas_rIisYLBGPYZn4,350
10
+ feilian-1.1.8.dist-info/METADATA,sha256=VIBIvtc9CJxD39A5m6be_sQqYD5zRXuWOftRPBjgvDo,902
11
+ feilian-1.1.8.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
12
+ feilian-1.1.8.dist-info/top_level.txt,sha256=1Q2-B6KJrcTr7drW_kik35PTVEUJLPP4wVrn0kYKwGw,8
13
+ feilian-1.1.8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,13 +0,0 @@
1
- feilian/__init__.py,sha256=D_OYSAfDALQnRsbaAXMnHyC0LtykaIXLTHq7XNcsYjU,763
2
- feilian/_dist_ver.py,sha256=J13_krnbZzV1wGrNVLJT8EIwC3Tx29u3m5Bykt3ljFc,148
3
- feilian/arg.py,sha256=lGZ99RkU9uhE_ziaFK2SClS3vrygteNNxTm3i_68SWw,8359
4
- feilian/dataframe.py,sha256=ac-8RqE2YTFmE6Gk-BdqMRAC-VqSYfyC1y1gwJe3fWQ,9838
5
- feilian/datetime.py,sha256=IONvWhLeGEy9IVe6GWKEW3FhrfRrShyhGP8-RTf9r3c,763
6
- feilian/io.py,sha256=aYN3QwWcLoRKzhGMNutqdkmxArVcXfeWXzxCB07LcFc,155
7
- feilian/json.py,sha256=1GsnL-CASi4xBaycMN-Tw1ytxty7GeL2wmt7nfLfnB4,754
8
- feilian/string.py,sha256=G_X3dnR0Oxmi4hXF-6E5jm5M7GPjGoMYrSMyI1dj6Z4,370
9
- feilian/version.py,sha256=rwuYOfaG8mzghVTba-c5zBWNrj-wZaE8iO7fX4tcGUQ,350
10
- feilian-1.1.7.dist-info/METADATA,sha256=Yolz6Nlb4bd_eTpYi2BzavCnzs6OWefQWdnv5LKiR2E,902
11
- feilian-1.1.7.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
12
- feilian-1.1.7.dist-info/top_level.txt,sha256=1Q2-B6KJrcTr7drW_kik35PTVEUJLPP4wVrn0kYKwGw,8
13
- feilian-1.1.7.dist-info/RECORD,,