feilian 1.1.9__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of feilian might be problematic. Click here for more details.

feilian/__init__.py CHANGED
@@ -6,7 +6,8 @@ from .dataframe import is_empty_text, is_nonempty_text, is_blank_text, is_non_bl
6
6
  from .datetime import format_time, format_date
7
7
  from .arg import ArgValueParser
8
8
  from .json import read_json, save_json
9
- from .utils import flatten_dict
9
+ from .process import DataframeProcessor
10
+ from .utils import flatten_dict, flatten_list
10
11
  from .version import __version__
11
12
 
12
13
  __all__ = [
@@ -16,6 +17,7 @@ __all__ = [
16
17
  'format_time', 'format_date',
17
18
  'ArgValueParser',
18
19
  'read_json', 'save_json',
19
- 'flatten_dict',
20
+ 'DataframeProcessor',
21
+ 'flatten_dict', 'flatten_list',
20
22
  '__version__',
21
23
  ]
feilian/_dist_ver.py CHANGED
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # file generated by setuptools_scm
3
3
  # don't change, don't track in version control
4
- VERSION = (1, 1, 9)
5
- __version__ = '1.1.9'
4
+ VERSION = (1, 2, 1)
5
+ __version__ = '1.2.1'
feilian/dataframe.py CHANGED
@@ -158,6 +158,7 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[st
158
158
  elif file_format == 'json':
159
159
  if jsonl:
160
160
  orient = 'records'
161
+ if orient not in ['split', 'table']:
161
162
  index = True
162
163
  df.to_json(file, *args, compression=compression, index=index,
163
164
  force_ascii=force_ascii, orient=orient, lines=jsonl,
feilian/json.py CHANGED
@@ -4,10 +4,16 @@ from typing import Dict, List, Union, Any
4
4
  import json
5
5
  from .io import ensure_parent_dir_exist
6
6
 
7
- def read_json(filepath: str, jsonl=False, encoding='utf-8', **kwargs):
7
+ def _is_jsonl(filepath: str, jsonl=None) -> bool:
8
+ if jsonl is None:
9
+ jsonl = filepath.lower().endswith('.jsonl')
10
+ return jsonl
11
+
12
+ def read_json(filepath: str, jsonl=None, encoding='utf-8', **kwargs):
8
13
  """
9
14
  An agent for `json.load()` with some default value.
10
15
  """
16
+ jsonl = _is_jsonl(filepath, jsonl)
11
17
  with open(filepath, encoding=encoding) as f:
12
18
  if jsonl:
13
19
  return [json.loads(x) for x in f]
@@ -19,6 +25,7 @@ def save_json(filepath: str, data: Union[Dict[str, Any], List[Any]], jsonl=False
19
25
  """
20
26
  An agent for `json.dump()` with some default value.
21
27
  """
28
+ jsonl = _is_jsonl(filepath, jsonl)
22
29
  if jsonl and not isinstance(data, list):
23
30
  # data should be a list
24
31
  raise ValueError("data should be a list when save as jsonl format")
feilian/process.py ADDED
@@ -0,0 +1,66 @@
1
+ import abc
2
+ import tqdm
3
+ import pandas as pd
4
+ from typing import Any, Dict, Hashable
5
+ from .dataframe import read_dataframe, save_dataframe
6
+
7
+ class BaseProcessor(abc.ABC):
8
+ """
9
+ Base class for processing data.
10
+ """
11
+
12
+ @abc.abstractmethod
13
+ def read_data(self, filepath: str) -> Any:
14
+ """
15
+ Read data from input file.
16
+ """
17
+
18
+ @abc.abstractmethod
19
+ def save_result(self, filepath: str, result: Any):
20
+ """
21
+ Save result to output file.
22
+ """
23
+
24
+ @abc.abstractmethod
25
+ def process(self, data: Any) -> Any:
26
+ """
27
+ Process data and return result.
28
+ """
29
+
30
+ def run(self, input_path: str, output_path: str = None, write_output=True):
31
+ """
32
+ Read from a file, and save result to another file.
33
+ :param input_path: file with the data
34
+ :param output_path: where to save the result, if not given, use input_path
35
+ :param write_output: whether to write the result to the output_file
36
+ """
37
+ data = self.read_data(input_path)
38
+ result = self.process(data)
39
+ if write_output:
40
+ self.save_result(output_path or input_path, result)
41
+
42
+ class DataframeProcessor(BaseProcessor, abc.ABC):
43
+ def __init__(self, input_dtype=None, progress=False):
44
+ self.input_dtype = input_dtype
45
+ self.progress = progress
46
+
47
+ def read_data(self, filepath: str) -> pd.DataFrame:
48
+ return read_dataframe(filepath, dtype=self.input_dtype)
49
+
50
+ def save_result(self, filepath: str, result: pd.DataFrame):
51
+ save_dataframe(filepath, result)
52
+
53
+ @abc.abstractmethod
54
+ def process_row(self, i: Hashable, row: pd.Series) -> Dict[str, Any]:
55
+ """
56
+ Process a single row of data.
57
+ """
58
+
59
+ def process(self, data: pd.DataFrame) -> pd.DataFrame:
60
+ bar = data.iterrows()
61
+ if self.progress:
62
+ desc = "process" if self.progress is True else self.progress
63
+ bar = tqdm.tqdm(bar, total=len(data), desc=desc)
64
+ res = [self.process_row(i, row) for i, row in bar]
65
+ return pd.DataFrame(res)
66
+
feilian/utils.py CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env python
2
2
  # coding: utf-8
3
3
 
4
- from typing import Dict, Any, Union, Collection
4
+ from typing import Dict, Any, Union, Collection, List
5
5
 
6
6
  def flatten_dict(data: Dict[str, Any], prefix="", joiner=".",
7
7
  exclude: Union[None, str, Collection[str]] = None,
@@ -62,3 +62,21 @@ def flatten_dict(data: Dict[str, Any], prefix="", joiner=".",
62
62
 
63
63
  return res
64
64
 
65
+
66
+ def flatten_list(data: List[Any], res: List[Any] = None) -> List[Any]:
67
+ """
68
+ Flatten nested list as a flat on layer list.
69
+ :param data: a nested list
70
+ :param res: the result flat layer list, create a new one if not given.
71
+ """
72
+ if res is None:
73
+ res = []
74
+
75
+ for x in data:
76
+ if isinstance(x, list):
77
+ # flatten recursively
78
+ flatten_list(x, res)
79
+ else:
80
+ res.append(x)
81
+
82
+ return res
@@ -0,0 +1,210 @@
1
+ Metadata-Version: 2.1
2
+ Name: feilian
3
+ Version: 1.2.1
4
+ Summary: General data processing tool.
5
+ Author-email: darkpeath <darkpeath@gmail.com>
6
+ Project-URL: Homepage, https://github.com/darkpeath/feilian
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: pandas
9
+ Provides-Extra: extra
10
+ Requires-Dist: tqdm ; extra == 'extra'
11
+
12
+ # feilian
13
+
14
+ General data processing tool.
15
+
16
+ ## Features
17
+
18
+ - More default values, less necessary arg.
19
+ - Encapsulation of panda dataframe for simple usage.
20
+
21
+ ## Usage
22
+
23
+ ### Process data with pandas dataframe
24
+
25
+ #### Read a file as dataframe
26
+
27
+ ```python
28
+ import feilian
29
+
30
+ input_file = '' # file can be any csv, json, parquet or xlsx format
31
+ df = feilian.read_dataframe(input_file)
32
+ ```
33
+
34
+ #### Write dataframe to a file
35
+
36
+ ```python
37
+ import feilian
38
+ import pandas as pd
39
+
40
+ df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))
41
+ output_file = '' # file can be any csv, json, parquet or xlsx format
42
+ feilian.save_dataframe(output_file, df)
43
+ ```
44
+
45
+ #### Iter a dataframe with a progress bar
46
+
47
+ ```python
48
+ import feilian
49
+ import pandas as pd
50
+
51
+ df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))
52
+ feilian.iter_dataframe(data=df, progress_bar="process")
53
+ ```
54
+
55
+ #### Extract sample from a dataframe
56
+
57
+ ```python
58
+ import feilian
59
+ import pandas as pd
60
+
61
+ df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))
62
+ sample = feilian.extract_dataframe_sample(size=2, shuffle=True)
63
+ ```
64
+
65
+ #### Test text value in dataframe
66
+
67
+ ```python
68
+ import feilian
69
+
70
+ s = ''
71
+
72
+ # test if s is na or empty string
73
+ feilian.is_empty_text(s)
74
+
75
+ # test if s is not na and non-empty string
76
+ feilian.is_nonempty_text(s)
77
+
78
+ # test if s is na or blank string
79
+ feilian.is_blank_text(s)
80
+
81
+ # test if s is not na and non-blank string
82
+ feilian.is_non_blank_text(s)
83
+ ```
84
+
85
+ #### Merge same id rows to one row
86
+
87
+ ```python
88
+ import feilian
89
+ import pandas as pd
90
+
91
+ df = pd.DataFrame([
92
+ {"a": "1", "b": "2", "c": "5"},
93
+ {"a": "2", "b": 6, "c": "8"},
94
+ {"a": "1", "b": 8, "c": "9"},
95
+ ])
96
+
97
+ res = feilian.merge_dataframe_rows(df, col_id="a", join_sep=",")
98
+ ```
99
+
100
+ ### IO for json file
101
+
102
+ #### Read a json file
103
+
104
+ ```python
105
+ import feilian
106
+
107
+ input_file = ''
108
+ data = feilian.read_json(input_file)
109
+ ```
110
+
111
+ #### Write a json file
112
+
113
+ ```python
114
+ import feilian
115
+
116
+ data = [
117
+ {"a": "1", "b": "2", "c": "5"},
118
+ {"a": "2", "b": 6, "c": "8"},
119
+ {"a": "1", "b": 8, "c": "9"},
120
+ ]
121
+ output_file = ''
122
+ feilian.save_dataframe(output_file, data)
123
+ ```
124
+
125
+ ### Datetime format
126
+
127
+ ```python
128
+ import feilian
129
+ import datetime
130
+
131
+ d = datetime.datetime.now()
132
+
133
+ # format a date string
134
+ feilian.format_date(d, sep='-')
135
+
136
+ # format a time string
137
+ feilian.format_time(d, fmt='%H:%M:%S')
138
+ ```
139
+
140
+ ### Process dict
141
+
142
+ #### Flatten dict value
143
+
144
+ ```python
145
+ import feilian
146
+
147
+ data = {
148
+ "a": 12,
149
+ "b": ["4", "s"],
150
+ "c": {
151
+ "l": 0,
152
+ "j": {
153
+ "se": "we",
154
+ "t": 5,
155
+ }
156
+ },
157
+ "f": 7,
158
+ "g": {
159
+ "ts": "9w",
160
+ "j2": 8,
161
+ },
162
+ "w": {
163
+ "s": {
164
+ "ge": 89,
165
+ "00": "ej",
166
+ },
167
+ "r": {
168
+ "le": 33,
169
+ "03": "ef",
170
+ }
171
+ },
172
+ "sk": {
173
+ "a": "23",
174
+ "b": {
175
+ "s": 9,
176
+ "g": 0,
177
+ "p": 4,
178
+ },
179
+ "c": {
180
+ "s": 8,
181
+ "t": "w",
182
+ "j": "23",
183
+ }
184
+ },
185
+ }
186
+ res = feilian.flatten_dict(data, frozen={"g", "w.s", "sk."}, exclude="f")
187
+ ```
188
+
189
+ ### Process args
190
+
191
+ ```python
192
+ from feilian import ArgValueParser
193
+
194
+ value = ''
195
+
196
+ # split value
197
+ ArgValueParser.split_strs_to_list(value)
198
+ ArgValueParser.split_strs_to_set(value)
199
+
200
+ # bound value
201
+ ArgValueParser.bound_set_if_singleton(value)
202
+ ArgValueParser.bound_tuple_if_singleton(value)
203
+ ArgValueParser.bound_list_if_singleton(value)
204
+
205
+ # force value type
206
+ ArgValueParser.ensure_set(value)
207
+ ArgValueParser.ensure_list(value)
208
+ ArgValueParser.ensure_tuple(value)
209
+ ```
210
+
@@ -0,0 +1,15 @@
1
+ feilian/__init__.py,sha256=97Rvz2O6LknvCWInAjINH4GeqG4-QPPcep9n-V7KD8k,911
2
+ feilian/_dist_ver.py,sha256=quKRw8l2uSmWwO0SXPBC-2dE29rT4esN_0UQ9PP_-bk,148
3
+ feilian/arg.py,sha256=n2nIcmC_3rb9A6BOzm9C5z3-T4lnubaGzH2sFhtqwZQ,8402
4
+ feilian/dataframe.py,sha256=DsUiNRuVAe2H6WwqtZ6TGu6WHiLxFEN_Xjl208xvvnw,10698
5
+ feilian/datetime.py,sha256=IONvWhLeGEy9IVe6GWKEW3FhrfRrShyhGP8-RTf9r3c,763
6
+ feilian/io.py,sha256=aYN3QwWcLoRKzhGMNutqdkmxArVcXfeWXzxCB07LcFc,155
7
+ feilian/json.py,sha256=1FkQ6e4JmbccpxhMobXpsGg-f7uVrTtUmu6jDCXCFTQ,1406
8
+ feilian/process.py,sha256=GLkmogYnhkxi6qf-JX-FwIlJAHmTynmgB7zSAggEe1E,2080
9
+ feilian/string.py,sha256=G_X3dnR0Oxmi4hXF-6E5jm5M7GPjGoMYrSMyI1dj6Z4,370
10
+ feilian/utils.py,sha256=pzzGEgngidVkYvuNJWY7KWjkdgGRuH5_ENaLS6kxBtk,2648
11
+ feilian/version.py,sha256=oH_DvE7jRCWlCCX9SSadwxwRJXFas_rIisYLBGPYZn4,350
12
+ feilian-1.2.1.dist-info/METADATA,sha256=v0pXur6UQVS3a6U8irka8ucSq9M-wyQ4bgshSv6IBJo,3723
13
+ feilian-1.2.1.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
14
+ feilian-1.2.1.dist-info/top_level.txt,sha256=1Q2-B6KJrcTr7drW_kik35PTVEUJLPP4wVrn0kYKwGw,8
15
+ feilian-1.2.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (71.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,41 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: feilian
3
- Version: 1.1.9
4
- Summary: General data processing tool.
5
- Author-email: darkpeath <darkpeath@gmail.com>
6
- Project-URL: Homepage, https://github.com/darkpeath/feilian
7
- Description-Content-Type: text/markdown
8
- Requires-Dist: pandas
9
- Provides-Extra: extra
10
- Requires-Dist: tqdm ; extra == 'extra'
11
-
12
- # feilian
13
-
14
- General data processing tool.
15
-
16
- ## Features
17
-
18
- - More default values, less necessary arg.
19
- - Encapsulation of panda dataframe for simple usage.
20
-
21
- ## Usage
22
-
23
- ### Read a file as dataframe
24
-
25
- ```python
26
- from feilian import read_dataframe
27
-
28
- input_file = '' # file can be any csv, json or xlsx format
29
- df = read_dataframe(input_file)
30
- ```
31
-
32
- ### Write dataframe to a file
33
-
34
- ```python
35
- import pandas as pd
36
- from feilian import save_dataframe
37
-
38
- df = pd.DataFrame(dict(a=[1, 2, 3], b=[4, 5, 6]))
39
- output_file = '' # file can be any csv, json or xlsx format
40
- save_dataframe(output_file, df)
41
- ```
@@ -1,14 +0,0 @@
1
- feilian/__init__.py,sha256=Flac-edWcAozHlL4aAdSJX0rphgSdplFrUUlkr5q7ro,815
2
- feilian/_dist_ver.py,sha256=gRZX3slfIQ_HsndEWgYKrFuTPKqi9wcoGNPZZAaGPTI,148
3
- feilian/arg.py,sha256=n2nIcmC_3rb9A6BOzm9C5z3-T4lnubaGzH2sFhtqwZQ,8402
4
- feilian/dataframe.py,sha256=G7Ai_JsMS7kNfqRNptqGOOxGWjyYwcJrT73IttDO1vo,10653
5
- feilian/datetime.py,sha256=IONvWhLeGEy9IVe6GWKEW3FhrfRrShyhGP8-RTf9r3c,763
6
- feilian/io.py,sha256=aYN3QwWcLoRKzhGMNutqdkmxArVcXfeWXzxCB07LcFc,155
7
- feilian/json.py,sha256=PSjDJ3MCdolKwfAOmT9DuS8KnJZo9oGABKgJDduCliU,1187
8
- feilian/string.py,sha256=G_X3dnR0Oxmi4hXF-6E5jm5M7GPjGoMYrSMyI1dj6Z4,370
9
- feilian/utils.py,sha256=DqBKjpRBbSNipRDau9sYnoCfSDR_Of-xTOCoNxGWUJk,2180
10
- feilian/version.py,sha256=oH_DvE7jRCWlCCX9SSadwxwRJXFas_rIisYLBGPYZn4,350
11
- feilian-1.1.9.dist-info/METADATA,sha256=KA03FrgOCulwqzLrodlh76uKxVnISbxpchq8GGDQfvw,902
12
- feilian-1.1.9.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
13
- feilian-1.1.9.dist-info/top_level.txt,sha256=1Q2-B6KJrcTr7drW_kik35PTVEUJLPP4wVrn0kYKwGw,8
14
- feilian-1.1.9.dist-info/RECORD,,