feilian 1.2.3__tar.gz → 1.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of feilian might be problematic. Click here for more details.
- {feilian-1.2.3 → feilian-1.2.5}/PKG-INFO +1 -1
- {feilian-1.2.3 → feilian-1.2.5}/feilian/_dist_ver.py +2 -2
- {feilian-1.2.3 → feilian-1.2.5}/feilian/dataframe.py +11 -3
- {feilian-1.2.3 → feilian-1.2.5}/feilian/process.py +4 -2
- {feilian-1.2.3 → feilian-1.2.5}/feilian.egg-info/PKG-INFO +1 -1
- {feilian-1.2.3 → feilian-1.2.5}/README.md +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/build.sh +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian/__init__.py +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian/arg.py +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian/datetime.py +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian/io.py +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian/json.py +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian/string.py +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian/utils.py +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian/version.py +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian.egg-info/SOURCES.txt +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian.egg-info/dependency_links.txt +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/feilian.egg-info/requires.txt +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/pyproject.toml +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/requirements.txt +0 -0
- {feilian-1.2.3 → feilian-1.2.5}/setup.cfg +0 -0
|
@@ -76,7 +76,14 @@ def read_dataframe(file: str, *args, sheet_name=0,
|
|
|
76
76
|
elif file_format == 'xlsx':
|
|
77
77
|
df = pd.read_excel(file, *args, sheet_name=sheet_name, dtype=dtype, **kwargs)
|
|
78
78
|
elif file_format == 'json':
|
|
79
|
-
|
|
79
|
+
try:
|
|
80
|
+
df = pd.read_json(file, *args, lines=jsonl, dtype=dtype, **kwargs)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
# if failed, try again with different arg `lines`
|
|
83
|
+
try:
|
|
84
|
+
df = pd.read_json(file, *args, lines=not jsonl, dtype=dtype, **kwargs)
|
|
85
|
+
except Exception:
|
|
86
|
+
raise e
|
|
80
87
|
elif file_format == 'parquet':
|
|
81
88
|
df = pd.read_parquet(file, *args, **kwargs)
|
|
82
89
|
else:
|
|
@@ -97,7 +104,7 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[st
|
|
|
97
104
|
index=False, index_label=None,
|
|
98
105
|
encoding='utf-8', newline='\n',
|
|
99
106
|
force_ascii=False,
|
|
100
|
-
orient='records', jsonl=True,
|
|
107
|
+
orient='records', jsonl=True, indent=None,
|
|
101
108
|
column_mapper: Union[Dict[str, str], Sequence[str]] = None,
|
|
102
109
|
include_columns: Sequence[str] = None,
|
|
103
110
|
exclude_columns: Sequence[str] = None,
|
|
@@ -119,6 +126,7 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[st
|
|
|
119
126
|
:param force_ascii: `force_ascii` for json format
|
|
120
127
|
:param orient: `orient` for json format
|
|
121
128
|
:param jsonl: jsonl format or not
|
|
129
|
+
:param indent: indent for json format
|
|
122
130
|
:param column_mapper: rename columns; if set, columns not list here will be ignored
|
|
123
131
|
:param include_columns: if set, columns not list here will be ignored
|
|
124
132
|
:param exclude_columns: if set, columns list here will be ignored
|
|
@@ -180,7 +188,7 @@ def save_dataframe(file: Union[str, 'pd.WriteBuffer[bytes]', 'pd.WriteBuffer[st
|
|
|
180
188
|
index = True
|
|
181
189
|
df.to_json(file, *args, compression=compression, index=index,
|
|
182
190
|
force_ascii=force_ascii, orient=orient, lines=jsonl,
|
|
183
|
-
**kwargs)
|
|
191
|
+
indent=indent, **kwargs)
|
|
184
192
|
elif file_format == 'parquet':
|
|
185
193
|
df.to_parquet(file, *args, compression=compression, index=index, **kwargs)
|
|
186
194
|
else:
|
|
@@ -61,11 +61,13 @@ class BaseProcessor(abc.ABC):
|
|
|
61
61
|
self.save_result(output_path or input_path, result)
|
|
62
62
|
|
|
63
63
|
class DataframeProcessor(BaseProcessor, abc.ABC):
|
|
64
|
-
def __init__(self, input_dtype=None, progress=False, read_args: Dict[str, Any] = None
|
|
64
|
+
def __init__(self, input_dtype=None, progress=False, read_args: Dict[str, Any] = None,
|
|
65
|
+
write_args: Dict[str, Any] = None):
|
|
65
66
|
self.progress = progress
|
|
66
67
|
self.read_args = read_args or {}
|
|
67
68
|
if input_dtype is not None:
|
|
68
69
|
self.read_args['dtype'] = input_dtype
|
|
70
|
+
self.write_args = write_args or {}
|
|
69
71
|
|
|
70
72
|
def read_single_file(self, filepath: str) -> pd.DataFrame:
|
|
71
73
|
return read_dataframe(filepath, **self.read_args)
|
|
@@ -77,7 +79,7 @@ class DataframeProcessor(BaseProcessor, abc.ABC):
|
|
|
77
79
|
return super().read_data(filepath)
|
|
78
80
|
|
|
79
81
|
def save_result(self, filepath: str, result: pd.DataFrame):
|
|
80
|
-
save_dataframe(filepath, result)
|
|
82
|
+
save_dataframe(filepath, result, **self.write_args)
|
|
81
83
|
|
|
82
84
|
@abc.abstractmethod
|
|
83
85
|
def process_row(self, i: Hashable, row: pd.Series) -> Optional[Dict[str, Any]]:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|