lionagi 0.0.206__py3-none-any.whl → 0.0.208__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- lionagi/_services/ollama.py +2 -2
- lionagi/core/branch/branch.py +517 -265
- lionagi/core/branch/branch_manager.py +0 -1
- lionagi/core/branch/conversation.py +640 -337
- lionagi/core/core_util.py +0 -59
- lionagi/core/sessions/session.py +137 -64
- lionagi/tools/tool_manager.py +39 -62
- lionagi/utils/__init__.py +3 -2
- lionagi/utils/call_util.py +9 -7
- lionagi/utils/sys_util.py +287 -255
- lionagi/version.py +1 -1
- {lionagi-0.0.206.dist-info → lionagi-0.0.208.dist-info}/METADATA +1 -1
- {lionagi-0.0.206.dist-info → lionagi-0.0.208.dist-info}/RECORD +16 -17
- lionagi/utils/pd_util.py +0 -57
- {lionagi-0.0.206.dist-info → lionagi-0.0.208.dist-info}/LICENSE +0 -0
- {lionagi-0.0.206.dist-info → lionagi-0.0.208.dist-info}/WHEEL +0 -0
- {lionagi-0.0.206.dist-info → lionagi-0.0.208.dist-info}/top_level.txt +0 -0
lionagi/utils/sys_util.py
CHANGED
@@ -10,79 +10,124 @@ import platform
|
|
10
10
|
import json
|
11
11
|
import logging
|
12
12
|
|
13
|
+
import pandas as pd
|
13
14
|
from typing import Any, List, Dict, Union
|
14
15
|
|
15
16
|
|
17
|
+
def as_dict(input_: Any) -> Dict[Any, Any]:
|
18
|
+
"""
|
19
|
+
Convert a JSON string or a dictionary into a dictionary.
|
16
20
|
|
17
|
-
|
18
|
-
|
19
|
-
if 'arm' in arch or 'aarch64' in arch:
|
20
|
-
return 'apple_silicon'
|
21
|
-
else:
|
22
|
-
return 'other_cpu'
|
21
|
+
Args:
|
22
|
+
input_ (Any): The input to be converted to a dictionary. This can be a JSON string or a dictionary.
|
23
23
|
|
24
|
+
Returns:
|
25
|
+
Dict[Any, Any]: The input converted into a dictionary.
|
24
26
|
|
25
|
-
|
27
|
+
Raises:
|
28
|
+
ValueError: If the input is a string but cannot be parsed as JSON.
|
29
|
+
TypeError: If the input is neither a string nor a dictionary.
|
30
|
+
"""
|
31
|
+
if isinstance(input_, str):
|
32
|
+
try:
|
33
|
+
return json.loads(input_)
|
34
|
+
except Exception as e:
|
35
|
+
raise ValueError(f"Could not convert input to dict: {e}") from e
|
36
|
+
elif isinstance(input_, dict):
|
37
|
+
return input_
|
38
|
+
else:
|
39
|
+
raise TypeError(f"Could not convert input to dict: {type(input_).__name__} given.")
|
40
|
+
|
41
|
+
def clear_dir(dir_path: str) -> None:
|
26
42
|
"""
|
27
|
-
|
28
|
-
underscores.
|
43
|
+
Clear all files within the specified directory.
|
29
44
|
|
30
|
-
|
31
|
-
str: The
|
45
|
+
Args:
|
46
|
+
dir_path (str): The path to the directory to be cleared.
|
32
47
|
|
33
|
-
|
34
|
-
|
35
|
-
True
|
48
|
+
Raises:
|
49
|
+
FileNotFoundError: If the specified directory does not exist.
|
36
50
|
"""
|
37
|
-
|
51
|
+
if not os.path.exists(dir_path):
|
52
|
+
raise FileNotFoundError("The specified directory does not exist.")
|
53
|
+
|
54
|
+
for filename in os.listdir(dir_path):
|
55
|
+
file_path = os.path.join(dir_path, filename)
|
56
|
+
try:
|
57
|
+
if os.path.isfile(file_path) or os.path.islink(file_path):
|
58
|
+
os.unlink(file_path)
|
59
|
+
logging.info(f'Successfully deleted {file_path}')
|
60
|
+
except Exception as e:
|
61
|
+
logging.error(f'Failed to delete {file_path}. Reason: {e}')
|
62
|
+
raise
|
38
63
|
|
64
|
+
def change_dict_key(dict_: Dict[Any, Any], old_key: str, new_key: str) -> None:
|
65
|
+
"""
|
66
|
+
Change a key in a dictionary to a new key.
|
67
|
+
|
68
|
+
Args:
|
69
|
+
dict_ (Dict[Any, Any]): The dictionary whose key needs to be changed.
|
70
|
+
old_key (str): The old key that needs to be replaced.
|
71
|
+
new_key (str): The new key to replace the old key.
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
None: The function modifies the dictionary in place and returns None.
|
75
|
+
"""
|
76
|
+
if old_key in dict_:
|
77
|
+
dict_[new_key] = dict_.pop(old_key)
|
78
|
+
|
39
79
|
def create_copy(input: Any, n: int = 1) -> Any:
|
40
80
|
"""
|
41
|
-
|
42
|
-
list of deep copies.
|
81
|
+
Create a deep copy of the input. If n > 1, returns a list of deep copies of the input.
|
43
82
|
|
44
83
|
Args:
|
45
|
-
input (Any): The
|
46
|
-
n (int, optional): The number of copies to
|
84
|
+
input (Any): The input to be copied.
|
85
|
+
n (int, optional): The number of copies to make. Defaults to 1.
|
47
86
|
|
48
87
|
Returns:
|
49
|
-
Any: A deep copy of the input, or a list of deep copies.
|
88
|
+
Any: A deep copy of the input, or a list of deep copies if n > 1.
|
50
89
|
|
51
90
|
Raises:
|
52
91
|
ValueError: If 'n' is not a positive integer.
|
53
|
-
|
54
|
-
Examples:
|
55
|
-
>>> create_copy([1, 2, 3], 2)
|
56
|
-
[[1, 2, 3], [1, 2, 3]]
|
57
|
-
>>> create_copy("Hello")
|
58
|
-
'Hello'
|
59
92
|
"""
|
60
93
|
if not isinstance(n, int) or n < 1:
|
61
94
|
raise ValueError(f"'n' must be a positive integer: {n}")
|
62
95
|
return copy.deepcopy(input) if n == 1 else [copy.deepcopy(input) for _ in range(n)]
|
63
96
|
|
97
|
+
def create_id(n: int = 32) -> str:
|
98
|
+
"""
|
99
|
+
Generate a unique ID using the current time and random bytes, hashed with SHA-256.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
n (int, optional): The length of the ID to be returned. Defaults to 32.
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
str: A unique ID of the specified length.
|
106
|
+
"""
|
107
|
+
current_time = datetime.now().isoformat().encode('utf-8')
|
108
|
+
random_bytes = os.urandom(32)
|
109
|
+
return hashlib.sha256(current_time + random_bytes).hexdigest()[:n]
|
110
|
+
|
64
111
|
def create_path(
|
65
112
|
dir: str, filename: str, timestamp: bool = True, dir_exist_ok: bool = True,
|
66
113
|
time_prefix: bool = False
|
67
114
|
) -> str:
|
68
115
|
"""
|
69
|
-
|
116
|
+
Create a file path with optional timestamp inclusion and directory creation.
|
70
117
|
|
71
118
|
Args:
|
72
|
-
|
119
|
+
dir_path (str): The directory path where the file will be located.
|
73
120
|
filename (str): The name of the file.
|
74
121
|
timestamp (bool, optional): Flag to include a timestamp in the filename. Defaults to True.
|
75
|
-
dir_exist_ok (bool, optional): Flag to
|
76
|
-
time_prefix (bool, optional): Flag to place the timestamp as a prefix. Defaults to False.
|
122
|
+
dir_exist_ok (bool, optional): Flag to allow existing directories. Defaults to True.
|
123
|
+
time_prefix (bool, optional): Flag to place the timestamp as a prefix or suffix. Defaults to False.
|
77
124
|
|
78
125
|
Returns:
|
79
|
-
str: The
|
126
|
+
str: The constructed file path including the directory, filename, and optional timestamp.
|
80
127
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
>>> isinstance(create_path("/tmp", "report", time_prefix=True), str)
|
85
|
-
True
|
128
|
+
Notes:
|
129
|
+
If the directory does not exist, it will be created unless dir_exist_ok is False.
|
130
|
+
The timestamp format is YYYYMMDD_HHMMSS.
|
86
131
|
"""
|
87
132
|
dir = dir + '/' if not dir.endswith('/') else dir
|
88
133
|
if '.' in filename:
|
@@ -94,82 +139,19 @@ def create_path(
|
|
94
139
|
filename = f"{timestamp_str}_{name}" if time_prefix else f"{name}_{timestamp_str}"
|
95
140
|
return f"{dir}{filename}.{ext}" if ext else f"{dir}{filename}"
|
96
141
|
|
97
|
-
def split_path(path: str) -> tuple:
|
98
|
-
"""
|
99
|
-
Splits a file path into its directory name and file name.
|
100
|
-
|
101
|
-
Args:
|
102
|
-
path (str): The file path to split.
|
103
|
-
|
104
|
-
Returns:
|
105
|
-
tuple: A tuple containing the directory name and file name.
|
106
|
-
|
107
|
-
Examples:
|
108
|
-
>>> split_path("/home/user/document.txt")
|
109
|
-
('/home/user', 'document.txt')
|
110
|
-
>>> split_path("document.txt")
|
111
|
-
('', 'document.txt')
|
112
|
-
"""
|
113
|
-
folder_name = os.path.dirname(path)
|
114
|
-
file_name = os.path.basename(path)
|
115
|
-
return (folder_name, file_name)
|
116
|
-
|
117
|
-
def str_to_num(
|
118
|
-
input: str, upper_bound: float = None, lower_bound: float = None,
|
119
|
-
num_type: type = int, precision: int = None
|
120
|
-
) -> Any:
|
121
|
-
"""
|
122
|
-
Converts the first number in a string to a specified numeric type and checks if it
|
123
|
-
falls within specified bounds.
|
124
|
-
|
125
|
-
Args:
|
126
|
-
input (str): The string containing the number.
|
127
|
-
upper_bound (float, optional): The upper limit for the number. Defaults to None.
|
128
|
-
lower_bound (float, optional): The lower limit for the number. Defaults to None.
|
129
|
-
num_type (type): The numeric type to which the number will be converted. Default is int.
|
130
|
-
precision (int, optional): The precision for floating point numbers. Defaults to None.
|
131
|
-
|
132
|
-
Returns:
|
133
|
-
Any: The converted number in the specified type.
|
134
|
-
|
135
|
-
Raises:
|
136
|
-
ValueError: If no numeric value is found, or the number is outside the specified bounds.
|
137
|
-
|
138
|
-
Examples:
|
139
|
-
>>> str_to_num("Value is 20.5", upper_bound=30, num_type=float)
|
140
|
-
20.5
|
141
|
-
>>> str_to_num("Temperature -5 degrees", lower_bound=0)
|
142
|
-
ValueError: Number -5 is less than the lower bound of 0.
|
143
|
-
"""
|
144
|
-
number_str = _extract_first_number(input)
|
145
|
-
if number_str is None:
|
146
|
-
raise ValueError(f"No numeric values found in the string: {input}")
|
147
|
-
|
148
|
-
number = _convert_to_num(number_str, num_type, precision)
|
149
|
-
|
150
|
-
if upper_bound is not None and number > upper_bound:
|
151
|
-
raise ValueError(f"Number {number} is greater than the upper bound of {upper_bound}.")
|
152
|
-
if lower_bound is not None and number < lower_bound:
|
153
|
-
raise ValueError(f"Number {number} is less than the lower bound of {lower_bound}.")
|
154
|
-
|
155
|
-
return number
|
156
|
-
|
157
142
|
def get_bins(input: List[str], upper: int) -> List[List[int]]:
|
158
143
|
"""
|
159
|
-
|
160
|
-
bin is less than a specified upper limit.
|
144
|
+
Organize indices of the input list into bins where the cumulative length of strings does not exceed the upper limit.
|
161
145
|
|
162
146
|
Args:
|
163
|
-
input (List[str]): The list of strings
|
164
|
-
upper (int): The upper limit for the
|
147
|
+
input (List[str]): The input list of strings.
|
148
|
+
upper (int): The upper limit for the cumulative length of strings in each bin.
|
165
149
|
|
166
150
|
Returns:
|
167
|
-
List[List[int]]: A list of
|
168
|
-
input list that make up a bin.
|
151
|
+
List[List[int]]: A list of bins, each bin is a list of indices from the input list.
|
169
152
|
|
170
|
-
|
171
|
-
|
172
|
-
[[0, 1], [2], [3, 4]]
|
153
|
+
Notes:
|
154
|
+
This function can be used to partition data into chunks where each chunk has a maximum cumulative length.
|
173
155
|
"""
|
174
156
|
current = 0
|
175
157
|
bins = []
|
@@ -190,227 +172,277 @@ def get_bins(input: List[str], upper: int) -> List[List[int]]:
|
|
190
172
|
|
191
173
|
return bins
|
192
174
|
|
193
|
-
def
|
175
|
+
def get_cpu_architecture() -> str:
|
194
176
|
"""
|
195
|
-
|
177
|
+
Determine the CPU architecture of the system.
|
196
178
|
|
197
|
-
|
198
|
-
|
179
|
+
Returns:
|
180
|
+
str: A string indicating the CPU architecture. Returns 'apple_silicon' for ARM architectures,
|
181
|
+
and 'other_cpu' for all other CPU types.
|
182
|
+
"""
|
183
|
+
arch = platform.machine()
|
184
|
+
if 'arm' in arch or 'aarch64' in arch:
|
185
|
+
return 'apple_silicon'
|
186
|
+
else:
|
187
|
+
return 'other_cpu'
|
188
|
+
|
189
|
+
def get_timestamp() -> str:
|
190
|
+
"""
|
191
|
+
Generate a timestamp string in ISO format with colons and periods replaced by underscores.
|
199
192
|
|
200
193
|
Returns:
|
201
|
-
str: The
|
194
|
+
str: The current timestamp in 'YYYY-MM-DDTHH_MM_SS_SSSSSS' format.
|
195
|
+
"""
|
196
|
+
return datetime.now().isoformat().replace(":", "_").replace(".", "_")
|
202
197
|
|
203
|
-
|
204
|
-
>>> len(create_id())
|
205
|
-
32
|
206
|
-
>>> len(create_id(16))
|
207
|
-
16
|
198
|
+
def split_path(path: str) -> tuple:
|
208
199
|
"""
|
209
|
-
|
210
|
-
|
211
|
-
|
200
|
+
Split a file path into folder name and file name.
|
201
|
+
|
202
|
+
Args:
|
203
|
+
path (str): The full path to split.
|
212
204
|
|
213
|
-
|
205
|
+
Returns:
|
206
|
+
tuple: A tuple containing the folder name and file name.
|
214
207
|
"""
|
215
|
-
|
208
|
+
folder_name = os.path.dirname(path)
|
209
|
+
file_name = os.path.basename(path)
|
210
|
+
return (folder_name, file_name)
|
211
|
+
|
212
|
+
def str_to_num(
|
213
|
+
input: str, upper_bound: float = None, lower_bound: float = None,
|
214
|
+
num_type: type = int, precision: int = None
|
215
|
+
) -> Any:
|
216
|
+
"""
|
217
|
+
Convert a string to a number, enforcing optional upper and lower bounds.
|
216
218
|
|
217
219
|
Args:
|
218
|
-
|
220
|
+
input (str): The input string containing the number to convert.
|
221
|
+
upper_bound (float, optional): The maximum allowable value of the number.
|
222
|
+
lower_bound (float, optional): The minimum allowable value of the number.
|
223
|
+
num_type (type): The type of number to convert to (e.g., int, float).
|
224
|
+
precision (int, optional): The precision for rounding if converting to a float.
|
219
225
|
|
220
|
-
|
221
|
-
|
222
|
-
Exception: If a file in the directory cannot be deleted.
|
226
|
+
Returns:
|
227
|
+
Any: The number extracted and converted from the string, of type specified by num_type.
|
223
228
|
|
224
|
-
|
225
|
-
|
226
|
-
FileNotFoundError: The specified directory does not exist.
|
229
|
+
Raises:
|
230
|
+
ValueError: If no numeric values are found, or if the converted number violates the specified bounds.
|
227
231
|
"""
|
228
|
-
|
229
|
-
|
232
|
+
number_str = _extract_first_number(input)
|
233
|
+
if number_str is None:
|
234
|
+
raise ValueError(f"No numeric values found in the string: {input}")
|
230
235
|
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
raise
|
236
|
+
number = _convert_to_num(number_str, num_type, precision)
|
237
|
+
|
238
|
+
if upper_bound is not None and number > upper_bound:
|
239
|
+
raise ValueError(f"Number {number} is greater than the upper bound of {upper_bound}.")
|
240
|
+
if lower_bound is not None and number < lower_bound:
|
241
|
+
raise ValueError(f"Number {number} is less than the lower bound of {lower_bound}.")
|
242
|
+
|
243
|
+
return number
|
240
244
|
|
241
245
|
def strip_lower(input: Any) -> str:
|
242
246
|
"""
|
243
|
-
|
247
|
+
Convert an input to string, strip leading and trailing spaces, and convert to lowercase.
|
244
248
|
|
245
249
|
Args:
|
246
250
|
input (Any): The input to be processed.
|
247
251
|
|
248
252
|
Returns:
|
249
|
-
str: The processed string
|
250
|
-
|
253
|
+
str: The processed string if conversion is successful, otherwise returns 'False' as a string.
|
254
|
+
|
251
255
|
Raises:
|
252
|
-
|
253
|
-
|
254
|
-
Examples:
|
255
|
-
>>> strip_lower(" Hello WORLD ")
|
256
|
-
'hello world'
|
257
|
-
>>> strip_lower(123)
|
258
|
-
'123'
|
256
|
+
ValueError: If the input cannot be converted to a string.
|
259
257
|
"""
|
260
258
|
try:
|
261
259
|
return str(input).strip().lower()
|
262
260
|
except:
|
263
|
-
|
261
|
+
raise ValueError(f"Could not convert input to string: {input}")
|
264
262
|
|
265
|
-
def
|
263
|
+
def install_import(package_name, module_name=None, import_name=None, pip_name=None):
|
266
264
|
"""
|
267
|
-
|
265
|
+
Dynamically import a module or package, installing it via pip if necessary.
|
268
266
|
|
269
267
|
Args:
|
270
|
-
|
268
|
+
package_name (str): The base name of the package.
|
269
|
+
module_name (str, optional): The name of the module within the package, if applicable.
|
270
|
+
import_name (str, optional): The specific name to import from the module or package.
|
271
|
+
pip_name (str, optional): The name of the package in pip, if different from package_name.
|
271
272
|
|
272
273
|
Returns:
|
273
|
-
|
274
|
-
Returns None if no number is found.
|
274
|
+
None: This function does not return a value but prints the outcome of the operation.
|
275
275
|
|
276
|
-
|
277
|
-
|
278
|
-
'2'
|
279
|
-
>>> extract_first_number("No numbers")
|
280
|
-
None
|
276
|
+
Note:
|
277
|
+
This function attempts to import the specified package or module and installs it using pip if the import fails.
|
281
278
|
"""
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
279
|
+
if pip_name is None:
|
280
|
+
pip_name = package_name # Defaults to package_name if pip_name is not explicitly provided
|
281
|
+
|
282
|
+
full_import_path = package_name if module_name is None else f"{package_name}.{module_name}"
|
283
|
+
try:
|
284
|
+
if import_name:
|
285
|
+
# For importing a specific name from a module or sub-module
|
286
|
+
module = __import__(full_import_path, fromlist=[import_name])
|
287
|
+
getattr(module, import_name)
|
288
|
+
else:
|
289
|
+
# For importing the module or package itself
|
290
|
+
__import__(full_import_path)
|
291
|
+
print(f"Successfully imported {import_name or full_import_path}.")
|
292
|
+
except ImportError:
|
293
|
+
print(f"Module {full_import_path} or attribute {import_name} not found. Installing {pip_name}...")
|
294
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", pip_name])
|
295
|
+
# Retry the import after installation
|
296
|
+
if import_name:
|
297
|
+
module = __import__(full_import_path, fromlist=[import_name])
|
298
|
+
getattr(module, import_name)
|
299
|
+
else:
|
300
|
+
__import__(full_import_path)
|
301
|
+
|
302
|
+
def is_schema(dict_: Dict, schema: Dict) -> bool:
|
286
303
|
"""
|
287
|
-
|
304
|
+
Validate if a dictionary matches a given schema.
|
288
305
|
|
289
306
|
Args:
|
290
|
-
|
291
|
-
|
292
|
-
precision (int, optional): The precision for floating point numbers. Defaults to None.
|
307
|
+
dict_ (Dict): The dictionary to validate.
|
308
|
+
schema (Dict): A schema with keys and their expected types.
|
293
309
|
|
294
310
|
Returns:
|
295
|
-
|
296
|
-
|
297
|
-
Raises:
|
298
|
-
ValueError: If an invalid number type is provided.
|
311
|
+
bool: True if the dictionary matches the schema, False otherwise.
|
299
312
|
|
300
|
-
|
301
|
-
|
302
|
-
3.14
|
303
|
-
>>> convert_to_num("100", int)
|
304
|
-
100
|
313
|
+
Note:
|
314
|
+
The function checks if each key in the schema exists in the dictionary and matches the expected type.
|
305
315
|
"""
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
else:
|
311
|
-
raise ValueError(f"Invalid number type: {num_type}")
|
316
|
+
for key, expected_type in schema.items():
|
317
|
+
if not isinstance(dict_[key], expected_type):
|
318
|
+
return False
|
319
|
+
return True
|
312
320
|
|
313
|
-
def
|
321
|
+
def is_package_installed(package_name: str) -> bool:
|
314
322
|
"""
|
315
|
-
|
323
|
+
Check if a package is installed in the current environment.
|
316
324
|
|
317
325
|
Args:
|
318
|
-
|
319
|
-
|
320
|
-
|
326
|
+
package_name (str): The name of the package to check.
|
327
|
+
|
328
|
+
Returns:
|
329
|
+
bool: True if the package is installed, False otherwise.
|
321
330
|
"""
|
322
|
-
|
323
|
-
|
331
|
+
package_spec = importlib.util.find_spec(package_name)
|
332
|
+
return package_spec is not None
|
324
333
|
|
325
|
-
def
|
334
|
+
def is_same_dtype(input_: Any, dtype: type = None) -> bool:
|
326
335
|
"""
|
327
|
-
|
336
|
+
Check if all elements in a collection have the same data type as specified or as the first element's type.
|
328
337
|
|
329
338
|
Args:
|
330
|
-
input_:
|
339
|
+
input_ (Any): The input collection, can be a list or a dictionary.
|
340
|
+
dtype (type, optional): The data type to check against. If not provided, the type of the first element is used.
|
331
341
|
|
332
342
|
Returns:
|
333
|
-
|
343
|
+
bool: True if all elements in the collection match the specified data type, False otherwise.
|
334
344
|
|
335
|
-
|
336
|
-
|
345
|
+
Note:
|
346
|
+
For dictionaries, it checks the data type of the values.
|
337
347
|
"""
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
348
|
+
|
349
|
+
if isinstance(input_, list):
|
350
|
+
dtype = dtype or type(input_[0])
|
351
|
+
return all(isinstance(i, dtype) for i in input_)
|
352
|
+
|
343
353
|
elif isinstance(input_, dict):
|
344
|
-
|
354
|
+
dtype = dtype or type(list(input_.values())[0])
|
355
|
+
return (isinstance(v, dtype) for _, v in input_.items())
|
356
|
+
|
345
357
|
else:
|
346
|
-
|
358
|
+
dtype = dtype or type(input_)
|
359
|
+
return isinstance(input_, dtype)
|
347
360
|
|
348
|
-
def
|
361
|
+
def to_df(
|
362
|
+
item: Any, how: str = 'all', drop_kwargs: Dict = {}, reset_index: bool = True,
|
363
|
+
**kwargs
|
364
|
+
) -> pd.DataFrame:
|
349
365
|
"""
|
350
|
-
|
366
|
+
Convert various item types (list, pandas DataFrame) into a pandas DataFrame with options for cleaning and resetting the index.
|
351
367
|
|
352
368
|
Args:
|
353
|
-
|
354
|
-
|
369
|
+
item (Any): The item to be converted into a DataFrame. Can be a list or a DataFrame.
|
370
|
+
how (str, optional): How to drop rows with missing values. Defaults to 'all'.
|
371
|
+
drop_kwargs (Dict, optional): Additional keyword arguments for pd.DataFrame.dropna(). Defaults to {}.
|
372
|
+
reset_index (bool, optional): Whether to reset the index of the final DataFrame. Defaults to True.
|
373
|
+
**kwargs: Additional keyword arguments for pd.DataFrame.reset_index() if reset_index is True.
|
355
374
|
|
356
375
|
Returns:
|
357
|
-
|
376
|
+
pd.DataFrame: The resulting DataFrame after conversions and cleaning.
|
377
|
+
|
378
|
+
Raises:
|
379
|
+
ValueError: If there's an error during conversion or processing of the DataFrame.
|
358
380
|
"""
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
381
|
+
try:
|
382
|
+
dfs = ''
|
383
|
+
|
384
|
+
if isinstance(item, List):
|
385
|
+
if is_same_dtype(item, pd.DataFrame):
|
386
|
+
dfs = pd.concat(item)
|
387
|
+
dfs = pd.DataFrame(item)
|
363
388
|
|
364
|
-
|
365
|
-
|
389
|
+
elif isinstance(item, pd.DataFrame):
|
390
|
+
dfs = item
|
366
391
|
|
367
|
-
|
392
|
+
drop_kwargs['how'] = how
|
393
|
+
dfs = dfs.dropna(**drop_kwargs)
|
394
|
+
|
395
|
+
if reset_index:
|
396
|
+
drop = kwargs.pop('drop', True)
|
397
|
+
inplace = kwargs.pop('inplace', True)
|
398
|
+
dfs.reset_index(drop=drop, inplace=inplace, **kwargs)
|
399
|
+
|
400
|
+
return dfs
|
368
401
|
|
369
|
-
|
370
|
-
|
371
|
-
import xx
|
372
|
-
install_and_import('xx')
|
402
|
+
except Exception as e:
|
403
|
+
raise ValueError(f'Error converting items to DataFrame: {e}')
|
373
404
|
|
405
|
+
def timestamp_to_datetime(timestamp: float) -> datetime:
|
406
|
+
"""
|
407
|
+
Convert a timestamp into a datetime object.
|
374
408
|
|
375
|
-
|
376
|
-
|
377
|
-
install_and_import('xx', 'yy')
|
378
|
-
|
379
|
-
For importing a specific attribute from a module/sub-module
|
380
|
-
from xx.yy import zz
|
381
|
-
install_and_import('xx', 'yy', 'zz')
|
409
|
+
Args:
|
410
|
+
timestamp (float): The timestamp to convert.
|
382
411
|
|
383
|
-
|
384
|
-
|
412
|
+
Returns:
|
413
|
+
datetime: The datetime object corresponding to the given timestamp.
|
414
|
+
"""
|
415
|
+
return datetime.fromtimestamp(timestamp)
|
385
416
|
|
386
|
-
|
387
|
-
install_and_import('xx', 'yy', 'zz', 'different_pip_name')
|
417
|
+
def _extract_first_number(inputstr: str) -> str:
|
388
418
|
"""
|
419
|
+
Extract the first number from a string.
|
389
420
|
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
421
|
+
Args:
|
422
|
+
input (str): The input string to search for numbers.
|
423
|
+
|
424
|
+
Returns:
|
425
|
+
str: The first number found in the string, or None if no numbers are found.
|
426
|
+
"""
|
427
|
+
numbers = re.findall(r'-?\d+\.?\d*', inputstr)
|
428
|
+
return numbers[0] if numbers else None
|
429
|
+
|
430
|
+
def _convert_to_num(number_str: str, num_type: type = int, precision: int = None) -> Any:
|
431
|
+
"""
|
432
|
+
Convert a string to a specified numeric type, optionally rounding to a given precision.
|
394
433
|
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
getattr(module, import_name)
|
411
|
-
else:
|
412
|
-
__import__(full_import_path)
|
413
|
-
|
414
|
-
def is_package_installed(package_name):
|
415
|
-
package_spec = importlib.util.find_spec(package_name)
|
416
|
-
return package_spec is not None
|
434
|
+
Args:
|
435
|
+
number_str (str): The number in string format to convert.
|
436
|
+
num_type (type): The type of number to convert to, e.g., int or float.
|
437
|
+
precision (int, optional): The number of decimal places to round to, for floating-point conversions.
|
438
|
+
|
439
|
+
Returns:
|
440
|
+
Any: The converted number, either as an int or float, depending on num_type.
|
441
|
+
"""
|
442
|
+
if num_type is int:
|
443
|
+
return int(float(number_str))
|
444
|
+
elif num_type is float:
|
445
|
+
return round(float(number_str), precision) if precision is not None else float(number_str)
|
446
|
+
else:
|
447
|
+
raise ValueError(f"Invalid number type: {num_type}")
|
448
|
+
|