praisonaiagents 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- praisonaiagents/tools/__init__.py +165 -2
- praisonaiagents/tools/arxiv_tools.py +292 -0
- praisonaiagents/tools/calculator_tools.py +278 -0
- praisonaiagents/tools/csv_tools.py +266 -0
- praisonaiagents/tools/duckdb_tools.py +268 -0
- praisonaiagents/tools/duckduckgo_tools.py +52 -0
- praisonaiagents/tools/excel_tools.py +310 -0
- praisonaiagents/tools/file_tools.py +274 -0
- praisonaiagents/tools/json_tools.py +515 -0
- praisonaiagents/tools/newspaper_tools.py +354 -0
- praisonaiagents/tools/pandas_tools.py +326 -0
- praisonaiagents/tools/python_tools.py +423 -0
- praisonaiagents/tools/shell_tools.py +278 -0
- praisonaiagents/tools/spider_tools.py +431 -0
- praisonaiagents/tools/test.py +56 -0
- praisonaiagents/tools/tools.py +5 -36
- praisonaiagents/tools/wikipedia_tools.py +272 -0
- praisonaiagents/tools/xml_tools.py +498 -0
- praisonaiagents/tools/yaml_tools.py +417 -0
- praisonaiagents/tools/yfinance_tools.py +213 -0
- {praisonaiagents-0.0.23.dist-info → praisonaiagents-0.0.24.dist-info}/METADATA +1 -1
- praisonaiagents-0.0.24.dist-info/RECORD +42 -0
- praisonaiagents-0.0.23.dist-info/RECORD +0 -24
- {praisonaiagents-0.0.23.dist-info → praisonaiagents-0.0.24.dist-info}/WHEEL +0 -0
- {praisonaiagents-0.0.23.dist-info → praisonaiagents-0.0.24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,268 @@
|
|
1
|
+
"""Tools for working with DuckDB databases.
|
2
|
+
|
3
|
+
Usage:
|
4
|
+
from praisonaiagents.tools import duckdb_tools
|
5
|
+
df = duckdb_tools.query("SELECT * FROM my_table")
|
6
|
+
|
7
|
+
or
|
8
|
+
from praisonaiagents.tools import query_db, create_table, load_data
|
9
|
+
df = query_db("SELECT * FROM my_table")
|
10
|
+
"""
|
11
|
+
|
12
|
+
import logging
|
13
|
+
from typing import List, Dict, Any, Optional, Union, TYPE_CHECKING
|
14
|
+
from importlib import util
|
15
|
+
import json
|
16
|
+
|
17
|
+
if TYPE_CHECKING:
|
18
|
+
import duckdb
|
19
|
+
import pandas as pd
|
20
|
+
|
21
|
+
class DuckDBTools:
|
22
|
+
"""Tools for working with DuckDB databases."""
|
23
|
+
|
24
|
+
def __init__(self, database: str = ':memory:'):
|
25
|
+
"""Initialize DuckDBTools.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
database: Path to database file or ':memory:' for in-memory database
|
29
|
+
"""
|
30
|
+
self.database = database
|
31
|
+
self._conn = None
|
32
|
+
|
33
|
+
def _get_duckdb(self) -> Optional['duckdb']:
|
34
|
+
"""Get duckdb module, installing if needed"""
|
35
|
+
if util.find_spec('duckdb') is None:
|
36
|
+
error_msg = "duckdb package is not available. Please install it using: pip install duckdb"
|
37
|
+
logging.error(error_msg)
|
38
|
+
return None
|
39
|
+
import duckdb
|
40
|
+
return duckdb
|
41
|
+
|
42
|
+
def _get_pandas(self) -> Optional['pd']:
|
43
|
+
"""Get pandas module, installing if needed"""
|
44
|
+
if util.find_spec('pandas') is None:
|
45
|
+
error_msg = "pandas package is not available. Please install it using: pip install pandas"
|
46
|
+
logging.error(error_msg)
|
47
|
+
return None
|
48
|
+
import pandas as pd
|
49
|
+
return pd
|
50
|
+
|
51
|
+
def _get_connection(self) -> Optional['duckdb.DuckDBPyConnection']:
|
52
|
+
"""Get or create database connection"""
|
53
|
+
if self._conn is None:
|
54
|
+
duckdb = self._get_duckdb()
|
55
|
+
if duckdb is None:
|
56
|
+
return None
|
57
|
+
try:
|
58
|
+
self._conn = duckdb.connect(self.database)
|
59
|
+
except Exception as e:
|
60
|
+
error_msg = f"Error connecting to database {self.database}: {str(e)}"
|
61
|
+
logging.error(error_msg)
|
62
|
+
return None
|
63
|
+
return self._conn
|
64
|
+
|
65
|
+
def execute_query(
|
66
|
+
self,
|
67
|
+
query: str,
|
68
|
+
params: Optional[Union[tuple, dict]] = None,
|
69
|
+
return_df: bool = True
|
70
|
+
) -> Union[List[Dict[str, Any]], Dict[str, str]]:
|
71
|
+
"""Execute a SQL query.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
query: SQL query to execute
|
75
|
+
params: Query parameters
|
76
|
+
return_df: If True, return results as DataFrame records
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
Query results as list of dicts, or error dict
|
80
|
+
"""
|
81
|
+
try:
|
82
|
+
conn = self._get_connection()
|
83
|
+
if conn is None:
|
84
|
+
return {"error": "Could not connect to database"}
|
85
|
+
|
86
|
+
if params:
|
87
|
+
result = conn.execute(query, params)
|
88
|
+
else:
|
89
|
+
result = conn.execute(query)
|
90
|
+
|
91
|
+
if return_df:
|
92
|
+
pd = self._get_pandas()
|
93
|
+
if pd is None:
|
94
|
+
return {"error": "pandas package not available"}
|
95
|
+
df = result.df()
|
96
|
+
return df.to_dict('records')
|
97
|
+
else:
|
98
|
+
return [dict(row) for row in result.fetchall()]
|
99
|
+
|
100
|
+
except Exception as e:
|
101
|
+
error_msg = f"Error executing query: {str(e)}"
|
102
|
+
logging.error(error_msg)
|
103
|
+
return {"error": error_msg}
|
104
|
+
|
105
|
+
def load_csv(
|
106
|
+
self,
|
107
|
+
table_name: str,
|
108
|
+
filepath: str,
|
109
|
+
schema: Optional[Dict[str, str]] = None,
|
110
|
+
if_exists: str = 'replace'
|
111
|
+
) -> bool:
|
112
|
+
"""Load a CSV file into a table.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
table_name: Name of table to create
|
116
|
+
filepath: Path to CSV file
|
117
|
+
schema: Optional column definitions
|
118
|
+
if_exists: What to do if table exists ('fail', 'replace', 'append')
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
bool: Success status
|
122
|
+
"""
|
123
|
+
try:
|
124
|
+
conn = self._get_connection()
|
125
|
+
if conn is None:
|
126
|
+
return False
|
127
|
+
|
128
|
+
# Check if table exists
|
129
|
+
exists = conn.execute(f"""
|
130
|
+
SELECT name FROM sqlite_master
|
131
|
+
WHERE type='table' AND name='{table_name}'
|
132
|
+
""").fetchone() is not None
|
133
|
+
|
134
|
+
if exists:
|
135
|
+
if if_exists == 'fail':
|
136
|
+
raise ValueError(f"Table {table_name} already exists")
|
137
|
+
elif if_exists == 'replace':
|
138
|
+
conn.execute(f"DROP TABLE IF EXISTS {table_name}")
|
139
|
+
elif if_exists != 'append':
|
140
|
+
raise ValueError("if_exists must be 'fail', 'replace', or 'append'")
|
141
|
+
|
142
|
+
# Create table if needed
|
143
|
+
if not exists or if_exists == 'replace':
|
144
|
+
if schema:
|
145
|
+
# Create table with schema
|
146
|
+
columns = ', '.join(f"{k} {v}" for k, v in schema.items())
|
147
|
+
conn.execute(f"CREATE TABLE {table_name} ({columns})")
|
148
|
+
else:
|
149
|
+
# Infer schema from CSV
|
150
|
+
conn.execute(f"""
|
151
|
+
CREATE TABLE {table_name} AS
|
152
|
+
SELECT * FROM read_csv_auto('{filepath}')
|
153
|
+
WHERE 1=0
|
154
|
+
""")
|
155
|
+
|
156
|
+
# Load data
|
157
|
+
conn.execute(f"""
|
158
|
+
INSERT INTO {table_name}
|
159
|
+
SELECT * FROM read_csv_auto('{filepath}')
|
160
|
+
""")
|
161
|
+
|
162
|
+
return True
|
163
|
+
|
164
|
+
except Exception as e:
|
165
|
+
error_msg = f"Error loading CSV file {filepath}: {str(e)}"
|
166
|
+
logging.error(error_msg)
|
167
|
+
return False
|
168
|
+
|
169
|
+
def export_csv(
|
170
|
+
self,
|
171
|
+
query: str,
|
172
|
+
filepath: str,
|
173
|
+
params: Optional[Union[tuple, dict]] = None
|
174
|
+
) -> bool:
|
175
|
+
"""Export query results to CSV.
|
176
|
+
|
177
|
+
Args:
|
178
|
+
query: SQL query to execute
|
179
|
+
filepath: Output file path
|
180
|
+
params: Optional query parameters
|
181
|
+
|
182
|
+
Returns:
|
183
|
+
bool: Success status
|
184
|
+
"""
|
185
|
+
try:
|
186
|
+
# Execute query and get results as DataFrame
|
187
|
+
results = self.execute_query(query, params)
|
188
|
+
if isinstance(results, dict) and 'error' in results:
|
189
|
+
return False
|
190
|
+
|
191
|
+
pd = self._get_pandas()
|
192
|
+
if pd is None:
|
193
|
+
return False
|
194
|
+
|
195
|
+
# Convert to DataFrame and save
|
196
|
+
df = pd.DataFrame(results)
|
197
|
+
df.to_csv(filepath, index=False)
|
198
|
+
return True
|
199
|
+
|
200
|
+
except Exception as e:
|
201
|
+
error_msg = f"Error exporting to CSV file {filepath}: {str(e)}"
|
202
|
+
logging.error(error_msg)
|
203
|
+
return False
|
204
|
+
|
205
|
+
def close(self):
|
206
|
+
"""Close database connection."""
|
207
|
+
if self._conn:
|
208
|
+
self._conn.close()
|
209
|
+
self._conn = None
|
210
|
+
|
211
|
+
# Create instance for direct function access
|
212
|
+
_duckdb_tools = DuckDBTools()
|
213
|
+
execute_query = _duckdb_tools.execute_query
|
214
|
+
load_csv = _duckdb_tools.load_csv
|
215
|
+
export_csv = _duckdb_tools.export_csv
|
216
|
+
|
217
|
+
if __name__ == "__main__":
|
218
|
+
print("\n==================================================")
|
219
|
+
print("DuckDBTools Demonstration")
|
220
|
+
print("==================================================\n")
|
221
|
+
|
222
|
+
# Create a temporary file for testing
|
223
|
+
import tempfile
|
224
|
+
import os
|
225
|
+
|
226
|
+
with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as temp:
|
227
|
+
temp_file = temp.name
|
228
|
+
|
229
|
+
# Create sample data
|
230
|
+
with open(temp_file, 'w') as f:
|
231
|
+
f.write("name,age,city\n")
|
232
|
+
f.write("Alice,25,New York\n")
|
233
|
+
f.write("Bob,30,San Francisco\n")
|
234
|
+
f.write("Charlie,35,Chicago\n")
|
235
|
+
|
236
|
+
print("1. Loading CSV File")
|
237
|
+
print("------------------------------")
|
238
|
+
result = load_csv('users', temp_file)
|
239
|
+
print(f"CSV loaded successfully: {result}")
|
240
|
+
print()
|
241
|
+
|
242
|
+
print("2. Executing Query")
|
243
|
+
print("------------------------------")
|
244
|
+
query = "SELECT * FROM users WHERE age > 25"
|
245
|
+
results = execute_query(query)
|
246
|
+
print("Query results:")
|
247
|
+
for row in results:
|
248
|
+
print(row)
|
249
|
+
print()
|
250
|
+
|
251
|
+
print("3. Exporting Query Results")
|
252
|
+
print("------------------------------")
|
253
|
+
with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as temp2:
|
254
|
+
temp_file2 = temp2.name
|
255
|
+
result = export_csv(query, temp_file2)
|
256
|
+
print(f"Results exported successfully: {result}")
|
257
|
+
if result:
|
258
|
+
print("\nExported file contents:")
|
259
|
+
with open(temp_file2) as f:
|
260
|
+
print(f.read())
|
261
|
+
|
262
|
+
# Clean up temporary files
|
263
|
+
os.unlink(temp_file)
|
264
|
+
os.unlink(temp_file2)
|
265
|
+
|
266
|
+
print("==================================================")
|
267
|
+
print("Demonstration Complete")
|
268
|
+
print("==================================================\n")
|
@@ -0,0 +1,52 @@
|
|
1
|
+
"""DuckDuckGo search functionality.
|
2
|
+
|
3
|
+
Usage:
|
4
|
+
from praisonaiagents.tools import internet_search
|
5
|
+
results = internet_search("AI news")
|
6
|
+
|
7
|
+
or
|
8
|
+
from praisonaiagents.tools import duckduckgo
|
9
|
+
results = duckduckgo("AI news")
|
10
|
+
"""
|
11
|
+
|
12
|
+
from typing import List, Dict
|
13
|
+
import logging
|
14
|
+
from importlib import util
|
15
|
+
import sys
|
16
|
+
|
17
|
+
def internet_search(query: str) -> List[Dict]:
|
18
|
+
"""Perform an internet search using DuckDuckGo."""
|
19
|
+
# Check if duckduckgo_search is installed
|
20
|
+
if util.find_spec("duckduckgo_search") is None:
|
21
|
+
error_msg = "DuckDuckGo search is not available. Please install duckduckgo_search package using: pip install duckduckgo_search"
|
22
|
+
logging.error(error_msg)
|
23
|
+
return [{"error": error_msg}]
|
24
|
+
|
25
|
+
try:
|
26
|
+
# Import only when needed
|
27
|
+
from duckduckgo_search import DDGS
|
28
|
+
results = []
|
29
|
+
ddgs = DDGS()
|
30
|
+
for result in ddgs.text(keywords=query, max_results=5):
|
31
|
+
results.append({
|
32
|
+
"title": result.get("title", ""),
|
33
|
+
"url": result.get("href", ""),
|
34
|
+
"snippet": result.get("body", "")
|
35
|
+
})
|
36
|
+
return results
|
37
|
+
|
38
|
+
except Exception as e:
|
39
|
+
error_msg = f"Error during DuckDuckGo search: {e}"
|
40
|
+
logging.error(error_msg)
|
41
|
+
return [{"error": error_msg}]
|
42
|
+
|
43
|
+
# Make the module callable
|
44
|
+
sys.modules[__name__].__call__ = internet_search
|
45
|
+
|
46
|
+
if __name__ == "__main__":
|
47
|
+
# Example usage
|
48
|
+
results = internet_search("Python programming")
|
49
|
+
for result in results:
|
50
|
+
print(f"\nTitle: {result.get('title')}")
|
51
|
+
print(f"URL: {result.get('url')}")
|
52
|
+
print(f"Snippet: {result.get('snippet')}")
|
@@ -0,0 +1,310 @@
|
|
1
|
+
"""Tools for working with Excel files.
|
2
|
+
|
3
|
+
Usage:
|
4
|
+
from praisonaiagents.tools import excel_tools
|
5
|
+
df = excel_tools.read_excel("data.xlsx")
|
6
|
+
|
7
|
+
or
|
8
|
+
from praisonaiagents.tools import read_excel, write_excel, merge_excel
|
9
|
+
df = read_excel("data.xlsx")
|
10
|
+
"""
|
11
|
+
|
12
|
+
import logging
|
13
|
+
from typing import List, Dict, Union, Optional, Any, TYPE_CHECKING, Tuple
|
14
|
+
from importlib import util
|
15
|
+
import json
|
16
|
+
from pathlib import Path
|
17
|
+
import tempfile
|
18
|
+
import os
|
19
|
+
|
20
|
+
if TYPE_CHECKING:
|
21
|
+
import pandas as pd
|
22
|
+
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
23
|
+
from openpyxl.chart import BarChart, LineChart, PieChart, Reference
|
24
|
+
from openpyxl.chart.label import DataLabelList
|
25
|
+
|
26
|
+
class ExcelTools:
|
27
|
+
"""Tools for working with Excel files."""
|
28
|
+
|
29
|
+
def __init__(self):
|
30
|
+
"""Initialize ExcelTools."""
|
31
|
+
pass
|
32
|
+
|
33
|
+
def _get_pandas(self) -> Optional['pd']:
|
34
|
+
"""Get pandas module, installing if needed"""
|
35
|
+
if util.find_spec('pandas') is None:
|
36
|
+
error_msg = "pandas package is not available. Please install it using: pip install pandas"
|
37
|
+
logging.error(error_msg)
|
38
|
+
return None
|
39
|
+
if util.find_spec('openpyxl') is None:
|
40
|
+
error_msg = "openpyxl package is not available. Please install it using: pip install openpyxl"
|
41
|
+
logging.error(error_msg)
|
42
|
+
return None
|
43
|
+
import pandas as pd
|
44
|
+
return pd
|
45
|
+
|
46
|
+
def read_excel(
|
47
|
+
self,
|
48
|
+
filepath: str,
|
49
|
+
sheet_name: Optional[Union[str, int, List[Union[str, int]]]] = 0,
|
50
|
+
header: Optional[int] = 0,
|
51
|
+
usecols: Optional[List[str]] = None,
|
52
|
+
skiprows: Optional[Union[int, List[int]]] = None,
|
53
|
+
na_values: Optional[List[str]] = None,
|
54
|
+
dtype: Optional[Dict[str, str]] = None
|
55
|
+
) -> Union[Dict[str, List[Dict[str, Any]]], List[Dict[str, Any]]]:
|
56
|
+
"""Read an Excel file with advanced options.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
filepath: Path to Excel file
|
60
|
+
sheet_name: Sheet name(s) or index(es)
|
61
|
+
header: Row number(s) to use as column names
|
62
|
+
usecols: Columns to read
|
63
|
+
skiprows: Line numbers to skip
|
64
|
+
na_values: Additional strings to recognize as NA/NaN
|
65
|
+
dtype: Dict of column dtypes
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
Dict of sheet names to data if multiple sheets, else list of row dicts
|
69
|
+
"""
|
70
|
+
try:
|
71
|
+
pd = self._get_pandas()
|
72
|
+
if pd is None:
|
73
|
+
return {"error": "Required packages not available"}
|
74
|
+
|
75
|
+
# Read Excel file
|
76
|
+
df = pd.read_excel(
|
77
|
+
filepath,
|
78
|
+
sheet_name=sheet_name,
|
79
|
+
header=header,
|
80
|
+
usecols=usecols,
|
81
|
+
skiprows=skiprows,
|
82
|
+
na_values=na_values,
|
83
|
+
dtype=dtype,
|
84
|
+
engine='openpyxl'
|
85
|
+
)
|
86
|
+
|
87
|
+
# Convert to dict format
|
88
|
+
if isinstance(df, dict):
|
89
|
+
return {
|
90
|
+
name: df[name].to_dict('records')
|
91
|
+
for name in df.keys()
|
92
|
+
}
|
93
|
+
else:
|
94
|
+
return df.to_dict('records')
|
95
|
+
|
96
|
+
except Exception as e:
|
97
|
+
error_msg = f"Error reading Excel file {filepath}: {str(e)}"
|
98
|
+
logging.error(error_msg)
|
99
|
+
return {"error": error_msg}
|
100
|
+
|
101
|
+
def write_excel(
|
102
|
+
self,
|
103
|
+
filepath: str,
|
104
|
+
data: Union[Dict[str, List[Dict[str, Any]]], List[Dict[str, Any]]],
|
105
|
+
sheet_name: Optional[str] = None,
|
106
|
+
index: bool = False,
|
107
|
+
header: bool = True,
|
108
|
+
mode: str = 'w'
|
109
|
+
) -> bool:
|
110
|
+
"""Write data to an Excel file.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
filepath: Path to Excel file
|
114
|
+
data: Data to write (dict of sheet names to data or list of row dicts)
|
115
|
+
sheet_name: Sheet name if data is a list
|
116
|
+
index: Whether to write row indices
|
117
|
+
header: Whether to write column headers
|
118
|
+
mode: Write mode ('w' for write, 'a' for append)
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
bool: Success status
|
122
|
+
"""
|
123
|
+
try:
|
124
|
+
pd = self._get_pandas()
|
125
|
+
if pd is None:
|
126
|
+
return False
|
127
|
+
|
128
|
+
# Convert data to DataFrame(s)
|
129
|
+
if isinstance(data, dict):
|
130
|
+
if mode == 'a' and os.path.exists(filepath):
|
131
|
+
book = pd.ExcelFile(filepath)
|
132
|
+
with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
|
133
|
+
# Copy existing sheets
|
134
|
+
for sheet in book.sheet_names:
|
135
|
+
pd.read_excel(filepath, sheet_name=sheet).to_excel(
|
136
|
+
writer, sheet_name=sheet, index=index, header=header
|
137
|
+
)
|
138
|
+
# Add new sheets
|
139
|
+
for name, sheet_data in data.items():
|
140
|
+
df = pd.DataFrame(sheet_data)
|
141
|
+
df.to_excel(
|
142
|
+
writer, sheet_name=name, index=index, header=header
|
143
|
+
)
|
144
|
+
else:
|
145
|
+
with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
|
146
|
+
for name, sheet_data in data.items():
|
147
|
+
df = pd.DataFrame(sheet_data)
|
148
|
+
df.to_excel(
|
149
|
+
writer, sheet_name=name, index=index, header=header
|
150
|
+
)
|
151
|
+
else:
|
152
|
+
if mode == 'a' and os.path.exists(filepath):
|
153
|
+
book = pd.ExcelFile(filepath)
|
154
|
+
with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
|
155
|
+
# Copy existing sheets
|
156
|
+
for sheet in book.sheet_names:
|
157
|
+
pd.read_excel(filepath, sheet_name=sheet).to_excel(
|
158
|
+
writer, sheet_name=sheet, index=index, header=header
|
159
|
+
)
|
160
|
+
# Add new sheet
|
161
|
+
df = pd.DataFrame(data)
|
162
|
+
df.to_excel(
|
163
|
+
writer,
|
164
|
+
sheet_name=sheet_name or 'Sheet1',
|
165
|
+
index=index,
|
166
|
+
header=header
|
167
|
+
)
|
168
|
+
else:
|
169
|
+
df = pd.DataFrame(data)
|
170
|
+
df.to_excel(
|
171
|
+
filepath,
|
172
|
+
sheet_name=sheet_name or 'Sheet1',
|
173
|
+
index=index,
|
174
|
+
header=header,
|
175
|
+
engine='openpyxl'
|
176
|
+
)
|
177
|
+
|
178
|
+
return True
|
179
|
+
|
180
|
+
except Exception as e:
|
181
|
+
error_msg = f"Error writing Excel file {filepath}: {str(e)}"
|
182
|
+
logging.error(error_msg)
|
183
|
+
return False
|
184
|
+
|
185
|
+
def merge_excel(
|
186
|
+
self,
|
187
|
+
files: List[str],
|
188
|
+
output_file: str,
|
189
|
+
how: str = 'inner',
|
190
|
+
on: Optional[Union[str, List[str]]] = None,
|
191
|
+
suffixes: Optional[Tuple[str, str]] = None
|
192
|
+
) -> bool:
|
193
|
+
"""Merge multiple Excel files.
|
194
|
+
|
195
|
+
Args:
|
196
|
+
files: List of Excel files to merge
|
197
|
+
output_file: Output file path
|
198
|
+
how: Merge method ('inner', 'outer', 'left', 'right')
|
199
|
+
on: Column(s) to merge on
|
200
|
+
suffixes: Suffixes for overlapping columns
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
bool: Success status
|
204
|
+
"""
|
205
|
+
try:
|
206
|
+
if len(files) < 2:
|
207
|
+
error_msg = "At least two files are required for merging"
|
208
|
+
logging.error(error_msg)
|
209
|
+
return False
|
210
|
+
|
211
|
+
# Read first file
|
212
|
+
result = self.read_excel(files[0])
|
213
|
+
|
214
|
+
# Merge with remaining files
|
215
|
+
for file in files[1:]:
|
216
|
+
df = self.read_excel(file)
|
217
|
+
if isinstance(df, dict):
|
218
|
+
df = next(iter(df.values()))
|
219
|
+
|
220
|
+
pd = self._get_pandas()
|
221
|
+
if pd is None:
|
222
|
+
return False
|
223
|
+
|
224
|
+
result = pd.merge(
|
225
|
+
pd.DataFrame(result),
|
226
|
+
pd.DataFrame(df),
|
227
|
+
how=how,
|
228
|
+
on=on,
|
229
|
+
suffixes=suffixes or ('_1', '_2')
|
230
|
+
).to_dict('records')
|
231
|
+
|
232
|
+
# Write merged result
|
233
|
+
return self.write_excel(output_file, result)
|
234
|
+
|
235
|
+
except Exception as e:
|
236
|
+
error_msg = f"Error merging Excel files: {str(e)}"
|
237
|
+
logging.error(error_msg)
|
238
|
+
return False
|
239
|
+
|
240
|
+
# Create instance for direct function access
|
241
|
+
_excel_tools = ExcelTools()
|
242
|
+
read_excel = _excel_tools.read_excel
|
243
|
+
write_excel = _excel_tools.write_excel
|
244
|
+
merge_excel = _excel_tools.merge_excel
|
245
|
+
|
246
|
+
if __name__ == "__main__":
|
247
|
+
print("\n==================================================")
|
248
|
+
print("ExcelTools Demonstration")
|
249
|
+
print("==================================================\n")
|
250
|
+
|
251
|
+
# Create a temporary file for testing
|
252
|
+
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as temp:
|
253
|
+
temp_file = temp.name
|
254
|
+
|
255
|
+
print("1. Writing Excel File")
|
256
|
+
print("------------------------------")
|
257
|
+
data = [
|
258
|
+
{"name": "Alice", "age": 25, "city": "New York"},
|
259
|
+
{"name": "Bob", "age": 30, "city": "San Francisco"},
|
260
|
+
{"name": "Charlie", "age": 35, "city": "Chicago"}
|
261
|
+
]
|
262
|
+
result = write_excel(temp_file, data, "People")
|
263
|
+
print(f"Data written successfully: {result}")
|
264
|
+
print()
|
265
|
+
|
266
|
+
print("2. Reading Excel File")
|
267
|
+
print("------------------------------")
|
268
|
+
read_data = read_excel(temp_file)
|
269
|
+
print("Contents of Excel file:")
|
270
|
+
for row in read_data:
|
271
|
+
print(row)
|
272
|
+
print()
|
273
|
+
|
274
|
+
print("3. Merging Excel Files")
|
275
|
+
print("------------------------------")
|
276
|
+
# Create a second file for merging
|
277
|
+
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as temp2:
|
278
|
+
temp_file2 = temp2.name
|
279
|
+
data2 = [
|
280
|
+
{"name": "Alice", "salary": 75000},
|
281
|
+
{"name": "Bob", "salary": 85000},
|
282
|
+
{"name": "Charlie", "salary": 95000}
|
283
|
+
]
|
284
|
+
write_excel(temp_file2, data2, "Salaries")
|
285
|
+
|
286
|
+
# Merge files
|
287
|
+
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as temp3:
|
288
|
+
temp_file3 = temp3.name
|
289
|
+
result = merge_excel(
|
290
|
+
[temp_file, temp_file2],
|
291
|
+
temp_file3,
|
292
|
+
how='inner',
|
293
|
+
on='name'
|
294
|
+
)
|
295
|
+
print(f"Files merged successfully: {result}")
|
296
|
+
if result:
|
297
|
+
merged_data = read_excel(temp_file3)
|
298
|
+
print("\nMerged contents:")
|
299
|
+
for row in merged_data:
|
300
|
+
print(row)
|
301
|
+
print()
|
302
|
+
|
303
|
+
# Clean up temporary files
|
304
|
+
os.unlink(temp_file)
|
305
|
+
os.unlink(temp_file2)
|
306
|
+
os.unlink(temp_file3)
|
307
|
+
|
308
|
+
print("==================================================")
|
309
|
+
print("Demonstration Complete")
|
310
|
+
print("==================================================\n")
|