praisonaiagents 0.0.23__py3-none-any.whl → 0.0.24__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- praisonaiagents/tools/__init__.py +165 -2
- praisonaiagents/tools/arxiv_tools.py +292 -0
- praisonaiagents/tools/calculator_tools.py +278 -0
- praisonaiagents/tools/csv_tools.py +266 -0
- praisonaiagents/tools/duckdb_tools.py +268 -0
- praisonaiagents/tools/duckduckgo_tools.py +52 -0
- praisonaiagents/tools/excel_tools.py +310 -0
- praisonaiagents/tools/file_tools.py +274 -0
- praisonaiagents/tools/json_tools.py +515 -0
- praisonaiagents/tools/newspaper_tools.py +354 -0
- praisonaiagents/tools/pandas_tools.py +326 -0
- praisonaiagents/tools/python_tools.py +423 -0
- praisonaiagents/tools/shell_tools.py +278 -0
- praisonaiagents/tools/spider_tools.py +431 -0
- praisonaiagents/tools/test.py +56 -0
- praisonaiagents/tools/tools.py +5 -36
- praisonaiagents/tools/wikipedia_tools.py +272 -0
- praisonaiagents/tools/xml_tools.py +498 -0
- praisonaiagents/tools/yaml_tools.py +417 -0
- praisonaiagents/tools/yfinance_tools.py +213 -0
- {praisonaiagents-0.0.23.dist-info → praisonaiagents-0.0.24.dist-info}/METADATA +1 -1
- praisonaiagents-0.0.24.dist-info/RECORD +42 -0
- praisonaiagents-0.0.23.dist-info/RECORD +0 -24
- {praisonaiagents-0.0.23.dist-info → praisonaiagents-0.0.24.dist-info}/WHEEL +0 -0
- {praisonaiagents-0.0.23.dist-info → praisonaiagents-0.0.24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,268 @@
|
|
1
|
+
"""Tools for working with DuckDB databases.
|
2
|
+
|
3
|
+
Usage:
|
4
|
+
from praisonaiagents.tools import duckdb_tools
|
5
|
+
df = duckdb_tools.query("SELECT * FROM my_table")
|
6
|
+
|
7
|
+
or
|
8
|
+
from praisonaiagents.tools import query_db, create_table, load_data
|
9
|
+
df = query_db("SELECT * FROM my_table")
|
10
|
+
"""
|
11
|
+
|
12
|
+
import logging
|
13
|
+
from typing import List, Dict, Any, Optional, Union, TYPE_CHECKING
|
14
|
+
from importlib import util
|
15
|
+
import json
|
16
|
+
|
17
|
+
if TYPE_CHECKING:
|
18
|
+
import duckdb
|
19
|
+
import pandas as pd
|
20
|
+
|
21
|
+
class DuckDBTools:
|
22
|
+
"""Tools for working with DuckDB databases."""
|
23
|
+
|
24
|
+
def __init__(self, database: str = ':memory:'):
|
25
|
+
"""Initialize DuckDBTools.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
database: Path to database file or ':memory:' for in-memory database
|
29
|
+
"""
|
30
|
+
self.database = database
|
31
|
+
self._conn = None
|
32
|
+
|
33
|
+
def _get_duckdb(self) -> Optional['duckdb']:
|
34
|
+
"""Get duckdb module, installing if needed"""
|
35
|
+
if util.find_spec('duckdb') is None:
|
36
|
+
error_msg = "duckdb package is not available. Please install it using: pip install duckdb"
|
37
|
+
logging.error(error_msg)
|
38
|
+
return None
|
39
|
+
import duckdb
|
40
|
+
return duckdb
|
41
|
+
|
42
|
+
def _get_pandas(self) -> Optional['pd']:
|
43
|
+
"""Get pandas module, installing if needed"""
|
44
|
+
if util.find_spec('pandas') is None:
|
45
|
+
error_msg = "pandas package is not available. Please install it using: pip install pandas"
|
46
|
+
logging.error(error_msg)
|
47
|
+
return None
|
48
|
+
import pandas as pd
|
49
|
+
return pd
|
50
|
+
|
51
|
+
def _get_connection(self) -> Optional['duckdb.DuckDBPyConnection']:
|
52
|
+
"""Get or create database connection"""
|
53
|
+
if self._conn is None:
|
54
|
+
duckdb = self._get_duckdb()
|
55
|
+
if duckdb is None:
|
56
|
+
return None
|
57
|
+
try:
|
58
|
+
self._conn = duckdb.connect(self.database)
|
59
|
+
except Exception as e:
|
60
|
+
error_msg = f"Error connecting to database {self.database}: {str(e)}"
|
61
|
+
logging.error(error_msg)
|
62
|
+
return None
|
63
|
+
return self._conn
|
64
|
+
|
65
|
+
def execute_query(
|
66
|
+
self,
|
67
|
+
query: str,
|
68
|
+
params: Optional[Union[tuple, dict]] = None,
|
69
|
+
return_df: bool = True
|
70
|
+
) -> Union[List[Dict[str, Any]], Dict[str, str]]:
|
71
|
+
"""Execute a SQL query.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
query: SQL query to execute
|
75
|
+
params: Query parameters
|
76
|
+
return_df: If True, return results as DataFrame records
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
Query results as list of dicts, or error dict
|
80
|
+
"""
|
81
|
+
try:
|
82
|
+
conn = self._get_connection()
|
83
|
+
if conn is None:
|
84
|
+
return {"error": "Could not connect to database"}
|
85
|
+
|
86
|
+
if params:
|
87
|
+
result = conn.execute(query, params)
|
88
|
+
else:
|
89
|
+
result = conn.execute(query)
|
90
|
+
|
91
|
+
if return_df:
|
92
|
+
pd = self._get_pandas()
|
93
|
+
if pd is None:
|
94
|
+
return {"error": "pandas package not available"}
|
95
|
+
df = result.df()
|
96
|
+
return df.to_dict('records')
|
97
|
+
else:
|
98
|
+
return [dict(row) for row in result.fetchall()]
|
99
|
+
|
100
|
+
except Exception as e:
|
101
|
+
error_msg = f"Error executing query: {str(e)}"
|
102
|
+
logging.error(error_msg)
|
103
|
+
return {"error": error_msg}
|
104
|
+
|
105
|
+
def load_csv(
|
106
|
+
self,
|
107
|
+
table_name: str,
|
108
|
+
filepath: str,
|
109
|
+
schema: Optional[Dict[str, str]] = None,
|
110
|
+
if_exists: str = 'replace'
|
111
|
+
) -> bool:
|
112
|
+
"""Load a CSV file into a table.
|
113
|
+
|
114
|
+
Args:
|
115
|
+
table_name: Name of table to create
|
116
|
+
filepath: Path to CSV file
|
117
|
+
schema: Optional column definitions
|
118
|
+
if_exists: What to do if table exists ('fail', 'replace', 'append')
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
bool: Success status
|
122
|
+
"""
|
123
|
+
try:
|
124
|
+
conn = self._get_connection()
|
125
|
+
if conn is None:
|
126
|
+
return False
|
127
|
+
|
128
|
+
# Check if table exists
|
129
|
+
exists = conn.execute(f"""
|
130
|
+
SELECT name FROM sqlite_master
|
131
|
+
WHERE type='table' AND name='{table_name}'
|
132
|
+
""").fetchone() is not None
|
133
|
+
|
134
|
+
if exists:
|
135
|
+
if if_exists == 'fail':
|
136
|
+
raise ValueError(f"Table {table_name} already exists")
|
137
|
+
elif if_exists == 'replace':
|
138
|
+
conn.execute(f"DROP TABLE IF EXISTS {table_name}")
|
139
|
+
elif if_exists != 'append':
|
140
|
+
raise ValueError("if_exists must be 'fail', 'replace', or 'append'")
|
141
|
+
|
142
|
+
# Create table if needed
|
143
|
+
if not exists or if_exists == 'replace':
|
144
|
+
if schema:
|
145
|
+
# Create table with schema
|
146
|
+
columns = ', '.join(f"{k} {v}" for k, v in schema.items())
|
147
|
+
conn.execute(f"CREATE TABLE {table_name} ({columns})")
|
148
|
+
else:
|
149
|
+
# Infer schema from CSV
|
150
|
+
conn.execute(f"""
|
151
|
+
CREATE TABLE {table_name} AS
|
152
|
+
SELECT * FROM read_csv_auto('{filepath}')
|
153
|
+
WHERE 1=0
|
154
|
+
""")
|
155
|
+
|
156
|
+
# Load data
|
157
|
+
conn.execute(f"""
|
158
|
+
INSERT INTO {table_name}
|
159
|
+
SELECT * FROM read_csv_auto('{filepath}')
|
160
|
+
""")
|
161
|
+
|
162
|
+
return True
|
163
|
+
|
164
|
+
except Exception as e:
|
165
|
+
error_msg = f"Error loading CSV file {filepath}: {str(e)}"
|
166
|
+
logging.error(error_msg)
|
167
|
+
return False
|
168
|
+
|
169
|
+
def export_csv(
|
170
|
+
self,
|
171
|
+
query: str,
|
172
|
+
filepath: str,
|
173
|
+
params: Optional[Union[tuple, dict]] = None
|
174
|
+
) -> bool:
|
175
|
+
"""Export query results to CSV.
|
176
|
+
|
177
|
+
Args:
|
178
|
+
query: SQL query to execute
|
179
|
+
filepath: Output file path
|
180
|
+
params: Optional query parameters
|
181
|
+
|
182
|
+
Returns:
|
183
|
+
bool: Success status
|
184
|
+
"""
|
185
|
+
try:
|
186
|
+
# Execute query and get results as DataFrame
|
187
|
+
results = self.execute_query(query, params)
|
188
|
+
if isinstance(results, dict) and 'error' in results:
|
189
|
+
return False
|
190
|
+
|
191
|
+
pd = self._get_pandas()
|
192
|
+
if pd is None:
|
193
|
+
return False
|
194
|
+
|
195
|
+
# Convert to DataFrame and save
|
196
|
+
df = pd.DataFrame(results)
|
197
|
+
df.to_csv(filepath, index=False)
|
198
|
+
return True
|
199
|
+
|
200
|
+
except Exception as e:
|
201
|
+
error_msg = f"Error exporting to CSV file {filepath}: {str(e)}"
|
202
|
+
logging.error(error_msg)
|
203
|
+
return False
|
204
|
+
|
205
|
+
def close(self):
|
206
|
+
"""Close database connection."""
|
207
|
+
if self._conn:
|
208
|
+
self._conn.close()
|
209
|
+
self._conn = None
|
210
|
+
|
211
|
+
# Create instance for direct function access
|
212
|
+
_duckdb_tools = DuckDBTools()
|
213
|
+
execute_query = _duckdb_tools.execute_query
|
214
|
+
load_csv = _duckdb_tools.load_csv
|
215
|
+
export_csv = _duckdb_tools.export_csv
|
216
|
+
|
217
|
+
if __name__ == "__main__":
|
218
|
+
print("\n==================================================")
|
219
|
+
print("DuckDBTools Demonstration")
|
220
|
+
print("==================================================\n")
|
221
|
+
|
222
|
+
# Create a temporary file for testing
|
223
|
+
import tempfile
|
224
|
+
import os
|
225
|
+
|
226
|
+
with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as temp:
|
227
|
+
temp_file = temp.name
|
228
|
+
|
229
|
+
# Create sample data
|
230
|
+
with open(temp_file, 'w') as f:
|
231
|
+
f.write("name,age,city\n")
|
232
|
+
f.write("Alice,25,New York\n")
|
233
|
+
f.write("Bob,30,San Francisco\n")
|
234
|
+
f.write("Charlie,35,Chicago\n")
|
235
|
+
|
236
|
+
print("1. Loading CSV File")
|
237
|
+
print("------------------------------")
|
238
|
+
result = load_csv('users', temp_file)
|
239
|
+
print(f"CSV loaded successfully: {result}")
|
240
|
+
print()
|
241
|
+
|
242
|
+
print("2. Executing Query")
|
243
|
+
print("------------------------------")
|
244
|
+
query = "SELECT * FROM users WHERE age > 25"
|
245
|
+
results = execute_query(query)
|
246
|
+
print("Query results:")
|
247
|
+
for row in results:
|
248
|
+
print(row)
|
249
|
+
print()
|
250
|
+
|
251
|
+
print("3. Exporting Query Results")
|
252
|
+
print("------------------------------")
|
253
|
+
with tempfile.NamedTemporaryFile(suffix='.csv', delete=False) as temp2:
|
254
|
+
temp_file2 = temp2.name
|
255
|
+
result = export_csv(query, temp_file2)
|
256
|
+
print(f"Results exported successfully: {result}")
|
257
|
+
if result:
|
258
|
+
print("\nExported file contents:")
|
259
|
+
with open(temp_file2) as f:
|
260
|
+
print(f.read())
|
261
|
+
|
262
|
+
# Clean up temporary files
|
263
|
+
os.unlink(temp_file)
|
264
|
+
os.unlink(temp_file2)
|
265
|
+
|
266
|
+
print("==================================================")
|
267
|
+
print("Demonstration Complete")
|
268
|
+
print("==================================================\n")
|
@@ -0,0 +1,52 @@
|
|
1
|
+
"""DuckDuckGo search functionality.
|
2
|
+
|
3
|
+
Usage:
|
4
|
+
from praisonaiagents.tools import internet_search
|
5
|
+
results = internet_search("AI news")
|
6
|
+
|
7
|
+
or
|
8
|
+
from praisonaiagents.tools import duckduckgo
|
9
|
+
results = duckduckgo("AI news")
|
10
|
+
"""
|
11
|
+
|
12
|
+
from typing import List, Dict
|
13
|
+
import logging
|
14
|
+
from importlib import util
|
15
|
+
import sys
|
16
|
+
|
17
|
+
def internet_search(query: str) -> List[Dict]:
|
18
|
+
"""Perform an internet search using DuckDuckGo."""
|
19
|
+
# Check if duckduckgo_search is installed
|
20
|
+
if util.find_spec("duckduckgo_search") is None:
|
21
|
+
error_msg = "DuckDuckGo search is not available. Please install duckduckgo_search package using: pip install duckduckgo_search"
|
22
|
+
logging.error(error_msg)
|
23
|
+
return [{"error": error_msg}]
|
24
|
+
|
25
|
+
try:
|
26
|
+
# Import only when needed
|
27
|
+
from duckduckgo_search import DDGS
|
28
|
+
results = []
|
29
|
+
ddgs = DDGS()
|
30
|
+
for result in ddgs.text(keywords=query, max_results=5):
|
31
|
+
results.append({
|
32
|
+
"title": result.get("title", ""),
|
33
|
+
"url": result.get("href", ""),
|
34
|
+
"snippet": result.get("body", "")
|
35
|
+
})
|
36
|
+
return results
|
37
|
+
|
38
|
+
except Exception as e:
|
39
|
+
error_msg = f"Error during DuckDuckGo search: {e}"
|
40
|
+
logging.error(error_msg)
|
41
|
+
return [{"error": error_msg}]
|
42
|
+
|
43
|
+
# Make the module callable
|
44
|
+
sys.modules[__name__].__call__ = internet_search
|
45
|
+
|
46
|
+
if __name__ == "__main__":
|
47
|
+
# Example usage
|
48
|
+
results = internet_search("Python programming")
|
49
|
+
for result in results:
|
50
|
+
print(f"\nTitle: {result.get('title')}")
|
51
|
+
print(f"URL: {result.get('url')}")
|
52
|
+
print(f"Snippet: {result.get('snippet')}")
|
@@ -0,0 +1,310 @@
|
|
1
|
+
"""Tools for working with Excel files.
|
2
|
+
|
3
|
+
Usage:
|
4
|
+
from praisonaiagents.tools import excel_tools
|
5
|
+
df = excel_tools.read_excel("data.xlsx")
|
6
|
+
|
7
|
+
or
|
8
|
+
from praisonaiagents.tools import read_excel, write_excel, merge_excel
|
9
|
+
df = read_excel("data.xlsx")
|
10
|
+
"""
|
11
|
+
|
12
|
+
import logging
|
13
|
+
from typing import List, Dict, Union, Optional, Any, TYPE_CHECKING, Tuple
|
14
|
+
from importlib import util
|
15
|
+
import json
|
16
|
+
from pathlib import Path
|
17
|
+
import tempfile
|
18
|
+
import os
|
19
|
+
|
20
|
+
if TYPE_CHECKING:
|
21
|
+
import pandas as pd
|
22
|
+
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
23
|
+
from openpyxl.chart import BarChart, LineChart, PieChart, Reference
|
24
|
+
from openpyxl.chart.label import DataLabelList
|
25
|
+
|
26
|
+
class ExcelTools:
|
27
|
+
"""Tools for working with Excel files."""
|
28
|
+
|
29
|
+
def __init__(self):
|
30
|
+
"""Initialize ExcelTools."""
|
31
|
+
pass
|
32
|
+
|
33
|
+
def _get_pandas(self) -> Optional['pd']:
|
34
|
+
"""Get pandas module, installing if needed"""
|
35
|
+
if util.find_spec('pandas') is None:
|
36
|
+
error_msg = "pandas package is not available. Please install it using: pip install pandas"
|
37
|
+
logging.error(error_msg)
|
38
|
+
return None
|
39
|
+
if util.find_spec('openpyxl') is None:
|
40
|
+
error_msg = "openpyxl package is not available. Please install it using: pip install openpyxl"
|
41
|
+
logging.error(error_msg)
|
42
|
+
return None
|
43
|
+
import pandas as pd
|
44
|
+
return pd
|
45
|
+
|
46
|
+
def read_excel(
|
47
|
+
self,
|
48
|
+
filepath: str,
|
49
|
+
sheet_name: Optional[Union[str, int, List[Union[str, int]]]] = 0,
|
50
|
+
header: Optional[int] = 0,
|
51
|
+
usecols: Optional[List[str]] = None,
|
52
|
+
skiprows: Optional[Union[int, List[int]]] = None,
|
53
|
+
na_values: Optional[List[str]] = None,
|
54
|
+
dtype: Optional[Dict[str, str]] = None
|
55
|
+
) -> Union[Dict[str, List[Dict[str, Any]]], List[Dict[str, Any]]]:
|
56
|
+
"""Read an Excel file with advanced options.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
filepath: Path to Excel file
|
60
|
+
sheet_name: Sheet name(s) or index(es)
|
61
|
+
header: Row number(s) to use as column names
|
62
|
+
usecols: Columns to read
|
63
|
+
skiprows: Line numbers to skip
|
64
|
+
na_values: Additional strings to recognize as NA/NaN
|
65
|
+
dtype: Dict of column dtypes
|
66
|
+
|
67
|
+
Returns:
|
68
|
+
Dict of sheet names to data if multiple sheets, else list of row dicts
|
69
|
+
"""
|
70
|
+
try:
|
71
|
+
pd = self._get_pandas()
|
72
|
+
if pd is None:
|
73
|
+
return {"error": "Required packages not available"}
|
74
|
+
|
75
|
+
# Read Excel file
|
76
|
+
df = pd.read_excel(
|
77
|
+
filepath,
|
78
|
+
sheet_name=sheet_name,
|
79
|
+
header=header,
|
80
|
+
usecols=usecols,
|
81
|
+
skiprows=skiprows,
|
82
|
+
na_values=na_values,
|
83
|
+
dtype=dtype,
|
84
|
+
engine='openpyxl'
|
85
|
+
)
|
86
|
+
|
87
|
+
# Convert to dict format
|
88
|
+
if isinstance(df, dict):
|
89
|
+
return {
|
90
|
+
name: df[name].to_dict('records')
|
91
|
+
for name in df.keys()
|
92
|
+
}
|
93
|
+
else:
|
94
|
+
return df.to_dict('records')
|
95
|
+
|
96
|
+
except Exception as e:
|
97
|
+
error_msg = f"Error reading Excel file {filepath}: {str(e)}"
|
98
|
+
logging.error(error_msg)
|
99
|
+
return {"error": error_msg}
|
100
|
+
|
101
|
+
def write_excel(
|
102
|
+
self,
|
103
|
+
filepath: str,
|
104
|
+
data: Union[Dict[str, List[Dict[str, Any]]], List[Dict[str, Any]]],
|
105
|
+
sheet_name: Optional[str] = None,
|
106
|
+
index: bool = False,
|
107
|
+
header: bool = True,
|
108
|
+
mode: str = 'w'
|
109
|
+
) -> bool:
|
110
|
+
"""Write data to an Excel file.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
filepath: Path to Excel file
|
114
|
+
data: Data to write (dict of sheet names to data or list of row dicts)
|
115
|
+
sheet_name: Sheet name if data is a list
|
116
|
+
index: Whether to write row indices
|
117
|
+
header: Whether to write column headers
|
118
|
+
mode: Write mode ('w' for write, 'a' for append)
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
bool: Success status
|
122
|
+
"""
|
123
|
+
try:
|
124
|
+
pd = self._get_pandas()
|
125
|
+
if pd is None:
|
126
|
+
return False
|
127
|
+
|
128
|
+
# Convert data to DataFrame(s)
|
129
|
+
if isinstance(data, dict):
|
130
|
+
if mode == 'a' and os.path.exists(filepath):
|
131
|
+
book = pd.ExcelFile(filepath)
|
132
|
+
with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
|
133
|
+
# Copy existing sheets
|
134
|
+
for sheet in book.sheet_names:
|
135
|
+
pd.read_excel(filepath, sheet_name=sheet).to_excel(
|
136
|
+
writer, sheet_name=sheet, index=index, header=header
|
137
|
+
)
|
138
|
+
# Add new sheets
|
139
|
+
for name, sheet_data in data.items():
|
140
|
+
df = pd.DataFrame(sheet_data)
|
141
|
+
df.to_excel(
|
142
|
+
writer, sheet_name=name, index=index, header=header
|
143
|
+
)
|
144
|
+
else:
|
145
|
+
with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
|
146
|
+
for name, sheet_data in data.items():
|
147
|
+
df = pd.DataFrame(sheet_data)
|
148
|
+
df.to_excel(
|
149
|
+
writer, sheet_name=name, index=index, header=header
|
150
|
+
)
|
151
|
+
else:
|
152
|
+
if mode == 'a' and os.path.exists(filepath):
|
153
|
+
book = pd.ExcelFile(filepath)
|
154
|
+
with pd.ExcelWriter(filepath, engine='openpyxl') as writer:
|
155
|
+
# Copy existing sheets
|
156
|
+
for sheet in book.sheet_names:
|
157
|
+
pd.read_excel(filepath, sheet_name=sheet).to_excel(
|
158
|
+
writer, sheet_name=sheet, index=index, header=header
|
159
|
+
)
|
160
|
+
# Add new sheet
|
161
|
+
df = pd.DataFrame(data)
|
162
|
+
df.to_excel(
|
163
|
+
writer,
|
164
|
+
sheet_name=sheet_name or 'Sheet1',
|
165
|
+
index=index,
|
166
|
+
header=header
|
167
|
+
)
|
168
|
+
else:
|
169
|
+
df = pd.DataFrame(data)
|
170
|
+
df.to_excel(
|
171
|
+
filepath,
|
172
|
+
sheet_name=sheet_name or 'Sheet1',
|
173
|
+
index=index,
|
174
|
+
header=header,
|
175
|
+
engine='openpyxl'
|
176
|
+
)
|
177
|
+
|
178
|
+
return True
|
179
|
+
|
180
|
+
except Exception as e:
|
181
|
+
error_msg = f"Error writing Excel file {filepath}: {str(e)}"
|
182
|
+
logging.error(error_msg)
|
183
|
+
return False
|
184
|
+
|
185
|
+
def merge_excel(
|
186
|
+
self,
|
187
|
+
files: List[str],
|
188
|
+
output_file: str,
|
189
|
+
how: str = 'inner',
|
190
|
+
on: Optional[Union[str, List[str]]] = None,
|
191
|
+
suffixes: Optional[Tuple[str, str]] = None
|
192
|
+
) -> bool:
|
193
|
+
"""Merge multiple Excel files.
|
194
|
+
|
195
|
+
Args:
|
196
|
+
files: List of Excel files to merge
|
197
|
+
output_file: Output file path
|
198
|
+
how: Merge method ('inner', 'outer', 'left', 'right')
|
199
|
+
on: Column(s) to merge on
|
200
|
+
suffixes: Suffixes for overlapping columns
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
bool: Success status
|
204
|
+
"""
|
205
|
+
try:
|
206
|
+
if len(files) < 2:
|
207
|
+
error_msg = "At least two files are required for merging"
|
208
|
+
logging.error(error_msg)
|
209
|
+
return False
|
210
|
+
|
211
|
+
# Read first file
|
212
|
+
result = self.read_excel(files[0])
|
213
|
+
|
214
|
+
# Merge with remaining files
|
215
|
+
for file in files[1:]:
|
216
|
+
df = self.read_excel(file)
|
217
|
+
if isinstance(df, dict):
|
218
|
+
df = next(iter(df.values()))
|
219
|
+
|
220
|
+
pd = self._get_pandas()
|
221
|
+
if pd is None:
|
222
|
+
return False
|
223
|
+
|
224
|
+
result = pd.merge(
|
225
|
+
pd.DataFrame(result),
|
226
|
+
pd.DataFrame(df),
|
227
|
+
how=how,
|
228
|
+
on=on,
|
229
|
+
suffixes=suffixes or ('_1', '_2')
|
230
|
+
).to_dict('records')
|
231
|
+
|
232
|
+
# Write merged result
|
233
|
+
return self.write_excel(output_file, result)
|
234
|
+
|
235
|
+
except Exception as e:
|
236
|
+
error_msg = f"Error merging Excel files: {str(e)}"
|
237
|
+
logging.error(error_msg)
|
238
|
+
return False
|
239
|
+
|
240
|
+
# Create instance for direct function access
|
241
|
+
_excel_tools = ExcelTools()
|
242
|
+
read_excel = _excel_tools.read_excel
|
243
|
+
write_excel = _excel_tools.write_excel
|
244
|
+
merge_excel = _excel_tools.merge_excel
|
245
|
+
|
246
|
+
if __name__ == "__main__":
|
247
|
+
print("\n==================================================")
|
248
|
+
print("ExcelTools Demonstration")
|
249
|
+
print("==================================================\n")
|
250
|
+
|
251
|
+
# Create a temporary file for testing
|
252
|
+
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as temp:
|
253
|
+
temp_file = temp.name
|
254
|
+
|
255
|
+
print("1. Writing Excel File")
|
256
|
+
print("------------------------------")
|
257
|
+
data = [
|
258
|
+
{"name": "Alice", "age": 25, "city": "New York"},
|
259
|
+
{"name": "Bob", "age": 30, "city": "San Francisco"},
|
260
|
+
{"name": "Charlie", "age": 35, "city": "Chicago"}
|
261
|
+
]
|
262
|
+
result = write_excel(temp_file, data, "People")
|
263
|
+
print(f"Data written successfully: {result}")
|
264
|
+
print()
|
265
|
+
|
266
|
+
print("2. Reading Excel File")
|
267
|
+
print("------------------------------")
|
268
|
+
read_data = read_excel(temp_file)
|
269
|
+
print("Contents of Excel file:")
|
270
|
+
for row in read_data:
|
271
|
+
print(row)
|
272
|
+
print()
|
273
|
+
|
274
|
+
print("3. Merging Excel Files")
|
275
|
+
print("------------------------------")
|
276
|
+
# Create a second file for merging
|
277
|
+
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as temp2:
|
278
|
+
temp_file2 = temp2.name
|
279
|
+
data2 = [
|
280
|
+
{"name": "Alice", "salary": 75000},
|
281
|
+
{"name": "Bob", "salary": 85000},
|
282
|
+
{"name": "Charlie", "salary": 95000}
|
283
|
+
]
|
284
|
+
write_excel(temp_file2, data2, "Salaries")
|
285
|
+
|
286
|
+
# Merge files
|
287
|
+
with tempfile.NamedTemporaryFile(suffix='.xlsx', delete=False) as temp3:
|
288
|
+
temp_file3 = temp3.name
|
289
|
+
result = merge_excel(
|
290
|
+
[temp_file, temp_file2],
|
291
|
+
temp_file3,
|
292
|
+
how='inner',
|
293
|
+
on='name'
|
294
|
+
)
|
295
|
+
print(f"Files merged successfully: {result}")
|
296
|
+
if result:
|
297
|
+
merged_data = read_excel(temp_file3)
|
298
|
+
print("\nMerged contents:")
|
299
|
+
for row in merged_data:
|
300
|
+
print(row)
|
301
|
+
print()
|
302
|
+
|
303
|
+
# Clean up temporary files
|
304
|
+
os.unlink(temp_file)
|
305
|
+
os.unlink(temp_file2)
|
306
|
+
os.unlink(temp_file3)
|
307
|
+
|
308
|
+
print("==================================================")
|
309
|
+
print("Demonstration Complete")
|
310
|
+
print("==================================================\n")
|