ai-data-science-team 0.0.0.9005__py3-none-any.whl → 0.0.0.9007__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- ai_data_science_team/_version.py +1 -1
- ai_data_science_team/agents/__init__.py +3 -1
- ai_data_science_team/agents/data_cleaning_agent.py +213 -20
- ai_data_science_team/agents/data_visualization_agent.py +331 -0
- ai_data_science_team/agents/data_wrangling_agent.py +66 -24
- ai_data_science_team/agents/feature_engineering_agent.py +50 -13
- ai_data_science_team/agents/sql_database_agent.py +397 -0
- ai_data_science_team/templates/__init__.py +8 -0
- ai_data_science_team/templates/agent_templates.py +154 -37
- ai_data_science_team/tools/logging.py +1 -1
- ai_data_science_team/tools/metadata.py +230 -0
- ai_data_science_team/tools/regex.py +7 -1
- {ai_data_science_team-0.0.0.9005.dist-info → ai_data_science_team-0.0.0.9007.dist-info}/METADATA +43 -22
- ai_data_science_team-0.0.0.9007.dist-info/RECORD +21 -0
- {ai_data_science_team-0.0.0.9005.dist-info → ai_data_science_team-0.0.0.9007.dist-info}/WHEEL +1 -1
- ai_data_science_team/tools/data_analysis.py +0 -116
- ai_data_science_team-0.0.0.9005.dist-info/RECORD +0 -19
- {ai_data_science_team-0.0.0.9005.dist-info → ai_data_science_team-0.0.0.9007.dist-info}/LICENSE +0 -0
- {ai_data_science_team-0.0.0.9005.dist-info → ai_data_science_team-0.0.0.9007.dist-info}/top_level.txt +0 -0
@@ -1,116 +0,0 @@
|
|
1
|
-
import io
|
2
|
-
import pandas as pd
|
3
|
-
from typing import Union, List, Dict
|
4
|
-
|
5
|
-
def summarize_dataframes(
|
6
|
-
dataframes: Union[pd.DataFrame, List[pd.DataFrame], Dict[str, pd.DataFrame]]
|
7
|
-
) -> List[str]:
|
8
|
-
"""
|
9
|
-
Generate a summary for one or more DataFrames. Accepts a single DataFrame, a list of DataFrames,
|
10
|
-
or a dictionary mapping names to DataFrames.
|
11
|
-
|
12
|
-
Parameters
|
13
|
-
----------
|
14
|
-
dataframes : pandas.DataFrame or list of pandas.DataFrame or dict of (str -> pandas.DataFrame)
|
15
|
-
- Single DataFrame: produce a single summary (returned within a one-element list).
|
16
|
-
- List of DataFrames: produce a summary for each DataFrame, using index-based names.
|
17
|
-
- Dictionary of DataFrames: produce a summary for each DataFrame, using dictionary keys as names.
|
18
|
-
|
19
|
-
Example:
|
20
|
-
--------
|
21
|
-
``` python
|
22
|
-
import pandas as pd
|
23
|
-
from sklearn.datasets import load_iris
|
24
|
-
data = load_iris(as_frame=True)
|
25
|
-
dataframes = {
|
26
|
-
"iris": data.frame,
|
27
|
-
"iris_target": data.target,
|
28
|
-
}
|
29
|
-
summaries = summarize_dataframes(dataframes)
|
30
|
-
print(summaries[0])
|
31
|
-
```
|
32
|
-
|
33
|
-
Returns
|
34
|
-
-------
|
35
|
-
list of str
|
36
|
-
A list of summaries, one for each provided DataFrame. Each summary includes:
|
37
|
-
- Shape of the DataFrame (rows, columns)
|
38
|
-
- Column data types
|
39
|
-
- Missing value percentage
|
40
|
-
- Unique value counts
|
41
|
-
- First 30 rows
|
42
|
-
- Descriptive statistics
|
43
|
-
- DataFrame info output
|
44
|
-
"""
|
45
|
-
|
46
|
-
summaries = []
|
47
|
-
|
48
|
-
# --- Dictionary Case ---
|
49
|
-
if isinstance(dataframes, dict):
|
50
|
-
for dataset_name, df in dataframes.items():
|
51
|
-
summaries.append(_summarize_dataframe(df, dataset_name))
|
52
|
-
|
53
|
-
# --- Single DataFrame Case ---
|
54
|
-
elif isinstance(dataframes, pd.DataFrame):
|
55
|
-
summaries.append(_summarize_dataframe(dataframes, "Single_Dataset"))
|
56
|
-
|
57
|
-
# --- List of DataFrames Case ---
|
58
|
-
elif isinstance(dataframes, list):
|
59
|
-
for idx, df in enumerate(dataframes):
|
60
|
-
dataset_name = f"Dataset_{idx}"
|
61
|
-
summaries.append(_summarize_dataframe(df, dataset_name))
|
62
|
-
|
63
|
-
else:
|
64
|
-
raise TypeError(
|
65
|
-
"Input must be a single DataFrame, a list of DataFrames, or a dictionary of DataFrames."
|
66
|
-
)
|
67
|
-
|
68
|
-
return summaries
|
69
|
-
|
70
|
-
|
71
|
-
def _summarize_dataframe(df: pd.DataFrame, dataset_name: str) -> str:
|
72
|
-
"""Generate a summary string for a single DataFrame."""
|
73
|
-
# 1. Convert dictionary-type cells to strings
|
74
|
-
# This prevents unhashable dict errors during df.nunique().
|
75
|
-
df = df.apply(lambda col: col.map(lambda x: str(x) if isinstance(x, dict) else x))
|
76
|
-
|
77
|
-
# 2. Capture df.info() output
|
78
|
-
buffer = io.StringIO()
|
79
|
-
df.info(buf=buffer)
|
80
|
-
info_text = buffer.getvalue()
|
81
|
-
|
82
|
-
# 3. Calculate missing value stats
|
83
|
-
missing_stats = (df.isna().sum() / len(df) * 100).sort_values(ascending=False)
|
84
|
-
missing_summary = "\n".join([f"{col}: {val:.2f}%" for col, val in missing_stats.items()])
|
85
|
-
|
86
|
-
# 4. Get column data types
|
87
|
-
column_types = "\n".join([f"{col}: {dtype}" for col, dtype in df.dtypes.items()])
|
88
|
-
|
89
|
-
# 5. Get unique value counts
|
90
|
-
unique_counts = df.nunique() # Will no longer fail on unhashable dict
|
91
|
-
unique_counts_summary = "\n".join([f"{col}: {count}" for col, count in unique_counts.items()])
|
92
|
-
|
93
|
-
summary_text = f"""
|
94
|
-
Dataset Name: {dataset_name}
|
95
|
-
----------------------------
|
96
|
-
Shape: {df.shape[0]} rows x {df.shape[1]} columns
|
97
|
-
|
98
|
-
Column Data Types:
|
99
|
-
{column_types}
|
100
|
-
|
101
|
-
Missing Value Percentage:
|
102
|
-
{missing_summary}
|
103
|
-
|
104
|
-
Unique Value Counts:
|
105
|
-
{unique_counts_summary}
|
106
|
-
|
107
|
-
Data (first 30 rows):
|
108
|
-
{df.head(30).to_string()}
|
109
|
-
|
110
|
-
Data Description:
|
111
|
-
{df.describe().to_string()}
|
112
|
-
|
113
|
-
Data Info:
|
114
|
-
{info_text}
|
115
|
-
"""
|
116
|
-
return summary_text.strip()
|
@@ -1,19 +0,0 @@
|
|
1
|
-
ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
ai_data_science_team/_version.py,sha256=7tA8TocqCCzLkcB4ptV6bn3k5ni-0TGZvGnVBzmbeIc,26
|
3
|
-
ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
|
4
|
-
ai_data_science_team/agents/__init__.py,sha256=DtwQgyeG3Q4rQ-NrMbva-jshVQyULaWW1RrnETQGZOY,270
|
5
|
-
ai_data_science_team/agents/data_cleaning_agent.py,sha256=0K-CgngGjamRk_QzMqNkplrI-ddCbtruQ7kjGrsRIN8,14390
|
6
|
-
ai_data_science_team/agents/data_wrangling_agent.py,sha256=uQBJ8vQwrXubQgaI9_UoNZnVQjIEBUOh3dTmNdg326k,14581
|
7
|
-
ai_data_science_team/agents/feature_engineering_agent.py,sha256=QEqXTsfjllUj4Wgsw4nNGUT6r9Y6q629ZNgqGy3Dbbk,15921
|
8
|
-
ai_data_science_team/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
ai_data_science_team/templates/agent_templates.py,sha256=gT48Pq9KlrrrF0yigodGl_BdptmowTJ2rEWUqh7g5E0,15410
|
10
|
-
ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
-
ai_data_science_team/tools/data_analysis.py,sha256=V7e6_fZA01mosFf5VcLwBcpiMVf7fClZMjTrj-egK-o,3715
|
12
|
-
ai_data_science_team/tools/logging.py,sha256=EU5EMg4Y0-Yhqf1vAEFg0eRvSTx8uF0LTOAKss8-T2M,2073
|
13
|
-
ai_data_science_team/tools/parsers.py,sha256=BAi-fJT7BBt9nRS3w5n9LDTsu7JAJsH8CAI9-Qf7jCs,2086
|
14
|
-
ai_data_science_team/tools/regex.py,sha256=KTH2SXPJT8Tzmj7CufyeET-FbA9BMhRzFlPKr4Tan3g,2320
|
15
|
-
ai_data_science_team-0.0.0.9005.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
|
16
|
-
ai_data_science_team-0.0.0.9005.dist-info/METADATA,sha256=PC6rJR965hPu02LtZrzHICkd3QeWzh2A35axTLjE9hM,5840
|
17
|
-
ai_data_science_team-0.0.0.9005.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
18
|
-
ai_data_science_team-0.0.0.9005.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
|
19
|
-
ai_data_science_team-0.0.0.9005.dist-info/RECORD,,
|
{ai_data_science_team-0.0.0.9005.dist-info → ai_data_science_team-0.0.0.9007.dist-info}/LICENSE
RENAMED
File without changes
|
File without changes
|