ai-data-science-team 0.0.0.9005__py3-none-any.whl → 0.0.0.9007__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,116 +0,0 @@
1
- import io
2
- import pandas as pd
3
- from typing import Union, List, Dict
4
-
5
- def summarize_dataframes(
6
- dataframes: Union[pd.DataFrame, List[pd.DataFrame], Dict[str, pd.DataFrame]]
7
- ) -> List[str]:
8
- """
9
- Generate a summary for one or more DataFrames. Accepts a single DataFrame, a list of DataFrames,
10
- or a dictionary mapping names to DataFrames.
11
-
12
- Parameters
13
- ----------
14
- dataframes : pandas.DataFrame or list of pandas.DataFrame or dict of (str -> pandas.DataFrame)
15
- - Single DataFrame: produce a single summary (returned within a one-element list).
16
- - List of DataFrames: produce a summary for each DataFrame, using index-based names.
17
- - Dictionary of DataFrames: produce a summary for each DataFrame, using dictionary keys as names.
18
-
19
- Example:
20
- --------
21
- ``` python
22
- import pandas as pd
23
- from sklearn.datasets import load_iris
24
- data = load_iris(as_frame=True)
25
- dataframes = {
26
- "iris": data.frame,
27
- "iris_target": data.target,
28
- }
29
- summaries = summarize_dataframes(dataframes)
30
- print(summaries[0])
31
- ```
32
-
33
- Returns
34
- -------
35
- list of str
36
- A list of summaries, one for each provided DataFrame. Each summary includes:
37
- - Shape of the DataFrame (rows, columns)
38
- - Column data types
39
- - Missing value percentage
40
- - Unique value counts
41
- - First 30 rows
42
- - Descriptive statistics
43
- - DataFrame info output
44
- """
45
-
46
- summaries = []
47
-
48
- # --- Dictionary Case ---
49
- if isinstance(dataframes, dict):
50
- for dataset_name, df in dataframes.items():
51
- summaries.append(_summarize_dataframe(df, dataset_name))
52
-
53
- # --- Single DataFrame Case ---
54
- elif isinstance(dataframes, pd.DataFrame):
55
- summaries.append(_summarize_dataframe(dataframes, "Single_Dataset"))
56
-
57
- # --- List of DataFrames Case ---
58
- elif isinstance(dataframes, list):
59
- for idx, df in enumerate(dataframes):
60
- dataset_name = f"Dataset_{idx}"
61
- summaries.append(_summarize_dataframe(df, dataset_name))
62
-
63
- else:
64
- raise TypeError(
65
- "Input must be a single DataFrame, a list of DataFrames, or a dictionary of DataFrames."
66
- )
67
-
68
- return summaries
69
-
70
-
71
- def _summarize_dataframe(df: pd.DataFrame, dataset_name: str) -> str:
72
- """Generate a summary string for a single DataFrame."""
73
- # 1. Convert dictionary-type cells to strings
74
- # This prevents unhashable dict errors during df.nunique().
75
- df = df.apply(lambda col: col.map(lambda x: str(x) if isinstance(x, dict) else x))
76
-
77
- # 2. Capture df.info() output
78
- buffer = io.StringIO()
79
- df.info(buf=buffer)
80
- info_text = buffer.getvalue()
81
-
82
- # 3. Calculate missing value stats
83
- missing_stats = (df.isna().sum() / len(df) * 100).sort_values(ascending=False)
84
- missing_summary = "\n".join([f"{col}: {val:.2f}%" for col, val in missing_stats.items()])
85
-
86
- # 4. Get column data types
87
- column_types = "\n".join([f"{col}: {dtype}" for col, dtype in df.dtypes.items()])
88
-
89
- # 5. Get unique value counts
90
- unique_counts = df.nunique() # Will no longer fail on unhashable dict
91
- unique_counts_summary = "\n".join([f"{col}: {count}" for col, count in unique_counts.items()])
92
-
93
- summary_text = f"""
94
- Dataset Name: {dataset_name}
95
- ----------------------------
96
- Shape: {df.shape[0]} rows x {df.shape[1]} columns
97
-
98
- Column Data Types:
99
- {column_types}
100
-
101
- Missing Value Percentage:
102
- {missing_summary}
103
-
104
- Unique Value Counts:
105
- {unique_counts_summary}
106
-
107
- Data (first 30 rows):
108
- {df.head(30).to_string()}
109
-
110
- Data Description:
111
- {df.describe().to_string()}
112
-
113
- Data Info:
114
- {info_text}
115
- """
116
- return summary_text.strip()
@@ -1,19 +0,0 @@
1
- ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ai_data_science_team/_version.py,sha256=7tA8TocqCCzLkcB4ptV6bn3k5ni-0TGZvGnVBzmbeIc,26
3
- ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
- ai_data_science_team/agents/__init__.py,sha256=DtwQgyeG3Q4rQ-NrMbva-jshVQyULaWW1RrnETQGZOY,270
5
- ai_data_science_team/agents/data_cleaning_agent.py,sha256=0K-CgngGjamRk_QzMqNkplrI-ddCbtruQ7kjGrsRIN8,14390
6
- ai_data_science_team/agents/data_wrangling_agent.py,sha256=uQBJ8vQwrXubQgaI9_UoNZnVQjIEBUOh3dTmNdg326k,14581
7
- ai_data_science_team/agents/feature_engineering_agent.py,sha256=QEqXTsfjllUj4Wgsw4nNGUT6r9Y6q629ZNgqGy3Dbbk,15921
8
- ai_data_science_team/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- ai_data_science_team/templates/agent_templates.py,sha256=gT48Pq9KlrrrF0yigodGl_BdptmowTJ2rEWUqh7g5E0,15410
10
- ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- ai_data_science_team/tools/data_analysis.py,sha256=V7e6_fZA01mosFf5VcLwBcpiMVf7fClZMjTrj-egK-o,3715
12
- ai_data_science_team/tools/logging.py,sha256=EU5EMg4Y0-Yhqf1vAEFg0eRvSTx8uF0LTOAKss8-T2M,2073
13
- ai_data_science_team/tools/parsers.py,sha256=BAi-fJT7BBt9nRS3w5n9LDTsu7JAJsH8CAI9-Qf7jCs,2086
14
- ai_data_science_team/tools/regex.py,sha256=KTH2SXPJT8Tzmj7CufyeET-FbA9BMhRzFlPKr4Tan3g,2320
15
- ai_data_science_team-0.0.0.9005.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
16
- ai_data_science_team-0.0.0.9005.dist-info/METADATA,sha256=PC6rJR965hPu02LtZrzHICkd3QeWzh2A35axTLjE9hM,5840
17
- ai_data_science_team-0.0.0.9005.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
18
- ai_data_science_team-0.0.0.9005.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
19
- ai_data_science_team-0.0.0.9005.dist-info/RECORD,,