ai-data-science-team 0.0.0.9005__py3-none-any.whl → 0.0.0.9007__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,116 +0,0 @@
1
- import io
2
- import pandas as pd
3
- from typing import Union, List, Dict
4
-
5
- def summarize_dataframes(
6
- dataframes: Union[pd.DataFrame, List[pd.DataFrame], Dict[str, pd.DataFrame]]
7
- ) -> List[str]:
8
- """
9
- Generate a summary for one or more DataFrames. Accepts a single DataFrame, a list of DataFrames,
10
- or a dictionary mapping names to DataFrames.
11
-
12
- Parameters
13
- ----------
14
- dataframes : pandas.DataFrame or list of pandas.DataFrame or dict of (str -> pandas.DataFrame)
15
- - Single DataFrame: produce a single summary (returned within a one-element list).
16
- - List of DataFrames: produce a summary for each DataFrame, using index-based names.
17
- - Dictionary of DataFrames: produce a summary for each DataFrame, using dictionary keys as names.
18
-
19
- Example:
20
- --------
21
- ``` python
22
- import pandas as pd
23
- from sklearn.datasets import load_iris
24
- data = load_iris(as_frame=True)
25
- dataframes = {
26
- "iris": data.frame,
27
- "iris_target": data.target,
28
- }
29
- summaries = summarize_dataframes(dataframes)
30
- print(summaries[0])
31
- ```
32
-
33
- Returns
34
- -------
35
- list of str
36
- A list of summaries, one for each provided DataFrame. Each summary includes:
37
- - Shape of the DataFrame (rows, columns)
38
- - Column data types
39
- - Missing value percentage
40
- - Unique value counts
41
- - First 30 rows
42
- - Descriptive statistics
43
- - DataFrame info output
44
- """
45
-
46
- summaries = []
47
-
48
- # --- Dictionary Case ---
49
- if isinstance(dataframes, dict):
50
- for dataset_name, df in dataframes.items():
51
- summaries.append(_summarize_dataframe(df, dataset_name))
52
-
53
- # --- Single DataFrame Case ---
54
- elif isinstance(dataframes, pd.DataFrame):
55
- summaries.append(_summarize_dataframe(dataframes, "Single_Dataset"))
56
-
57
- # --- List of DataFrames Case ---
58
- elif isinstance(dataframes, list):
59
- for idx, df in enumerate(dataframes):
60
- dataset_name = f"Dataset_{idx}"
61
- summaries.append(_summarize_dataframe(df, dataset_name))
62
-
63
- else:
64
- raise TypeError(
65
- "Input must be a single DataFrame, a list of DataFrames, or a dictionary of DataFrames."
66
- )
67
-
68
- return summaries
69
-
70
-
71
- def _summarize_dataframe(df: pd.DataFrame, dataset_name: str) -> str:
72
- """Generate a summary string for a single DataFrame."""
73
- # 1. Convert dictionary-type cells to strings
74
- # This prevents unhashable dict errors during df.nunique().
75
- df = df.apply(lambda col: col.map(lambda x: str(x) if isinstance(x, dict) else x))
76
-
77
- # 2. Capture df.info() output
78
- buffer = io.StringIO()
79
- df.info(buf=buffer)
80
- info_text = buffer.getvalue()
81
-
82
- # 3. Calculate missing value stats
83
- missing_stats = (df.isna().sum() / len(df) * 100).sort_values(ascending=False)
84
- missing_summary = "\n".join([f"{col}: {val:.2f}%" for col, val in missing_stats.items()])
85
-
86
- # 4. Get column data types
87
- column_types = "\n".join([f"{col}: {dtype}" for col, dtype in df.dtypes.items()])
88
-
89
- # 5. Get unique value counts
90
- unique_counts = df.nunique() # Will no longer fail on unhashable dict
91
- unique_counts_summary = "\n".join([f"{col}: {count}" for col, count in unique_counts.items()])
92
-
93
- summary_text = f"""
94
- Dataset Name: {dataset_name}
95
- ----------------------------
96
- Shape: {df.shape[0]} rows x {df.shape[1]} columns
97
-
98
- Column Data Types:
99
- {column_types}
100
-
101
- Missing Value Percentage:
102
- {missing_summary}
103
-
104
- Unique Value Counts:
105
- {unique_counts_summary}
106
-
107
- Data (first 30 rows):
108
- {df.head(30).to_string()}
109
-
110
- Data Description:
111
- {df.describe().to_string()}
112
-
113
- Data Info:
114
- {info_text}
115
- """
116
- return summary_text.strip()
@@ -1,19 +0,0 @@
1
- ai_data_science_team/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- ai_data_science_team/_version.py,sha256=7tA8TocqCCzLkcB4ptV6bn3k5ni-0TGZvGnVBzmbeIc,26
3
- ai_data_science_team/orchestration.py,sha256=xiIFOsrLwPdkSmtme7wNCCGv8XopnMTNElNzlZokL-4,303
4
- ai_data_science_team/agents/__init__.py,sha256=DtwQgyeG3Q4rQ-NrMbva-jshVQyULaWW1RrnETQGZOY,270
5
- ai_data_science_team/agents/data_cleaning_agent.py,sha256=0K-CgngGjamRk_QzMqNkplrI-ddCbtruQ7kjGrsRIN8,14390
6
- ai_data_science_team/agents/data_wrangling_agent.py,sha256=uQBJ8vQwrXubQgaI9_UoNZnVQjIEBUOh3dTmNdg326k,14581
7
- ai_data_science_team/agents/feature_engineering_agent.py,sha256=QEqXTsfjllUj4Wgsw4nNGUT6r9Y6q629ZNgqGy3Dbbk,15921
8
- ai_data_science_team/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- ai_data_science_team/templates/agent_templates.py,sha256=gT48Pq9KlrrrF0yigodGl_BdptmowTJ2rEWUqh7g5E0,15410
10
- ai_data_science_team/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- ai_data_science_team/tools/data_analysis.py,sha256=V7e6_fZA01mosFf5VcLwBcpiMVf7fClZMjTrj-egK-o,3715
12
- ai_data_science_team/tools/logging.py,sha256=EU5EMg4Y0-Yhqf1vAEFg0eRvSTx8uF0LTOAKss8-T2M,2073
13
- ai_data_science_team/tools/parsers.py,sha256=BAi-fJT7BBt9nRS3w5n9LDTsu7JAJsH8CAI9-Qf7jCs,2086
14
- ai_data_science_team/tools/regex.py,sha256=KTH2SXPJT8Tzmj7CufyeET-FbA9BMhRzFlPKr4Tan3g,2320
15
- ai_data_science_team-0.0.0.9005.dist-info/LICENSE,sha256=Xif0IRLdd2HGLATxV2EVp91aSY6KOuacRr_6BorKGzA,1084
16
- ai_data_science_team-0.0.0.9005.dist-info/METADATA,sha256=PC6rJR965hPu02LtZrzHICkd3QeWzh2A35axTLjE9hM,5840
17
- ai_data_science_team-0.0.0.9005.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
18
- ai_data_science_team-0.0.0.9005.dist-info/top_level.txt,sha256=CnoMgOphCoAdGTLueWdCVByVyjwOubaGiTB1lchdy4M,21
19
- ai_data_science_team-0.0.0.9005.dist-info/RECORD,,