analystx 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
analystx-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 AnalystX Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,99 @@
1
+ Metadata-Version: 2.4
2
+ Name: analystx
3
+ Version: 0.1.0
4
+ Summary: Advanced analytics and KPI engine for data-driven insights
5
+ Author-email: Your Name <your.email@example.com>
6
+ License: MIT
7
+ Keywords: analytics,kpi,insights,profiling,reporting
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Requires-Python: >=3.8
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: pandas>=1.0.0
20
+ Requires-Dist: numpy>=1.19.0
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=6.0; extra == "dev"
23
+ Requires-Dist: pytest-cov>=2.10; extra == "dev"
24
+ Requires-Dist: black>=21.0; extra == "dev"
25
+ Requires-Dist: flake8>=3.9; extra == "dev"
26
+ Dynamic: license-file
27
+
28
+ # AnalystX
29
+
30
+ Advanced analytics and KPI engine for data-driven insights.
31
+
32
+ ## Overview
33
+
34
+ AnalystX is a Python library designed to provide comprehensive data profiling, KPI calculation, insight generation, and automated reporting capabilities. It simplifies complex analytics workflows and enables data-driven decision making.
35
+
36
+ ## Features
37
+
38
+ - **Data Profiling**: Automatic data quality and statistical analysis
39
+ - **KPI Engine**: Flexible and extensible key performance indicator calculation
40
+ - **Insight Engine**: Generate actionable insights from data
41
+ - **Report Generation**: Create professional analytics reports
42
+ - **CLI Interface**: Command-line tools for easy integration
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ pip install analystx
48
+ ```
49
+
50
+ Or from development source:
51
+
52
+ ```bash
53
+ git clone <repository-url>
54
+ cd analystx
55
+ pip install -e ".[dev]"
56
+ ```
57
+
58
+ ## Quick Start
59
+
60
+ ```python
61
+ from analystx import AnalystX
62
+
63
+ # Initialize analyzer
64
+ analyzer = AnalystX(data=df)
65
+
66
+ # Run profiling
67
+ profile = analyzer.profile()
68
+
69
+ # Calculate KPIs
70
+ kpis = analyzer.calculate_kpis()
71
+
72
+ # Generate insights
73
+ insights = analyzer.generate_insights()
74
+
75
+ # Create report
76
+ report = analyzer.create_report()
77
+ ```
78
+
79
+ ## Command Line Usage
80
+
81
+ ```bash
82
+ analystx --help
83
+ ```
84
+
85
+ ## Documentation
86
+
87
+ For detailed documentation, see the [docs](docs/) directory.
88
+
89
+ ## Contributing
90
+
91
+ Contributions are welcome! Please read our contributing guidelines.
92
+
93
+ ## License
94
+
95
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
96
+
97
+ ## Author
98
+
99
+ Your Name <your.email@example.com>
@@ -0,0 +1,72 @@
1
+ # AnalystX
2
+
3
+ Advanced analytics and KPI engine for data-driven insights.
4
+
5
+ ## Overview
6
+
7
+ AnalystX is a Python library designed to provide comprehensive data profiling, KPI calculation, insight generation, and automated reporting capabilities. It simplifies complex analytics workflows and enables data-driven decision making.
8
+
9
+ ## Features
10
+
11
+ - **Data Profiling**: Automatic data quality and statistical analysis
12
+ - **KPI Engine**: Flexible and extensible key performance indicator calculation
13
+ - **Insight Engine**: Generate actionable insights from data
14
+ - **Report Generation**: Create professional analytics reports
15
+ - **CLI Interface**: Command-line tools for easy integration
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install analystx
21
+ ```
22
+
23
+ Or from development source:
24
+
25
+ ```bash
26
+ git clone <repository-url>
27
+ cd analystx
28
+ pip install -e ".[dev]"
29
+ ```
30
+
31
+ ## Quick Start
32
+
33
+ ```python
34
+ from analystx import AnalystX
35
+
36
+ # Initialize analyzer
37
+ analyzer = AnalystX(data=df)
38
+
39
+ # Run profiling
40
+ profile = analyzer.profile()
41
+
42
+ # Calculate KPIs
43
+ kpis = analyzer.calculate_kpis()
44
+
45
+ # Generate insights
46
+ insights = analyzer.generate_insights()
47
+
48
+ # Create report
49
+ report = analyzer.create_report()
50
+ ```
51
+
52
+ ## Command Line Usage
53
+
54
+ ```bash
55
+ analystx --help
56
+ ```
57
+
58
+ ## Documentation
59
+
60
+ For detailed documentation, see the [docs](docs/) directory.
61
+
62
+ ## Contributing
63
+
64
+ Contributions are welcome! Please read our contributing guidelines.
65
+
66
+ ## License
67
+
68
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
69
+
70
+ ## Author
71
+
72
+ Your Name <your.email@example.com>
@@ -0,0 +1,47 @@
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "analystx"
7
+ version = "0.1.0"
8
+ description = "Advanced analytics and KPI engine for data-driven insights"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Your Name", email = "your.email@example.com"}
14
+ ]
15
+ keywords = ["analytics", "kpi", "insights", "profiling", "reporting"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.8",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ ]
26
+
27
+ dependencies = [
28
+ "pandas>=1.0.0",
29
+ "numpy>=1.19.0",
30
+ ]
31
+
32
+ [project.optional-dependencies]
33
+ dev = [
34
+ "pytest>=6.0",
35
+ "pytest-cov>=2.10",
36
+ "black>=21.0",
37
+ "flake8>=3.9",
38
+ ]
39
+
40
+ [project.scripts]
41
+ analystx = "analystx.cli:main"
42
+
43
+ [tool.setuptools]
44
+ package-dir = {"" = "src"}
45
+
46
+ [tool.setuptools.packages.find]
47
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,11 @@
1
+ """
2
+ AnalystX: Advanced analytics and KPI engine for data-driven insights.
3
+ """
4
+
5
+ __version__ = "0.1.0"
6
+ __author__ = "AnalystX Contributors"
7
+ __license__ = "MIT"
8
+
9
+ from .main import AnalystX
10
+
11
+ __all__ = ["AnalystX"]
@@ -0,0 +1,111 @@
1
+ """
2
+ Command-line interface for AnalystX.
3
+ """
4
+
5
+ import argparse
6
+ import sys
7
+ import pandas as pd
8
+ from .main import AnalystX
9
+
10
+
11
+ def main():
12
+ """Main CLI entry point."""
13
+ parser = argparse.ArgumentParser(
14
+ description="AnalystX: Advanced analytics and KPI engine"
15
+ )
16
+
17
+ parser.add_argument(
18
+ "command",
19
+ choices=["profile", "kpi", "insight", "report"],
20
+ help="Command to execute"
21
+ )
22
+
23
+ parser.add_argument(
24
+ "--data",
25
+ type=str,
26
+ help="Path to input data file (CSV, Excel, etc.)"
27
+ )
28
+
29
+ parser.add_argument(
30
+ "--output",
31
+ type=str,
32
+ help="Path to output file"
33
+ )
34
+
35
+ parser.add_argument(
36
+ "--format",
37
+ choices=["html", "markdown", "pdf"],
38
+ default="html",
39
+ help="Output format for reports"
40
+ )
41
+
42
+ parser.add_argument(
43
+ "--config",
44
+ type=str,
45
+ help="Path to configuration file (JSON)"
46
+ )
47
+
48
+ parser.add_argument(
49
+ "-v", "--version",
50
+ action="version",
51
+ version="%(prog)s 0.1.0"
52
+ )
53
+
54
+ args = parser.parse_args()
55
+
56
+ try:
57
+ if not args.data:
58
+ parser.print_help()
59
+ print("\nError: --data argument is required")
60
+ sys.exit(1)
61
+
62
+ # Load data
63
+ if args.data.endswith(".csv"):
64
+ data = pd.read_csv(args.data)
65
+ elif args.data.endswith((".xlsx", ".xls")):
66
+ data = pd.read_excel(args.data)
67
+ else:
68
+ print(f"Unsupported file format: {args.data}")
69
+ sys.exit(1)
70
+
71
+ # Initialize analyzer
72
+ analyzer = AnalystX(data=data)
73
+
74
+ # Execute command
75
+ if args.command == "profile":
76
+ result = analyzer.profile()
77
+ print("Data Profile:")
78
+ print(result)
79
+
80
+ elif args.command == "kpi":
81
+ result = analyzer.calculate_kpis()
82
+ print("KPIs:")
83
+ for kpi, value in result.items():
84
+ print(f" {kpi}: {value}")
85
+
86
+ elif args.command == "insight":
87
+ result = analyzer.generate_insights()
88
+ print("Insights:")
89
+ for insight in result:
90
+ print(f" - {insight.get('message', 'N/A')}")
91
+
92
+ elif args.command == "report":
93
+ result = analyzer.create_report(output_format=args.format)
94
+ if args.output:
95
+ if args.format == "html":
96
+ with open(args.output, "w") as f:
97
+ f.write(result)
98
+ else:
99
+ with open(args.output, "w") as f:
100
+ f.write(result)
101
+ print(f"Report saved to {args.output}")
102
+ else:
103
+ print(result)
104
+
105
+ except Exception as e:
106
+ print(f"Error: {str(e)}", file=sys.stderr)
107
+ sys.exit(1)
108
+
109
+
110
+ if __name__ == "__main__":
111
+ main()
@@ -0,0 +1,78 @@
1
+ """
2
+ Dictionary module for managing business logic and metadata.
3
+ """
4
+
5
+
6
+ class DataDictionary:
7
+ """Manage data dictionary, metadata, and business logic definitions."""
8
+
9
+ def __init__(self):
10
+ """Initialize data dictionary."""
11
+ self.definitions = {}
12
+ self.metrics = {}
13
+ self.transformations = {}
14
+
15
+ def add_definition(self, name, definition):
16
+ """
17
+ Add a column or metric definition.
18
+
19
+ Parameters
20
+ ----------
21
+ name : str
22
+ Name of the item
23
+ definition : dict
24
+ Definition details (type, description, format, etc.)
25
+ """
26
+ self.definitions[name] = definition
27
+
28
+ def add_metric(self, name, formula, description=""):
29
+ """
30
+ Add a business metric definition.
31
+
32
+ Parameters
33
+ ----------
34
+ name : str
35
+ Metric name
36
+ formula : str
37
+ Metric calculation formula or expression
38
+ description : str, optional
39
+ Metric description
40
+ """
41
+ self.metrics[name] = {
42
+ "formula": formula,
43
+ "description": description
44
+ }
45
+
46
+ def add_transformation(self, name, func, description=""):
47
+ """
48
+ Register a data transformation.
49
+
50
+ Parameters
51
+ ----------
52
+ name : str
53
+ Transformation name
54
+ func : callable
55
+ Transformation function
56
+ description : str, optional
57
+ Description of the transformation
58
+ """
59
+ self.transformations[name] = {
60
+ "function": func,
61
+ "description": description
62
+ }
63
+
64
+ def get_definition(self, name):
65
+ """Get a definition."""
66
+ return self.definitions.get(name)
67
+
68
+ def get_metric(self, name):
69
+ """Get a metric definition."""
70
+ return self.metrics.get(name)
71
+
72
+ def list_all(self):
73
+ """List all definitions and metrics."""
74
+ return {
75
+ "definitions": self.definitions,
76
+ "metrics": self.metrics,
77
+ "transformations": list(self.transformations.keys())
78
+ }
@@ -0,0 +1,97 @@
1
+ """
2
+ Insight Engine for generating actionable insights from data analysis.
3
+ """
4
+
5
+ import pandas as pd
6
+ import numpy as np
7
+
8
+
9
+ class InsightEngine:
10
+ """Generate actionable insights from data and analysis results."""
11
+
12
+ def generate(self, data, profile=None, kpis=None):
13
+ """
14
+ Generate insights from data and analysis results.
15
+
16
+ Parameters
17
+ ----------
18
+ data : pd.DataFrame
19
+ Input data
20
+ profile : dict, optional
21
+ Data profile results
22
+ kpis : dict, optional
23
+ KPI calculation results
24
+
25
+ Returns
26
+ -------
27
+ list
28
+ List of insight dictionaries
29
+ """
30
+ insights = []
31
+
32
+ if profile:
33
+ insights.extend(self._quality_insights(profile))
34
+ insights.extend(self._distribution_insights(data))
35
+
36
+ if kpis:
37
+ insights.extend(self._kpi_insights(kpis))
38
+
39
+ return insights
40
+
41
+ @staticmethod
42
+ def _quality_insights(profile):
43
+ """Generate insights about data quality."""
44
+ insights = []
45
+ quality = profile.get("quality_metrics", {})
46
+
47
+ if quality.get("completeness_pct", 100) < 90:
48
+ insights.append({
49
+ "type": "warning",
50
+ "message": f"Data completeness is {quality.get('completeness_pct', 0)}%. "
51
+ "Consider addressing missing values.",
52
+ "severity": "high"
53
+ })
54
+
55
+ if quality.get("duplicate_rows", 0) > 0:
56
+ insights.append({
57
+ "type": "warning",
58
+ "message": f"Found {quality.get('duplicate_rows', 0)} duplicate rows.",
59
+ "severity": "medium"
60
+ })
61
+
62
+ return insights
63
+
64
+ @staticmethod
65
+ def _distribution_insights(data):
66
+ """Generate insights about data distribution."""
67
+ insights = []
68
+ numeric_cols = data.select_dtypes(include=[np.number]).columns
69
+
70
+ for col in numeric_cols:
71
+ skewness = data[col].skew()
72
+ if abs(skewness) > 1:
73
+ insights.append({
74
+ "type": "observation",
75
+ "message": f"Column '{col}' has significant skewness ({skewness:.2f}).",
76
+ "column": col
77
+ })
78
+
79
+ return insights
80
+
81
+ @staticmethod
82
+ def _kpi_insights(kpis):
83
+ """Generate insights about KPI values."""
84
+ insights = []
85
+
86
+ # Example: Flag anomalies or trends
87
+ for kpi_name, kpi_value in kpis.items():
88
+ if isinstance(kpi_value, (int, float)):
89
+ if kpi_value < 0:
90
+ insights.append({
91
+ "type": "observation",
92
+ "message": f"KPI '{kpi_name}' has a negative value: {kpi_value}",
93
+ "kpi": kpi_name,
94
+ "value": kpi_value
95
+ })
96
+
97
+ return insights
@@ -0,0 +1,71 @@
1
+ """
2
+ KPI Engine for calculating key performance indicators from data.
3
+ """
4
+
5
+ import pandas as pd
6
+ import numpy as np
7
+
8
+
9
+ class KPIEngine:
10
+ """Calculate and manage key performance indicators."""
11
+
12
+ def __init__(self):
13
+ """Initialize KPI engine with default metrics."""
14
+ self.custom_kpis = {}
15
+
16
+ def calculate(self, data, config=None):
17
+ """
18
+ Calculate KPIs from data.
19
+
20
+ Parameters
21
+ ----------
22
+ data : pd.DataFrame
23
+ Data to calculate KPIs from
24
+ config : dict, optional
25
+ Configuration for KPI calculation
26
+
27
+ Returns
28
+ -------
29
+ dict
30
+ Dictionary of calculated KPIs
31
+ """
32
+ kpis = {}
33
+
34
+ # Basic numeric KPIs
35
+ numeric_cols = data.select_dtypes(include=[np.number]).columns
36
+ for col in numeric_cols:
37
+ kpis[f"{col}_mean"] = data[col].mean()
38
+ kpis[f"{col}_median"] = data[col].median()
39
+ kpis[f"{col}_std"] = data[col].std()
40
+ kpis[f"{col}_min"] = data[col].min()
41
+ kpis[f"{col}_max"] = data[col].max()
42
+
43
+ # Add custom KPIs if provided
44
+ if config:
45
+ kpis.update(self._calculate_custom_kpis(data, config))
46
+
47
+ return kpis
48
+
49
+ def register_kpi(self, name, func):
50
+ """
51
+ Register a custom KPI calculation function.
52
+
53
+ Parameters
54
+ ----------
55
+ name : str
56
+ Name of the KPI
57
+ func : callable
58
+ Function that calculates the KPI
59
+ """
60
+ self.custom_kpis[name] = func
61
+
62
+ def _calculate_custom_kpis(self, data, config):
63
+ """Calculate custom KPIs based on configuration."""
64
+ custom_results = {}
65
+ for kpi_name, kpi_config in config.items():
66
+ if kpi_name in self.custom_kpis:
67
+ try:
68
+ custom_results[kpi_name] = self.custom_kpis[kpi_name](data, kpi_config)
69
+ except Exception as e:
70
+ custom_results[kpi_name] = f"Error: {str(e)}"
71
+ return custom_results
@@ -0,0 +1,120 @@
1
+ """
2
+ Main AnalystX module containing the core analyzer class.
3
+ """
4
+
5
+ import pandas as pd
6
+ from .profiling import DataProfiler
7
+ from .kpi_engine import KPIEngine
8
+ from .insight_engine import InsightEngine
9
+ from .report import ReportGenerator
10
+
11
+
12
+ class AnalystX:
13
+ """Main analyzer class for data profiling, KPI calculation, and insight generation."""
14
+
15
+ def __init__(self, data=None):
16
+ """
17
+ Initialize AnalystX analyzer.
18
+
19
+ Parameters
20
+ ----------
21
+ data : pd.DataFrame, optional
22
+ Input dataframe for analysis
23
+ """
24
+ self.data = data
25
+ self.profiler = DataProfiler()
26
+ self.kpi_engine = KPIEngine()
27
+ self.insight_engine = InsightEngine()
28
+ self.report_generator = ReportGenerator()
29
+ self._profile = None
30
+ self._kpis = None
31
+ self._insights = None
32
+
33
+ def load_data(self, data):
34
+ """
35
+ Load or update data for analysis.
36
+
37
+ Parameters
38
+ ----------
39
+ data : pd.DataFrame
40
+ Dataframe to analyze
41
+ """
42
+ if not isinstance(data, pd.DataFrame):
43
+ raise TypeError("Data must be a pandas DataFrame")
44
+ self.data = data
45
+ return self
46
+
47
+ def profile(self):
48
+ """
49
+ Generate data profile including statistical summaries and quality metrics.
50
+
51
+ Returns
52
+ -------
53
+ dict
54
+ Profile results
55
+ """
56
+ if self.data is None:
57
+ raise ValueError("No data loaded. Use load_data() first.")
58
+ self._profile = self.profiler.analyze(self.data)
59
+ return self._profile
60
+
61
+ def calculate_kpis(self, config=None):
62
+ """
63
+ Calculate KPIs based on provided configuration.
64
+
65
+ Parameters
66
+ ----------
67
+ config : dict, optional
68
+ KPI configuration
69
+
70
+ Returns
71
+ -------
72
+ dict
73
+ Calculated KPIs
74
+ """
75
+ if self.data is None:
76
+ raise ValueError("No data loaded. Use load_data() first.")
77
+ self._kpis = self.kpi_engine.calculate(self.data, config)
78
+ return self._kpis
79
+
80
+ def generate_insights(self):
81
+ """
82
+ Generate insights from profiling and KPI results.
83
+
84
+ Returns
85
+ -------
86
+ list
87
+ List of insights
88
+ """
89
+ if self._profile is None:
90
+ self.profile()
91
+ self._insights = self.insight_engine.generate(self.data, self._profile, self._kpis)
92
+ return self._insights
93
+
94
+ def create_report(self, output_format="html"):
95
+ """
96
+ Create a comprehensive report.
97
+
98
+ Parameters
99
+ ----------
100
+ output_format : str, optional
101
+ Output format (html, pdf, markdown)
102
+
103
+ Returns
104
+ -------
105
+ str
106
+ Report content or file path
107
+ """
108
+ if self._profile is None:
109
+ self.profile()
110
+ if self._insights is None:
111
+ self.generate_insights()
112
+
113
+ report = self.report_generator.generate(
114
+ data=self.data,
115
+ profile=self._profile,
116
+ kpis=self._kpis,
117
+ insights=self._insights,
118
+ format=output_format
119
+ )
120
+ return report
@@ -0,0 +1,72 @@
1
+ """
2
+ Data profiling module for comprehensive data analysis and quality assessment.
3
+ """
4
+
5
+ import pandas as pd
6
+ import numpy as np
7
+
8
+
9
+ class DataProfiler:
10
+ """Generate detailed data profiles including statistics and quality metrics."""
11
+
12
+ def analyze(self, data):
13
+ """
14
+ Analyze dataset and generate comprehensive profile.
15
+
16
+ Parameters
17
+ ----------
18
+ data : pd.DataFrame
19
+ Data to profile
20
+
21
+ Returns
22
+ -------
23
+ dict
24
+ Profile results including shape, dtypes, statistics, and quality metrics
25
+ """
26
+ profile = {
27
+ "shape": data.shape,
28
+ "columns": list(data.columns),
29
+ "dtypes": data.dtypes.to_dict(),
30
+ "summary_stats": self._get_summary_stats(data),
31
+ "missing_values": self._get_missing_values(data),
32
+ "unique_counts": self._get_unique_counts(data),
33
+ "quality_metrics": self._get_quality_metrics(data),
34
+ }
35
+ return profile
36
+
37
+ @staticmethod
38
+ def _get_summary_stats(data):
39
+ """Generate summary statistics for numeric columns."""
40
+ numeric_data = data.select_dtypes(include=[np.number])
41
+ if numeric_data.empty:
42
+ return {}
43
+ return numeric_data.describe().to_dict()
44
+
45
+ @staticmethod
46
+ def _get_missing_values(data):
47
+ """Calculate missing value statistics."""
48
+ missing = data.isnull().sum()
49
+ missing_pct = (missing / len(data) * 100).round(2)
50
+ return {
51
+ "count": missing.to_dict(),
52
+ "percentage": missing_pct.to_dict(),
53
+ }
54
+
55
+ @staticmethod
56
+ def _get_unique_counts(data):
57
+ """Get unique value counts for all columns."""
58
+ return data.nunique().to_dict()
59
+
60
+ @staticmethod
61
+ def _get_quality_metrics(data):
62
+ """Calculate data quality metrics."""
63
+ total_cells = data.shape[0] * data.shape[1]
64
+ missing_cells = data.isnull().sum().sum()
65
+ completeness = ((total_cells - missing_cells) / total_cells * 100)
66
+
67
+ return {
68
+ "completeness_pct": round(completeness, 2),
69
+ "total_cells": total_cells,
70
+ "missing_cells": missing_cells,
71
+ "duplicate_rows": data.duplicated().sum(),
72
+ }
@@ -0,0 +1,156 @@
1
+ """
2
+ Report generation module for creating analytics reports in various formats.
3
+ """
4
+
5
+ from datetime import datetime
6
+
7
+
8
+ class ReportGenerator:
9
+ """Generate comprehensive analytics reports."""
10
+
11
+ def __init__(self):
12
+ """Initialize report generator."""
13
+ self.report_config = {}
14
+
15
+ def generate(self, data=None, profile=None, kpis=None, insights=None, format="html"):
16
+ """
17
+ Generate a report with provided analysis results.
18
+
19
+ Parameters
20
+ ----------
21
+ data : pd.DataFrame, optional
22
+ Original data
23
+ profile : dict, optional
24
+ Data profile results
25
+ kpis : dict, optional
26
+ KPI calculations
27
+ insights : list, optional
28
+ Generated insights
29
+ format : str, optional
30
+ Output format (html, markdown, pdf)
31
+
32
+ Returns
33
+ -------
34
+ str
35
+ Report content or file path
36
+ """
37
+ if format == "html":
38
+ return self._generate_html_report(data, profile, kpis, insights)
39
+ elif format == "markdown":
40
+ return self._generate_markdown_report(data, profile, kpis, insights)
41
+ elif format == "pdf":
42
+ return self._generate_pdf_report(data, profile, kpis, insights)
43
+ else:
44
+ raise ValueError(f"Unsupported format: {format}")
45
+
46
+ @staticmethod
47
+ def _generate_html_report(data, profile, kpis, insights):
48
+ """Generate HTML report."""
49
+ html = f"""
50
+ <!DOCTYPE html>
51
+ <html>
52
+ <head>
53
+ <title>AnalystX Report</title>
54
+ <style>
55
+ body {{ font-family: Arial, sans-serif; margin: 20px; }}
56
+ .section {{ margin: 20px 0; padding: 10px; border-left: 4px solid #007bff; }}
57
+ h2 {{ color: #007bff; }}
58
+ table {{ border-collapse: collapse; width: 100%; }}
59
+ th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
60
+ th {{ background-color: #f8f9fa; }}
61
+ </style>
62
+ </head>
63
+ <body>
64
+ <h1>AnalystX Report</h1>
65
+ <p>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
66
+
67
+ <div class="section">
68
+ <h2>Data Profile</h2>
69
+ {ReportGenerator._format_profile_html(profile)}
70
+ </div>
71
+
72
+ <div class="section">
73
+ <h2>Key Performance Indicators</h2>
74
+ {ReportGenerator._format_kpis_html(kpis)}
75
+ </div>
76
+
77
+ <div class="section">
78
+ <h2>Insights</h2>
79
+ {ReportGenerator._format_insights_html(insights)}
80
+ </div>
81
+ </body>
82
+ </html>
83
+ """
84
+ return html
85
+
86
+ @staticmethod
87
+ def _generate_markdown_report(data, profile, kpis, insights):
88
+ """Generate Markdown report."""
89
+ md = f"""# AnalystX Report
90
+
91
+ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
92
+
93
+ ## Data Profile
94
+
95
+ {ReportGenerator._format_profile_markdown(profile)}
96
+
97
+ ## Key Performance Indicators
98
+
99
+ {ReportGenerator._format_kpis_markdown(kpis)}
100
+
101
+ ## Insights
102
+
103
+ {ReportGenerator._format_insights_markdown(insights)}
104
+ """
105
+ return md
106
+
107
+ @staticmethod
108
+ def _generate_pdf_report(data, profile, kpis, insights):
109
+ """Placeholder for PDF report generation."""
110
+ return "PDF report generation not yet implemented."
111
+
112
+ @staticmethod
113
+ def _format_profile_html(profile):
114
+ """Format profile results as HTML."""
115
+ if not profile:
116
+ return "<p>No profile data available.</p>"
117
+ return f"<p>Shape: {profile.get('shape', 'N/A')}</p>"
118
+
119
+ @staticmethod
120
+ def _format_profile_markdown(profile):
121
+ """Format profile results as Markdown."""
122
+ if not profile:
123
+ return "No profile data available."
124
+ return f"- Shape: {profile.get('shape', 'N/A')}"
125
+
126
+ @staticmethod
127
+ def _format_kpis_html(kpis):
128
+ """Format KPIs as HTML table."""
129
+ if not kpis:
130
+ return "<p>No KPI data available.</p>"
131
+ rows = "".join([f"<tr><td>{k}</td><td>{v}</td></tr>" for k, v in kpis.items()][:10])
132
+ return f"<table><tr><th>KPI</th><th>Value</th></tr>{rows}</table>"
133
+
134
+ @staticmethod
135
+ def _format_kpis_markdown(kpis):
136
+ """Format KPIs as Markdown table."""
137
+ if not kpis:
138
+ return "No KPI data available."
139
+ rows = "\n".join([f"| {k} | {v} |" for k, v in kpis.items()][:10])
140
+ return f"| KPI | Value |\n|-----|-------|\n{rows}"
141
+
142
+ @staticmethod
143
+ def _format_insights_html(insights):
144
+ """Format insights as HTML."""
145
+ if not insights:
146
+ return "<p>No insights available.</p>"
147
+ items = "".join([f"<li>{i.get('message', 'N/A')}</li>" for i in insights])
148
+ return f"<ul>{items}</ul>"
149
+
150
+ @staticmethod
151
+ def _format_insights_markdown(insights):
152
+ """Format insights as Markdown list."""
153
+ if not insights:
154
+ return "No insights available."
155
+ items = "\n".join([f"- {i.get('message', 'N/A')}" for i in insights])
156
+ return items
@@ -0,0 +1,99 @@
1
+ Metadata-Version: 2.4
2
+ Name: analystx
3
+ Version: 0.1.0
4
+ Summary: Advanced analytics and KPI engine for data-driven insights
5
+ Author-email: Your Name <your.email@example.com>
6
+ License: MIT
7
+ Keywords: analytics,kpi,insights,profiling,reporting
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Requires-Python: >=3.8
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: pandas>=1.0.0
20
+ Requires-Dist: numpy>=1.19.0
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=6.0; extra == "dev"
23
+ Requires-Dist: pytest-cov>=2.10; extra == "dev"
24
+ Requires-Dist: black>=21.0; extra == "dev"
25
+ Requires-Dist: flake8>=3.9; extra == "dev"
26
+ Dynamic: license-file
27
+
28
+ # AnalystX
29
+
30
+ Advanced analytics and KPI engine for data-driven insights.
31
+
32
+ ## Overview
33
+
34
+ AnalystX is a Python library designed to provide comprehensive data profiling, KPI calculation, insight generation, and automated reporting capabilities. It simplifies complex analytics workflows and enables data-driven decision making.
35
+
36
+ ## Features
37
+
38
+ - **Data Profiling**: Automatic data quality and statistical analysis
39
+ - **KPI Engine**: Flexible and extensible key performance indicator calculation
40
+ - **Insight Engine**: Generate actionable insights from data
41
+ - **Report Generation**: Create professional analytics reports
42
+ - **CLI Interface**: Command-line tools for easy integration
43
+
44
+ ## Installation
45
+
46
+ ```bash
47
+ pip install analystx
48
+ ```
49
+
50
+ Or from development source:
51
+
52
+ ```bash
53
+ git clone <repository-url>
54
+ cd analystx
55
+ pip install -e ".[dev]"
56
+ ```
57
+
58
+ ## Quick Start
59
+
60
+ ```python
61
+ from analystx import AnalystX
62
+
63
+ # Initialize analyzer
64
+ analyzer = AnalystX(data=df)
65
+
66
+ # Run profiling
67
+ profile = analyzer.profile()
68
+
69
+ # Calculate KPIs
70
+ kpis = analyzer.calculate_kpis()
71
+
72
+ # Generate insights
73
+ insights = analyzer.generate_insights()
74
+
75
+ # Create report
76
+ report = analyzer.create_report()
77
+ ```
78
+
79
+ ## Command Line Usage
80
+
81
+ ```bash
82
+ analystx --help
83
+ ```
84
+
85
+ ## Documentation
86
+
87
+ For detailed documentation, see the [docs](docs/) directory.
88
+
89
+ ## Contributing
90
+
91
+ Contributions are welcome! Please read our contributing guidelines.
92
+
93
+ ## License
94
+
95
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
96
+
97
+ ## Author
98
+
99
+ Your Name <your.email@example.com>
@@ -0,0 +1,18 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/analystx/__init__.py
5
+ src/analystx/cli.py
6
+ src/analystx/dictionary.py
7
+ src/analystx/insight_engine.py
8
+ src/analystx/kpi_engine.py
9
+ src/analystx/main.py
10
+ src/analystx/profiling.py
11
+ src/analystx/report.py
12
+ src/analystx.egg-info/PKG-INFO
13
+ src/analystx.egg-info/SOURCES.txt
14
+ src/analystx.egg-info/dependency_links.txt
15
+ src/analystx.egg-info/entry_points.txt
16
+ src/analystx.egg-info/requires.txt
17
+ src/analystx.egg-info/top_level.txt
18
+ tests/test_basic.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ analystx = analystx.cli:main
@@ -0,0 +1,8 @@
1
+ pandas>=1.0.0
2
+ numpy>=1.19.0
3
+
4
+ [dev]
5
+ pytest>=6.0
6
+ pytest-cov>=2.10
7
+ black>=21.0
8
+ flake8>=3.9
@@ -0,0 +1 @@
1
+ analystx
@@ -0,0 +1,142 @@
1
+ """
2
+ Basic tests for AnalystX functionality.
3
+ """
4
+
5
+ import pytest
6
+ import pandas as pd
7
+ import numpy as np
8
+ from analystx import AnalystX
9
+
10
+
11
+ @pytest.fixture
12
+ def sample_data():
13
+ """Create sample data for testing."""
14
+ return pd.DataFrame({
15
+ "id": [1, 2, 3, 4, 5],
16
+ "value": [10.5, 20.3, 15.8, 30.1, 25.6],
17
+ "category": ["A", "B", "A", "B", "A"],
18
+ "flag": [True, False, True, False, True]
19
+ })
20
+
21
+
22
+ class TestAnalystXBasic:
23
+ """Basic tests for AnalystX core functionality."""
24
+
25
+ def test_initialization(self):
26
+ """Test initialization without data."""
27
+ analyzer = AnalystX()
28
+ assert analyzer.data is None
29
+
30
+ def test_load_data(self, sample_data):
31
+ """Test loading data."""
32
+ analyzer = AnalystX()
33
+ analyzer.load_data(sample_data)
34
+ assert analyzer.data is not None
35
+ assert len(analyzer.data) == 5
36
+
37
+ def test_initialization_with_data(self, sample_data):
38
+ """Test initialization with data."""
39
+ analyzer = AnalystX(data=sample_data)
40
+ assert analyzer.data is not None
41
+
42
+ def test_profiling(self, sample_data):
43
+ """Test data profiling."""
44
+ analyzer = AnalystX(data=sample_data)
45
+ profile = analyzer.profile()
46
+
47
+ assert profile is not None
48
+ assert "shape" in profile
49
+ assert "columns" in profile
50
+ assert "dtypes" in profile
51
+ assert "missing_values" in profile
52
+
53
+ def test_kpi_calculation(self, sample_data):
54
+ """Test KPI calculation."""
55
+ analyzer = AnalystX(data=sample_data)
56
+ kpis = analyzer.calculate_kpis()
57
+
58
+ assert kpis is not None
59
+ assert isinstance(kpis, dict)
60
+ assert "value_mean" in kpis
61
+
62
+ def test_insight_generation(self, sample_data):
63
+ """Test insight generation."""
64
+ analyzer = AnalystX(data=sample_data)
65
+ insights = analyzer.generate_insights()
66
+
67
+ assert insights is not None
68
+ assert isinstance(insights, list)
69
+
70
+ def test_report_generation(self, sample_data):
71
+ """Test report generation."""
72
+ analyzer = AnalystX(data=sample_data)
73
+ report = analyzer.create_report(output_format="html")
74
+
75
+ assert report is not None
76
+ assert isinstance(report, str)
77
+ assert "AnalystX Report" in report
78
+
79
+ def test_report_markdown(self, sample_data):
80
+ """Test markdown report generation."""
81
+ analyzer = AnalystX(data=sample_data)
82
+ report = analyzer.create_report(output_format="markdown")
83
+
84
+ assert report is not None
85
+ assert isinstance(report, str)
86
+ assert "# AnalystX Report" in report
87
+
88
+ def test_error_on_missing_data(self):
89
+ """Test error when no data is loaded."""
90
+ analyzer = AnalystX()
91
+ with pytest.raises(ValueError):
92
+ analyzer.profile()
93
+
94
+ def test_invalid_data_type(self):
95
+ """Test error with invalid data type."""
96
+ analyzer = AnalystX()
97
+ with pytest.raises(TypeError):
98
+ analyzer.load_data([1, 2, 3])
99
+
100
+
101
+ class TestDataProfiler:
102
+ """Tests for data profiling functionality."""
103
+
104
+ def test_profile_columns(self, sample_data):
105
+ """Test column detection in profile."""
106
+ analyzer = AnalystX(data=sample_data)
107
+ profile = analyzer.profile()
108
+
109
+ assert len(profile["columns"]) == 4
110
+ assert "id" in profile["columns"]
111
+ assert "value" in profile["columns"]
112
+
113
+ def test_profile_dtypes(self, sample_data):
114
+ """Test dtype detection in profile."""
115
+ analyzer = AnalystX(data=sample_data)
116
+ profile = analyzer.profile()
117
+
118
+ assert "id" in profile["dtypes"]
119
+
120
+
121
+ class TestKPIEngine:
122
+ """Tests for KPI engine functionality."""
123
+
124
+ def test_kpi_mean(self, sample_data):
125
+ """Test mean KPI calculation."""
126
+ analyzer = AnalystX(data=sample_data)
127
+ kpis = analyzer.calculate_kpis()
128
+
129
+ expected_mean = sample_data["value"].mean()
130
+ assert abs(kpis["value_mean"] - expected_mean) < 0.01
131
+
132
+ def test_kpi_median(self, sample_data):
133
+ """Test median KPI calculation."""
134
+ analyzer = AnalystX(data=sample_data)
135
+ kpis = analyzer.calculate_kpis()
136
+
137
+ expected_median = sample_data["value"].median()
138
+ assert abs(kpis["value_median"] - expected_median) < 0.01
139
+
140
+
141
+ if __name__ == "__main__":
142
+ pytest.main([__file__, "-v"])