analystx 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analystx-0.1.0/LICENSE +21 -0
- analystx-0.1.0/PKG-INFO +99 -0
- analystx-0.1.0/README.md +72 -0
- analystx-0.1.0/pyproject.toml +47 -0
- analystx-0.1.0/setup.cfg +4 -0
- analystx-0.1.0/src/analystx/__init__.py +11 -0
- analystx-0.1.0/src/analystx/cli.py +111 -0
- analystx-0.1.0/src/analystx/dictionary.py +78 -0
- analystx-0.1.0/src/analystx/insight_engine.py +97 -0
- analystx-0.1.0/src/analystx/kpi_engine.py +71 -0
- analystx-0.1.0/src/analystx/main.py +120 -0
- analystx-0.1.0/src/analystx/profiling.py +72 -0
- analystx-0.1.0/src/analystx/report.py +156 -0
- analystx-0.1.0/src/analystx.egg-info/PKG-INFO +99 -0
- analystx-0.1.0/src/analystx.egg-info/SOURCES.txt +18 -0
- analystx-0.1.0/src/analystx.egg-info/dependency_links.txt +1 -0
- analystx-0.1.0/src/analystx.egg-info/entry_points.txt +2 -0
- analystx-0.1.0/src/analystx.egg-info/requires.txt +8 -0
- analystx-0.1.0/src/analystx.egg-info/top_level.txt +1 -0
- analystx-0.1.0/tests/test_basic.py +142 -0
analystx-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 AnalystX Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
analystx-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: analystx
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Advanced analytics and KPI engine for data-driven insights
|
|
5
|
+
Author-email: Your Name <your.email@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: analytics,kpi,insights,profiling,reporting
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Requires-Python: >=3.8
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: pandas>=1.0.0
|
|
20
|
+
Requires-Dist: numpy>=1.19.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest-cov>=2.10; extra == "dev"
|
|
24
|
+
Requires-Dist: black>=21.0; extra == "dev"
|
|
25
|
+
Requires-Dist: flake8>=3.9; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# AnalystX
|
|
29
|
+
|
|
30
|
+
Advanced analytics and KPI engine for data-driven insights.
|
|
31
|
+
|
|
32
|
+
## Overview
|
|
33
|
+
|
|
34
|
+
AnalystX is a Python library designed to provide comprehensive data profiling, KPI calculation, insight generation, and automated reporting capabilities. It simplifies complex analytics workflows and enables data-driven decision making.
|
|
35
|
+
|
|
36
|
+
## Features
|
|
37
|
+
|
|
38
|
+
- **Data Profiling**: Automatic data quality and statistical analysis
|
|
39
|
+
- **KPI Engine**: Flexible and extensible key performance indicator calculation
|
|
40
|
+
- **Insight Engine**: Generate actionable insights from data
|
|
41
|
+
- **Report Generation**: Create professional analytics reports
|
|
42
|
+
- **CLI Interface**: Command-line tools for easy integration
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install analystx
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Or from development source:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
git clone <repository-url>
|
|
54
|
+
cd analystx
|
|
55
|
+
pip install -e ".[dev]"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from analystx import AnalystX
|
|
62
|
+
|
|
63
|
+
# Initialize analyzer
|
|
64
|
+
analyzer = AnalystX(data=df)
|
|
65
|
+
|
|
66
|
+
# Run profiling
|
|
67
|
+
profile = analyzer.profile()
|
|
68
|
+
|
|
69
|
+
# Calculate KPIs
|
|
70
|
+
kpis = analyzer.calculate_kpis()
|
|
71
|
+
|
|
72
|
+
# Generate insights
|
|
73
|
+
insights = analyzer.generate_insights()
|
|
74
|
+
|
|
75
|
+
# Create report
|
|
76
|
+
report = analyzer.create_report()
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Command Line Usage
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
analystx --help
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Documentation
|
|
86
|
+
|
|
87
|
+
For detailed documentation, see the [docs](docs/) directory.
|
|
88
|
+
|
|
89
|
+
## Contributing
|
|
90
|
+
|
|
91
|
+
Contributions are welcome! Please read our contributing guidelines.
|
|
92
|
+
|
|
93
|
+
## License
|
|
94
|
+
|
|
95
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
96
|
+
|
|
97
|
+
## Author
|
|
98
|
+
|
|
99
|
+
Your Name <your.email@example.com>
|
analystx-0.1.0/README.md
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# AnalystX
|
|
2
|
+
|
|
3
|
+
Advanced analytics and KPI engine for data-driven insights.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
AnalystX is a Python library designed to provide comprehensive data profiling, KPI calculation, insight generation, and automated reporting capabilities. It simplifies complex analytics workflows and enables data-driven decision making.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **Data Profiling**: Automatic data quality and statistical analysis
|
|
12
|
+
- **KPI Engine**: Flexible and extensible key performance indicator calculation
|
|
13
|
+
- **Insight Engine**: Generate actionable insights from data
|
|
14
|
+
- **Report Generation**: Create professional analytics reports
|
|
15
|
+
- **CLI Interface**: Command-line tools for easy integration
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install analystx
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Or from development source:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
git clone <repository-url>
|
|
27
|
+
cd analystx
|
|
28
|
+
pip install -e ".[dev]"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from analystx import AnalystX
|
|
35
|
+
|
|
36
|
+
# Initialize analyzer
|
|
37
|
+
analyzer = AnalystX(data=df)
|
|
38
|
+
|
|
39
|
+
# Run profiling
|
|
40
|
+
profile = analyzer.profile()
|
|
41
|
+
|
|
42
|
+
# Calculate KPIs
|
|
43
|
+
kpis = analyzer.calculate_kpis()
|
|
44
|
+
|
|
45
|
+
# Generate insights
|
|
46
|
+
insights = analyzer.generate_insights()
|
|
47
|
+
|
|
48
|
+
# Create report
|
|
49
|
+
report = analyzer.create_report()
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Command Line Usage
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
analystx --help
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Documentation
|
|
59
|
+
|
|
60
|
+
For detailed documentation, see the [docs](docs/) directory.
|
|
61
|
+
|
|
62
|
+
## Contributing
|
|
63
|
+
|
|
64
|
+
Contributions are welcome! Please read our contributing guidelines.
|
|
65
|
+
|
|
66
|
+
## License
|
|
67
|
+
|
|
68
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
69
|
+
|
|
70
|
+
## Author
|
|
71
|
+
|
|
72
|
+
Your Name <your.email@example.com>
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=45", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "analystx"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Advanced analytics and KPI engine for data-driven insights"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Your Name", email = "your.email@example.com"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["analytics", "kpi", "insights", "profiling", "reporting"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.8",
|
|
22
|
+
"Programming Language :: Python :: 3.9",
|
|
23
|
+
"Programming Language :: Python :: 3.10",
|
|
24
|
+
"Programming Language :: Python :: 3.11",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
dependencies = [
|
|
28
|
+
"pandas>=1.0.0",
|
|
29
|
+
"numpy>=1.19.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
dev = [
|
|
34
|
+
"pytest>=6.0",
|
|
35
|
+
"pytest-cov>=2.10",
|
|
36
|
+
"black>=21.0",
|
|
37
|
+
"flake8>=3.9",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.scripts]
|
|
41
|
+
analystx = "analystx.cli:main"
|
|
42
|
+
|
|
43
|
+
[tool.setuptools]
|
|
44
|
+
package-dir = {"" = "src"}
|
|
45
|
+
|
|
46
|
+
[tool.setuptools.packages.find]
|
|
47
|
+
where = ["src"]
|
analystx-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-line interface for AnalystX.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from .main import AnalystX
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main():
|
|
12
|
+
"""Main CLI entry point."""
|
|
13
|
+
parser = argparse.ArgumentParser(
|
|
14
|
+
description="AnalystX: Advanced analytics and KPI engine"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
parser.add_argument(
|
|
18
|
+
"command",
|
|
19
|
+
choices=["profile", "kpi", "insight", "report"],
|
|
20
|
+
help="Command to execute"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
parser.add_argument(
|
|
24
|
+
"--data",
|
|
25
|
+
type=str,
|
|
26
|
+
help="Path to input data file (CSV, Excel, etc.)"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--output",
|
|
31
|
+
type=str,
|
|
32
|
+
help="Path to output file"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
"--format",
|
|
37
|
+
choices=["html", "markdown", "pdf"],
|
|
38
|
+
default="html",
|
|
39
|
+
help="Output format for reports"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--config",
|
|
44
|
+
type=str,
|
|
45
|
+
help="Path to configuration file (JSON)"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
"-v", "--version",
|
|
50
|
+
action="version",
|
|
51
|
+
version="%(prog)s 0.1.0"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
args = parser.parse_args()
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
if not args.data:
|
|
58
|
+
parser.print_help()
|
|
59
|
+
print("\nError: --data argument is required")
|
|
60
|
+
sys.exit(1)
|
|
61
|
+
|
|
62
|
+
# Load data
|
|
63
|
+
if args.data.endswith(".csv"):
|
|
64
|
+
data = pd.read_csv(args.data)
|
|
65
|
+
elif args.data.endswith((".xlsx", ".xls")):
|
|
66
|
+
data = pd.read_excel(args.data)
|
|
67
|
+
else:
|
|
68
|
+
print(f"Unsupported file format: {args.data}")
|
|
69
|
+
sys.exit(1)
|
|
70
|
+
|
|
71
|
+
# Initialize analyzer
|
|
72
|
+
analyzer = AnalystX(data=data)
|
|
73
|
+
|
|
74
|
+
# Execute command
|
|
75
|
+
if args.command == "profile":
|
|
76
|
+
result = analyzer.profile()
|
|
77
|
+
print("Data Profile:")
|
|
78
|
+
print(result)
|
|
79
|
+
|
|
80
|
+
elif args.command == "kpi":
|
|
81
|
+
result = analyzer.calculate_kpis()
|
|
82
|
+
print("KPIs:")
|
|
83
|
+
for kpi, value in result.items():
|
|
84
|
+
print(f" {kpi}: {value}")
|
|
85
|
+
|
|
86
|
+
elif args.command == "insight":
|
|
87
|
+
result = analyzer.generate_insights()
|
|
88
|
+
print("Insights:")
|
|
89
|
+
for insight in result:
|
|
90
|
+
print(f" - {insight.get('message', 'N/A')}")
|
|
91
|
+
|
|
92
|
+
elif args.command == "report":
|
|
93
|
+
result = analyzer.create_report(output_format=args.format)
|
|
94
|
+
if args.output:
|
|
95
|
+
if args.format == "html":
|
|
96
|
+
with open(args.output, "w") as f:
|
|
97
|
+
f.write(result)
|
|
98
|
+
else:
|
|
99
|
+
with open(args.output, "w") as f:
|
|
100
|
+
f.write(result)
|
|
101
|
+
print(f"Report saved to {args.output}")
|
|
102
|
+
else:
|
|
103
|
+
print(result)
|
|
104
|
+
|
|
105
|
+
except Exception as e:
|
|
106
|
+
print(f"Error: {str(e)}", file=sys.stderr)
|
|
107
|
+
sys.exit(1)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
if __name__ == "__main__":
|
|
111
|
+
main()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dictionary module for managing business logic and metadata.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DataDictionary:
|
|
7
|
+
"""Manage data dictionary, metadata, and business logic definitions."""
|
|
8
|
+
|
|
9
|
+
def __init__(self):
|
|
10
|
+
"""Initialize data dictionary."""
|
|
11
|
+
self.definitions = {}
|
|
12
|
+
self.metrics = {}
|
|
13
|
+
self.transformations = {}
|
|
14
|
+
|
|
15
|
+
def add_definition(self, name, definition):
|
|
16
|
+
"""
|
|
17
|
+
Add a column or metric definition.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
name : str
|
|
22
|
+
Name of the item
|
|
23
|
+
definition : dict
|
|
24
|
+
Definition details (type, description, format, etc.)
|
|
25
|
+
"""
|
|
26
|
+
self.definitions[name] = definition
|
|
27
|
+
|
|
28
|
+
def add_metric(self, name, formula, description=""):
|
|
29
|
+
"""
|
|
30
|
+
Add a business metric definition.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
name : str
|
|
35
|
+
Metric name
|
|
36
|
+
formula : str
|
|
37
|
+
Metric calculation formula or expression
|
|
38
|
+
description : str, optional
|
|
39
|
+
Metric description
|
|
40
|
+
"""
|
|
41
|
+
self.metrics[name] = {
|
|
42
|
+
"formula": formula,
|
|
43
|
+
"description": description
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
def add_transformation(self, name, func, description=""):
|
|
47
|
+
"""
|
|
48
|
+
Register a data transformation.
|
|
49
|
+
|
|
50
|
+
Parameters
|
|
51
|
+
----------
|
|
52
|
+
name : str
|
|
53
|
+
Transformation name
|
|
54
|
+
func : callable
|
|
55
|
+
Transformation function
|
|
56
|
+
description : str, optional
|
|
57
|
+
Description of the transformation
|
|
58
|
+
"""
|
|
59
|
+
self.transformations[name] = {
|
|
60
|
+
"function": func,
|
|
61
|
+
"description": description
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
def get_definition(self, name):
|
|
65
|
+
"""Get a definition."""
|
|
66
|
+
return self.definitions.get(name)
|
|
67
|
+
|
|
68
|
+
def get_metric(self, name):
|
|
69
|
+
"""Get a metric definition."""
|
|
70
|
+
return self.metrics.get(name)
|
|
71
|
+
|
|
72
|
+
def list_all(self):
|
|
73
|
+
"""List all definitions and metrics."""
|
|
74
|
+
return {
|
|
75
|
+
"definitions": self.definitions,
|
|
76
|
+
"metrics": self.metrics,
|
|
77
|
+
"transformations": list(self.transformations.keys())
|
|
78
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Insight Engine for generating actionable insights from data analysis.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class InsightEngine:
|
|
10
|
+
"""Generate actionable insights from data and analysis results."""
|
|
11
|
+
|
|
12
|
+
def generate(self, data, profile=None, kpis=None):
|
|
13
|
+
"""
|
|
14
|
+
Generate insights from data and analysis results.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
data : pd.DataFrame
|
|
19
|
+
Input data
|
|
20
|
+
profile : dict, optional
|
|
21
|
+
Data profile results
|
|
22
|
+
kpis : dict, optional
|
|
23
|
+
KPI calculation results
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
list
|
|
28
|
+
List of insight dictionaries
|
|
29
|
+
"""
|
|
30
|
+
insights = []
|
|
31
|
+
|
|
32
|
+
if profile:
|
|
33
|
+
insights.extend(self._quality_insights(profile))
|
|
34
|
+
insights.extend(self._distribution_insights(data))
|
|
35
|
+
|
|
36
|
+
if kpis:
|
|
37
|
+
insights.extend(self._kpi_insights(kpis))
|
|
38
|
+
|
|
39
|
+
return insights
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def _quality_insights(profile):
|
|
43
|
+
"""Generate insights about data quality."""
|
|
44
|
+
insights = []
|
|
45
|
+
quality = profile.get("quality_metrics", {})
|
|
46
|
+
|
|
47
|
+
if quality.get("completeness_pct", 100) < 90:
|
|
48
|
+
insights.append({
|
|
49
|
+
"type": "warning",
|
|
50
|
+
"message": f"Data completeness is {quality.get('completeness_pct', 0)}%. "
|
|
51
|
+
"Consider addressing missing values.",
|
|
52
|
+
"severity": "high"
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
if quality.get("duplicate_rows", 0) > 0:
|
|
56
|
+
insights.append({
|
|
57
|
+
"type": "warning",
|
|
58
|
+
"message": f"Found {quality.get('duplicate_rows', 0)} duplicate rows.",
|
|
59
|
+
"severity": "medium"
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
return insights
|
|
63
|
+
|
|
64
|
+
@staticmethod
|
|
65
|
+
def _distribution_insights(data):
|
|
66
|
+
"""Generate insights about data distribution."""
|
|
67
|
+
insights = []
|
|
68
|
+
numeric_cols = data.select_dtypes(include=[np.number]).columns
|
|
69
|
+
|
|
70
|
+
for col in numeric_cols:
|
|
71
|
+
skewness = data[col].skew()
|
|
72
|
+
if abs(skewness) > 1:
|
|
73
|
+
insights.append({
|
|
74
|
+
"type": "observation",
|
|
75
|
+
"message": f"Column '{col}' has significant skewness ({skewness:.2f}).",
|
|
76
|
+
"column": col
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
return insights
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _kpi_insights(kpis):
|
|
83
|
+
"""Generate insights about KPI values."""
|
|
84
|
+
insights = []
|
|
85
|
+
|
|
86
|
+
# Example: Flag anomalies or trends
|
|
87
|
+
for kpi_name, kpi_value in kpis.items():
|
|
88
|
+
if isinstance(kpi_value, (int, float)):
|
|
89
|
+
if kpi_value < 0:
|
|
90
|
+
insights.append({
|
|
91
|
+
"type": "observation",
|
|
92
|
+
"message": f"KPI '{kpi_name}' has a negative value: {kpi_value}",
|
|
93
|
+
"kpi": kpi_name,
|
|
94
|
+
"value": kpi_value
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
return insights
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
KPI Engine for calculating key performance indicators from data.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class KPIEngine:
|
|
10
|
+
"""Calculate and manage key performance indicators."""
|
|
11
|
+
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""Initialize KPI engine with default metrics."""
|
|
14
|
+
self.custom_kpis = {}
|
|
15
|
+
|
|
16
|
+
def calculate(self, data, config=None):
|
|
17
|
+
"""
|
|
18
|
+
Calculate KPIs from data.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
data : pd.DataFrame
|
|
23
|
+
Data to calculate KPIs from
|
|
24
|
+
config : dict, optional
|
|
25
|
+
Configuration for KPI calculation
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
dict
|
|
30
|
+
Dictionary of calculated KPIs
|
|
31
|
+
"""
|
|
32
|
+
kpis = {}
|
|
33
|
+
|
|
34
|
+
# Basic numeric KPIs
|
|
35
|
+
numeric_cols = data.select_dtypes(include=[np.number]).columns
|
|
36
|
+
for col in numeric_cols:
|
|
37
|
+
kpis[f"{col}_mean"] = data[col].mean()
|
|
38
|
+
kpis[f"{col}_median"] = data[col].median()
|
|
39
|
+
kpis[f"{col}_std"] = data[col].std()
|
|
40
|
+
kpis[f"{col}_min"] = data[col].min()
|
|
41
|
+
kpis[f"{col}_max"] = data[col].max()
|
|
42
|
+
|
|
43
|
+
# Add custom KPIs if provided
|
|
44
|
+
if config:
|
|
45
|
+
kpis.update(self._calculate_custom_kpis(data, config))
|
|
46
|
+
|
|
47
|
+
return kpis
|
|
48
|
+
|
|
49
|
+
def register_kpi(self, name, func):
|
|
50
|
+
"""
|
|
51
|
+
Register a custom KPI calculation function.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
name : str
|
|
56
|
+
Name of the KPI
|
|
57
|
+
func : callable
|
|
58
|
+
Function that calculates the KPI
|
|
59
|
+
"""
|
|
60
|
+
self.custom_kpis[name] = func
|
|
61
|
+
|
|
62
|
+
def _calculate_custom_kpis(self, data, config):
|
|
63
|
+
"""Calculate custom KPIs based on configuration."""
|
|
64
|
+
custom_results = {}
|
|
65
|
+
for kpi_name, kpi_config in config.items():
|
|
66
|
+
if kpi_name in self.custom_kpis:
|
|
67
|
+
try:
|
|
68
|
+
custom_results[kpi_name] = self.custom_kpis[kpi_name](data, kpi_config)
|
|
69
|
+
except Exception as e:
|
|
70
|
+
custom_results[kpi_name] = f"Error: {str(e)}"
|
|
71
|
+
return custom_results
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main AnalystX module containing the core analyzer class.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from .profiling import DataProfiler
|
|
7
|
+
from .kpi_engine import KPIEngine
|
|
8
|
+
from .insight_engine import InsightEngine
|
|
9
|
+
from .report import ReportGenerator
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AnalystX:
|
|
13
|
+
"""Main analyzer class for data profiling, KPI calculation, and insight generation."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, data=None):
|
|
16
|
+
"""
|
|
17
|
+
Initialize AnalystX analyzer.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
data : pd.DataFrame, optional
|
|
22
|
+
Input dataframe for analysis
|
|
23
|
+
"""
|
|
24
|
+
self.data = data
|
|
25
|
+
self.profiler = DataProfiler()
|
|
26
|
+
self.kpi_engine = KPIEngine()
|
|
27
|
+
self.insight_engine = InsightEngine()
|
|
28
|
+
self.report_generator = ReportGenerator()
|
|
29
|
+
self._profile = None
|
|
30
|
+
self._kpis = None
|
|
31
|
+
self._insights = None
|
|
32
|
+
|
|
33
|
+
def load_data(self, data):
|
|
34
|
+
"""
|
|
35
|
+
Load or update data for analysis.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
data : pd.DataFrame
|
|
40
|
+
Dataframe to analyze
|
|
41
|
+
"""
|
|
42
|
+
if not isinstance(data, pd.DataFrame):
|
|
43
|
+
raise TypeError("Data must be a pandas DataFrame")
|
|
44
|
+
self.data = data
|
|
45
|
+
return self
|
|
46
|
+
|
|
47
|
+
def profile(self):
|
|
48
|
+
"""
|
|
49
|
+
Generate data profile including statistical summaries and quality metrics.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
dict
|
|
54
|
+
Profile results
|
|
55
|
+
"""
|
|
56
|
+
if self.data is None:
|
|
57
|
+
raise ValueError("No data loaded. Use load_data() first.")
|
|
58
|
+
self._profile = self.profiler.analyze(self.data)
|
|
59
|
+
return self._profile
|
|
60
|
+
|
|
61
|
+
def calculate_kpis(self, config=None):
|
|
62
|
+
"""
|
|
63
|
+
Calculate KPIs based on provided configuration.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
config : dict, optional
|
|
68
|
+
KPI configuration
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
dict
|
|
73
|
+
Calculated KPIs
|
|
74
|
+
"""
|
|
75
|
+
if self.data is None:
|
|
76
|
+
raise ValueError("No data loaded. Use load_data() first.")
|
|
77
|
+
self._kpis = self.kpi_engine.calculate(self.data, config)
|
|
78
|
+
return self._kpis
|
|
79
|
+
|
|
80
|
+
def generate_insights(self):
|
|
81
|
+
"""
|
|
82
|
+
Generate insights from profiling and KPI results.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
list
|
|
87
|
+
List of insights
|
|
88
|
+
"""
|
|
89
|
+
if self._profile is None:
|
|
90
|
+
self.profile()
|
|
91
|
+
self._insights = self.insight_engine.generate(self.data, self._profile, self._kpis)
|
|
92
|
+
return self._insights
|
|
93
|
+
|
|
94
|
+
def create_report(self, output_format="html"):
|
|
95
|
+
"""
|
|
96
|
+
Create a comprehensive report.
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
output_format : str, optional
|
|
101
|
+
Output format (html, pdf, markdown)
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
str
|
|
106
|
+
Report content or file path
|
|
107
|
+
"""
|
|
108
|
+
if self._profile is None:
|
|
109
|
+
self.profile()
|
|
110
|
+
if self._insights is None:
|
|
111
|
+
self.generate_insights()
|
|
112
|
+
|
|
113
|
+
report = self.report_generator.generate(
|
|
114
|
+
data=self.data,
|
|
115
|
+
profile=self._profile,
|
|
116
|
+
kpis=self._kpis,
|
|
117
|
+
insights=self._insights,
|
|
118
|
+
format=output_format
|
|
119
|
+
)
|
|
120
|
+
return report
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data profiling module for comprehensive data analysis and quality assessment.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DataProfiler:
|
|
10
|
+
"""Generate detailed data profiles including statistics and quality metrics."""
|
|
11
|
+
|
|
12
|
+
def analyze(self, data):
|
|
13
|
+
"""
|
|
14
|
+
Analyze dataset and generate comprehensive profile.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
data : pd.DataFrame
|
|
19
|
+
Data to profile
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
dict
|
|
24
|
+
Profile results including shape, dtypes, statistics, and quality metrics
|
|
25
|
+
"""
|
|
26
|
+
profile = {
|
|
27
|
+
"shape": data.shape,
|
|
28
|
+
"columns": list(data.columns),
|
|
29
|
+
"dtypes": data.dtypes.to_dict(),
|
|
30
|
+
"summary_stats": self._get_summary_stats(data),
|
|
31
|
+
"missing_values": self._get_missing_values(data),
|
|
32
|
+
"unique_counts": self._get_unique_counts(data),
|
|
33
|
+
"quality_metrics": self._get_quality_metrics(data),
|
|
34
|
+
}
|
|
35
|
+
return profile
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def _get_summary_stats(data):
|
|
39
|
+
"""Generate summary statistics for numeric columns."""
|
|
40
|
+
numeric_data = data.select_dtypes(include=[np.number])
|
|
41
|
+
if numeric_data.empty:
|
|
42
|
+
return {}
|
|
43
|
+
return numeric_data.describe().to_dict()
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def _get_missing_values(data):
|
|
47
|
+
"""Calculate missing value statistics."""
|
|
48
|
+
missing = data.isnull().sum()
|
|
49
|
+
missing_pct = (missing / len(data) * 100).round(2)
|
|
50
|
+
return {
|
|
51
|
+
"count": missing.to_dict(),
|
|
52
|
+
"percentage": missing_pct.to_dict(),
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def _get_unique_counts(data):
|
|
57
|
+
"""Get unique value counts for all columns."""
|
|
58
|
+
return data.nunique().to_dict()
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def _get_quality_metrics(data):
|
|
62
|
+
"""Calculate data quality metrics."""
|
|
63
|
+
total_cells = data.shape[0] * data.shape[1]
|
|
64
|
+
missing_cells = data.isnull().sum().sum()
|
|
65
|
+
completeness = ((total_cells - missing_cells) / total_cells * 100)
|
|
66
|
+
|
|
67
|
+
return {
|
|
68
|
+
"completeness_pct": round(completeness, 2),
|
|
69
|
+
"total_cells": total_cells,
|
|
70
|
+
"missing_cells": missing_cells,
|
|
71
|
+
"duplicate_rows": data.duplicated().sum(),
|
|
72
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Report generation module for creating analytics reports in various formats.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ReportGenerator:
|
|
9
|
+
"""Generate comprehensive analytics reports."""
|
|
10
|
+
|
|
11
|
+
def __init__(self):
|
|
12
|
+
"""Initialize report generator."""
|
|
13
|
+
self.report_config = {}
|
|
14
|
+
|
|
15
|
+
def generate(self, data=None, profile=None, kpis=None, insights=None, format="html"):
|
|
16
|
+
"""
|
|
17
|
+
Generate a report with provided analysis results.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
data : pd.DataFrame, optional
|
|
22
|
+
Original data
|
|
23
|
+
profile : dict, optional
|
|
24
|
+
Data profile results
|
|
25
|
+
kpis : dict, optional
|
|
26
|
+
KPI calculations
|
|
27
|
+
insights : list, optional
|
|
28
|
+
Generated insights
|
|
29
|
+
format : str, optional
|
|
30
|
+
Output format (html, markdown, pdf)
|
|
31
|
+
|
|
32
|
+
Returns
|
|
33
|
+
-------
|
|
34
|
+
str
|
|
35
|
+
Report content or file path
|
|
36
|
+
"""
|
|
37
|
+
if format == "html":
|
|
38
|
+
return self._generate_html_report(data, profile, kpis, insights)
|
|
39
|
+
elif format == "markdown":
|
|
40
|
+
return self._generate_markdown_report(data, profile, kpis, insights)
|
|
41
|
+
elif format == "pdf":
|
|
42
|
+
return self._generate_pdf_report(data, profile, kpis, insights)
|
|
43
|
+
else:
|
|
44
|
+
raise ValueError(f"Unsupported format: {format}")
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def _generate_html_report(data, profile, kpis, insights):
|
|
48
|
+
"""Generate HTML report."""
|
|
49
|
+
html = f"""
|
|
50
|
+
<!DOCTYPE html>
|
|
51
|
+
<html>
|
|
52
|
+
<head>
|
|
53
|
+
<title>AnalystX Report</title>
|
|
54
|
+
<style>
|
|
55
|
+
body {{ font-family: Arial, sans-serif; margin: 20px; }}
|
|
56
|
+
.section {{ margin: 20px 0; padding: 10px; border-left: 4px solid #007bff; }}
|
|
57
|
+
h2 {{ color: #007bff; }}
|
|
58
|
+
table {{ border-collapse: collapse; width: 100%; }}
|
|
59
|
+
th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
|
|
60
|
+
th {{ background-color: #f8f9fa; }}
|
|
61
|
+
</style>
|
|
62
|
+
</head>
|
|
63
|
+
<body>
|
|
64
|
+
<h1>AnalystX Report</h1>
|
|
65
|
+
<p>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
|
|
66
|
+
|
|
67
|
+
<div class="section">
|
|
68
|
+
<h2>Data Profile</h2>
|
|
69
|
+
{ReportGenerator._format_profile_html(profile)}
|
|
70
|
+
</div>
|
|
71
|
+
|
|
72
|
+
<div class="section">
|
|
73
|
+
<h2>Key Performance Indicators</h2>
|
|
74
|
+
{ReportGenerator._format_kpis_html(kpis)}
|
|
75
|
+
</div>
|
|
76
|
+
|
|
77
|
+
<div class="section">
|
|
78
|
+
<h2>Insights</h2>
|
|
79
|
+
{ReportGenerator._format_insights_html(insights)}
|
|
80
|
+
</div>
|
|
81
|
+
</body>
|
|
82
|
+
</html>
|
|
83
|
+
"""
|
|
84
|
+
return html
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def _generate_markdown_report(data, profile, kpis, insights):
|
|
88
|
+
"""Generate Markdown report."""
|
|
89
|
+
md = f"""# AnalystX Report
|
|
90
|
+
|
|
91
|
+
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
|
92
|
+
|
|
93
|
+
## Data Profile
|
|
94
|
+
|
|
95
|
+
{ReportGenerator._format_profile_markdown(profile)}
|
|
96
|
+
|
|
97
|
+
## Key Performance Indicators
|
|
98
|
+
|
|
99
|
+
{ReportGenerator._format_kpis_markdown(kpis)}
|
|
100
|
+
|
|
101
|
+
## Insights
|
|
102
|
+
|
|
103
|
+
{ReportGenerator._format_insights_markdown(insights)}
|
|
104
|
+
"""
|
|
105
|
+
return md
|
|
106
|
+
|
|
107
|
+
@staticmethod
|
|
108
|
+
def _generate_pdf_report(data, profile, kpis, insights):
|
|
109
|
+
"""Placeholder for PDF report generation."""
|
|
110
|
+
return "PDF report generation not yet implemented."
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _format_profile_html(profile):
|
|
114
|
+
"""Format profile results as HTML."""
|
|
115
|
+
if not profile:
|
|
116
|
+
return "<p>No profile data available.</p>"
|
|
117
|
+
return f"<p>Shape: {profile.get('shape', 'N/A')}</p>"
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def _format_profile_markdown(profile):
|
|
121
|
+
"""Format profile results as Markdown."""
|
|
122
|
+
if not profile:
|
|
123
|
+
return "No profile data available."
|
|
124
|
+
return f"- Shape: {profile.get('shape', 'N/A')}"
|
|
125
|
+
|
|
126
|
+
@staticmethod
|
|
127
|
+
def _format_kpis_html(kpis):
|
|
128
|
+
"""Format KPIs as HTML table."""
|
|
129
|
+
if not kpis:
|
|
130
|
+
return "<p>No KPI data available.</p>"
|
|
131
|
+
rows = "".join([f"<tr><td>{k}</td><td>{v}</td></tr>" for k, v in kpis.items()][:10])
|
|
132
|
+
return f"<table><tr><th>KPI</th><th>Value</th></tr>{rows}</table>"
|
|
133
|
+
|
|
134
|
+
@staticmethod
|
|
135
|
+
def _format_kpis_markdown(kpis):
|
|
136
|
+
"""Format KPIs as Markdown table."""
|
|
137
|
+
if not kpis:
|
|
138
|
+
return "No KPI data available."
|
|
139
|
+
rows = "\n".join([f"| {k} | {v} |" for k, v in kpis.items()][:10])
|
|
140
|
+
return f"| KPI | Value |\n|-----|-------|\n{rows}"
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def _format_insights_html(insights):
|
|
144
|
+
"""Format insights as HTML."""
|
|
145
|
+
if not insights:
|
|
146
|
+
return "<p>No insights available.</p>"
|
|
147
|
+
items = "".join([f"<li>{i.get('message', 'N/A')}</li>" for i in insights])
|
|
148
|
+
return f"<ul>{items}</ul>"
|
|
149
|
+
|
|
150
|
+
@staticmethod
|
|
151
|
+
def _format_insights_markdown(insights):
|
|
152
|
+
"""Format insights as Markdown list."""
|
|
153
|
+
if not insights:
|
|
154
|
+
return "No insights available."
|
|
155
|
+
items = "\n".join([f"- {i.get('message', 'N/A')}" for i in insights])
|
|
156
|
+
return items
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: analystx
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Advanced analytics and KPI engine for data-driven insights
|
|
5
|
+
Author-email: Your Name <your.email@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: analytics,kpi,insights,profiling,reporting
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Requires-Python: >=3.8
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: pandas>=1.0.0
|
|
20
|
+
Requires-Dist: numpy>=1.19.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest-cov>=2.10; extra == "dev"
|
|
24
|
+
Requires-Dist: black>=21.0; extra == "dev"
|
|
25
|
+
Requires-Dist: flake8>=3.9; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
# AnalystX
|
|
29
|
+
|
|
30
|
+
Advanced analytics and KPI engine for data-driven insights.
|
|
31
|
+
|
|
32
|
+
## Overview
|
|
33
|
+
|
|
34
|
+
AnalystX is a Python library designed to provide comprehensive data profiling, KPI calculation, insight generation, and automated reporting capabilities. It simplifies complex analytics workflows and enables data-driven decision making.
|
|
35
|
+
|
|
36
|
+
## Features
|
|
37
|
+
|
|
38
|
+
- **Data Profiling**: Automatic data quality and statistical analysis
|
|
39
|
+
- **KPI Engine**: Flexible and extensible key performance indicator calculation
|
|
40
|
+
- **Insight Engine**: Generate actionable insights from data
|
|
41
|
+
- **Report Generation**: Create professional analytics reports
|
|
42
|
+
- **CLI Interface**: Command-line tools for easy integration
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install analystx
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Or from development source:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
git clone <repository-url>
|
|
54
|
+
cd analystx
|
|
55
|
+
pip install -e ".[dev]"
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from analystx import AnalystX
|
|
62
|
+
|
|
63
|
+
# Initialize analyzer
|
|
64
|
+
analyzer = AnalystX(data=df)
|
|
65
|
+
|
|
66
|
+
# Run profiling
|
|
67
|
+
profile = analyzer.profile()
|
|
68
|
+
|
|
69
|
+
# Calculate KPIs
|
|
70
|
+
kpis = analyzer.calculate_kpis()
|
|
71
|
+
|
|
72
|
+
# Generate insights
|
|
73
|
+
insights = analyzer.generate_insights()
|
|
74
|
+
|
|
75
|
+
# Create report
|
|
76
|
+
report = analyzer.create_report()
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Command Line Usage
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
analystx --help
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Documentation
|
|
86
|
+
|
|
87
|
+
For detailed documentation, see the [docs](docs/) directory.
|
|
88
|
+
|
|
89
|
+
## Contributing
|
|
90
|
+
|
|
91
|
+
Contributions are welcome! Please read our contributing guidelines.
|
|
92
|
+
|
|
93
|
+
## License
|
|
94
|
+
|
|
95
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
96
|
+
|
|
97
|
+
## Author
|
|
98
|
+
|
|
99
|
+
Your Name <your.email@example.com>
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/analystx/__init__.py
|
|
5
|
+
src/analystx/cli.py
|
|
6
|
+
src/analystx/dictionary.py
|
|
7
|
+
src/analystx/insight_engine.py
|
|
8
|
+
src/analystx/kpi_engine.py
|
|
9
|
+
src/analystx/main.py
|
|
10
|
+
src/analystx/profiling.py
|
|
11
|
+
src/analystx/report.py
|
|
12
|
+
src/analystx.egg-info/PKG-INFO
|
|
13
|
+
src/analystx.egg-info/SOURCES.txt
|
|
14
|
+
src/analystx.egg-info/dependency_links.txt
|
|
15
|
+
src/analystx.egg-info/entry_points.txt
|
|
16
|
+
src/analystx.egg-info/requires.txt
|
|
17
|
+
src/analystx.egg-info/top_level.txt
|
|
18
|
+
tests/test_basic.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
analystx
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Basic tests for AnalystX functionality.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import numpy as np
|
|
8
|
+
from analystx import AnalystX
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.fixture
|
|
12
|
+
def sample_data():
|
|
13
|
+
"""Create sample data for testing."""
|
|
14
|
+
return pd.DataFrame({
|
|
15
|
+
"id": [1, 2, 3, 4, 5],
|
|
16
|
+
"value": [10.5, 20.3, 15.8, 30.1, 25.6],
|
|
17
|
+
"category": ["A", "B", "A", "B", "A"],
|
|
18
|
+
"flag": [True, False, True, False, True]
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TestAnalystXBasic:
|
|
23
|
+
"""Basic tests for AnalystX core functionality."""
|
|
24
|
+
|
|
25
|
+
def test_initialization(self):
|
|
26
|
+
"""Test initialization without data."""
|
|
27
|
+
analyzer = AnalystX()
|
|
28
|
+
assert analyzer.data is None
|
|
29
|
+
|
|
30
|
+
def test_load_data(self, sample_data):
|
|
31
|
+
"""Test loading data."""
|
|
32
|
+
analyzer = AnalystX()
|
|
33
|
+
analyzer.load_data(sample_data)
|
|
34
|
+
assert analyzer.data is not None
|
|
35
|
+
assert len(analyzer.data) == 5
|
|
36
|
+
|
|
37
|
+
def test_initialization_with_data(self, sample_data):
|
|
38
|
+
"""Test initialization with data."""
|
|
39
|
+
analyzer = AnalystX(data=sample_data)
|
|
40
|
+
assert analyzer.data is not None
|
|
41
|
+
|
|
42
|
+
def test_profiling(self, sample_data):
|
|
43
|
+
"""Test data profiling."""
|
|
44
|
+
analyzer = AnalystX(data=sample_data)
|
|
45
|
+
profile = analyzer.profile()
|
|
46
|
+
|
|
47
|
+
assert profile is not None
|
|
48
|
+
assert "shape" in profile
|
|
49
|
+
assert "columns" in profile
|
|
50
|
+
assert "dtypes" in profile
|
|
51
|
+
assert "missing_values" in profile
|
|
52
|
+
|
|
53
|
+
def test_kpi_calculation(self, sample_data):
|
|
54
|
+
"""Test KPI calculation."""
|
|
55
|
+
analyzer = AnalystX(data=sample_data)
|
|
56
|
+
kpis = analyzer.calculate_kpis()
|
|
57
|
+
|
|
58
|
+
assert kpis is not None
|
|
59
|
+
assert isinstance(kpis, dict)
|
|
60
|
+
assert "value_mean" in kpis
|
|
61
|
+
|
|
62
|
+
def test_insight_generation(self, sample_data):
|
|
63
|
+
"""Test insight generation."""
|
|
64
|
+
analyzer = AnalystX(data=sample_data)
|
|
65
|
+
insights = analyzer.generate_insights()
|
|
66
|
+
|
|
67
|
+
assert insights is not None
|
|
68
|
+
assert isinstance(insights, list)
|
|
69
|
+
|
|
70
|
+
def test_report_generation(self, sample_data):
|
|
71
|
+
"""Test report generation."""
|
|
72
|
+
analyzer = AnalystX(data=sample_data)
|
|
73
|
+
report = analyzer.create_report(output_format="html")
|
|
74
|
+
|
|
75
|
+
assert report is not None
|
|
76
|
+
assert isinstance(report, str)
|
|
77
|
+
assert "AnalystX Report" in report
|
|
78
|
+
|
|
79
|
+
def test_report_markdown(self, sample_data):
|
|
80
|
+
"""Test markdown report generation."""
|
|
81
|
+
analyzer = AnalystX(data=sample_data)
|
|
82
|
+
report = analyzer.create_report(output_format="markdown")
|
|
83
|
+
|
|
84
|
+
assert report is not None
|
|
85
|
+
assert isinstance(report, str)
|
|
86
|
+
assert "# AnalystX Report" in report
|
|
87
|
+
|
|
88
|
+
def test_error_on_missing_data(self):
|
|
89
|
+
"""Test error when no data is loaded."""
|
|
90
|
+
analyzer = AnalystX()
|
|
91
|
+
with pytest.raises(ValueError):
|
|
92
|
+
analyzer.profile()
|
|
93
|
+
|
|
94
|
+
def test_invalid_data_type(self):
|
|
95
|
+
"""Test error with invalid data type."""
|
|
96
|
+
analyzer = AnalystX()
|
|
97
|
+
with pytest.raises(TypeError):
|
|
98
|
+
analyzer.load_data([1, 2, 3])
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class TestDataProfiler:
|
|
102
|
+
"""Tests for data profiling functionality."""
|
|
103
|
+
|
|
104
|
+
def test_profile_columns(self, sample_data):
|
|
105
|
+
"""Test column detection in profile."""
|
|
106
|
+
analyzer = AnalystX(data=sample_data)
|
|
107
|
+
profile = analyzer.profile()
|
|
108
|
+
|
|
109
|
+
assert len(profile["columns"]) == 4
|
|
110
|
+
assert "id" in profile["columns"]
|
|
111
|
+
assert "value" in profile["columns"]
|
|
112
|
+
|
|
113
|
+
def test_profile_dtypes(self, sample_data):
|
|
114
|
+
"""Test dtype detection in profile."""
|
|
115
|
+
analyzer = AnalystX(data=sample_data)
|
|
116
|
+
profile = analyzer.profile()
|
|
117
|
+
|
|
118
|
+
assert "id" in profile["dtypes"]
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class TestKPIEngine:
|
|
122
|
+
"""Tests for KPI engine functionality."""
|
|
123
|
+
|
|
124
|
+
def test_kpi_mean(self, sample_data):
|
|
125
|
+
"""Test mean KPI calculation."""
|
|
126
|
+
analyzer = AnalystX(data=sample_data)
|
|
127
|
+
kpis = analyzer.calculate_kpis()
|
|
128
|
+
|
|
129
|
+
expected_mean = sample_data["value"].mean()
|
|
130
|
+
assert abs(kpis["value_mean"] - expected_mean) < 0.01
|
|
131
|
+
|
|
132
|
+
def test_kpi_median(self, sample_data):
|
|
133
|
+
"""Test median KPI calculation."""
|
|
134
|
+
analyzer = AnalystX(data=sample_data)
|
|
135
|
+
kpis = analyzer.calculate_kpis()
|
|
136
|
+
|
|
137
|
+
expected_median = sample_data["value"].median()
|
|
138
|
+
assert abs(kpis["value_median"] - expected_median) < 0.01
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
if __name__ == "__main__":
|
|
142
|
+
pytest.main([__file__, "-v"])
|