azure77 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- azure77-0.1.0/LICENSE +21 -0
- azure77-0.1.0/PKG-INFO +110 -0
- azure77-0.1.0/README.md +79 -0
- azure77-0.1.0/pyproject.toml +44 -0
- azure77-0.1.0/setup.cfg +4 -0
- azure77-0.1.0/src/azure77/__init__.py +86 -0
- azure77-0.1.0/src/azure77/benchmark/__init__.py +25 -0
- azure77-0.1.0/src/azure77/benchmark/advisor.py +128 -0
- azure77-0.1.0/src/azure77/benchmark/models.py +112 -0
- azure77-0.1.0/src/azure77/benchmark/report_builder.py +143 -0
- azure77-0.1.0/src/azure77/benchmark/service.py +430 -0
- azure77-0.1.0/src/azure77/benchmark/stats.py +88 -0
- azure77-0.1.0/src/azure77/benchmark/store.py +165 -0
- azure77-0.1.0/src/azure77/benchmark/timer.py +61 -0
- azure77-0.1.0/src/azure77/cli/__init__.py +23 -0
- azure77-0.1.0/src/azure77/cli/benchmark.py +417 -0
- azure77-0.1.0/src/azure77/cli/client.py +171 -0
- azure77-0.1.0/src/azure77/cli/connection.py +107 -0
- azure77-0.1.0/src/azure77/cli/datasets.py +274 -0
- azure77-0.1.0/src/azure77/cli/upload.py +317 -0
- azure77-0.1.0/src/azure77/client_exceptions.py +48 -0
- azure77-0.1.0/src/azure77/client_models.py +42 -0
- azure77-0.1.0/src/azure77/client_service.py +255 -0
- azure77-0.1.0/src/azure77/config_loader.py +71 -0
- azure77-0.1.0/src/azure77/connection_manager.py +293 -0
- azure77-0.1.0/src/azure77/database.py +119 -0
- azure77-0.1.0/src/azure77/datasets/__init__.py +23 -0
- azure77-0.1.0/src/azure77/datasets/exceptions.py +28 -0
- azure77-0.1.0/src/azure77/datasets/models.py +95 -0
- azure77-0.1.0/src/azure77/datasets/queries.py +276 -0
- azure77-0.1.0/src/azure77/datasets/service.py +657 -0
- azure77-0.1.0/src/azure77/datasets/snapshots.py +224 -0
- azure77-0.1.0/src/azure77/exceptions.py +25 -0
- azure77-0.1.0/src/azure77/history_store.py +146 -0
- azure77-0.1.0/src/azure77/metadata_repository.py +264 -0
- azure77-0.1.0/src/azure77/models.py +74 -0
- azure77-0.1.0/src/azure77/query_service.py +314 -0
- azure77-0.1.0/src/azure77/result_exporter.py +140 -0
- azure77-0.1.0/src/azure77/safety.py +73 -0
- azure77-0.1.0/src/azure77/saved_query_store.py +173 -0
- azure77-0.1.0/src/azure77/schema_manager.py +45 -0
- azure77-0.1.0/src/azure77/slug_generator.py +81 -0
- azure77-0.1.0/src/azure77/sql_parser.py +146 -0
- azure77-0.1.0/src/azure77/upload/__init__.py +25 -0
- azure77-0.1.0/src/azure77/upload/exceptions.py +17 -0
- azure77-0.1.0/src/azure77/upload/import_logger.py +222 -0
- azure77-0.1.0/src/azure77/upload/models.py +69 -0
- azure77-0.1.0/src/azure77/upload/normalizer.py +88 -0
- azure77-0.1.0/src/azure77/upload/parser.py +279 -0
- azure77-0.1.0/src/azure77/upload/preview.py +127 -0
- azure77-0.1.0/src/azure77/upload/service.py +407 -0
- azure77-0.1.0/src/azure77/upload/type_inferrer.py +265 -0
- azure77-0.1.0/src/azure77.egg-info/PKG-INFO +110 -0
- azure77-0.1.0/src/azure77.egg-info/SOURCES.txt +72 -0
- azure77-0.1.0/src/azure77.egg-info/dependency_links.txt +1 -0
- azure77-0.1.0/src/azure77.egg-info/entry_points.txt +2 -0
- azure77-0.1.0/src/azure77.egg-info/requires.txt +8 -0
- azure77-0.1.0/src/azure77.egg-info/top_level.txt +1 -0
- azure77-0.1.0/tests/test_advisor.py +259 -0
- azure77-0.1.0/tests/test_benchmark_service.py +350 -0
- azure77-0.1.0/tests/test_client_exceptions.py +161 -0
- azure77-0.1.0/tests/test_connection_manager.py +334 -0
- azure77-0.1.0/tests/test_dataset_service.py +736 -0
- azure77-0.1.0/tests/test_exceptions.py +39 -0
- azure77-0.1.0/tests/test_history_store.py +156 -0
- azure77-0.1.0/tests/test_import_logger.py +342 -0
- azure77-0.1.0/tests/test_import_service.py +633 -0
- azure77-0.1.0/tests/test_metadata_repository.py +551 -0
- azure77-0.1.0/tests/test_preview_service.py +357 -0
- azure77-0.1.0/tests/test_query_service.py +211 -0
- azure77-0.1.0/tests/test_report_builder.py +183 -0
- azure77-0.1.0/tests/test_snapshots.py +255 -0
- azure77-0.1.0/tests/test_stats.py +121 -0
- azure77-0.1.0/tests/test_type_inferrer.py +287 -0
azure77-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 77 Indicadores
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
azure77-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: azure77
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Upload, query and manage data on Azure SQL Server from Python
|
|
5
|
+
Author: 77 Indicadores
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: azure,sql,database,upload,data
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Database
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: pyodbc>=4.0.30
|
|
23
|
+
Requires-Dist: python-dotenv>=0.19
|
|
24
|
+
Requires-Dist: unidecode>=1.3
|
|
25
|
+
Requires-Dist: chardet>=5.0
|
|
26
|
+
Requires-Dist: openpyxl>=3.1
|
|
27
|
+
Requires-Dist: xlrd>=2.0
|
|
28
|
+
Requires-Dist: click>=8.0
|
|
29
|
+
Requires-Dist: rich>=12.0
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# azure77
|
|
33
|
+
|
|
34
|
+
Upload, query and manage data on Azure SQL Server from Python.
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install azure77
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Quick Start
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from azure77 import ConnectionManager
|
|
46
|
+
|
|
47
|
+
# Connect to Azure SQL
|
|
48
|
+
cm = ConnectionManager(
|
|
49
|
+
server="your-server.database.windows.net",
|
|
50
|
+
database="your-db",
|
|
51
|
+
user="your-user",
|
|
52
|
+
password="your-pass",
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Test connection
|
|
56
|
+
result = cm.test_connection()
|
|
57
|
+
print(result.success, result.message)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Features
|
|
61
|
+
|
|
62
|
+
- **Connection management** — Azure SQL via pyodbc or pymssql
|
|
63
|
+
- **Client/schema organization** — each client maps to a SQL schema
|
|
64
|
+
- **File upload** — CSV, XLSX, XLS with auto encoding/detection
|
|
65
|
+
- **Column normalization** — lowercase, accent-free, SQL-safe names
|
|
66
|
+
- **Type inference** — automatic INTEGER, DECIMAL, DATE, NVARCHAR
|
|
67
|
+
- **SQL query execution** — with safety validation (blocks DROP, DELETE, etc.)
|
|
68
|
+
- **Query history** — stored in local SQLite
|
|
69
|
+
- **Dataset browsing** — metadata, import logs, schema diff
|
|
70
|
+
- **Benchmark** — measure upload performance across strategies
|
|
71
|
+
|
|
72
|
+
## CLI
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
azure77 connection test # Test Azure SQL connection
|
|
76
|
+
azure77 connection status # Show config status
|
|
77
|
+
azure77 client list # List clients
|
|
78
|
+
azure77 client create "Nome" # Create client
|
|
79
|
+
azure77 upload preview file.csv # Preview file
|
|
80
|
+
azure77 upload import file.csv --client adl --table vendas
|
|
81
|
+
azure77 datasets list adl # List datasets
|
|
82
|
+
azure77 benchmark run file.csv # Run benchmark
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Configuration
|
|
86
|
+
|
|
87
|
+
Create a `.env` file:
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
AZURE77_SERVER=server.database.windows.net
|
|
91
|
+
AZURE77_DATABASE=dbname
|
|
92
|
+
AZURE77_USER=username
|
|
93
|
+
AZURE77_PASSWORD=password
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Or pass directly:
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
cm = ConnectionManager(
|
|
100
|
+
server="...",
|
|
101
|
+
database="...",
|
|
102
|
+
user="...",
|
|
103
|
+
password="...",
|
|
104
|
+
driver="pymssql", # or "pyodbc"
|
|
105
|
+
)
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## License
|
|
109
|
+
|
|
110
|
+
MIT
|
azure77-0.1.0/README.md
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# azure77
|
|
2
|
+
|
|
3
|
+
Upload, query and manage data on Azure SQL Server from Python.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install azure77
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from azure77 import ConnectionManager
|
|
15
|
+
|
|
16
|
+
# Connect to Azure SQL
|
|
17
|
+
cm = ConnectionManager(
|
|
18
|
+
server="your-server.database.windows.net",
|
|
19
|
+
database="your-db",
|
|
20
|
+
user="your-user",
|
|
21
|
+
password="your-pass",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Test connection
|
|
25
|
+
result = cm.test_connection()
|
|
26
|
+
print(result.success, result.message)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Features
|
|
30
|
+
|
|
31
|
+
- **Connection management** — Azure SQL via pyodbc or pymssql
|
|
32
|
+
- **Client/schema organization** — each client maps to a SQL schema
|
|
33
|
+
- **File upload** — CSV, XLSX, XLS with auto encoding/detection
|
|
34
|
+
- **Column normalization** — lowercase, accent-free, SQL-safe names
|
|
35
|
+
- **Type inference** — automatic INTEGER, DECIMAL, DATE, NVARCHAR
|
|
36
|
+
- **SQL query execution** — with safety validation (blocks DROP, DELETE, etc.)
|
|
37
|
+
- **Query history** — stored in local SQLite
|
|
38
|
+
- **Dataset browsing** — metadata, import logs, schema diff
|
|
39
|
+
- **Benchmark** — measure upload performance across strategies
|
|
40
|
+
|
|
41
|
+
## CLI
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
azure77 connection test # Test Azure SQL connection
|
|
45
|
+
azure77 connection status # Show config status
|
|
46
|
+
azure77 client list # List clients
|
|
47
|
+
azure77 client create "Nome" # Create client
|
|
48
|
+
azure77 upload preview file.csv # Preview file
|
|
49
|
+
azure77 upload import file.csv --client adl --table vendas
|
|
50
|
+
azure77 datasets list adl # List datasets
|
|
51
|
+
azure77 benchmark run file.csv # Run benchmark
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Configuration
|
|
55
|
+
|
|
56
|
+
Create a `.env` file:
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
AZURE77_SERVER=server.database.windows.net
|
|
60
|
+
AZURE77_DATABASE=dbname
|
|
61
|
+
AZURE77_USER=username
|
|
62
|
+
AZURE77_PASSWORD=password
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Or pass directly:
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
cm = ConnectionManager(
|
|
69
|
+
server="...",
|
|
70
|
+
database="...",
|
|
71
|
+
user="...",
|
|
72
|
+
password="...",
|
|
73
|
+
driver="pymssql", # or "pyodbc"
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## License
|
|
78
|
+
|
|
79
|
+
MIT
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "azure77"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Upload, query and manage data on Azure SQL Server from Python"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "77 Indicadores" },
|
|
14
|
+
]
|
|
15
|
+
keywords = ["azure", "sql", "database", "upload", "data"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.9",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Topic :: Database",
|
|
27
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"pyodbc>=4.0.30",
|
|
31
|
+
"python-dotenv>=0.19",
|
|
32
|
+
"unidecode>=1.3",
|
|
33
|
+
"chardet>=5.0",
|
|
34
|
+
"openpyxl>=3.1",
|
|
35
|
+
"xlrd>=2.0",
|
|
36
|
+
"click>=8.0",
|
|
37
|
+
"rich>=12.0",
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
[project.scripts]
|
|
41
|
+
azure77 = "azure77.cli:cli"
|
|
42
|
+
|
|
43
|
+
[tool.setuptools.packages.find]
|
|
44
|
+
where = ["src"]
|
azure77-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""azure77 -- Azure SQL Connection Management for Python."""
|
|
2
|
+
|
|
3
|
+
from azure77.connection_manager import ConnectionManager
|
|
4
|
+
from azure77.datasets.exceptions import (
|
|
5
|
+
ClientNotFoundError as DatasetsClientNotFoundError,
|
|
6
|
+
DatasetServiceError,
|
|
7
|
+
MetadataQueryError,
|
|
8
|
+
TableNotFoundError,
|
|
9
|
+
)
|
|
10
|
+
from azure77.datasets.models import (
|
|
11
|
+
ClientSummary,
|
|
12
|
+
ColumnMetadata,
|
|
13
|
+
DatasetMetadata,
|
|
14
|
+
DatasetSummary,
|
|
15
|
+
SchemaDiff,
|
|
16
|
+
VersionEvent,
|
|
17
|
+
)
|
|
18
|
+
from azure77.datasets.service import DatasetService
|
|
19
|
+
from azure77.exceptions import ConfigError, ConnectionTestError
|
|
20
|
+
from azure77.models import (
|
|
21
|
+
ConfigStatus,
|
|
22
|
+
ConfigTestResult,
|
|
23
|
+
QueryHistoryEntry,
|
|
24
|
+
QueryResult,
|
|
25
|
+
SavedQuery,
|
|
26
|
+
)
|
|
27
|
+
from azure77.query_service import QueryService
|
|
28
|
+
from azure77.safety import SafetyValidator
|
|
29
|
+
from azure77.sql_parser import extract_command_tokens
|
|
30
|
+
|
|
31
|
+
from azure77.client_service import ClientService
|
|
32
|
+
from azure77.client_models import client_record, schema_validation_result
|
|
33
|
+
from azure77.client_exceptions import (
|
|
34
|
+
Azure77Error,
|
|
35
|
+
ClientNotFoundError,
|
|
36
|
+
ClientValidationError,
|
|
37
|
+
DuplicateSlugError,
|
|
38
|
+
SchemaOperationError,
|
|
39
|
+
SlugValidationError,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
# connection_manager
|
|
44
|
+
"ConnectionManager",
|
|
45
|
+
# exceptions
|
|
46
|
+
"ConfigError",
|
|
47
|
+
"ConnectionTestError",
|
|
48
|
+
# models
|
|
49
|
+
"ConfigTestResult",
|
|
50
|
+
"ConfigStatus",
|
|
51
|
+
"QueryHistoryEntry",
|
|
52
|
+
"QueryResult",
|
|
53
|
+
"SavedQuery",
|
|
54
|
+
# query_service
|
|
55
|
+
"QueryService",
|
|
56
|
+
# safety
|
|
57
|
+
"SafetyValidator",
|
|
58
|
+
# sql_parser
|
|
59
|
+
"extract_command_tokens",
|
|
60
|
+
# datasets.service
|
|
61
|
+
"DatasetService",
|
|
62
|
+
# datasets.models
|
|
63
|
+
"DatasetSummary",
|
|
64
|
+
"ColumnMetadata",
|
|
65
|
+
"DatasetMetadata",
|
|
66
|
+
"ClientSummary",
|
|
67
|
+
"VersionEvent",
|
|
68
|
+
"SchemaDiff",
|
|
69
|
+
# datasets.exceptions
|
|
70
|
+
"DatasetServiceError",
|
|
71
|
+
"DatasetsClientNotFoundError",
|
|
72
|
+
"TableNotFoundError",
|
|
73
|
+
"MetadataQueryError",
|
|
74
|
+
# client_service
|
|
75
|
+
"ClientService",
|
|
76
|
+
# client_models
|
|
77
|
+
"client_record",
|
|
78
|
+
"schema_validation_result",
|
|
79
|
+
# client_exceptions
|
|
80
|
+
"Azure77Error",
|
|
81
|
+
"ClientNotFoundError",
|
|
82
|
+
"ClientValidationError",
|
|
83
|
+
"DuplicateSlugError",
|
|
84
|
+
"SchemaOperationError",
|
|
85
|
+
"SlugValidationError",
|
|
86
|
+
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""azure77.benchmark -- Performance benchmarking for upload strategies."""
|
|
2
|
+
|
|
3
|
+
from azure77.benchmark.models import (
|
|
4
|
+
AggregateResult,
|
|
5
|
+
BenchmarkConfig,
|
|
6
|
+
BenchmarkReport,
|
|
7
|
+
CategorySuggestion,
|
|
8
|
+
EnvironmentInfo,
|
|
9
|
+
OptimizationSuggestions,
|
|
10
|
+
StrategyResult,
|
|
11
|
+
TimingSample,
|
|
12
|
+
VarianceWarning,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"AggregateResult",
|
|
17
|
+
"BenchmarkConfig",
|
|
18
|
+
"BenchmarkReport",
|
|
19
|
+
"CategorySuggestion",
|
|
20
|
+
"EnvironmentInfo",
|
|
21
|
+
"OptimizationSuggestions",
|
|
22
|
+
"StrategyResult",
|
|
23
|
+
"TimingSample",
|
|
24
|
+
"VarianceWarning",
|
|
25
|
+
]
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Rule-based optimization advisor for benchmark reports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
|
|
7
|
+
from azure77.benchmark.models import (
|
|
8
|
+
BenchmarkReport,
|
|
9
|
+
CategorySuggestion,
|
|
10
|
+
OptimizationSuggestions,
|
|
11
|
+
VarianceWarning,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# Thresholds
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
SMALL_MAX_BYTES = 1_000_000 # < 1 MB
|
|
19
|
+
LARGE_MIN_BYTES = 50_000_000 # > 50 MB
|
|
20
|
+
CV_WARNING_THRESHOLD = 0.15 # coefficient of variation
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _categorize(file_size_bytes: int) -> str:
|
|
24
|
+
"""Return 'small', 'medium', or 'large' for a given file size."""
|
|
25
|
+
if file_size_bytes < SMALL_MAX_BYTES:
|
|
26
|
+
return "small"
|
|
27
|
+
if file_size_bytes > LARGE_MIN_BYTES:
|
|
28
|
+
return "large"
|
|
29
|
+
return "medium"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class OptimizationAdvisor:
|
|
33
|
+
"""Analyze a BenchmarkReport and produce OptimizationSuggestions."""
|
|
34
|
+
|
|
35
|
+
def analyze(self, report: BenchmarkReport) -> OptimizationSuggestions:
|
|
36
|
+
"""Produce rule-based recommendations from a completed report.
|
|
37
|
+
|
|
38
|
+
Steps:
|
|
39
|
+
1. Categorize StrategyResults by file_size_bytes.
|
|
40
|
+
2. Recommend the strategy with lowest mean_wall_clock per category.
|
|
41
|
+
3. Compute estimated_savings per strategy vs the slowest strategy.
|
|
42
|
+
4. Emit VarianceWarning for any pair with CV > 0.15.
|
|
43
|
+
5. Determine overall_recommendation as best strategy across all data.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
# ------------------------------------------------------------------
|
|
47
|
+
# 1. Categorize StrategyResults
|
|
48
|
+
# ------------------------------------------------------------------
|
|
49
|
+
bucketed: dict[str, list] = defaultdict(list)
|
|
50
|
+
for sr in report.strategy_results:
|
|
51
|
+
bucketed[_categorize(sr.file_size_bytes)].append(sr)
|
|
52
|
+
|
|
53
|
+
# ------------------------------------------------------------------
|
|
54
|
+
# 2. Per-category recommendations
|
|
55
|
+
# ------------------------------------------------------------------
|
|
56
|
+
category_recommendations: dict[str, CategorySuggestion] = {}
|
|
57
|
+
for cat_name in ("small", "medium", "large"):
|
|
58
|
+
results = bucketed.get(cat_name)
|
|
59
|
+
if not results:
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
# Average mean_wall_clock per strategy within this category
|
|
63
|
+
strategy_times: dict[str, list[float]] = defaultdict(list)
|
|
64
|
+
for sr in results:
|
|
65
|
+
strategy_times[sr.strategy_id].append(sr.aggregate.mean_wall_clock)
|
|
66
|
+
|
|
67
|
+
avg_times = {
|
|
68
|
+
sid: sum(times) / len(times)
|
|
69
|
+
for sid, times in strategy_times.items()
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
best_strategy = min(avg_times, key=avg_times.get)
|
|
73
|
+
worst_time = max(avg_times.values())
|
|
74
|
+
savings = worst_time - avg_times[best_strategy]
|
|
75
|
+
|
|
76
|
+
category_recommendations[cat_name] = CategorySuggestion(
|
|
77
|
+
category=cat_name,
|
|
78
|
+
recommended_strategy=best_strategy,
|
|
79
|
+
estimated_savings=savings,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# ------------------------------------------------------------------
|
|
83
|
+
# 3. Overall estimated savings per strategy
|
|
84
|
+
# ------------------------------------------------------------------
|
|
85
|
+
all_strategy_times: dict[str, list[float]] = defaultdict(list)
|
|
86
|
+
for sr in report.strategy_results:
|
|
87
|
+
all_strategy_times[sr.strategy_id].append(sr.aggregate.mean_wall_clock)
|
|
88
|
+
|
|
89
|
+
overall_avg = {
|
|
90
|
+
sid: sum(times) / len(times)
|
|
91
|
+
for sid, times in all_strategy_times.items()
|
|
92
|
+
}
|
|
93
|
+
overall_worst = max(overall_avg.values()) if overall_avg else 0.0
|
|
94
|
+
estimated_savings = {
|
|
95
|
+
sid: overall_worst - avg for sid, avg in overall_avg.items()
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
# ------------------------------------------------------------------
|
|
99
|
+
# 4. Variance warnings
|
|
100
|
+
# ------------------------------------------------------------------
|
|
101
|
+
variance_warnings: list[VarianceWarning] = []
|
|
102
|
+
for sr in report.strategy_results:
|
|
103
|
+
cv = sr.aggregate.coefficient_of_variation
|
|
104
|
+
if cv > CV_WARNING_THRESHOLD:
|
|
105
|
+
variance_warnings.append(
|
|
106
|
+
VarianceWarning(
|
|
107
|
+
strategy_id=sr.strategy_id,
|
|
108
|
+
file_path=sr.file_path,
|
|
109
|
+
coefficient_of_variation=cv,
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# ------------------------------------------------------------------
|
|
114
|
+
# 5. Overall recommendation
|
|
115
|
+
# ------------------------------------------------------------------
|
|
116
|
+
overall_recommendation: str | None = (
|
|
117
|
+
min(overall_avg, key=overall_avg.get) if overall_avg else None
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# ------------------------------------------------------------------
|
|
121
|
+
# 6. Build and return
|
|
122
|
+
# ------------------------------------------------------------------
|
|
123
|
+
return OptimizationSuggestions(
|
|
124
|
+
category_recommendations=category_recommendations,
|
|
125
|
+
overall_recommendation=overall_recommendation,
|
|
126
|
+
estimated_savings=estimated_savings,
|
|
127
|
+
variance_warnings=variance_warnings,
|
|
128
|
+
)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Data models for the benchmark module.
|
|
2
|
+
|
|
3
|
+
All structures are stdlib dataclasses designed for easy serialization
|
|
4
|
+
via ``dataclasses.asdict()`` and JSON persistence.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class TimingSample:
|
|
15
|
+
"""Raw measurement from a single benchmark iteration."""
|
|
16
|
+
|
|
17
|
+
wall_clock_seconds: float
|
|
18
|
+
peak_memory_bytes: int | None = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class AggregateResult:
|
|
23
|
+
"""Statistical summary computed across multiple TimingSamples."""
|
|
24
|
+
|
|
25
|
+
min_wall_clock: float
|
|
26
|
+
max_wall_clock: float
|
|
27
|
+
mean_wall_clock: float
|
|
28
|
+
median_wall_clock: float
|
|
29
|
+
std_dev_wall_clock: float
|
|
30
|
+
coefficient_of_variation: float
|
|
31
|
+
rows_per_sec: float
|
|
32
|
+
bytes_per_sec: float
|
|
33
|
+
failed_iterations: int
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class StrategyResult:
|
|
38
|
+
"""Aggregated results for one strategy tested against one file."""
|
|
39
|
+
|
|
40
|
+
strategy_id: str
|
|
41
|
+
file_path: str
|
|
42
|
+
row_count: int
|
|
43
|
+
column_count: int
|
|
44
|
+
file_size_bytes: int
|
|
45
|
+
iterations: list[TimingSample]
|
|
46
|
+
aggregate: AggregateResult
|
|
47
|
+
timed_out: bool = False
|
|
48
|
+
error_message: str | None = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class BenchmarkConfig:
|
|
53
|
+
"""Configuration parameters for a benchmark run."""
|
|
54
|
+
|
|
55
|
+
iterations: int = 3
|
|
56
|
+
warmup_iterations: int = 1
|
|
57
|
+
timeout_seconds: int = 1800
|
|
58
|
+
memory_tracking: bool = False
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class EnvironmentInfo:
|
|
63
|
+
"""Metadata about the runtime environment."""
|
|
64
|
+
|
|
65
|
+
python_version: str
|
|
66
|
+
os_info: str
|
|
67
|
+
platform_name: str
|
|
68
|
+
pyodbc_version: str
|
|
69
|
+
machine: str
|
|
70
|
+
processor: str
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class BenchmarkReport:
|
|
75
|
+
"""Top-level report produced by a completed benchmark run."""
|
|
76
|
+
|
|
77
|
+
run_id: str
|
|
78
|
+
timestamp: str
|
|
79
|
+
mode: str
|
|
80
|
+
config: BenchmarkConfig
|
|
81
|
+
environment: EnvironmentInfo
|
|
82
|
+
strategy_results: list[StrategyResult]
|
|
83
|
+
rankings: list[str]
|
|
84
|
+
aggregate: AggregateResult | None = None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@dataclass
|
|
88
|
+
class CategorySuggestion:
|
|
89
|
+
"""Recommended strategy for a file size category."""
|
|
90
|
+
|
|
91
|
+
category: str
|
|
92
|
+
recommended_strategy: str
|
|
93
|
+
estimated_savings: float
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass
|
|
97
|
+
class VarianceWarning:
|
|
98
|
+
"""Warning for a strategy-file pair with high coefficient of variation."""
|
|
99
|
+
|
|
100
|
+
strategy_id: str
|
|
101
|
+
file_path: str
|
|
102
|
+
coefficient_of_variation: float
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclass
|
|
106
|
+
class OptimizationSuggestions:
|
|
107
|
+
"""Rule-based recommendations derived from a BenchmarkReport."""
|
|
108
|
+
|
|
109
|
+
category_recommendations: dict[str, CategorySuggestion]
|
|
110
|
+
overall_recommendation: str | None
|
|
111
|
+
estimated_savings: dict[str, float]
|
|
112
|
+
variance_warnings: list[VarianceWarning]
|