azure77 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. azure77-0.1.0/LICENSE +21 -0
  2. azure77-0.1.0/PKG-INFO +110 -0
  3. azure77-0.1.0/README.md +79 -0
  4. azure77-0.1.0/pyproject.toml +44 -0
  5. azure77-0.1.0/setup.cfg +4 -0
  6. azure77-0.1.0/src/azure77/__init__.py +86 -0
  7. azure77-0.1.0/src/azure77/benchmark/__init__.py +25 -0
  8. azure77-0.1.0/src/azure77/benchmark/advisor.py +128 -0
  9. azure77-0.1.0/src/azure77/benchmark/models.py +112 -0
  10. azure77-0.1.0/src/azure77/benchmark/report_builder.py +143 -0
  11. azure77-0.1.0/src/azure77/benchmark/service.py +430 -0
  12. azure77-0.1.0/src/azure77/benchmark/stats.py +88 -0
  13. azure77-0.1.0/src/azure77/benchmark/store.py +165 -0
  14. azure77-0.1.0/src/azure77/benchmark/timer.py +61 -0
  15. azure77-0.1.0/src/azure77/cli/__init__.py +23 -0
  16. azure77-0.1.0/src/azure77/cli/benchmark.py +417 -0
  17. azure77-0.1.0/src/azure77/cli/client.py +171 -0
  18. azure77-0.1.0/src/azure77/cli/connection.py +107 -0
  19. azure77-0.1.0/src/azure77/cli/datasets.py +274 -0
  20. azure77-0.1.0/src/azure77/cli/upload.py +317 -0
  21. azure77-0.1.0/src/azure77/client_exceptions.py +48 -0
  22. azure77-0.1.0/src/azure77/client_models.py +42 -0
  23. azure77-0.1.0/src/azure77/client_service.py +255 -0
  24. azure77-0.1.0/src/azure77/config_loader.py +71 -0
  25. azure77-0.1.0/src/azure77/connection_manager.py +293 -0
  26. azure77-0.1.0/src/azure77/database.py +119 -0
  27. azure77-0.1.0/src/azure77/datasets/__init__.py +23 -0
  28. azure77-0.1.0/src/azure77/datasets/exceptions.py +28 -0
  29. azure77-0.1.0/src/azure77/datasets/models.py +95 -0
  30. azure77-0.1.0/src/azure77/datasets/queries.py +276 -0
  31. azure77-0.1.0/src/azure77/datasets/service.py +657 -0
  32. azure77-0.1.0/src/azure77/datasets/snapshots.py +224 -0
  33. azure77-0.1.0/src/azure77/exceptions.py +25 -0
  34. azure77-0.1.0/src/azure77/history_store.py +146 -0
  35. azure77-0.1.0/src/azure77/metadata_repository.py +264 -0
  36. azure77-0.1.0/src/azure77/models.py +74 -0
  37. azure77-0.1.0/src/azure77/query_service.py +314 -0
  38. azure77-0.1.0/src/azure77/result_exporter.py +140 -0
  39. azure77-0.1.0/src/azure77/safety.py +73 -0
  40. azure77-0.1.0/src/azure77/saved_query_store.py +173 -0
  41. azure77-0.1.0/src/azure77/schema_manager.py +45 -0
  42. azure77-0.1.0/src/azure77/slug_generator.py +81 -0
  43. azure77-0.1.0/src/azure77/sql_parser.py +146 -0
  44. azure77-0.1.0/src/azure77/upload/__init__.py +25 -0
  45. azure77-0.1.0/src/azure77/upload/exceptions.py +17 -0
  46. azure77-0.1.0/src/azure77/upload/import_logger.py +222 -0
  47. azure77-0.1.0/src/azure77/upload/models.py +69 -0
  48. azure77-0.1.0/src/azure77/upload/normalizer.py +88 -0
  49. azure77-0.1.0/src/azure77/upload/parser.py +279 -0
  50. azure77-0.1.0/src/azure77/upload/preview.py +127 -0
  51. azure77-0.1.0/src/azure77/upload/service.py +407 -0
  52. azure77-0.1.0/src/azure77/upload/type_inferrer.py +265 -0
  53. azure77-0.1.0/src/azure77.egg-info/PKG-INFO +110 -0
  54. azure77-0.1.0/src/azure77.egg-info/SOURCES.txt +72 -0
  55. azure77-0.1.0/src/azure77.egg-info/dependency_links.txt +1 -0
  56. azure77-0.1.0/src/azure77.egg-info/entry_points.txt +2 -0
  57. azure77-0.1.0/src/azure77.egg-info/requires.txt +8 -0
  58. azure77-0.1.0/src/azure77.egg-info/top_level.txt +1 -0
  59. azure77-0.1.0/tests/test_advisor.py +259 -0
  60. azure77-0.1.0/tests/test_benchmark_service.py +350 -0
  61. azure77-0.1.0/tests/test_client_exceptions.py +161 -0
  62. azure77-0.1.0/tests/test_connection_manager.py +334 -0
  63. azure77-0.1.0/tests/test_dataset_service.py +736 -0
  64. azure77-0.1.0/tests/test_exceptions.py +39 -0
  65. azure77-0.1.0/tests/test_history_store.py +156 -0
  66. azure77-0.1.0/tests/test_import_logger.py +342 -0
  67. azure77-0.1.0/tests/test_import_service.py +633 -0
  68. azure77-0.1.0/tests/test_metadata_repository.py +551 -0
  69. azure77-0.1.0/tests/test_preview_service.py +357 -0
  70. azure77-0.1.0/tests/test_query_service.py +211 -0
  71. azure77-0.1.0/tests/test_report_builder.py +183 -0
  72. azure77-0.1.0/tests/test_snapshots.py +255 -0
  73. azure77-0.1.0/tests/test_stats.py +121 -0
  74. azure77-0.1.0/tests/test_type_inferrer.py +287 -0
azure77-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 77 Indicadores
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
azure77-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: azure77
3
+ Version: 0.1.0
4
+ Summary: Upload, query and manage data on Azure SQL Server from Python
5
+ Author: 77 Indicadores
6
+ License-Expression: MIT
7
+ Keywords: azure,sql,database,upload,data
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Database
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Requires-Python: >=3.9
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: pyodbc>=4.0.30
23
+ Requires-Dist: python-dotenv>=0.19
24
+ Requires-Dist: unidecode>=1.3
25
+ Requires-Dist: chardet>=5.0
26
+ Requires-Dist: openpyxl>=3.1
27
+ Requires-Dist: xlrd>=2.0
28
+ Requires-Dist: click>=8.0
29
+ Requires-Dist: rich>=12.0
30
+ Dynamic: license-file
31
+
32
+ # azure77
33
+
34
+ Upload, query and manage data on Azure SQL Server from Python.
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install azure77
40
+ ```
41
+
42
+ ## Quick Start
43
+
44
+ ```python
45
+ from azure77 import ConnectionManager
46
+
47
+ # Connect to Azure SQL
48
+ cm = ConnectionManager(
49
+ server="your-server.database.windows.net",
50
+ database="your-db",
51
+ user="your-user",
52
+ password="your-pass",
53
+ )
54
+
55
+ # Test connection
56
+ result = cm.test_connection()
57
+ print(result.success, result.message)
58
+ ```
59
+
60
+ ## Features
61
+
62
+ - **Connection management** — Azure SQL via pyodbc or pymssql
63
+ - **Client/schema organization** — each client maps to a SQL schema
64
+ - **File upload** — CSV, XLSX, XLS with auto encoding/detection
65
+ - **Column normalization** — lowercase, accent-free, SQL-safe names
66
+ - **Type inference** — automatic INTEGER, DECIMAL, DATE, NVARCHAR
67
+ - **SQL query execution** — with safety validation (blocks DROP, DELETE, etc.)
68
+ - **Query history** — stored in local SQLite
69
+ - **Dataset browsing** — metadata, import logs, schema diff
70
+ - **Benchmark** — measure upload performance across strategies
71
+
72
+ ## CLI
73
+
74
+ ```bash
75
+ azure77 connection test # Test Azure SQL connection
76
+ azure77 connection status # Show config status
77
+ azure77 client list # List clients
78
+ azure77 client create "Nome" # Create client
79
+ azure77 upload preview file.csv # Preview file
80
+ azure77 upload import file.csv --client adl --table vendas
81
+ azure77 datasets list adl # List datasets
82
+ azure77 benchmark run file.csv # Run benchmark
83
+ ```
84
+
85
+ ## Configuration
86
+
87
+ Create a `.env` file:
88
+
89
+ ```
90
+ AZURE77_SERVER=server.database.windows.net
91
+ AZURE77_DATABASE=dbname
92
+ AZURE77_USER=username
93
+ AZURE77_PASSWORD=password
94
+ ```
95
+
96
+ Or pass directly:
97
+
98
+ ```python
99
+ cm = ConnectionManager(
100
+ server="...",
101
+ database="...",
102
+ user="...",
103
+ password="...",
104
+ driver="pymssql", # or "pyodbc"
105
+ )
106
+ ```
107
+
108
+ ## License
109
+
110
+ MIT
@@ -0,0 +1,79 @@
1
+ # azure77
2
+
3
+ Upload, query and manage data on Azure SQL Server from Python.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install azure77
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```python
14
+ from azure77 import ConnectionManager
15
+
16
+ # Connect to Azure SQL
17
+ cm = ConnectionManager(
18
+ server="your-server.database.windows.net",
19
+ database="your-db",
20
+ user="your-user",
21
+ password="your-pass",
22
+ )
23
+
24
+ # Test connection
25
+ result = cm.test_connection()
26
+ print(result.success, result.message)
27
+ ```
28
+
29
+ ## Features
30
+
31
+ - **Connection management** — Azure SQL via pyodbc or pymssql
32
+ - **Client/schema organization** — each client maps to a SQL schema
33
+ - **File upload** — CSV, XLSX, XLS with auto encoding/detection
34
+ - **Column normalization** — lowercase, accent-free, SQL-safe names
35
+ - **Type inference** — automatic INTEGER, DECIMAL, DATE, NVARCHAR
36
+ - **SQL query execution** — with safety validation (blocks DROP, DELETE, etc.)
37
+ - **Query history** — stored in local SQLite
38
+ - **Dataset browsing** — metadata, import logs, schema diff
39
+ - **Benchmark** — measure upload performance across strategies
40
+
41
+ ## CLI
42
+
43
+ ```bash
44
+ azure77 connection test # Test Azure SQL connection
45
+ azure77 connection status # Show config status
46
+ azure77 client list # List clients
47
+ azure77 client create "Nome" # Create client
48
+ azure77 upload preview file.csv # Preview file
49
+ azure77 upload import file.csv --client adl --table vendas
50
+ azure77 datasets list adl # List datasets
51
+ azure77 benchmark run file.csv # Run benchmark
52
+ ```
53
+
54
+ ## Configuration
55
+
56
+ Create a `.env` file:
57
+
58
+ ```
59
+ AZURE77_SERVER=server.database.windows.net
60
+ AZURE77_DATABASE=dbname
61
+ AZURE77_USER=username
62
+ AZURE77_PASSWORD=password
63
+ ```
64
+
65
+ Or pass directly:
66
+
67
+ ```python
68
+ cm = ConnectionManager(
69
+ server="...",
70
+ database="...",
71
+ user="...",
72
+ password="...",
73
+ driver="pymssql", # or "pyodbc"
74
+ )
75
+ ```
76
+
77
+ ## License
78
+
79
+ MIT
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "azure77"
7
+ version = "0.1.0"
8
+ description = "Upload, query and manage data on Azure SQL Server from Python"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ { name = "77 Indicadores" },
14
+ ]
15
+ keywords = ["azure", "sql", "database", "upload", "data"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "Operating System :: OS Independent",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Topic :: Database",
27
+ "Topic :: Software Development :: Libraries :: Python Modules",
28
+ ]
29
+ dependencies = [
30
+ "pyodbc>=4.0.30",
31
+ "python-dotenv>=0.19",
32
+ "unidecode>=1.3",
33
+ "chardet>=5.0",
34
+ "openpyxl>=3.1",
35
+ "xlrd>=2.0",
36
+ "click>=8.0",
37
+ "rich>=12.0",
38
+ ]
39
+
40
+ [project.scripts]
41
+ azure77 = "azure77.cli:cli"
42
+
43
+ [tool.setuptools.packages.find]
44
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,86 @@
1
+ """azure77 -- Azure SQL Connection Management for Python."""
2
+
3
+ from azure77.connection_manager import ConnectionManager
4
+ from azure77.datasets.exceptions import (
5
+ ClientNotFoundError as DatasetsClientNotFoundError,
6
+ DatasetServiceError,
7
+ MetadataQueryError,
8
+ TableNotFoundError,
9
+ )
10
+ from azure77.datasets.models import (
11
+ ClientSummary,
12
+ ColumnMetadata,
13
+ DatasetMetadata,
14
+ DatasetSummary,
15
+ SchemaDiff,
16
+ VersionEvent,
17
+ )
18
+ from azure77.datasets.service import DatasetService
19
+ from azure77.exceptions import ConfigError, ConnectionTestError
20
+ from azure77.models import (
21
+ ConfigStatus,
22
+ ConfigTestResult,
23
+ QueryHistoryEntry,
24
+ QueryResult,
25
+ SavedQuery,
26
+ )
27
+ from azure77.query_service import QueryService
28
+ from azure77.safety import SafetyValidator
29
+ from azure77.sql_parser import extract_command_tokens
30
+
31
+ from azure77.client_service import ClientService
32
+ from azure77.client_models import client_record, schema_validation_result
33
+ from azure77.client_exceptions import (
34
+ Azure77Error,
35
+ ClientNotFoundError,
36
+ ClientValidationError,
37
+ DuplicateSlugError,
38
+ SchemaOperationError,
39
+ SlugValidationError,
40
+ )
41
+
42
+ __all__ = [
43
+ # connection_manager
44
+ "ConnectionManager",
45
+ # exceptions
46
+ "ConfigError",
47
+ "ConnectionTestError",
48
+ # models
49
+ "ConfigTestResult",
50
+ "ConfigStatus",
51
+ "QueryHistoryEntry",
52
+ "QueryResult",
53
+ "SavedQuery",
54
+ # query_service
55
+ "QueryService",
56
+ # safety
57
+ "SafetyValidator",
58
+ # sql_parser
59
+ "extract_command_tokens",
60
+ # datasets.service
61
+ "DatasetService",
62
+ # datasets.models
63
+ "DatasetSummary",
64
+ "ColumnMetadata",
65
+ "DatasetMetadata",
66
+ "ClientSummary",
67
+ "VersionEvent",
68
+ "SchemaDiff",
69
+ # datasets.exceptions
70
+ "DatasetServiceError",
71
+ "DatasetsClientNotFoundError",
72
+ "TableNotFoundError",
73
+ "MetadataQueryError",
74
+ # client_service
75
+ "ClientService",
76
+ # client_models
77
+ "client_record",
78
+ "schema_validation_result",
79
+ # client_exceptions
80
+ "Azure77Error",
81
+ "ClientNotFoundError",
82
+ "ClientValidationError",
83
+ "DuplicateSlugError",
84
+ "SchemaOperationError",
85
+ "SlugValidationError",
86
+ ]
@@ -0,0 +1,25 @@
1
+ """azure77.benchmark -- Performance benchmarking for upload strategies."""
2
+
3
+ from azure77.benchmark.models import (
4
+ AggregateResult,
5
+ BenchmarkConfig,
6
+ BenchmarkReport,
7
+ CategorySuggestion,
8
+ EnvironmentInfo,
9
+ OptimizationSuggestions,
10
+ StrategyResult,
11
+ TimingSample,
12
+ VarianceWarning,
13
+ )
14
+
15
+ __all__ = [
16
+ "AggregateResult",
17
+ "BenchmarkConfig",
18
+ "BenchmarkReport",
19
+ "CategorySuggestion",
20
+ "EnvironmentInfo",
21
+ "OptimizationSuggestions",
22
+ "StrategyResult",
23
+ "TimingSample",
24
+ "VarianceWarning",
25
+ ]
@@ -0,0 +1,128 @@
1
+ """Rule-based optimization advisor for benchmark reports."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import defaultdict
6
+
7
+ from azure77.benchmark.models import (
8
+ BenchmarkReport,
9
+ CategorySuggestion,
10
+ OptimizationSuggestions,
11
+ VarianceWarning,
12
+ )
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # Thresholds
16
+ # ---------------------------------------------------------------------------
17
+
18
+ SMALL_MAX_BYTES = 1_000_000 # < 1 MB
19
+ LARGE_MIN_BYTES = 50_000_000 # > 50 MB
20
+ CV_WARNING_THRESHOLD = 0.15 # coefficient of variation
21
+
22
+
23
+ def _categorize(file_size_bytes: int) -> str:
24
+ """Return 'small', 'medium', or 'large' for a given file size."""
25
+ if file_size_bytes < SMALL_MAX_BYTES:
26
+ return "small"
27
+ if file_size_bytes > LARGE_MIN_BYTES:
28
+ return "large"
29
+ return "medium"
30
+
31
+
32
+ class OptimizationAdvisor:
33
+ """Analyze a BenchmarkReport and produce OptimizationSuggestions."""
34
+
35
+ def analyze(self, report: BenchmarkReport) -> OptimizationSuggestions:
36
+ """Produce rule-based recommendations from a completed report.
37
+
38
+ Steps:
39
+ 1. Categorize StrategyResults by file_size_bytes.
40
+ 2. Recommend the strategy with lowest mean_wall_clock per category.
41
+ 3. Compute estimated_savings per strategy vs the slowest strategy.
42
+ 4. Emit VarianceWarning for any pair with CV > 0.15.
43
+ 5. Determine overall_recommendation as best strategy across all data.
44
+ """
45
+
46
+ # ------------------------------------------------------------------
47
+ # 1. Categorize StrategyResults
48
+ # ------------------------------------------------------------------
49
+ bucketed: dict[str, list] = defaultdict(list)
50
+ for sr in report.strategy_results:
51
+ bucketed[_categorize(sr.file_size_bytes)].append(sr)
52
+
53
+ # ------------------------------------------------------------------
54
+ # 2. Per-category recommendations
55
+ # ------------------------------------------------------------------
56
+ category_recommendations: dict[str, CategorySuggestion] = {}
57
+ for cat_name in ("small", "medium", "large"):
58
+ results = bucketed.get(cat_name)
59
+ if not results:
60
+ continue
61
+
62
+ # Average mean_wall_clock per strategy within this category
63
+ strategy_times: dict[str, list[float]] = defaultdict(list)
64
+ for sr in results:
65
+ strategy_times[sr.strategy_id].append(sr.aggregate.mean_wall_clock)
66
+
67
+ avg_times = {
68
+ sid: sum(times) / len(times)
69
+ for sid, times in strategy_times.items()
70
+ }
71
+
72
+ best_strategy = min(avg_times, key=avg_times.get)
73
+ worst_time = max(avg_times.values())
74
+ savings = worst_time - avg_times[best_strategy]
75
+
76
+ category_recommendations[cat_name] = CategorySuggestion(
77
+ category=cat_name,
78
+ recommended_strategy=best_strategy,
79
+ estimated_savings=savings,
80
+ )
81
+
82
+ # ------------------------------------------------------------------
83
+ # 3. Overall estimated savings per strategy
84
+ # ------------------------------------------------------------------
85
+ all_strategy_times: dict[str, list[float]] = defaultdict(list)
86
+ for sr in report.strategy_results:
87
+ all_strategy_times[sr.strategy_id].append(sr.aggregate.mean_wall_clock)
88
+
89
+ overall_avg = {
90
+ sid: sum(times) / len(times)
91
+ for sid, times in all_strategy_times.items()
92
+ }
93
+ overall_worst = max(overall_avg.values()) if overall_avg else 0.0
94
+ estimated_savings = {
95
+ sid: overall_worst - avg for sid, avg in overall_avg.items()
96
+ }
97
+
98
+ # ------------------------------------------------------------------
99
+ # 4. Variance warnings
100
+ # ------------------------------------------------------------------
101
+ variance_warnings: list[VarianceWarning] = []
102
+ for sr in report.strategy_results:
103
+ cv = sr.aggregate.coefficient_of_variation
104
+ if cv > CV_WARNING_THRESHOLD:
105
+ variance_warnings.append(
106
+ VarianceWarning(
107
+ strategy_id=sr.strategy_id,
108
+ file_path=sr.file_path,
109
+ coefficient_of_variation=cv,
110
+ )
111
+ )
112
+
113
+ # ------------------------------------------------------------------
114
+ # 5. Overall recommendation
115
+ # ------------------------------------------------------------------
116
+ overall_recommendation: str | None = (
117
+ min(overall_avg, key=overall_avg.get) if overall_avg else None
118
+ )
119
+
120
+ # ------------------------------------------------------------------
121
+ # 6. Build and return
122
+ # ------------------------------------------------------------------
123
+ return OptimizationSuggestions(
124
+ category_recommendations=category_recommendations,
125
+ overall_recommendation=overall_recommendation,
126
+ estimated_savings=estimated_savings,
127
+ variance_warnings=variance_warnings,
128
+ )
@@ -0,0 +1,112 @@
1
+ """Data models for the benchmark module.
2
+
3
+ All structures are stdlib dataclasses designed for easy serialization
4
+ via ``dataclasses.asdict()`` and JSON persistence.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from typing import Any
11
+
12
+
13
+ @dataclass
14
+ class TimingSample:
15
+ """Raw measurement from a single benchmark iteration."""
16
+
17
+ wall_clock_seconds: float
18
+ peak_memory_bytes: int | None = None
19
+
20
+
21
+ @dataclass
22
+ class AggregateResult:
23
+ """Statistical summary computed across multiple TimingSamples."""
24
+
25
+ min_wall_clock: float
26
+ max_wall_clock: float
27
+ mean_wall_clock: float
28
+ median_wall_clock: float
29
+ std_dev_wall_clock: float
30
+ coefficient_of_variation: float
31
+ rows_per_sec: float
32
+ bytes_per_sec: float
33
+ failed_iterations: int
34
+
35
+
36
+ @dataclass
37
+ class StrategyResult:
38
+ """Aggregated results for one strategy tested against one file."""
39
+
40
+ strategy_id: str
41
+ file_path: str
42
+ row_count: int
43
+ column_count: int
44
+ file_size_bytes: int
45
+ iterations: list[TimingSample]
46
+ aggregate: AggregateResult
47
+ timed_out: bool = False
48
+ error_message: str | None = None
49
+
50
+
51
+ @dataclass
52
+ class BenchmarkConfig:
53
+ """Configuration parameters for a benchmark run."""
54
+
55
+ iterations: int = 3
56
+ warmup_iterations: int = 1
57
+ timeout_seconds: int = 1800
58
+ memory_tracking: bool = False
59
+
60
+
61
+ @dataclass
62
+ class EnvironmentInfo:
63
+ """Metadata about the runtime environment."""
64
+
65
+ python_version: str
66
+ os_info: str
67
+ platform_name: str
68
+ pyodbc_version: str
69
+ machine: str
70
+ processor: str
71
+
72
+
73
+ @dataclass
74
+ class BenchmarkReport:
75
+ """Top-level report produced by a completed benchmark run."""
76
+
77
+ run_id: str
78
+ timestamp: str
79
+ mode: str
80
+ config: BenchmarkConfig
81
+ environment: EnvironmentInfo
82
+ strategy_results: list[StrategyResult]
83
+ rankings: list[str]
84
+ aggregate: AggregateResult | None = None
85
+
86
+
87
+ @dataclass
88
+ class CategorySuggestion:
89
+ """Recommended strategy for a file size category."""
90
+
91
+ category: str
92
+ recommended_strategy: str
93
+ estimated_savings: float
94
+
95
+
96
+ @dataclass
97
+ class VarianceWarning:
98
+ """Warning for a strategy-file pair with high coefficient of variation."""
99
+
100
+ strategy_id: str
101
+ file_path: str
102
+ coefficient_of_variation: float
103
+
104
+
105
+ @dataclass
106
+ class OptimizationSuggestions:
107
+ """Rule-based recommendations derived from a BenchmarkReport."""
108
+
109
+ category_recommendations: dict[str, CategorySuggestion]
110
+ overall_recommendation: str | None
111
+ estimated_savings: dict[str, float]
112
+ variance_warnings: list[VarianceWarning]