excel-orm 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ .ruff_cache/
13
+ .pytest_cache/
14
+ .git/
15
+ .env
@@ -0,0 +1,273 @@
1
+ Metadata-Version: 2.4
2
+ Name: excel-orm
3
+ Version: 0.1.0
4
+ Summary: A lightweight Excel ORM for generating templates and parsing typed row models.
5
+ Project-URL: Homepage, https://github.com/acdelrusso/excel-orm
6
+ Project-URL: Repository, https://github.com/acdelrusso/excel-orm
7
+ Project-URL: Issues, https://github.com/acdelrusso/excel-orm/issues
8
+ Author: Anthony Del Russo
9
+ License: MIT
10
+ Keywords: etl,excel,openpyxl,orm
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Requires-Python: >=3.13
18
+ Requires-Dist: openpyxl>=3.1.5
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Excel ORM
22
+
23
+ A lightweight, typed “Excel ORM” for generating Excel templates and parsing Excel workbooks into Python objects using column descriptors.
24
+
25
+ This project is designed for the common enterprise pattern where you:
26
+ 1) generate a structured `.xlsx` template for users,
27
+ 2) let users fill it in,
28
+ 3) load the workbook back into Python, producing typed objects grouped by model.
29
+
30
+ It uses `openpyxl` for reading/writing Excel files and supports multiple model “tables” on the same worksheet.
31
+
32
+ ---
33
+
34
+ ## Features
35
+
36
+ - **Typed column descriptors** (`text_column`, `int_column`, `bool_column`, `date_column`)
37
+ - **Template generation** with:
38
+ - merged **table title cells** (pluralized model name)
39
+ - bold headers
40
+ - sensible column widths
41
+ - multiple tables laid out horizontally on the same sheet with a configurable gap
42
+ - **Workbook parsing** into model-specific repositories:
43
+ - `excel_file.cars.all()` → `list[Car]`
44
+ - `excel_file.manufacturing_plants.all()` → `list[ManufacturingPlant]`
45
+ - **Validation hooks**
46
+ - column-level `not_null`
47
+ - optional row exclusion rules via `excludes`
48
+ - optional model-level `validate()` method
49
+
50
+ ---
51
+
52
+ ## Installation
53
+
54
+ ### From PyPI (once published)
55
+ ```bash
56
+ pip install excel-orm
57
+ ````
58
+
59
+ ### From source (uv)
60
+
61
+ ```bash
62
+ git clone <your-repo-url>
63
+ cd excel-orm
64
+ uv sync
65
+ ```
66
+
67
+ ---
68
+
69
+ ## Quick Start
70
+
71
+ ### 1) Define models using `Column[...]` descriptors
72
+
73
+ ```python
74
+ from excel_orm.column import Column, text_column, int_column
75
+ from excel_orm.orm import ExcelFile, SheetSpec
76
+
77
+ class Car:
78
+ make: Column[str] = text_column(header="Make", not_null=True)
79
+ model: Column[str] = text_column(header="Model", not_null=True)
80
+ year: Column[int] = int_column(header="Year", not_null=True)
81
+
82
+ class ManufacturingPlant:
83
+ name: Column[str] = text_column(header="Factory Name", not_null=True)
84
+ location: Column[str] = text_column(header="Location")
85
+ ```
86
+
87
+ ### 2) Declare a sheet containing multiple models
88
+
89
+ Each model becomes its own table on the same worksheet.
90
+
91
+ ```python
92
+ sheet = SheetSpec(
93
+ name="Cars",
94
+ models=[Car, ManufacturingPlant],
95
+
96
+ # Layout rows
97
+ title_row=1,
98
+ header_row=2,
99
+ data_start_row=3,
100
+
101
+ # Horizontal spacing between model tables
102
+ template_table_gap=2,
103
+ )
104
+ ```
105
+
106
+ ### 3) Create an `ExcelFile`, generate a template, then load data
107
+
108
+ ```python
109
+ excel_file = ExcelFile(sheets=[sheet])
110
+
111
+ # Generate a blank template workbook
112
+ excel_file.generate_template("car_inventory_template.xlsx")
113
+
114
+ # Users fill in data in Excel...
115
+
116
+ # Load the filled workbook into repositories
117
+ excel_file.load_data("car_inventory_data.xlsx")
118
+
119
+ cars = excel_file.cars.all()
120
+ plants = excel_file.manufacturing_plants.all()
121
+
122
+ print(cars[0].make, cars[0].year)
123
+ print(plants[0].name, plants[0].location)
124
+ ```
125
+
126
+ ---
127
+
128
+ ## How It Works
129
+
130
+ ### Repositories
131
+
132
+ For each model you register, `ExcelFile` creates a repository attribute on the instance using a snake_case pluralized name:
133
+
134
+ * `Car` → `excel_file.cars`
135
+ * `ManufacturingPlant` → `excel_file.manufacturing_plants`
136
+
137
+ Repositories are simple list-like containers with an `all()` helper:
138
+
139
+ ```python
140
+ cars = excel_file.cars.all() # list[Car]
141
+ ```
142
+
143
+ ### Multi-table Sheets
144
+
145
+ A single worksheet can host multiple model tables. During template generation:
146
+
147
+ * A merged title cell is written above each table (pluralized class name in title case).
148
+ * Headers appear under the title.
149
+ * Data rows begin at `data_start_row`.
150
+ * Tables are placed horizontally with `template_table_gap` blank columns between them.
151
+
152
+ During parsing:
153
+
154
+ * The library locates each model table by matching the expected header sequence.
155
+ * It reads contiguous rows until a blank row is encountered.
156
+
157
+ ---
158
+
159
+ ## Column Types
160
+
161
+ ### Text
162
+
163
+ ```python
164
+ from excel_orm.column import Column, text_column
165
+
166
+ class Example:
167
+ name: Column[str] = text_column(header="Name", not_null=True, strip=True)
168
+ ```
169
+
170
+ * `None` parses to `""` (empty string).
171
+ * `strip=True` trims whitespace.
172
+
173
+ ### Integer
174
+
175
+ ```python
176
+ from excel_orm.column import Column, int_column
177
+
178
+ class Example:
179
+ qty: Column[int] = int_column(header="Qty", not_null=True)
180
+ ```
181
+
182
+ * `None` or `""` parses to `0`.
183
+
184
+ ### Boolean
185
+
186
+ ```python
187
+ from excel_orm.column import Column, bool_column
188
+
189
+ class Example:
190
+ active: Column[bool] = bool_column(header="Active")
191
+ ```
192
+
193
+ Accepted values include:
194
+
195
+ * True: `true, t, yes, y, 1` (case-insensitive)
196
+ * False: `false, f, no, n, 0`
197
+ * `None` / empty parses to `False`
198
+
199
+ Invalid values raise `ValueError`.
200
+
201
+ ### Date
202
+
203
+ ```python
204
+ from excel_orm.column import Column, date_column
205
+
206
+ class Example:
207
+ start_date: Column[date] = date_column(header="Start Date")
208
+ ```
209
+
210
+ The date parser supports:
211
+
212
+ * Excel-native `datetime`/`date` values from `openpyxl`
213
+ * ISO strings like `2025-06-01` and `2025-06-01T13:45:00`
214
+ * Common business formats including `01-JUN-2025`
215
+
216
+ Invalid/empty values raise `ValueError`.
217
+
218
+ ---
219
+
220
+ ## Validation
221
+
222
+ ### Column-level: `not_null`
223
+
224
+ ```python
225
+ class Car:
226
+ make: Column[str] = text_column(header="Make", not_null=True)
227
+ ```
228
+
229
+ If a `not_null=True` column parses to `None` or `""`, a `ValueError` is raised.
230
+
231
+ ### Row exclusion: `excludes`
232
+
233
+ If you set `excludes`, rows matching those raw values in that column will be skipped.
234
+
235
+ ```python
236
+ status: Column[str] = text_column(header="Status")
237
+ status.spec.excludes = {"IGNORE", "SKIP"} # example pattern
238
+ ```
239
+
240
+ (If you want a nicer API for excludes, consider adding it directly to the column factory signature.)
241
+
242
+ ### Model-level: `validate()`
243
+
244
+ If your model defines a `validate(self)` method, it is called after a row is parsed.
245
+
246
+ ```python
247
+ class Car:
248
+ make: Column[str] = text_column(header="Make", not_null=True)
249
+ year: Column[int] = int_column(header="Year", not_null=True)
250
+
251
+ def validate(self) -> None:
252
+ if self.year < 1886:
253
+ raise ValueError("Invalid car year")
254
+ ```
255
+
256
+ ---
257
+
258
+ ## Development
259
+
260
+ ### Run tests
261
+
262
+ ```bash
263
+ uv run pytest
264
+ ```
265
+
266
+ ### Lint/format (example)
267
+
268
+ If you use Ruff:
269
+
270
+ ```bash
271
+ uv run ruff check .
272
+ uv run ruff format .
273
+ ```
@@ -0,0 +1,253 @@
1
+ # Excel ORM
2
+
3
+ A lightweight, typed “Excel ORM” for generating Excel templates and parsing Excel workbooks into Python objects using column descriptors.
4
+
5
+ This project is designed for the common enterprise pattern where you:
6
+ 1) generate a structured `.xlsx` template for users,
7
+ 2) let users fill it in,
8
+ 3) load the workbook back into Python, producing typed objects grouped by model.
9
+
10
+ It uses `openpyxl` for reading/writing Excel files and supports multiple model “tables” on the same worksheet.
11
+
12
+ ---
13
+
14
+ ## Features
15
+
16
+ - **Typed column descriptors** (`text_column`, `int_column`, `bool_column`, `date_column`)
17
+ - **Template generation** with:
18
+ - merged **table title cells** (pluralized model name)
19
+ - bold headers
20
+ - sensible column widths
21
+ - multiple tables laid out horizontally on the same sheet with a configurable gap
22
+ - **Workbook parsing** into model-specific repositories:
23
+ - `excel_file.cars.all()` → `list[Car]`
24
+ - `excel_file.manufacturing_plants.all()` → `list[ManufacturingPlant]`
25
+ - **Validation hooks**
26
+ - column-level `not_null`
27
+ - optional row exclusion rules via `excludes`
28
+ - optional model-level `validate()` method
29
+
30
+ ---
31
+
32
+ ## Installation
33
+
34
+ ### From PyPI (once published)
35
+ ```bash
36
+ pip install excel-orm
37
+ ````
38
+
39
+ ### From source (uv)
40
+
41
+ ```bash
42
+ git clone <your-repo-url>
43
+ cd excel-orm
44
+ uv sync
45
+ ```
46
+
47
+ ---
48
+
49
+ ## Quick Start
50
+
51
+ ### 1) Define models using `Column[...]` descriptors
52
+
53
+ ```python
54
+ from excel_orm.column import Column, text_column, int_column
55
+ from excel_orm.orm import ExcelFile, SheetSpec
56
+
57
+ class Car:
58
+ make: Column[str] = text_column(header="Make", not_null=True)
59
+ model: Column[str] = text_column(header="Model", not_null=True)
60
+ year: Column[int] = int_column(header="Year", not_null=True)
61
+
62
+ class ManufacturingPlant:
63
+ name: Column[str] = text_column(header="Factory Name", not_null=True)
64
+ location: Column[str] = text_column(header="Location")
65
+ ```
66
+
67
+ ### 2) Declare a sheet containing multiple models
68
+
69
+ Each model becomes its own table on the same worksheet.
70
+
71
+ ```python
72
+ sheet = SheetSpec(
73
+ name="Cars",
74
+ models=[Car, ManufacturingPlant],
75
+
76
+ # Layout rows
77
+ title_row=1,
78
+ header_row=2,
79
+ data_start_row=3,
80
+
81
+ # Horizontal spacing between model tables
82
+ template_table_gap=2,
83
+ )
84
+ ```
85
+
86
+ ### 3) Create an `ExcelFile`, generate a template, then load data
87
+
88
+ ```python
89
+ excel_file = ExcelFile(sheets=[sheet])
90
+
91
+ # Generate a blank template workbook
92
+ excel_file.generate_template("car_inventory_template.xlsx")
93
+
94
+ # Users fill in data in Excel...
95
+
96
+ # Load the filled workbook into repositories
97
+ excel_file.load_data("car_inventory_data.xlsx")
98
+
99
+ cars = excel_file.cars.all()
100
+ plants = excel_file.manufacturing_plants.all()
101
+
102
+ print(cars[0].make, cars[0].year)
103
+ print(plants[0].name, plants[0].location)
104
+ ```
105
+
106
+ ---
107
+
108
+ ## How It Works
109
+
110
+ ### Repositories
111
+
112
+ For each model you register, `ExcelFile` creates a repository attribute on the instance using a snake_case pluralized name:
113
+
114
+ * `Car` → `excel_file.cars`
115
+ * `ManufacturingPlant` → `excel_file.manufacturing_plants`
116
+
117
+ Repositories are simple list-like containers with an `all()` helper:
118
+
119
+ ```python
120
+ cars = excel_file.cars.all() # list[Car]
121
+ ```
122
+
123
+ ### Multi-table Sheets
124
+
125
+ A single worksheet can host multiple model tables. During template generation:
126
+
127
+ * A merged title cell is written above each table (pluralized class name in title case).
128
+ * Headers appear under the title.
129
+ * Data rows begin at `data_start_row`.
130
+ * Tables are placed horizontally with `template_table_gap` blank columns between them.
131
+
132
+ During parsing:
133
+
134
+ * The library locates each model table by matching the expected header sequence.
135
+ * It reads contiguous rows until a blank row is encountered.
136
+
137
+ ---
138
+
139
+ ## Column Types
140
+
141
+ ### Text
142
+
143
+ ```python
144
+ from excel_orm.column import Column, text_column
145
+
146
+ class Example:
147
+ name: Column[str] = text_column(header="Name", not_null=True, strip=True)
148
+ ```
149
+
150
+ * `None` parses to `""` (empty string).
151
+ * `strip=True` trims whitespace.
152
+
153
+ ### Integer
154
+
155
+ ```python
156
+ from excel_orm.column import Column, int_column
157
+
158
+ class Example:
159
+ qty: Column[int] = int_column(header="Qty", not_null=True)
160
+ ```
161
+
162
+ * `None` or `""` parses to `0`.
163
+
164
+ ### Boolean
165
+
166
+ ```python
167
+ from excel_orm.column import Column, bool_column
168
+
169
+ class Example:
170
+ active: Column[bool] = bool_column(header="Active")
171
+ ```
172
+
173
+ Accepted values include:
174
+
175
+ * True: `true, t, yes, y, 1` (case-insensitive)
176
+ * False: `false, f, no, n, 0`
177
+ * `None` / empty parses to `False`
178
+
179
+ Invalid values raise `ValueError`.
180
+
181
+ ### Date
182
+
183
+ ```python
184
+ from excel_orm.column import Column, date_column
185
+
186
+ class Example:
187
+ start_date: Column[date] = date_column(header="Start Date")
188
+ ```
189
+
190
+ The date parser supports:
191
+
192
+ * Excel-native `datetime`/`date` values from `openpyxl`
193
+ * ISO strings like `2025-06-01` and `2025-06-01T13:45:00`
194
+ * Common business formats including `01-JUN-2025`
195
+
196
+ Invalid/empty values raise `ValueError`.
197
+
198
+ ---
199
+
200
+ ## Validation
201
+
202
+ ### Column-level: `not_null`
203
+
204
+ ```python
205
+ class Car:
206
+ make: Column[str] = text_column(header="Make", not_null=True)
207
+ ```
208
+
209
+ If a `not_null=True` column parses to `None` or `""`, a `ValueError` is raised.
210
+
211
+ ### Row exclusion: `excludes`
212
+
213
+ If you set `excludes`, rows matching those raw values in that column will be skipped.
214
+
215
+ ```python
216
+ status: Column[str] = text_column(header="Status")
217
+ status.spec.excludes = {"IGNORE", "SKIP"} # example pattern
218
+ ```
219
+
220
+ (If you want a nicer API for excludes, consider adding it directly to the column factory signature.)
221
+
222
+ ### Model-level: `validate()`
223
+
224
+ If your model defines a `validate(self)` method, it is called after a row is parsed.
225
+
226
+ ```python
227
+ class Car:
228
+ make: Column[str] = text_column(header="Make", not_null=True)
229
+ year: Column[int] = int_column(header="Year", not_null=True)
230
+
231
+ def validate(self) -> None:
232
+ if self.year < 1886:
233
+ raise ValueError("Invalid car year")
234
+ ```
235
+
236
+ ---
237
+
238
+ ## Development
239
+
240
+ ### Run tests
241
+
242
+ ```bash
243
+ uv run pytest
244
+ ```
245
+
246
+ ### Lint/format (example)
247
+
248
+ If you use Ruff:
249
+
250
+ ```bash
251
+ uv run ruff check .
252
+ uv run ruff format .
253
+ ```
@@ -0,0 +1,14 @@
1
+ from .column import Column, ColumnSpec, bool_column, date_column, int_column, text_column
2
+ from .orm import ExcelFile, PivotSheetSpec, SheetSpec
3
+
4
+ __all__ = [
5
+ "Column",
6
+ "ColumnSpec",
7
+ "ExcelFile",
8
+ "PivotSheetSpec",
9
+ "SheetSpec",
10
+ "bool_column",
11
+ "date_column",
12
+ "int_column",
13
+ "text_column",
14
+ ]
@@ -0,0 +1,175 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+ from dataclasses import dataclass
5
+ from datetime import date, datetime
6
+ from typing import Any, TypeVar
7
+
8
+ T = TypeVar("T")
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class ColumnSpec[T]:
13
+ header: str | None = None # header string in Excel
14
+ default: T | None = None
15
+ not_null: bool = False # parsed value cannot be None/empty
16
+ excludes: set[Any] | None = None # raw values that mark row as excluded
17
+ parser: Callable[[Any], T] = lambda x: x # raw -> parsed
18
+ renderer: Callable[[T | None], Any] = lambda x: x # parsed -> raw
19
+ validator: Callable[[T | None], None] = lambda _: None
20
+
21
+
22
+ class Column[T]:
23
+ def __init__(self, spec: ColumnSpec[T]):
24
+ self.spec = spec
25
+
26
+ def __set_name__(self, owner, name: str):
27
+ self.name = name
28
+ # register in definition order
29
+ reg = owner.__dict__.get("__columns__")
30
+ if reg is None:
31
+ owner.__columns__ = []
32
+ owner.__columns__.append(self)
33
+
34
+ def __get__(self, obj, objtype=None) -> T | Column | None:
35
+ if obj is None:
36
+ return self
37
+ return obj._values.get(self.name) # centralized storage
38
+
39
+ def __set__(self, obj, value: T | None):
40
+ self.validate(value)
41
+ obj._values[self.name] = value
42
+
43
+ def parse_cell(self, raw: Any) -> T | None:
44
+ return self.spec.parser(raw)
45
+
46
+ def validate(self, value: T | None) -> None:
47
+ if self.spec.not_null and (value is None or value == ""):
48
+ raise ValueError(f"{self.name} cannot be null/empty")
49
+ self.spec.validator(value)
50
+
51
+
52
+ def text_column(
53
+ header: str | None = None,
54
+ *,
55
+ default: str | None = None,
56
+ strip: bool = True,
57
+ not_null: bool = False,
58
+ ):
59
+ def parse(raw: Any) -> str:
60
+ if raw is None:
61
+ return ""
62
+ s = str(raw)
63
+ if strip:
64
+ s = s.strip()
65
+ return s
66
+
67
+ return Column(
68
+ ColumnSpec[str](
69
+ header=header,
70
+ default=default,
71
+ not_null=not_null,
72
+ parser=parse,
73
+ renderer=lambda v: "" if v is None else v,
74
+ )
75
+ )
76
+
77
+
78
+ def int_column(
79
+ header: str | None = None,
80
+ *,
81
+ default: int | None = None,
82
+ not_null: bool = False,
83
+ ):
84
+ def parse(raw: Any) -> int:
85
+ if raw is None or raw == "":
86
+ return 0
87
+ return int(raw)
88
+
89
+ return Column(
90
+ ColumnSpec[int](
91
+ header=header,
92
+ default=default,
93
+ not_null=not_null,
94
+ parser=parse,
95
+ )
96
+ )
97
+
98
+
99
+ def bool_column(header: str | None = None, *, default: bool | None = None):
100
+ def parse(raw: Any) -> bool:
101
+ if raw is None or raw == "":
102
+ return False
103
+ if isinstance(raw, bool):
104
+ return raw
105
+ s = str(raw).strip().lower()
106
+ if s in {"true", "t", "yes", "y", "1"}:
107
+ return True
108
+ if s in {"false", "f", "no", "n", "0"}:
109
+ return False
110
+ raise ValueError(f"Invalid boolean: {raw}")
111
+
112
+ return Column(
113
+ ColumnSpec[bool](
114
+ header=header,
115
+ default=default,
116
+ parser=parse,
117
+ )
118
+ )
119
+
120
+
121
+ def date_column(header: str | None = None, *, default: date | None = None):
122
+ _DATE_FORMATS: tuple[str, ...] = (
123
+ "%d-%b-%Y", # 01-JUN-2025 (your requirement)
124
+ "%d-%b-%y", # 01-JUN-25
125
+ "%d %b %Y", # 01 JUN 2025
126
+ "%d %b %y", # 01 JUN 25
127
+ "%d/%b/%Y", # 01/JUN/2025
128
+ "%Y-%m-%d", # 2025-06-01
129
+ "%Y/%m/%d", # 2025/06/01
130
+ "%m/%d/%Y", # 06/01/2025
131
+ "%m/%d/%y", # 06/01/25
132
+ "%d/%m/%Y", # 01/06/2025
133
+ "%d/%m/%y", # 01/06/25
134
+ )
135
+
136
+ def parse(raw: Any) -> date:
137
+ if raw is None or raw == "":
138
+ raise ValueError("Date Value was empty")
139
+
140
+ if isinstance(raw, date) and not isinstance(raw, datetime):
141
+ return raw
142
+
143
+ if isinstance(raw, datetime):
144
+ return raw.date()
145
+
146
+ s = str(raw).strip()
147
+ if s == "":
148
+ raise ValueError("Date Value was empty")
149
+
150
+ # 1) ISO-8601 fast path (handles "2025-06-01" and "2025-06-01T13:45:00", etc.)
151
+ try:
152
+ dt = datetime.fromisoformat(s)
153
+ return dt.date()
154
+ except ValueError:
155
+ pass
156
+
157
+ # 2) Try known patterns (case-insensitive month abbreviations like JUN)
158
+ s_norm = s.upper()
159
+
160
+ for fmt in _DATE_FORMATS:
161
+ try:
162
+ return datetime.strptime(s_norm, fmt).date()
163
+ except ValueError:
164
+ continue
165
+
166
+ raise ValueError(f"Invalid date value: {raw!r}")
167
+
168
+ return Column(
169
+ ColumnSpec[date](
170
+ header=header,
171
+ default=default,
172
+ parser=parse,
173
+ renderer=lambda d: None if d is None else d, # openpyxl handles date types
174
+ )
175
+ )
@@ -0,0 +1,353 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+ from typing import Any, TypeVar
6
+
7
+ from openpyxl import Workbook, load_workbook
8
+ from openpyxl.styles import Alignment, Font
9
+ from openpyxl.worksheet.worksheet import Worksheet
10
+
11
+ from .column import Column
12
+
13
+ M = TypeVar("M")
14
+
15
+
16
+ def _camel_to_snake(name: str) -> str:
17
+ s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
18
+ s2 = re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1)
19
+ return s2.lower()
20
+
21
+
22
+ def _pluralize(s: str) -> str:
23
+ # keep deliberately simple; can be swapped for inflect later
24
+ if s.endswith("s"):
25
+ return s
26
+ return s + "s"
27
+
28
+
29
+ def _repo_name_for_model(model: type[Any]) -> str:
30
+ return _pluralize(_camel_to_snake(model.__name__))
31
+
32
+
33
+ def _display_name_for_model(model: type[Any]) -> str:
34
+ # "manufacturing_plants" -> "Manufacturing Plants"
35
+ return _repo_name_for_model(model).replace("_", " ").title()
36
+
37
+
38
+ def _get_model_columns(model: type[Any]) -> list[Column[Any]]:
39
+ return list(getattr(model, "__columns__", []))
40
+
41
+
42
+ def _normalize_header(v: Any) -> str:
43
+ if v is None:
44
+ return ""
45
+ return str(v).strip()
46
+
47
+
48
+ def _row_is_blank(values: list[Any]) -> bool:
49
+ return all(_normalize_header(v) == "" for v in values)
50
+
51
+
52
+ def _instantiate_model[M](model: type[M]) -> M:
53
+ obj = model.__new__(model)
54
+ obj._values = {}
55
+ # defaults
56
+ for col in _get_model_columns(model):
57
+ if col.name is None:
58
+ raise RuntimeError("Column __set_name__ did not run.")
59
+ obj._values[col.name] = col.spec.default
60
+ return obj
61
+
62
+
63
+ class Repository(list[M]):
64
+ def all(self) -> list[M]:
65
+ return list(self)
66
+
67
+
68
+ @dataclass(frozen=True)
69
+ class SheetSpec:
70
+ name: str
71
+ models: list[type[Any]]
72
+
73
+ title_row: int = 1
74
+ header_row: int = 2
75
+ data_start_row: int = 3
76
+
77
+ template_table_gap: int = 2
78
+
79
+
80
+ @dataclass(frozen=True)
81
+ class PivotSheetSpec:
82
+ name: str
83
+ model: type[Any] # single model only
84
+
85
+ # field names on the model
86
+ pivot_field: str
87
+ row_field: str
88
+ value_field: str
89
+
90
+ # layout
91
+ title_row: int = 1
92
+ header_row: int = 2 # pivot headers
93
+ row_header_col: int = 1
94
+ data_start_row: int = 3
95
+ data_start_col: int = 2
96
+
97
+ # template: define the pivot column values (dates) to render across the top
98
+ pivot_values: list[Any] | None = None # e.g., list[date]; required for generation
99
+
100
+ # optional: seed row keys (regions) on template
101
+ row_values: list[Any] | None = None
102
+
103
+ include_blanks: bool = False # whether to load blank cells as data points
104
+
105
+
106
+ AnySheetSpec = PivotSheetSpec | SheetSpec
107
+
108
+
109
+ class ExcelFile:
110
+ def __init__(self, *, sheets: list[AnySheetSpec]):
111
+ self.sheets = sheets
112
+
113
+ self._repos: dict[type[Any], Repository[Any]] = {}
114
+
115
+ for sheet in sheets:
116
+ models = [sheet.model] if isinstance(sheet, PivotSheetSpec) else sheet.models
117
+
118
+ for model in models:
119
+ repo_name = _repo_name_for_model(model)
120
+ if hasattr(self, repo_name):
121
+ raise ValueError(
122
+ f"Duplicate repo name '{repo_name}' for model {model.__name__}"
123
+ )
124
+ repo = Repository()
125
+ self._repos[model] = repo
126
+ setattr(self, repo_name, repo)
127
+
128
+ def generate_template(self, filename: str) -> None:
129
+ wb = Workbook()
130
+ default_ws = wb.active
131
+ if self.sheets:
132
+ wb.remove(default_ws)
133
+
134
+ for sheet in self.sheets:
135
+ ws = wb.create_sheet(title=sheet.name)
136
+ if isinstance(sheet, PivotSheetSpec):
137
+ self._write_pivot_sheet_template(ws, sheet)
138
+ else:
139
+ self._write_sheet_template(ws, sheet)
140
+
141
+ wb.save(filename)
142
+
143
+ def _write_sheet_template(self, ws: Worksheet, spec: SheetSpec) -> None:
144
+ current_col = 1 # 1-based index
145
+
146
+ title_font = Font(bold=True)
147
+ title_alignment = Alignment(horizontal="center", vertical="center")
148
+
149
+ header_font = Font(bold=True)
150
+
151
+ for model in spec.models:
152
+ cols = _get_model_columns(model)
153
+ headers = [c.spec.header or c.name for c in cols]
154
+ width = len(headers)
155
+
156
+ start_col = current_col
157
+ end_col = current_col + width - 1
158
+
159
+ # ---- merged title row ----
160
+ ws.merge_cells(
161
+ start_row=spec.title_row,
162
+ start_column=start_col,
163
+ end_row=spec.title_row,
164
+ end_column=end_col,
165
+ )
166
+ title_cell = ws.cell(
167
+ row=spec.title_row, column=start_col, value=_display_name_for_model(model)
168
+ )
169
+ title_cell.font = title_font
170
+ title_cell.alignment = title_alignment
171
+
172
+ for j, h in enumerate(headers):
173
+ c = start_col + j
174
+ cell = ws.cell(row=spec.header_row, column=c, value=h)
175
+ cell.font = header_font
176
+
177
+ col_letter = ws.cell(row=spec.header_row, column=c).column_letter
178
+ ws.column_dimensions[col_letter].width = max(12, min(40, len(str(h)) + 4))
179
+
180
+ current_col = end_col + 1 + spec.template_table_gap
181
+
182
+ def load_data(self, filename: str) -> None:
183
+ wb = load_workbook(filename=filename, data_only=True)
184
+ for repo in self._repos.values():
185
+ repo.clear()
186
+
187
+ for sheet_spec in self.sheets:
188
+ if sheet_spec.name not in wb.sheetnames:
189
+ raise ValueError(f"Workbook missing sheet '{sheet_spec.name}'")
190
+ ws = wb[sheet_spec.name]
191
+ if isinstance(sheet_spec, PivotSheetSpec):
192
+ self._parse_pivot_sheet(ws, sheet_spec)
193
+ else:
194
+ self._parse_sheet(ws, sheet_spec)
195
+
196
+ def _parse_sheet(self, ws: Worksheet, spec: SheetSpec) -> None:
197
+ for model in spec.models:
198
+ found = self._find_header(ws, spec, model)
199
+ if found is None:
200
+ continue
201
+
202
+ _, start_col = found
203
+ cols = _get_model_columns(model)
204
+ width = len(cols)
205
+
206
+ repo: Repository[Any] = self._repos[model]
207
+
208
+ r = spec.data_start_row
209
+ while r <= ws.max_row:
210
+ row_vals = [ws.cell(row=r, column=start_col + j).value for j in range(width)]
211
+ if _row_is_blank(row_vals):
212
+ break
213
+
214
+ # excludes (raw-value based)
215
+ if any(
216
+ col.spec.excludes and row_vals[i] in col.spec.excludes
217
+ for i, col in enumerate(cols)
218
+ ):
219
+ r += 1
220
+ continue
221
+
222
+ obj = _instantiate_model(model)
223
+ for i, col in enumerate(cols):
224
+ raw = row_vals[i]
225
+ parsed = col.parse_cell(raw)
226
+ setattr(obj, col.name, parsed)
227
+
228
+ validate = getattr(obj, "validate", None)
229
+ if callable(validate):
230
+ validate()
231
+
232
+ repo.append(obj)
233
+ r += 1
234
+
235
+ def _find_header(
236
+ self, ws: Worksheet, spec: SheetSpec, model: type[Any]
237
+ ) -> tuple[int, int] | None:
238
+ cols = _get_model_columns(model)
239
+ expected = [_normalize_header(c.spec.header) for c in cols]
240
+ if not expected:
241
+ return None
242
+
243
+ r = spec.header_row
244
+ width = len(expected)
245
+ max_c = ws.max_column or 0
246
+
247
+ for start_col in range(1, max_c - width + 2):
248
+ actual = [
249
+ _normalize_header(ws.cell(row=r, column=start_col + j).value) for j in range(width)
250
+ ]
251
+ if actual == expected:
252
+ return (r, start_col)
253
+
254
+ return None
255
+
256
+ def _write_pivot_sheet_template(self, ws: Worksheet, spec: PivotSheetSpec) -> None:
257
+ if not spec.pivot_values:
258
+ raise ValueError("PivotSheetSpec.pivot_values is required for template generation.")
259
+
260
+ title_font = Font(bold=True)
261
+ title_alignment = Alignment(horizontal="center", vertical="center")
262
+ header_font = Font(bold=True)
263
+
264
+ # Title merged across the pivot header span
265
+ end_col = spec.data_start_col + len(spec.pivot_values) - 1
266
+
267
+ ws.merge_cells(
268
+ start_row=spec.title_row,
269
+ start_column=spec.row_header_col,
270
+ end_row=spec.title_row,
271
+ end_column=end_col,
272
+ )
273
+ tcell = ws.cell(spec.title_row, spec.row_header_col, _display_name_for_model(spec.model))
274
+ tcell.font = title_font
275
+ tcell.alignment = title_alignment
276
+
277
+ # Top-left corner header (row field name)
278
+ corner = ws.cell(spec.header_row, spec.row_header_col, spec.row_field.title())
279
+ corner.font = header_font
280
+
281
+ # Pivot headers across the top
282
+ for j, pv in enumerate(spec.pivot_values):
283
+ c = spec.data_start_col + j
284
+ cell = ws.cell(spec.header_row, c, pv)
285
+ cell.font = header_font
286
+
287
+ col_letter = ws.cell(spec.header_row, c).column_letter
288
+ ws.column_dimensions[col_letter].width = 14
289
+
290
+ # Seed row keys (optional)
291
+ if spec.row_values:
292
+ for i, rv in enumerate(spec.row_values):
293
+ r = spec.data_start_row + i
294
+ ws.cell(r, spec.row_header_col, rv)
295
+
296
+ def _parse_pivot_sheet(self, ws: Worksheet, spec: PivotSheetSpec) -> None:
297
+ model = spec.model
298
+ cols = {c.name: c for c in _get_model_columns(model)} # Column descriptors by field name
299
+
300
+ # Validate fields exist
301
+ for fname in (spec.pivot_field, spec.row_field, spec.value_field):
302
+ if fname not in cols:
303
+ raise ValueError(
304
+ f"{model.__name__} is missing Column field '{fname}' required by PivotSheetSpec."
305
+ )
306
+
307
+ pivot_col = cols[spec.pivot_field]
308
+ row_col = cols[spec.row_field]
309
+ val_col = cols[spec.value_field]
310
+
311
+ # Determine pivot headers from sheet (or trust spec.pivot_values)
312
+ pivot_headers: list[Any] = []
313
+ j = 0
314
+ while True:
315
+ c = spec.data_start_col + j
316
+ raw = ws.cell(spec.header_row, c).value
317
+ if raw is None or str(raw).strip() == "":
318
+ break
319
+ pivot_headers.append(pivot_col.parse_cell(raw))
320
+ j += 1
321
+
322
+ if not pivot_headers:
323
+ return
324
+
325
+ repo: Repository[Any] = self._repos[model]
326
+
327
+ r = spec.data_start_row
328
+ while r <= ws.max_row:
329
+ raw_row_key = ws.cell(r, spec.row_header_col).value
330
+ if raw_row_key is None or str(raw_row_key).strip() == "":
331
+ break
332
+
333
+ row_key = row_col.parse_cell(raw_row_key)
334
+
335
+ for j, pivot_value in enumerate(pivot_headers):
336
+ c = spec.data_start_col + j
337
+ raw_val = ws.cell(r, c).value
338
+ if not spec.include_blanks and (raw_val is None or raw_val == ""):
339
+ continue
340
+
341
+ obj = _instantiate_model(model)
342
+
343
+ setattr(obj, spec.row_field, row_key)
344
+ setattr(obj, spec.pivot_field, pivot_value)
345
+ setattr(obj, spec.value_field, val_col.parse_cell(raw_val))
346
+
347
+ validate = getattr(obj, "validate", None)
348
+ if callable(validate):
349
+ validate()
350
+
351
+ repo.append(obj)
352
+
353
+ r += 1
@@ -0,0 +1,62 @@
1
+ [project]
2
+ name = "excel-orm"
3
+ version = "0.1.0"
4
+ description = "A lightweight Excel ORM for generating templates and parsing typed row models."
5
+ readme = "README.md"
6
+ requires-python = ">=3.13"
7
+ license = { text = "MIT" }
8
+ authors = [{ name = "Anthony Del Russo" }]
9
+ keywords = ["excel", "orm", "openpyxl", "etl"]
10
+ classifiers = [
11
+ "Development Status :: 3 - Alpha",
12
+ "Programming Language :: Python :: 3",
13
+ "Programming Language :: Python :: 3.11",
14
+ "Programming Language :: Python :: 3.12",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Operating System :: OS Independent",
17
+ ]
18
+ dependencies = [
19
+ "openpyxl>=3.1.5",
20
+ ]
21
+
22
+ [dependency-groups]
23
+ dev = [
24
+ "build>=1.3.0",
25
+ "pre-commit>=4.5.1",
26
+ "pytest>=9.0.2",
27
+ "ruff>=0.14.10",
28
+ "twine>=6.2.0",
29
+ "ty>=0.0.8",
30
+ ]
31
+
32
+ [tool.ruff]
33
+ target-version = "py312"
34
+ line-length = 100
35
+
36
+ [tool.ruff.lint]
37
+ select = ["E", "F", "I", "B", "UP", "SIM", "RUF"]
38
+ ignore = ["E501"]
39
+
40
+ [tool.ruff.format]
41
+ quote-style = "double"
42
+ indent-style = "space"
43
+
44
+ [tool.ty]
45
+
46
+ [tool.ty.rules]
47
+
48
+ [tool.pytest.ini_options]
49
+ addopts = "-q"
50
+ testpaths = ["tests"]
51
+
52
+ [project.urls]
53
+ Homepage = "https://github.com/acdelrusso/excel-orm"
54
+ Repository = "https://github.com/acdelrusso/excel-orm"
55
+ Issues = "https://github.com/acdelrusso/excel-orm/issues"
56
+
57
+ [build-system]
58
+ requires = ["hatchling>=1.25"]
59
+ build-backend = "hatchling.build"
60
+
61
+ [tool.hatch.build]
62
+ packages = ["src/excel_orm"]