phaethon 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. phaethon-0.2.1/LICENSE +21 -0
  2. phaethon-0.2.1/PKG-INFO +245 -0
  3. phaethon-0.2.1/README.md +212 -0
  4. phaethon-0.2.1/pyproject.toml +59 -0
  5. phaethon-0.2.1/setup.cfg +4 -0
  6. phaethon-0.2.1/src/chisa/__init__.py +90 -0
  7. phaethon-0.2.1/src/chisa/core/__init__.py +8 -0
  8. phaethon-0.2.1/src/chisa/core/axioms.py +445 -0
  9. phaethon-0.2.1/src/chisa/core/base.py +547 -0
  10. phaethon-0.2.1/src/chisa/core/constants.py +275 -0
  11. phaethon-0.2.1/src/chisa/core/engine.py +485 -0
  12. phaethon-0.2.1/src/chisa/core/registry.py +187 -0
  13. phaethon-0.2.1/src/chisa/core/schema.py +342 -0
  14. phaethon-0.2.1/src/chisa/core/vmath.py +123 -0
  15. phaethon-0.2.1/src/chisa/exceptions.py +89 -0
  16. phaethon-0.2.1/src/chisa/u.py +43 -0
  17. phaethon-0.2.1/src/chisa/units/__init__.py +10 -0
  18. phaethon-0.2.1/src/chisa/units/area.py +106 -0
  19. phaethon-0.2.1/src/chisa/units/data.py +123 -0
  20. phaethon-0.2.1/src/chisa/units/density.py +54 -0
  21. phaethon-0.2.1/src/chisa/units/energy.py +122 -0
  22. phaethon-0.2.1/src/chisa/units/force.py +120 -0
  23. phaethon-0.2.1/src/chisa/units/frequency.py +53 -0
  24. phaethon-0.2.1/src/chisa/units/length.py +181 -0
  25. phaethon-0.2.1/src/chisa/units/mass.py +243 -0
  26. phaethon-0.2.1/src/chisa/units/power.py +96 -0
  27. phaethon-0.2.1/src/chisa/units/pressure.py +212 -0
  28. phaethon-0.2.1/src/chisa/units/speed.py +146 -0
  29. phaethon-0.2.1/src/chisa/units/temperature.py +97 -0
  30. phaethon-0.2.1/src/chisa/units/time.py +206 -0
  31. phaethon-0.2.1/src/chisa/units/volume.py +185 -0
  32. phaethon-0.2.1/src/phaethon.egg-info/PKG-INFO +245 -0
  33. phaethon-0.2.1/src/phaethon.egg-info/SOURCES.txt +34 -0
  34. phaethon-0.2.1/src/phaethon.egg-info/dependency_links.txt +1 -0
  35. phaethon-0.2.1/src/phaethon.egg-info/requires.txt +2 -0
  36. phaethon-0.2.1/src/phaethon.egg-info/top_level.txt +1 -0
phaethon-0.2.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 rannd1nt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,245 @@
1
+ Metadata-Version: 2.4
2
+ Name: phaethon
3
+ Version: 0.2.1
4
+ Summary: Unit-Safe Data Pipeline Schema and Dimensional Algebra Framework
5
+ Author-email: Zahraan Dzakii Tsaqiif <zahraandzakiits@gmail.com>
6
+ License: MIT
7
+ Keywords: data engineering,schema validation,pandas,physics,dimensional analysis,data pipeline,numpy,unit conversion,machine learning
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Intended Audience :: Education
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Intended Audience :: Information Technology
13
+ Classifier: Topic :: Scientific/Engineering
14
+ Classifier: Topic :: Scientific/Engineering :: Physics
15
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
16
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
17
+ Classifier: Topic :: Database
18
+ Classifier: License :: OSI Approved :: MIT License
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.8
22
+ Classifier: Programming Language :: Python :: 3.9
23
+ Classifier: Programming Language :: Python :: 3.10
24
+ Classifier: Programming Language :: Python :: 3.11
25
+ Classifier: Programming Language :: Python :: 3.12
26
+ Classifier: Programming Language :: Python :: 3.13
27
+ Requires-Python: >=3.8
28
+ Description-Content-Type: text/markdown
29
+ License-File: LICENSE
30
+ Requires-Dist: numpy>=1.26.0
31
+ Requires-Dist: pandas>=2.0.0
32
+ Dynamic: license-file
33
+
34
+ <div align="center">
35
+
36
+ <h1>Chisa — Unit-Safe Data Pipeline Schema</h1>
37
+
38
+ <p>
39
+ <img src="https://img.shields.io/badge/MADE_WITH-PYTHON-blue?style=for-the-badge&logo=python&logoColor=white" alt="Python">
40
+ <img src="https://img.shields.io/badge/INTEGRATION-NUMPY-013243?style=for-the-badge&logo=numpy&logoColor=white" alt="NumPy">
41
+ <img src="https://img.shields.io/badge/INTEGRATION-PANDAS-150458?style=for-the-badge&logo=pandas&logoColor=white" alt="Pandas">
42
+ <img src="https://img.shields.io/badge/LICENSE-MIT-red?style=for-the-badge" alt="License">
43
+ </p>
44
+
45
+ <p>
46
+ <i>Normalize messy heterogeneous units and enforce physical integrity before your data hits ML or production systems.</i>
47
+ </p>
48
+
49
+ </div>
50
+
51
+ **Chisa** is a declarative schema validation and semantic data transformation tool designed for Data Engineers. It rescues your data pipelines from the nightmare of mixed units, bizarre abbreviations, and impossible physical values.
52
+
53
+ While standard schema tools (like Pydantic or Pandera) only validate *data types* (e.g., ensuring a value is a `float`), Chisa validates **physical reality**. If you are ingesting IoT sensor streams, parsing messy logistics CSVs, or processing manufacturing Excel sheets, Chisa ensures your numbers obey the laws of physics before they enter your database.
54
+
55
+ ---
56
+
57
+ ## 🚀 The Nightmare vs. The Chisa Way
58
+
59
+ Real-world data is rarely clean. A single dataset might contain `"1.5e3 lbs"`, `" -5 kg "`, missing values, and typos like `"20 pallets"`. Standard pandas workflows force you to write fragile regex and manual `if-else` blocks.
60
+
61
+ **Chisa solves this declaratively.**
62
+
63
+ ```python
64
+ import pandas as pd
65
+ import chisa as cs
66
+ from chisa import u
67
+
68
+ class GlobalFreightSchema(cs.Schema):
69
+ gross_weight: u.Kilogram = cs.Field(
70
+ source="Weight_Log",
71
+ parse_string=True,
72
+ on_error='coerce',
73
+ round=2,
74
+ min=0 # Axiom Bound: Cargo mass cannot be negative!
75
+ )
76
+ cargo_volume: u.CubicMeter = cs.Field(
77
+ source="Volume_Log",
78
+ parse_string=True,
79
+ on_error='coerce'
80
+ )
81
+
82
+ df_messy = pd.DataFrame({
83
+ 'Weight_Log': ["1.5e3 lbs", " -5 kg ", "20 pallets", "150", "kg"],
84
+ 'Volume_Log': ["100 m^3", "500 cu_ft", "1000", "", "NaN"]
85
+ })
86
+
87
+ # Execute the pipeline instantly via vectorized masking
88
+ clean_df = GlobalFreightSchema.normalize(df_messy)
89
+ ```
90
+
91
+ **The Output:**
92
+ Chisa cleanly parses `"1.5e3 lbs"` to `680.39 kg`, accurately converts `"cu_ft"` to Cubic Meters, and safely nullifies physical anomalies (like `-5 kg`), bare numbers, and vague inputs (`"20 pallets"`) to `NaN`—all automatically.
93
+
94
+ ---
95
+
96
+ ## 🧠 Smart Error Intelligence
97
+
98
+ Data pipelines shouldn't just crash; they should tell you *how* to fix them. If you enforce strict data rules (`on_error='raise'`), Chisa provides unparalleled Developer Experience (DX) for debugging massive DataFrames:
99
+
100
+ ```text
101
+ NormalizationError: Normalization failed for field 'gross_weight' at index [2].
102
+ ► Issue : Unrecognized unit 'pallets'
103
+ ► Expected Dimension : mass
104
+ ► Raw Value Sample : '20 pallets'
105
+ ► Suggestion : Fix the raw data, register the unit, or set Field(on_error='coerce').
106
+ ```
107
+
108
+ ---
109
+
110
+ ## ⚡ Performance: The Vectorization Advantage
111
+
112
+ Standard unit libraries (like Pint) struggle with **heterogeneous strings** (mixed units in the same column), forcing developers to use slow `pandas.apply()` loops to parse row-by-row. Chisa bypasses this entirely using native NumPy vectorization and Pandas Boolean masking.
113
+
114
+ When stress-tested against 100,000 rows of heterogeneous data (e.g., a mix of `lbs` and `oz` targeting `kg`):
115
+ * *Traditional (Pint + Pandas Apply):* ~14.71 seconds
116
+ * *Chisa (Vectorized Schema):* **~0.046 seconds** *(>316x Faster)*
117
+
118
+ > *Transparency Note: You can reproduce this 99.6% reduction in latency using the `benchmarks/benchmark_vs_pint.py` script included in this repository.*
119
+
120
+ ---
121
+
122
+ ## 🪝 Pipeline Hooks (Inversion of Control)
123
+
124
+ Need to filter offline sensors before parsing, or trigger an alarm if a physical threshold is breached? Inject your own domain logic directly into the validation lifecycle.
125
+
126
+ ```python
127
+ class ColdChainPipeline(cs.Schema):
128
+ temp: u.Celsius = cs.Field(source="raw_temp", parse_string=True)
129
+
130
+ @cs.pre_normalize
131
+ def drop_calibration_pings(cls, raw_df):
132
+ """Runs BEFORE Chisa parses the strings. Removes sensor test pings."""
133
+ return raw_df[raw_df['status'] != 'CALIBRATION']
134
+
135
+ @cs.post_normalize
136
+ def enforce_spoilage_check(cls, clean_df):
137
+ """Runs AFTER all temperatures (e.g., Fahrenheit) are standardized to Celsius."""
138
+ if clean_df['temp'].max() > -20.0:
139
+ raise ValueError("CRITICAL: Vaccine shipment spoiled! Temp exceeded -20°C.")
140
+ return clean_df
141
+ ```
142
+
143
+ ---
144
+ ## 🏎️ The Fluent API (Quick Inline Conversions)
145
+
146
+ For simple scripts, logging, or UI components where you don't need full declarative schemas, Chisa provides a highly readable, chainable Fluent API.
147
+
148
+ ```python
149
+ import chisa as cs
150
+
151
+ # Simple scalar conversion
152
+ speed = cs.convert(120, 'km/h').to('m/s').resolve()
153
+ print(speed) # 33.333333333
154
+
155
+ # Powerful cosmetic formatting for logs
156
+ text = cs.convert(1000, 'm').to('cm').use(format='verbose', delim=True).resolve()
157
+ print(text) # "1,000 m = 100,000 cm"
158
+ ```
159
+ ---
160
+
161
+ ## 📚 Examples & Tutorials
162
+
163
+ To help you integrate Chisa into your existing workflows, we provide a comprehensive suite of examples in the `examples/` directory.
164
+
165
+ ### Interactive Crash Course (Google Colab)
166
+ The fastest way to learn Chisa is through our interactive notebooks. No local installation required!
167
+
168
+ | Tutorial | Description | Link |
169
+ | :--- | :--- | :--- |
170
+ | **01. Fundamentals** | Core concepts, Axiom Engine, and Type Safety. | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rannd1nt/chisa/blob/main/examples/T01_Chisa_Fundamentals.ipynb) |
171
+ | **02. Workflow Demo** | Real-world engineering with Pandas & Matplotlib. | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rannd1nt/chisa/blob/main/examples/T02_Chisa_RealWorld_Workflow.ipynb) |
172
+
173
+ ### Python Scripts Reference
174
+ For detailed, standalone script implementations, explore our `examples/` directory:
175
+
176
+ * **Phase 1: Declarative Data Pipelines (Data Ingestion)**
177
+ * `01_wearable_health_data.py`: Standardizing messy smartwatch exports (BPM, kcal vs cal, body temperature).
178
+ * `02_food_manufacturing_scale.py`: Safely converting industrial recipe batches across cups, tablespoons, grams, and fluid ounces.
179
+ * `03_multi_region_tariffs.py`: Parsing mixed currency and weight strings (lbs, oz, kg) in a single pass to calculate global shipping costs.
180
+ * `04_energy_grid_audits.py`: Normalizing utility bill chaos (MMBtu, kWh, Joules) into a single unified Pandas cost report.
181
+
182
+ * **Phase 2: High-Performance Vectorization & Algebra**
183
+ * `05_f1_telemetry_vectorization.py`: Array math on RPM, Speed, and Tire Pressure operating on millions of rows in milliseconds.
184
+ * `06_structural_stress_testing.py`: Cross-unit algebra combining Kips, Newtons, and Pound-force over Square Meters for civil engineering loads.
185
+ * `07_financial_billing_precision.py`: Understanding when to use `.mag` (fast Python floats for Math/ML) vs `.exact` (high-precision Decimals for strict financial audits).
186
+
187
+ * **Phase 3: The Axiom Engine (Domain-Driven Engineering)**
188
+ * `08_gas_pipeline_thermodynamics.py`: Using Contextual Shifts to dynamically calculate industrial gas volume expansion based on real-time temperature and pressure (PV=nRT).
189
+ * `09_end_to_end_esg_pipeline.py`: The Grand Unified Theory of Chisa. Synthesizing a custom dimension (Carbon Intensity), cleaning data into it via Schema, and guarding algorithms with `@require` and `@prepare`.
190
+
191
+ * **Phase 4: Real-World Ecosystem Integration**
192
+ * `10_pandas_groupby_physics.py`: Integrating Chisa arrays directly with Pandas `GroupBy` to aggregate daily IoT power production into monthly summaries.
193
+ * `11_scikit_learn_transformer.py`: Building a custom ML `BaseEstimator` to autonomously normalize heterogeneous unit arrays before training a Random Forest.
194
+ * `12_handling_sensor_drift.py`: Using NumPy array masks and vectorization to neutralize factory machine calibration errors without slow `for` loops.
195
+ * `13_dynamic_alert_thresholds.py`: Simulating an IoT streaming pipeline where safety limits (`@axiom.bound`) change dynamically based on the machine's operating context.
196
+ * `14_cloud_compute_costs.py`: Utilizing extreme Metaclass algebra (`Currency / (RAM * Time)`) to synthesize and calculate abstract Server Compute billing rates ($ / GB-Hour).
197
+
198
+ ---
199
+
200
+ ## 🔬 The Engine: Explicit Dimensional Algebra
201
+
202
+ While Chisa's Schema is built for Data Engineering pipelines, underneath it lies a highly strict, Metaclass-driven Object-Oriented physics engine. If you are a Data Scientist, you can extract your clean data into Chisa Arrays for cross-dimensional mathematics with zero memory leaks.
203
+
204
+ ```python
205
+ import numpy as np
206
+ import chisa as cs
207
+ from chisa import u
208
+
209
+ # Seamless cross-unit Metaclass Vectorized Synthesis (Mass * Acceleration = Force)
210
+ Mass = u.Kilogram(np.random.uniform(10, 100, 1_000_000))
211
+ Acceleration = (u.Meter / (u.Second ** 2))(np.random.uniform(0.5, 9.8, 1_000_000))
212
+
213
+ Force = Mass * Acceleration
214
+ Force_kN_array = Force.to(u.Newton * 1000).mag
215
+ ```
216
+ > 📖 **Deep Dive:** For advanced features like Dynamic Contextual Scaling (Mach), Axiom Bound derivation, and Registry Introspection, please refer to our **[Advanced Physics Documentation](https://github.com/rannd1nt/chisa/blob/main/docs/advanced_physics.md)**.
217
+
218
+ ---
219
+
220
+ ## 📦 Installation
221
+ **Install via pip:**
222
+ ```bash
223
+ pip install chisa
224
+ ```
225
+ **Requirements:**
226
+ - Python 3.8+
227
+ - numpy >= 1.26.0
228
+ - pandas >= 2.0.0
229
+
230
+ ---
231
+
232
+ ## 🛠 Roadmap & TODOs
233
+ - **String Expression Parser:** Upgrading the registry to autonomously parse complex composite strings (e.g., `"kg * m / s^2"`).
234
+ - **Global Context Manager:** Introduce `chisa.conf()` to temporarily force data types or ignore boundary rules.
235
+ - **Polars Integration:** Expanding `Schema.normalize()` to support Polars DataFrames for ultra-fast Rust-based data processing.
236
+
237
+ ---
238
+
239
+ ## 🤝 Contributing
240
+ Contributions are what make the open-source community an amazing place to learn, inspire, and create. Any contributions you make to Chisa are **greatly appreciated**.
241
+
242
+ ---
243
+
244
+ ## License
245
+ Distributed under the MIT License. See the `LICENSE` file for more information.
@@ -0,0 +1,212 @@
1
+ <div align="center">
2
+
3
+ <h1>Chisa — Unit-Safe Data Pipeline Schema</h1>
4
+
5
+ <p>
6
+ <img src="https://img.shields.io/badge/MADE_WITH-PYTHON-blue?style=for-the-badge&logo=python&logoColor=white" alt="Python">
7
+ <img src="https://img.shields.io/badge/INTEGRATION-NUMPY-013243?style=for-the-badge&logo=numpy&logoColor=white" alt="NumPy">
8
+ <img src="https://img.shields.io/badge/INTEGRATION-PANDAS-150458?style=for-the-badge&logo=pandas&logoColor=white" alt="Pandas">
9
+ <img src="https://img.shields.io/badge/LICENSE-MIT-red?style=for-the-badge" alt="License">
10
+ </p>
11
+
12
+ <p>
13
+ <i>Normalize messy heterogeneous units and enforce physical integrity before your data hits ML or production systems.</i>
14
+ </p>
15
+
16
+ </div>
17
+
18
+ **Chisa** is a declarative schema validation and semantic data transformation tool designed for Data Engineers. It rescues your data pipelines from the nightmare of mixed units, bizarre abbreviations, and impossible physical values.
19
+
20
+ While standard schema tools (like Pydantic or Pandera) only validate *data types* (e.g., ensuring a value is a `float`), Chisa validates **physical reality**. If you are ingesting IoT sensor streams, parsing messy logistics CSVs, or processing manufacturing Excel sheets, Chisa ensures your numbers obey the laws of physics before they enter your database.
21
+
22
+ ---
23
+
24
+ ## 🚀 The Nightmare vs. The Chisa Way
25
+
26
+ Real-world data is rarely clean. A single dataset might contain `"1.5e3 lbs"`, `" -5 kg "`, missing values, and typos like `"20 pallets"`. Standard pandas workflows force you to write fragile regex and manual `if-else` blocks.
27
+
28
+ **Chisa solves this declaratively.**
29
+
30
+ ```python
31
+ import pandas as pd
32
+ import chisa as cs
33
+ from chisa import u
34
+
35
+ class GlobalFreightSchema(cs.Schema):
36
+ gross_weight: u.Kilogram = cs.Field(
37
+ source="Weight_Log",
38
+ parse_string=True,
39
+ on_error='coerce',
40
+ round=2,
41
+ min=0 # Axiom Bound: Cargo mass cannot be negative!
42
+ )
43
+ cargo_volume: u.CubicMeter = cs.Field(
44
+ source="Volume_Log",
45
+ parse_string=True,
46
+ on_error='coerce'
47
+ )
48
+
49
+ df_messy = pd.DataFrame({
50
+ 'Weight_Log': ["1.5e3 lbs", " -5 kg ", "20 pallets", "150", "kg"],
51
+ 'Volume_Log': ["100 m^3", "500 cu_ft", "1000", "", "NaN"]
52
+ })
53
+
54
+ # Execute the pipeline instantly via vectorized masking
55
+ clean_df = GlobalFreightSchema.normalize(df_messy)
56
+ ```
57
+
58
+ **The Output:**
59
+ Chisa cleanly parses `"1.5e3 lbs"` to `680.39 kg`, accurately converts `"cu_ft"` to Cubic Meters, and safely nullifies physical anomalies (like `-5 kg`), bare numbers, and vague inputs (`"20 pallets"`) to `NaN`—all automatically.
60
+
61
+ ---
62
+
63
+ ## 🧠 Smart Error Intelligence
64
+
65
+ Data pipelines shouldn't just crash; they should tell you *how* to fix them. If you enforce strict data rules (`on_error='raise'`), Chisa provides unparalleled Developer Experience (DX) for debugging massive DataFrames:
66
+
67
+ ```text
68
+ NormalizationError: Normalization failed for field 'gross_weight' at index [2].
69
+ ► Issue : Unrecognized unit 'pallets'
70
+ ► Expected Dimension : mass
71
+ ► Raw Value Sample : '20 pallets'
72
+ ► Suggestion : Fix the raw data, register the unit, or set Field(on_error='coerce').
73
+ ```
74
+
75
+ ---
76
+
77
+ ## ⚡ Performance: The Vectorization Advantage
78
+
79
+ Standard unit libraries (like Pint) struggle with **heterogeneous strings** (mixed units in the same column), forcing developers to use slow `pandas.apply()` loops to parse row-by-row. Chisa bypasses this entirely using native NumPy vectorization and Pandas Boolean masking.
80
+
81
+ When stress-tested against 100,000 rows of heterogeneous data (e.g., a mix of `lbs` and `oz` targeting `kg`):
82
+ * *Traditional (Pint + Pandas Apply):* ~14.71 seconds
83
+ * *Chisa (Vectorized Schema):* **~0.046 seconds** *(>316x Faster)*
84
+
85
+ > *Transparency Note: You can reproduce this 99.6% reduction in latency using the `benchmarks/benchmark_vs_pint.py` script included in this repository.*
86
+
87
+ ---
88
+
89
+ ## 🪝 Pipeline Hooks (Inversion of Control)
90
+
91
+ Need to filter offline sensors before parsing, or trigger an alarm if a physical threshold is breached? Inject your own domain logic directly into the validation lifecycle.
92
+
93
+ ```python
94
+ class ColdChainPipeline(cs.Schema):
95
+ temp: u.Celsius = cs.Field(source="raw_temp", parse_string=True)
96
+
97
+ @cs.pre_normalize
98
+ def drop_calibration_pings(cls, raw_df):
99
+ """Runs BEFORE Chisa parses the strings. Removes sensor test pings."""
100
+ return raw_df[raw_df['status'] != 'CALIBRATION']
101
+
102
+ @cs.post_normalize
103
+ def enforce_spoilage_check(cls, clean_df):
104
+ """Runs AFTER all temperatures (e.g., Fahrenheit) are standardized to Celsius."""
105
+ if clean_df['temp'].max() > -20.0:
106
+ raise ValueError("CRITICAL: Vaccine shipment spoiled! Temp exceeded -20°C.")
107
+ return clean_df
108
+ ```
109
+
110
+ ---
111
+ ## 🏎️ The Fluent API (Quick Inline Conversions)
112
+
113
+ For simple scripts, logging, or UI components where you don't need full declarative schemas, Chisa provides a highly readable, chainable Fluent API.
114
+
115
+ ```python
116
+ import chisa as cs
117
+
118
+ # Simple scalar conversion
119
+ speed = cs.convert(120, 'km/h').to('m/s').resolve()
120
+ print(speed) # 33.333333333
121
+
122
+ # Powerful cosmetic formatting for logs
123
+ text = cs.convert(1000, 'm').to('cm').use(format='verbose', delim=True).resolve()
124
+ print(text) # "1,000 m = 100,000 cm"
125
+ ```
126
+ ---
127
+
128
+ ## 📚 Examples & Tutorials
129
+
130
+ To help you integrate Chisa into your existing workflows, we provide a comprehensive suite of examples in the `examples/` directory.
131
+
132
+ ### Interactive Crash Course (Google Colab)
133
+ The fastest way to learn Chisa is through our interactive notebooks. No local installation required!
134
+
135
+ | Tutorial | Description | Link |
136
+ | :--- | :--- | :--- |
137
+ | **01. Fundamentals** | Core concepts, Axiom Engine, and Type Safety. | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rannd1nt/chisa/blob/main/examples/T01_Chisa_Fundamentals.ipynb) |
138
+ | **02. Workflow Demo** | Real-world engineering with Pandas & Matplotlib. | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/rannd1nt/chisa/blob/main/examples/T02_Chisa_RealWorld_Workflow.ipynb) |
139
+
140
+ ### Python Scripts Reference
141
+ For detailed, standalone script implementations, explore our `examples/` directory:
142
+
143
+ * **Phase 1: Declarative Data Pipelines (Data Ingestion)**
144
+ * `01_wearable_health_data.py`: Standardizing messy smartwatch exports (BPM, kcal vs cal, body temperature).
145
+ * `02_food_manufacturing_scale.py`: Safely converting industrial recipe batches across cups, tablespoons, grams, and fluid ounces.
146
+ * `03_multi_region_tariffs.py`: Parsing mixed currency and weight strings (lbs, oz, kg) in a single pass to calculate global shipping costs.
147
+ * `04_energy_grid_audits.py`: Normalizing utility bill chaos (MMBtu, kWh, Joules) into a single unified Pandas cost report.
148
+
149
+ * **Phase 2: High-Performance Vectorization & Algebra**
150
+ * `05_f1_telemetry_vectorization.py`: Array math on RPM, Speed, and Tire Pressure operating on millions of rows in milliseconds.
151
+ * `06_structural_stress_testing.py`: Cross-unit algebra combining Kips, Newtons, and Pound-force over Square Meters for civil engineering loads.
152
+ * `07_financial_billing_precision.py`: Understanding when to use `.mag` (fast Python floats for Math/ML) vs `.exact` (high-precision Decimals for strict financial audits).
153
+
154
+ * **Phase 3: The Axiom Engine (Domain-Driven Engineering)**
155
+ * `08_gas_pipeline_thermodynamics.py`: Using Contextual Shifts to dynamically calculate industrial gas volume expansion based on real-time temperature and pressure (PV=nRT).
156
+ * `09_end_to_end_esg_pipeline.py`: The Grand Unified Theory of Chisa. Synthesizing a custom dimension (Carbon Intensity), cleaning data into it via Schema, and guarding algorithms with `@require` and `@prepare`.
157
+
158
+ * **Phase 4: Real-World Ecosystem Integration**
159
+ * `10_pandas_groupby_physics.py`: Integrating Chisa arrays directly with Pandas `GroupBy` to aggregate daily IoT power production into monthly summaries.
160
+ * `11_scikit_learn_transformer.py`: Building a custom ML `BaseEstimator` to autonomously normalize heterogeneous unit arrays before training a Random Forest.
161
+ * `12_handling_sensor_drift.py`: Using NumPy array masks and vectorization to neutralize factory machine calibration errors without slow `for` loops.
162
+ * `13_dynamic_alert_thresholds.py`: Simulating an IoT streaming pipeline where safety limits (`@axiom.bound`) change dynamically based on the machine's operating context.
163
+ * `14_cloud_compute_costs.py`: Utilizing extreme Metaclass algebra (`Currency / (RAM * Time)`) to synthesize and calculate abstract Server Compute billing rates ($ / GB-Hour).
164
+
165
+ ---
166
+
167
+ ## 🔬 The Engine: Explicit Dimensional Algebra
168
+
169
+ While Chisa's Schema is built for Data Engineering pipelines, underneath it lies a highly strict, Metaclass-driven Object-Oriented physics engine. If you are a Data Scientist, you can extract your clean data into Chisa Arrays for cross-dimensional mathematics with zero memory leaks.
170
+
171
+ ```python
172
+ import numpy as np
173
+ import chisa as cs
174
+ from chisa import u
175
+
176
+ # Seamless cross-unit Metaclass Vectorized Synthesis (Mass * Acceleration = Force)
177
+ Mass = u.Kilogram(np.random.uniform(10, 100, 1_000_000))
178
+ Acceleration = (u.Meter / (u.Second ** 2))(np.random.uniform(0.5, 9.8, 1_000_000))
179
+
180
+ Force = Mass * Acceleration
181
+ Force_kN_array = Force.to(u.Newton * 1000).mag
182
+ ```
183
+ > 📖 **Deep Dive:** For advanced features like Dynamic Contextual Scaling (Mach), Axiom Bound derivation, and Registry Introspection, please refer to our **[Advanced Physics Documentation](https://github.com/rannd1nt/chisa/blob/main/docs/advanced_physics.md)**.
184
+
185
+ ---
186
+
187
+ ## 📦 Installation
188
+ **Install via pip:**
189
+ ```bash
190
+ pip install chisa
191
+ ```
192
+ **Requirements:**
193
+ - Python 3.8+
194
+ - numpy >= 1.26.0
195
+ - pandas >= 2.0.0
196
+
197
+ ---
198
+
199
+ ## 🛠 Roadmap & TODOs
200
+ - **String Expression Parser:** Upgrading the registry to autonomously parse complex composite strings (e.g., `"kg * m / s^2"`).
201
+ - **Global Context Manager:** Introduce `chisa.conf()` to temporarily force data types or ignore boundary rules.
202
+ - **Polars Integration:** Expanding `Schema.normalize()` to support Polars DataFrames for ultra-fast Rust-based data processing.
203
+
204
+ ---
205
+
206
+ ## 🤝 Contributing
207
+ Contributions are what make the open-source community an amazing place to learn, inspire, and create. Any contributions you make to Chisa are **greatly appreciated**.
208
+
209
+ ---
210
+
211
+ ## License
212
+ Distributed under the MIT License. See the `LICENSE` file for more information.
@@ -0,0 +1,59 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "phaethon"
7
+ version = "0.2.1"
8
+ description = "Unit-Safe Data Pipeline Schema and Dimensional Algebra Framework"
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Zahraan Dzakii Tsaqiif", email = "zahraandzakiits@gmail.com"}
14
+ ]
15
+
16
+ keywords = [
17
+ "data engineering", "schema validation", "pandas",
18
+ "physics", "dimensional analysis", "data pipeline",
19
+ "numpy", "unit conversion", "machine learning"
20
+ ]
21
+
22
+ dependencies = [
23
+ "numpy>=1.26.0",
24
+ "pandas>=2.0.0"
25
+ ]
26
+
27
+ classifiers = [
28
+ # Development Status
29
+ "Development Status :: 4 - Beta",
30
+
31
+ # Intended Audiences
32
+ "Intended Audience :: Developers",
33
+ "Intended Audience :: Education",
34
+ "Intended Audience :: Science/Research",
35
+ "Intended Audience :: Information Technology",
36
+
37
+ # Topics
38
+ "Topic :: Scientific/Engineering",
39
+ "Topic :: Scientific/Engineering :: Physics",
40
+ "Topic :: Scientific/Engineering :: Mathematics",
41
+ "Topic :: Software Development :: Libraries :: Python Modules",
42
+ "Topic :: Database",
43
+
44
+ # License & Environment
45
+ "License :: OSI Approved :: MIT License",
46
+ "Operating System :: OS Independent",
47
+
48
+ # Supported Python Versions
49
+ "Programming Language :: Python :: 3",
50
+ "Programming Language :: Python :: 3.8",
51
+ "Programming Language :: Python :: 3.9",
52
+ "Programming Language :: Python :: 3.10",
53
+ "Programming Language :: Python :: 3.11",
54
+ "Programming Language :: Python :: 3.12",
55
+ "Programming Language :: Python :: 3.13"
56
+ ]
57
+
58
+ [tool.setuptools.packages.find]
59
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,90 @@
1
+ """
2
+ Chisa: Unit-Safe Data Pipeline Schema
3
+ =====================================
4
+
5
+ Chisa is a declarative schema validation and semantic data transformation
6
+ tool designed for Data Engineers and Scientists. It normalizes messy
7
+ heterogeneous units and enforces physical integrity before your data hits
8
+ ML models or production databases.
9
+
10
+ While standard schema tools only validate data types, Chisa validates
11
+ physical reality using a strict, Metaclass-driven Object-Oriented physics engine.
12
+
13
+ Core Features
14
+ -------------
15
+ * Declarative Schemas : Clean and normalize mixed-unit Pandas DataFrames.
16
+ * Vectorized Engine : High-speed, array-safe transformations using NumPy.
17
+ * Axiom Engine : Build custom dimensions and enforce physical laws.
18
+ * Pipeline Hooks : Inject domain logic via pre/post normalization hooks.
19
+ * Fluent API : Quick, chainable conversions supporting both string
20
+ aliases ('km') and explicit unit classes (u.Kilometer).
21
+
22
+ Main Exported Components
23
+ ------------------------
24
+ Schema : Base class for defining declarative data pipelines.
25
+ Field : Defines column-level unit targets, bounds, and parsing rules.
26
+ u : Namespace for all built-in physical dimensions and units.
27
+ axiom : Decorators for deriving units and enforcing physical bounds.
28
+ convert : Fluent API entry point for inline conversions (scalars or arrays).
29
+ """
30
+
31
+ from chisa.exceptions import (
32
+ ChisaError, ConversionError, DimensionMismatchError, AxiomViolationError, AmbiguousUnitError,
33
+ UnitNotFoundError, NormalizationError
34
+ )
35
+
36
+ from .core.registry import baseof, dims, unitsin, dimof
37
+ from .core.schema import Schema, Field, post_normalize, pre_normalize
38
+ from .core import axioms as axiom
39
+ from .core import constants as const
40
+ from .core import vmath
41
+ from .core.axioms import C
42
+ from .core.engine import convert
43
+ from .core.base import BaseUnit
44
+
45
+ def _bootstrap_units() -> None:
46
+ """
47
+ Silently loads all built-in unit modules to trigger the Axiom Engine's
48
+ auto-registration (__init_subclass__).
49
+ """
50
+ import importlib
51
+
52
+ _core_dimensions = [
53
+ "length", "mass", "pressure", "time", "speed", "temperature", "volume",
54
+ "data", "force", "energy", "power", "area", "density", "frequency"
55
+ ]
56
+
57
+ for dim in _core_dimensions:
58
+ importlib.import_module(f".units.{dim}", package=__name__)
59
+
60
+
61
+ _bootstrap_units()
62
+ del _bootstrap_units
63
+
64
+ from . import u
65
+
66
+ __version__ = "0.2.0"
67
+ __all__ = [
68
+ "u",
69
+ "baseof",
70
+ "dims",
71
+ "unitsin",
72
+ "dimof",
73
+ "Schema",
74
+ "Field",
75
+ "post_normalize",
76
+ "pre_normalize",
77
+ "convert",
78
+ "axiom",
79
+ "C",
80
+ "const",
81
+ "vmath",
82
+ "BaseUnit",
83
+ "ChisaError",
84
+ "ConversionError",
85
+ "DimensionMismatchError",
86
+ "AxiomViolationError",
87
+ "AmbiguousUnitError",
88
+ "UnitNotFoundError",
89
+ "NormalizationError"
90
+ ]
@@ -0,0 +1,8 @@
1
+ """
2
+ Core Engine modules for Chisa.
3
+
4
+ Contains the logic-driven Dimensional Algebra Engine, the global Unit Registry,
5
+ the Base Unit architecture, and the Axiom physics modeling modifiers. This package
6
+ forms the low-level computational backbone for strict dimensional validation,
7
+ scalar mathematics, and high-performance vectorized NumPy integration.
8
+ """