pint-cf 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pint_cf-0.1.0/LICENSE +21 -0
- pint_cf-0.1.0/PKG-INFO +65 -0
- pint_cf-0.1.0/README.md +49 -0
- pint_cf-0.1.0/pyproject.toml +53 -0
- pint_cf-0.1.0/setup.cfg +4 -0
- pint_cf-0.1.0/src/pint_cf/__init__.py +6 -0
- pint_cf-0.1.0/src/pint_cf/parser.py +337 -0
- pint_cf-0.1.0/src/pint_cf/units.py +86 -0
- pint_cf-0.1.0/src/pint_cf.egg-info/PKG-INFO +65 -0
- pint_cf-0.1.0/src/pint_cf.egg-info/SOURCES.txt +13 -0
- pint_cf-0.1.0/src/pint_cf.egg-info/dependency_links.txt +1 -0
- pint_cf-0.1.0/src/pint_cf.egg-info/requires.txt +2 -0
- pint_cf-0.1.0/src/pint_cf.egg-info/top_level.txt +1 -0
- pint_cf-0.1.0/tests/test_formatter.py +166 -0
- pint_cf-0.1.0/tests/test_parser.py +416 -0
pint_cf-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Pedro Gámez
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pint_cf-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pint-cf
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CF-compliant unit registry and formatter for Pint
|
|
5
|
+
Author: Pedro Gámez
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
10
|
+
Requires-Python: >=3.12
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: Pint>=0.25.2
|
|
14
|
+
Requires-Dist: lark
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# pint-cf
|
|
18
|
+
|
|
19
|
+
This package extends Pint with a CF-compliant unit registry and formatter.
|
|
20
|
+
|
|
21
|
+
## Features
|
|
22
|
+
|
|
23
|
+
This package provides:
|
|
24
|
+
|
|
25
|
+
- A CF-compliant Pint UnitRegistry, with explicit plurals.
|
|
26
|
+
- A custom formatter `cf`, supporting both long and short (symbol) formats.
|
|
27
|
+
|
|
28
|
+
Known limitations:
|
|
29
|
+
|
|
30
|
+
- Pint does not support **time coordinates** (e.g.
|
|
31
|
+
`days since 2001-01-01`) or **climate calendars** (e.g., `360_days`).
|
|
32
|
+
Consider using [cftime](https://unidata.github.io/cftime/) for that.
|
|
33
|
+
- Pint does not support **scaling factors** in Unit expressions (e.g.
|
|
34
|
+
`pint.Unit('0.1 m')`). You can work around this by adding your own
|
|
35
|
+
[unit definition](https://pint.readthedocs.io/en/stable/advanced/defining.html#programmatically).
|
|
36
|
+
|
|
37
|
+
## Usage
|
|
38
|
+
|
|
39
|
+
Create a CF-ready unit registry with `cf_unitregistry()`.
|
|
40
|
+
This function also registers the `cf` formatter.
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from pint_cf import cf_unitregistry
|
|
44
|
+
|
|
45
|
+
ureg = cf_unitregistry()
|
|
46
|
+
print(type(ureg)) # <class 'pint.registry.UnitRegistry'>
|
|
47
|
+
|
|
48
|
+
q = ureg('10 meters per second^2').to('km s-2')
|
|
49
|
+
|
|
50
|
+
print(f"{q:cf}") # 0.01 kilometer-second^-2
|
|
51
|
+
print(f"{q:~cf}") # 0.01 km/s2
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
<!-- If you prefer Pint's global application registry:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
import pint
|
|
58
|
+
from pint_cf import cf_unitregistry
|
|
59
|
+
|
|
60
|
+
ureg = cf_unitregistry()
|
|
61
|
+
pint.set_application_registry(ureg)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
After that, quantities created through `pint.get_application_registry()` will
|
|
65
|
+
use the same CF configuration. -->
|
pint_cf-0.1.0/README.md
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# pint-cf
|
|
2
|
+
|
|
3
|
+
This package extends Pint with a CF-compliant unit registry and formatter.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
This package provides:
|
|
8
|
+
|
|
9
|
+
- A CF-compliant Pint UnitRegistry, with explicit plurals.
|
|
10
|
+
- A custom formatter `cf`, supporting both long and short (symbol) formats.
|
|
11
|
+
|
|
12
|
+
Known limitations:
|
|
13
|
+
|
|
14
|
+
- Pint does not support **time coordinates** (e.g.
|
|
15
|
+
`days since 2001-01-01`) or **climate calendars** (e.g., `360_days`).
|
|
16
|
+
Consider using [cftime](https://unidata.github.io/cftime/) for that.
|
|
17
|
+
- Pint does not support **scaling factors** in Unit expressions (e.g.
|
|
18
|
+
`pint.Unit('0.1 m')`). You can work around this by adding your own
|
|
19
|
+
[unit definition](https://pint.readthedocs.io/en/stable/advanced/defining.html#programmatically).
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
Create a CF-ready unit registry with `cf_unitregistry()`.
|
|
24
|
+
This function also registers the `cf` formatter.
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from pint_cf import cf_unitregistry
|
|
28
|
+
|
|
29
|
+
ureg = cf_unitregistry()
|
|
30
|
+
print(type(ureg)) # <class 'pint.registry.UnitRegistry'>
|
|
31
|
+
|
|
32
|
+
q = ureg('10 meters per second^2').to('km s-2')
|
|
33
|
+
|
|
34
|
+
print(f"{q:cf}") # 0.01 kilometer-second^-2
|
|
35
|
+
print(f"{q:~cf}") # 0.01 km/s2
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
<!-- If you prefer Pint's global application registry:
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
import pint
|
|
42
|
+
from pint_cf import cf_unitregistry
|
|
43
|
+
|
|
44
|
+
ureg = cf_unitregistry()
|
|
45
|
+
pint.set_application_registry(ureg)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
After that, quantities created through `pint.get_application_registry()` will
|
|
49
|
+
use the same CF configuration. -->
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "pint-cf"
|
|
7
|
+
authors = [{ name = "Pedro Gámez" }]
|
|
8
|
+
description = "CF-compliant unit registry and formatter for Pint"
|
|
9
|
+
dynamic = ["version"]
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
requires-python = ">=3.12"
|
|
12
|
+
license = "MIT"
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Programming Language :: Python :: 3.12",
|
|
15
|
+
"Programming Language :: Python :: 3.13",
|
|
16
|
+
"Programming Language :: Python :: 3.14",
|
|
17
|
+
]
|
|
18
|
+
dependencies = ["Pint>=0.25.2", "lark"]
|
|
19
|
+
|
|
20
|
+
[dependency-groups]
|
|
21
|
+
tests = ["pytest", "pytest-cov"]
|
|
22
|
+
build = ["build", "twine"]
|
|
23
|
+
tools = ["defusedxml"]
|
|
24
|
+
dev = [
|
|
25
|
+
{ include-group = "tests" },
|
|
26
|
+
{ include-group = "tools" },
|
|
27
|
+
{ include-group = "build" },
|
|
28
|
+
"cf-units",
|
|
29
|
+
"cfunits",
|
|
30
|
+
"prek",
|
|
31
|
+
"ruff",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[tool.setuptools.dynamic]
|
|
35
|
+
version = { attr = "pint_cf.__version__" }
|
|
36
|
+
|
|
37
|
+
[tool.setuptools.packages.find]
|
|
38
|
+
where = ["src/"]
|
|
39
|
+
|
|
40
|
+
[tool.ruff.lint]
|
|
41
|
+
select = ["E", "F", "I", "B", "S"]
|
|
42
|
+
|
|
43
|
+
[tool.ruff.lint.per-file-ignores]
|
|
44
|
+
"tests/*" = ["S101"]
|
|
45
|
+
|
|
46
|
+
[tool.pytest.ini_options]
|
|
47
|
+
minversion = "6.0"
|
|
48
|
+
# addopts = ""
|
|
49
|
+
testpaths = ["tests"]
|
|
50
|
+
pythonpath = ["src"]
|
|
51
|
+
|
|
52
|
+
[tool.coverage.run]
|
|
53
|
+
omit = ["tests/*", "*/__init__.py"]
|
pint_cf-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""
|
|
2
|
+
UDUNITS-2 to Pint String Transformer
|
|
3
|
+
|
|
4
|
+
Converts Lark parse trees from udunits2.lark into strings
|
|
5
|
+
that can be parsed directly by pint.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from lark import Lark, Token, Transformer, v_args
|
|
11
|
+
|
|
12
|
+
# Unicode superscript to ASCII digit mapping
|
|
13
|
+
_SUPERSCRIPT_MAP = {
|
|
14
|
+
"⁰": "0",
|
|
15
|
+
"¹": "1",
|
|
16
|
+
"²": "2",
|
|
17
|
+
"³": "3",
|
|
18
|
+
"⁴": "4",
|
|
19
|
+
"⁵": "5",
|
|
20
|
+
"⁶": "6",
|
|
21
|
+
"⁷": "7",
|
|
22
|
+
"⁸": "8",
|
|
23
|
+
"⁹": "9",
|
|
24
|
+
"⁺": "+",
|
|
25
|
+
"⁻": "-",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
_ANGLE_SYMBOL_REPLACEMENTS = {
|
|
29
|
+
"'": "arc_minute",
|
|
30
|
+
'"': "arc_second",
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _decode_superscript(s: str) -> str:
|
|
35
|
+
"""Convert unicode superscript characters to regular digits."""
|
|
36
|
+
return "".join(_SUPERSCRIPT_MAP.get(c, c) for c in s)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _normalize_identifier(name: str) -> str:
|
|
40
|
+
"""Normalize UDUNITS symbols that need explicit pint unit names."""
|
|
41
|
+
for symbol, replacement in _ANGLE_SYMBOL_REPLACEMENTS.items():
|
|
42
|
+
name = name.replace(symbol, replacement)
|
|
43
|
+
return name
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _split_id_with_exponent(token_str: str) -> tuple[str, str]:
|
|
47
|
+
"""Split trailing signed integer exponent from an identifier token."""
|
|
48
|
+
idx = len(token_str)
|
|
49
|
+
while idx > 0 and token_str[idx - 1].isdigit():
|
|
50
|
+
idx -= 1
|
|
51
|
+
|
|
52
|
+
# The grammar guarantees that exponent digits exist at the end.
|
|
53
|
+
if idx == len(token_str):
|
|
54
|
+
raise ValueError(f"ID_WITH_EXP token without trailing digits: {token_str}")
|
|
55
|
+
|
|
56
|
+
if idx > 0 and token_str[idx - 1] == "-":
|
|
57
|
+
idx -= 1
|
|
58
|
+
|
|
59
|
+
base_name = token_str[:idx]
|
|
60
|
+
exponent = token_str[idx:]
|
|
61
|
+
|
|
62
|
+
if not base_name or not exponent:
|
|
63
|
+
raise ValueError(f"Invalid ID_WITH_EXP token: {token_str}")
|
|
64
|
+
|
|
65
|
+
return base_name, exponent
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@v_args(inline=True)
|
|
69
|
+
class UdunitsToPintTransformer(Transformer):
|
|
70
|
+
"""
|
|
71
|
+
Transform a Lark parse tree from udunits2.lark into a string
|
|
72
|
+
that pint can parse directly.
|
|
73
|
+
|
|
74
|
+
Example:
|
|
75
|
+
>>> transformer = UdunitsToPintTransformer()
|
|
76
|
+
>>> parser = get_parser()
|
|
77
|
+
>>> tree = parser.parse("kg.m/s2")
|
|
78
|
+
>>> result = transformer.transform(tree)
|
|
79
|
+
>>> print(result) # "kg * m / s ** 2"
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
# -------------------------------------------------------------------------
|
|
83
|
+
# Numbers
|
|
84
|
+
# -------------------------------------------------------------------------
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def _binary_operands(args: tuple) -> tuple[str, str]:
|
|
88
|
+
"""Return left/right operands for binary forms with optional operator token."""
|
|
89
|
+
if len(args) == 2:
|
|
90
|
+
return args[0], args[1]
|
|
91
|
+
if len(args) == 3:
|
|
92
|
+
return args[0], args[2]
|
|
93
|
+
raise ValueError(f"Unexpected binary args: {args}")
|
|
94
|
+
|
|
95
|
+
def int(self, value: Token) -> str:
|
|
96
|
+
"""Integer literal."""
|
|
97
|
+
return str(value)
|
|
98
|
+
|
|
99
|
+
def real(self, value: Token) -> str:
|
|
100
|
+
"""Real number literal."""
|
|
101
|
+
return str(value)
|
|
102
|
+
|
|
103
|
+
def scalar(self, value: str) -> str:
|
|
104
|
+
"""Scalar value (passes through)."""
|
|
105
|
+
return value
|
|
106
|
+
|
|
107
|
+
# -------------------------------------------------------------------------
|
|
108
|
+
# Identifiers
|
|
109
|
+
# -------------------------------------------------------------------------
|
|
110
|
+
|
|
111
|
+
def identifier(self, name: Token) -> str:
|
|
112
|
+
"""
|
|
113
|
+
Unit identifier (e.g., 'm', 'kilogram', '°').
|
|
114
|
+
"""
|
|
115
|
+
return _normalize_identifier(str(name))
|
|
116
|
+
|
|
117
|
+
def power_from_id(self, name: Token) -> str:
|
|
118
|
+
"""Power notation encoded in identifier token (e.g., 'm2', 's-1')."""
|
|
119
|
+
base_name, exponent = _split_id_with_exponent(str(name))
|
|
120
|
+
return f"{base_name} ** {exponent}"
|
|
121
|
+
|
|
122
|
+
# -------------------------------------------------------------------------
|
|
123
|
+
# Arithmetic operations
|
|
124
|
+
# -------------------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
def multiply(self, *args) -> str:
|
|
127
|
+
"""
|
|
128
|
+
Multiplication (explicit or implicit via juxtaposition).
|
|
129
|
+
"""
|
|
130
|
+
left, right = self._binary_operands(args)
|
|
131
|
+
return f"{left} * {right}"
|
|
132
|
+
|
|
133
|
+
def divide(self, *args) -> str:
|
|
134
|
+
"""Division."""
|
|
135
|
+
left, right = self._binary_operands(args)
|
|
136
|
+
return f"{left} / {right}"
|
|
137
|
+
|
|
138
|
+
def power(self, base: str, *args) -> str:
|
|
139
|
+
"""
|
|
140
|
+
Exponentiation. Handles multiple forms:
|
|
141
|
+
- basic_exp EXPONENT (e.g., m²)
|
|
142
|
+
- basic_exp RAISE INTEGER (e.g., m^2, m**2)
|
|
143
|
+
"""
|
|
144
|
+
if len(args) == 1:
|
|
145
|
+
exp_token = args[0]
|
|
146
|
+
elif len(args) == 2:
|
|
147
|
+
# RAISE INTEGER - skip the RAISE operator
|
|
148
|
+
exp_token = args[1]
|
|
149
|
+
else:
|
|
150
|
+
raise ValueError(f"Unexpected power args: {args}")
|
|
151
|
+
|
|
152
|
+
exp_str = str(exp_token)
|
|
153
|
+
|
|
154
|
+
# Handle unicode superscripts
|
|
155
|
+
if any(c in _SUPERSCRIPT_MAP for c in exp_str):
|
|
156
|
+
exp_str = _decode_superscript(exp_str)
|
|
157
|
+
|
|
158
|
+
return f"{base} ** {exp_str}"
|
|
159
|
+
|
|
160
|
+
# -------------------------------------------------------------------------
|
|
161
|
+
# Grouping
|
|
162
|
+
# -------------------------------------------------------------------------
|
|
163
|
+
|
|
164
|
+
def group(self, inner: str) -> str:
|
|
165
|
+
"""Parenthetical group."""
|
|
166
|
+
return f"({inner})"
|
|
167
|
+
|
|
168
|
+
# -------------------------------------------------------------------------
|
|
169
|
+
# Logarithms
|
|
170
|
+
# -------------------------------------------------------------------------
|
|
171
|
+
|
|
172
|
+
# Mapping from UDUNITS log types to pint logbase/logfactor
|
|
173
|
+
_LOG_PARAMS = {
|
|
174
|
+
"lg": ("10", "10"), # log base 10, decibels
|
|
175
|
+
"log": ("10", "10"), # log base 10, decibels (alias)
|
|
176
|
+
"ln": (
|
|
177
|
+
"2.71828182845904523536028747135266249775724709369995",
|
|
178
|
+
"0.5",
|
|
179
|
+
), # natural log, neper
|
|
180
|
+
"lb": ("2", "1"), # log base 2, octaves
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
def logarithm(self, logref: Token, argument: str) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Logarithmic units (e.g., lg(re 1 mW), ln(re Pa)).
|
|
186
|
+
|
|
187
|
+
Converts to pint's logarithmic unit format:
|
|
188
|
+
scale; logbase: X; logfactor: Y
|
|
189
|
+
"""
|
|
190
|
+
logref_str = str(logref).strip().lower()
|
|
191
|
+
|
|
192
|
+
# Determine log type
|
|
193
|
+
if logref_str.startswith("log"):
|
|
194
|
+
log_type = "log"
|
|
195
|
+
elif logref_str.startswith("lg"):
|
|
196
|
+
log_type = "lg"
|
|
197
|
+
elif logref_str.startswith("ln"):
|
|
198
|
+
log_type = "ln"
|
|
199
|
+
elif logref_str.startswith("lb"):
|
|
200
|
+
log_type = "lb"
|
|
201
|
+
else:
|
|
202
|
+
log_type = "lg" # default to decibels
|
|
203
|
+
|
|
204
|
+
logbase, logfactor = self._LOG_PARAMS.get(log_type, ("10", "10"))
|
|
205
|
+
|
|
206
|
+
return f"{argument}; logbase: {logbase}; logfactor: {logfactor}"
|
|
207
|
+
|
|
208
|
+
# -------------------------------------------------------------------------
|
|
209
|
+
# Time shifts (offset units)
|
|
210
|
+
# -------------------------------------------------------------------------
|
|
211
|
+
|
|
212
|
+
def shift_op(self, op: Token) -> str:
|
|
213
|
+
"""Extract shift operator type."""
|
|
214
|
+
return str(op).lower()
|
|
215
|
+
|
|
216
|
+
def shift_by_number(self, unit: str, shift_op: str, offset: str) -> str:
|
|
217
|
+
"""
|
|
218
|
+
Shift by numeric offset (e.g., K @ 273.15).
|
|
219
|
+
|
|
220
|
+
Uses pint's offset notation: "unit; offset: value"
|
|
221
|
+
"""
|
|
222
|
+
return f"{unit}; offset: {offset}"
|
|
223
|
+
|
|
224
|
+
def shift_by_time(self, unit: str, shift_op: str, timestamp: str) -> str:
|
|
225
|
+
"""
|
|
226
|
+
Shift by timestamp (e.g., seconds since 1970-01-01).
|
|
227
|
+
|
|
228
|
+
Creates a special notation for time references.
|
|
229
|
+
"""
|
|
230
|
+
# return f"{unit} since {timestamp}"
|
|
231
|
+
raise NotImplementedError(
|
|
232
|
+
"Time-based offsets are not directly supported by pint. "
|
|
233
|
+
"Consider using a dedicated time handling library like cftime "
|
|
234
|
+
"for this use case (see: https://unidata.github.io/cftime/)"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# -------------------------------------------------------------------------
|
|
238
|
+
# Timestamps
|
|
239
|
+
# -------------------------------------------------------------------------
|
|
240
|
+
|
|
241
|
+
def date_only(self, date: Token) -> str:
|
|
242
|
+
"""Date without time (e.g., 1970-01-01)."""
|
|
243
|
+
return str(date)
|
|
244
|
+
|
|
245
|
+
def date_time(self, date: Token, clock: Token) -> str:
|
|
246
|
+
"""Date with time (e.g., 1970-01-01 00:00:00)."""
|
|
247
|
+
return f"{date} {clock}"
|
|
248
|
+
|
|
249
|
+
def date_time_offset(self, date: Token, clock: Token, offset: Token) -> str:
|
|
250
|
+
"""Date with time and timezone offset."""
|
|
251
|
+
return f"{date} {clock} {offset}"
|
|
252
|
+
|
|
253
|
+
def date_time_tz(self, date: Token, clock: Token, tz: Token) -> str:
|
|
254
|
+
"""Date with time and timezone identifier."""
|
|
255
|
+
return f"{date} {clock} {tz}"
|
|
256
|
+
|
|
257
|
+
def datetime_iso(self, dt: Token) -> str:
|
|
258
|
+
"""ISO 8601 datetime (e.g., 1970-01-01T00:00:00)."""
|
|
259
|
+
return str(dt)
|
|
260
|
+
|
|
261
|
+
def datetime_iso_tz(self, dt: Token, tz: Token) -> str:
|
|
262
|
+
"""ISO 8601 datetime with timezone."""
|
|
263
|
+
return f"{dt} {tz}"
|
|
264
|
+
|
|
265
|
+
def packed_timestamp(self, ts: Token) -> str:
|
|
266
|
+
"""Packed timestamp (e.g., 19700101T000000)."""
|
|
267
|
+
return str(ts)
|
|
268
|
+
|
|
269
|
+
def packed_timestamp_offset(self, ts: Token, offset: Token) -> str:
|
|
270
|
+
"""Packed timestamp with offset."""
|
|
271
|
+
return f"{ts} {offset}"
|
|
272
|
+
|
|
273
|
+
def packed_timestamp_tz(self, ts: Token, tz: Token) -> str:
|
|
274
|
+
"""Packed timestamp with timezone."""
|
|
275
|
+
return f"{ts} {tz}"
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# =============================================================================
|
|
279
|
+
# Convenience functions
|
|
280
|
+
# =============================================================================
|
|
281
|
+
|
|
282
|
+
_parser = None
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def get_parser() -> Lark:
|
|
286
|
+
"""Get the UDUNITS-2 parser (cached)."""
|
|
287
|
+
global _parser
|
|
288
|
+
if _parser is None:
|
|
289
|
+
grammar_path = Path(__file__).parent / "resources" / "udunits2.lark"
|
|
290
|
+
_parser = Lark(
|
|
291
|
+
grammar_path.read_text(),
|
|
292
|
+
parser="lalr",
|
|
293
|
+
lexer="contextual",
|
|
294
|
+
maybe_placeholders=False,
|
|
295
|
+
)
|
|
296
|
+
return _parser
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def cf_string_to_pint(unit_string: str) -> str:
|
|
300
|
+
"""
|
|
301
|
+
Convert a UDUNITS-2 unit string to pint-compatible format.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
unit_string: A unit specification in UDUNITS-2 format.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
A string that pint can parse directly.
|
|
308
|
+
|
|
309
|
+
Examples:
|
|
310
|
+
>>> udunits_to_pint("m")
|
|
311
|
+
'm'
|
|
312
|
+
>>> udunits_to_pint("m2")
|
|
313
|
+
'm ** 2'
|
|
314
|
+
>>> udunits_to_pint("kg.m/s2")
|
|
315
|
+
'kg * m / s ** 2'
|
|
316
|
+
>>> udunits_to_pint("K @ 273.15")
|
|
317
|
+
'K; offset: 273.15'
|
|
318
|
+
>>> udunits_to_pint("seconds since 1970-01-01")
|
|
319
|
+
'seconds since 1970-01-01'
|
|
320
|
+
"""
|
|
321
|
+
if not unit_string or unit_string.isspace():
|
|
322
|
+
return "1"
|
|
323
|
+
|
|
324
|
+
parser = get_parser()
|
|
325
|
+
transformer = UdunitsToPintTransformer()
|
|
326
|
+
|
|
327
|
+
tree = parser.parse(unit_string)
|
|
328
|
+
|
|
329
|
+
if tree is None or (hasattr(tree, "children") and len(tree.children) == 0):
|
|
330
|
+
return "1"
|
|
331
|
+
|
|
332
|
+
result = transformer.transform(tree)
|
|
333
|
+
|
|
334
|
+
if not isinstance(result, str):
|
|
335
|
+
return "1"
|
|
336
|
+
|
|
337
|
+
return result
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
from importlib import resources
|
|
3
|
+
from typing import Any, Unpack
|
|
4
|
+
|
|
5
|
+
import pint
|
|
6
|
+
from pint.delegates.formatter._compound_unit_helpers import (
|
|
7
|
+
BabelKwds,
|
|
8
|
+
SortFunc,
|
|
9
|
+
prepare_compount_unit,
|
|
10
|
+
)
|
|
11
|
+
from pint.delegates.formatter._spec_helpers import REGISTERED_FORMATTERS
|
|
12
|
+
from pint.delegates.formatter.plain import DefaultFormatter
|
|
13
|
+
from pint.facets.plain import PlainUnit
|
|
14
|
+
|
|
15
|
+
from .parser import cf_string_to_pint
|
|
16
|
+
|
|
17
|
+
_CF_FORMATTER_NAME = "cf"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CFFormatter(DefaultFormatter):
|
|
21
|
+
"""Formatter for CF-compliant unit strings."""
|
|
22
|
+
|
|
23
|
+
def format_unit(
|
|
24
|
+
self,
|
|
25
|
+
unit: PlainUnit | Iterable[tuple[str, Any]],
|
|
26
|
+
uspec: str = "",
|
|
27
|
+
sort_func: SortFunc | None = None,
|
|
28
|
+
**babel_kwds: Unpack[BabelKwds],
|
|
29
|
+
) -> str:
|
|
30
|
+
if "~" in uspec:
|
|
31
|
+
as_ratio = True
|
|
32
|
+
product_fmt = "."
|
|
33
|
+
division_fmt = "/"
|
|
34
|
+
power_fmt = "{}{}"
|
|
35
|
+
else:
|
|
36
|
+
as_ratio = False
|
|
37
|
+
product_fmt = "-"
|
|
38
|
+
division_fmt = " per "
|
|
39
|
+
power_fmt = "{}^{}"
|
|
40
|
+
|
|
41
|
+
numerator, denominator = prepare_compount_unit(
|
|
42
|
+
unit,
|
|
43
|
+
uspec,
|
|
44
|
+
sort_func=sort_func,
|
|
45
|
+
**babel_kwds,
|
|
46
|
+
registry=self._registry,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return (
|
|
50
|
+
pint.formatter(
|
|
51
|
+
numerator,
|
|
52
|
+
denominator,
|
|
53
|
+
as_ratio=as_ratio,
|
|
54
|
+
product_fmt=product_fmt,
|
|
55
|
+
division_fmt=division_fmt,
|
|
56
|
+
power_fmt=power_fmt,
|
|
57
|
+
)
|
|
58
|
+
.replace("Δ", "")
|
|
59
|
+
.replace("delta_", "")
|
|
60
|
+
.replace("dimensionless", "")
|
|
61
|
+
) or "1"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _register_cf_formatter(ureg: pint.UnitRegistry) -> None:
|
|
65
|
+
"""Register the CF formatter in both registry-local and Pint-global maps."""
|
|
66
|
+
formatter = CFFormatter(ureg)
|
|
67
|
+
ureg.formatter._formatters[_CF_FORMATTER_NAME] = formatter
|
|
68
|
+
REGISTERED_FORMATTERS[_CF_FORMATTER_NAME] = formatter
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def cf_unitregistry() -> pint.UnitRegistry:
|
|
72
|
+
"""Create a CF-ready UnitRegistry with parser and formatter configured."""
|
|
73
|
+
with resources.path("pint_cf.resources.registry", "udunits2.txt") as filename:
|
|
74
|
+
ureg = pint.UnitRegistry(
|
|
75
|
+
filename=str(filename),
|
|
76
|
+
autoconvert_offset_to_baseunit=True,
|
|
77
|
+
preprocessors=[cf_string_to_pint],
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# ureg.formatter.default_format = "cf"
|
|
81
|
+
|
|
82
|
+
# Deactivate Pint's native pluralization, since UDUNITS2 already
|
|
83
|
+
# defines plural forms for units
|
|
84
|
+
ureg._suffixes = {"": ""}
|
|
85
|
+
_register_cf_formatter(ureg)
|
|
86
|
+
return ureg
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pint-cf
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CF-compliant unit registry and formatter for Pint
|
|
5
|
+
Author: Pedro Gámez
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
10
|
+
Requires-Python: >=3.12
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: Pint>=0.25.2
|
|
14
|
+
Requires-Dist: lark
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# pint-cf
|
|
18
|
+
|
|
19
|
+
This package extends Pint with a CF-compliant unit registry and formatter.
|
|
20
|
+
|
|
21
|
+
## Features
|
|
22
|
+
|
|
23
|
+
This package provides:
|
|
24
|
+
|
|
25
|
+
- A CF-compliant Pint UnitRegistry, with explicit plurals.
|
|
26
|
+
- A custom formatter `cf`, supporting both long and short (symbol) formats.
|
|
27
|
+
|
|
28
|
+
Known limitations:
|
|
29
|
+
|
|
30
|
+
- Pint does not support **time coordinates** (e.g.
|
|
31
|
+
`days since 2001-01-01`) or **climate calendars** (e.g., `360_days`).
|
|
32
|
+
Consider using [cftime](https://unidata.github.io/cftime/) for that.
|
|
33
|
+
- Pint does not support **scaling factors** in Unit expressions (e.g.
|
|
34
|
+
`pint.Unit('0.1 m')`). You can work around this by adding your own
|
|
35
|
+
[unit definition](https://pint.readthedocs.io/en/stable/advanced/defining.html#programmatically).
|
|
36
|
+
|
|
37
|
+
## Usage
|
|
38
|
+
|
|
39
|
+
Create a CF-ready unit registry with `cf_unitregistry()`.
|
|
40
|
+
This function also registers the `cf` formatter.
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
from pint_cf import cf_unitregistry
|
|
44
|
+
|
|
45
|
+
ureg = cf_unitregistry()
|
|
46
|
+
print(type(ureg)) # <class 'pint.registry.UnitRegistry'>
|
|
47
|
+
|
|
48
|
+
q = ureg('10 meters per second^2').to('km s-2')
|
|
49
|
+
|
|
50
|
+
print(f"{q:cf}") # 0.01 kilometer-second^-2
|
|
51
|
+
print(f"{q:~cf}") # 0.01 km/s2
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
<!-- If you prefer Pint's global application registry:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
import pint
|
|
58
|
+
from pint_cf import cf_unitregistry
|
|
59
|
+
|
|
60
|
+
ureg = cf_unitregistry()
|
|
61
|
+
pint.set_application_registry(ureg)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
After that, quantities created through `pint.get_application_registry()` will
|
|
65
|
+
use the same CF configuration. -->
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/pint_cf/__init__.py
|
|
5
|
+
src/pint_cf/parser.py
|
|
6
|
+
src/pint_cf/units.py
|
|
7
|
+
src/pint_cf.egg-info/PKG-INFO
|
|
8
|
+
src/pint_cf.egg-info/SOURCES.txt
|
|
9
|
+
src/pint_cf.egg-info/dependency_links.txt
|
|
10
|
+
src/pint_cf.egg-info/requires.txt
|
|
11
|
+
src/pint_cf.egg-info/top_level.txt
|
|
12
|
+
tests/test_formatter.py
|
|
13
|
+
tests/test_parser.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pint_cf
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for the CF formatter.
|
|
3
|
+
|
|
4
|
+
The CF formatter supports two modes via format specifiers:
|
|
5
|
+
- "cf" : long form (product="-", power="^", no ratio)
|
|
6
|
+
- "~cf" : short form (product=".", power=implicit digits, ratio with "/")
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import pint
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
from pint_cf import cf_unitregistry
|
|
13
|
+
|
|
14
|
+
# Test cases: (input_unit, expected_cf, expected_short_cf)
|
|
15
|
+
TEST_CASES_FORMAT = [
|
|
16
|
+
# =====================================================================
|
|
17
|
+
# Dimensionless
|
|
18
|
+
# =====================================================================
|
|
19
|
+
("1", "1", "1"),
|
|
20
|
+
# =====================================================================
|
|
21
|
+
# Base units: long name ↔ symbol
|
|
22
|
+
# =====================================================================
|
|
23
|
+
("meter", "meter", "m"),
|
|
24
|
+
("kilogram", "kilogram", "kg"),
|
|
25
|
+
("second", "second", "s"),
|
|
26
|
+
("kelvin", "kelvin", "K"),
|
|
27
|
+
("ampere", "ampere", "A"),
|
|
28
|
+
("mole", "mole", "mol"),
|
|
29
|
+
("candela", "candela", "cd"),
|
|
30
|
+
# Short-form input produces same output
|
|
31
|
+
("m", "meter", "m"),
|
|
32
|
+
("kg", "kilogram", "kg"),
|
|
33
|
+
("s", "second", "s"),
|
|
34
|
+
("K", "kelvin", "K"),
|
|
35
|
+
("A", "ampere", "A"),
|
|
36
|
+
("mol", "mole", "mol"),
|
|
37
|
+
("cd", "candela", "cd"),
|
|
38
|
+
# =====================================================================
|
|
39
|
+
# Prefixed units
|
|
40
|
+
# =====================================================================
|
|
41
|
+
("kilometer", "kilometer", "km"),
|
|
42
|
+
("centimeter", "centimeter", "cm"),
|
|
43
|
+
("millimeter", "millimeter", "mm"),
|
|
44
|
+
("micrometer", "micrometer", "µm"),
|
|
45
|
+
("milligram", "milligram", "mg"),
|
|
46
|
+
("millisecond", "millisecond", "ms"),
|
|
47
|
+
("microsecond", "microsecond", "µs"),
|
|
48
|
+
("hectopascal", "hectopascal", "hPa"),
|
|
49
|
+
# Short-form input
|
|
50
|
+
("km", "kilometer", "km"),
|
|
51
|
+
("cm", "centimeter", "cm"),
|
|
52
|
+
("mm", "millimeter", "mm"),
|
|
53
|
+
# =====================================================================
|
|
54
|
+
# Derived units
|
|
55
|
+
# =====================================================================
|
|
56
|
+
("newton", "newton", "N"),
|
|
57
|
+
("pascal", "pascal", "Pa"),
|
|
58
|
+
("joule", "joule", "J"),
|
|
59
|
+
("watt", "watt", "W"),
|
|
60
|
+
("hertz", "hertz", "Hz"),
|
|
61
|
+
("volt", "volt", "V"),
|
|
62
|
+
("ohm", "ohm", "Ω"),
|
|
63
|
+
# Short-form input
|
|
64
|
+
("N", "newton", "N"),
|
|
65
|
+
("Pa", "pascal", "Pa"),
|
|
66
|
+
("J", "joule", "J"),
|
|
67
|
+
("W", "watt", "W"),
|
|
68
|
+
("Hz", "hertz", "Hz"),
|
|
69
|
+
# =====================================================================
|
|
70
|
+
# Non-SI units
|
|
71
|
+
# =====================================================================
|
|
72
|
+
("degree", "arc_degree", "°"),
|
|
73
|
+
("radian", "radian", "rad"),
|
|
74
|
+
("steradian", "steradian", "sr"),
|
|
75
|
+
("liter", "liter", "L"),
|
|
76
|
+
("day", "day", "d"),
|
|
77
|
+
("hour", "hour", "h"),
|
|
78
|
+
("minute", "minute", "min"),
|
|
79
|
+
# =====================================================================
|
|
80
|
+
# Temperature
|
|
81
|
+
# =====================================================================
|
|
82
|
+
("degree_Celsius", "degree_Celsius", "°C"),
|
|
83
|
+
("degC", "degree_Celsius", "°C"),
|
|
84
|
+
# =====================================================================
|
|
85
|
+
# Plural forms (normalized to singular by registry)
|
|
86
|
+
# =====================================================================
|
|
87
|
+
("meters", "meter", "m"),
|
|
88
|
+
("seconds", "second", "s"),
|
|
89
|
+
("kilometers", "kilometer", "km"),
|
|
90
|
+
# =====================================================================
|
|
91
|
+
# Single-unit powers
|
|
92
|
+
# Long: "unit^exp" | Short: "unitEXP" (positive) or "1/unitEXP" (negative)
|
|
93
|
+
# =====================================================================
|
|
94
|
+
("m^2", "meter^2", "m2"),
|
|
95
|
+
("m^3", "meter^3", "m3"),
|
|
96
|
+
("s^-1", "second^-1", "1/s"),
|
|
97
|
+
("m^-2", "meter^-2", "1/m2"),
|
|
98
|
+
# =====================================================================
|
|
99
|
+
# Product of units (MULTIPLY)
|
|
100
|
+
# Long: "-" separator, no ratio | Short: "." separator with "/" ratio
|
|
101
|
+
# =====================================================================
|
|
102
|
+
("Pa s", "pascal-second", "Pa.s"),
|
|
103
|
+
# =====================================================================
|
|
104
|
+
# Division / compound (DIVIDE)
|
|
105
|
+
# Long: negative exponents with "-" | Short: "/" ratio
|
|
106
|
+
# =====================================================================
|
|
107
|
+
("m/s", "meter-second^-1", "m/s"),
|
|
108
|
+
("m/s^2", "meter-second^-2", "m/s2"),
|
|
109
|
+
("kg/m^3", "kilogram-meter^-3", "kg/m3"),
|
|
110
|
+
("W/m^2", "watt-meter^-2", "W/m2"),
|
|
111
|
+
("mol/m^3", "mole-meter^-3", "mol/m3"),
|
|
112
|
+
("K/m", "kelvin-meter^-1", "K/m"),
|
|
113
|
+
("g/kg", "gram-kilogram^-1", "g/kg"),
|
|
114
|
+
("mm/day", "millimeter-day^-1", "mm/d"),
|
|
115
|
+
("mg/kg", "milligram-kilogram^-1", "mg/kg"),
|
|
116
|
+
("1/s", "second^-1", "1/s"),
|
|
117
|
+
# =====================================================================
|
|
118
|
+
# Complex compounds: numerator and denominator
|
|
119
|
+
# Long: all terms with "-" and "^" | Short: "." and "/" with implicit exp
|
|
120
|
+
# =====================================================================
|
|
121
|
+
("kg m/s^2", "kilogram-meter-second^-2", "kg.m/s2"),
|
|
122
|
+
("kg m^2/s^2", "kilogram-meter^2-second^-2", "kg.m2/s2"),
|
|
123
|
+
("kg m^2 s^-2", "kilogram-meter^2-second^-2", "kg.m2/s2"),
|
|
124
|
+
("kg m^-1 s^-2", "kilogram-meter^-1-second^-2", "kg/m/s2"),
|
|
125
|
+
("kg/(m s^2)", "kilogram-meter^-1-second^-2", "kg/m/s2"),
|
|
126
|
+
("kg m^2 s^-2 K^-1", "kilogram-meter^2-kelvin^-1-second^-2", "kg.m2/K/s2"),
|
|
127
|
+
("mol m^-3", "mole-meter^-3", "mol/m3"),
|
|
128
|
+
("W m^-2", "watt-meter^-2", "W/m2"),
|
|
129
|
+
# =====================================================================
|
|
130
|
+
# Division with parenthesized denominator
|
|
131
|
+
# (same physical unit, just different input syntax)
|
|
132
|
+
# =====================================================================
|
|
133
|
+
("J/(kg K)", "joule-kelvin^-1-kilogram^-1", "J/K/kg"),
|
|
134
|
+
("W/(m^2 K)", "watt-kelvin^-1-meter^-2", "W/K/m2"),
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
_TEST_CASES_FORMAT_LONG = [(i[0], i[1]) for i in TEST_CASES_FORMAT]
|
|
138
|
+
_TEST_CASES_FORMAT_SHORT = [(i[0], i[2]) for i in TEST_CASES_FORMAT]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@pytest.fixture(scope="module")
|
|
142
|
+
def ureg() -> pint.UnitRegistry:
|
|
143
|
+
"""Return the CF-compliant UnitRegistry."""
|
|
144
|
+
return cf_unitregistry()
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@pytest.mark.parametrize("input_str, expected", _TEST_CASES_FORMAT_LONG)
|
|
148
|
+
def test_cf_format(ureg: pint.UnitRegistry, input_str: str, expected: str) -> None:
|
|
149
|
+
"""Long-form CF format: product='-', power='^', no ratio."""
|
|
150
|
+
u = ureg.Unit(input_str)
|
|
151
|
+
result = format(u, "cf")
|
|
152
|
+
assert result == expected, (
|
|
153
|
+
f"format(Unit('{input_str}'), 'cf') = {result!r}, expected {expected!r}"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@pytest.mark.parametrize("input_str, expected", _TEST_CASES_FORMAT_SHORT)
|
|
158
|
+
def test_cf_short_format(
|
|
159
|
+
ureg: pint.UnitRegistry, input_str: str, expected: str
|
|
160
|
+
) -> None:
|
|
161
|
+
"""Short-form CF format (~): product='.', power=implicit, ratio with '/'."""
|
|
162
|
+
u = ureg.Unit(input_str)
|
|
163
|
+
result = format(u, "~cf")
|
|
164
|
+
assert result == expected, (
|
|
165
|
+
f"format(Unit('{input_str}'), '~cf') = {result!r}, expected {expected!r}"
|
|
166
|
+
)
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TODO:
|
|
3
|
+
|
|
4
|
+
- https://docs.unidata.ucar.edu/udunits/current/udunits2lib.html#Grammar
|
|
5
|
+
- fechas (tienen el signo -)
|
|
6
|
+
- # ("days since 1970-01-01", "a; offset: 1"), -> not implemented error
|
|
7
|
+
- El XML tambien se tiene que probar
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import pytest
|
|
12
|
+
|
|
13
|
+
# from cfunits import Units
|
|
14
|
+
from lark.exceptions import UnexpectedInput, VisitError
|
|
15
|
+
from pint.delegates import ParserConfig
|
|
16
|
+
from pint.delegates.txt_defparser.plain import UnitDefinition
|
|
17
|
+
|
|
18
|
+
from pint_cf.parser import cf_string_to_pint
|
|
19
|
+
|
|
20
|
+
TEST_CASES_TRANSFORM = [
|
|
21
|
+
# =========================================================================
|
|
22
|
+
# Unit-Spec: nothing
|
|
23
|
+
# =========================================================================
|
|
24
|
+
("", ""),
|
|
25
|
+
("1", ""),
|
|
26
|
+
# =========================================================================
|
|
27
|
+
# Basic-Spec → ID: simple identifiers
|
|
28
|
+
# =========================================================================
|
|
29
|
+
("a", "a"),
|
|
30
|
+
("m", "m"),
|
|
31
|
+
("kg", "kg"),
|
|
32
|
+
("kilogram", "kilogram"),
|
|
33
|
+
("degrees_Celsius", "degrees_Celsius"),
|
|
34
|
+
("degrees_north", "degrees_north"),
|
|
35
|
+
("degrees_east", "degrees_east"),
|
|
36
|
+
("hPa", "hPa"),
|
|
37
|
+
("dBm", "dBm"),
|
|
38
|
+
# Special symbol IDs
|
|
39
|
+
("°", "°"),
|
|
40
|
+
("°F", "°F"),
|
|
41
|
+
# ("µ", "µ"),
|
|
42
|
+
("'", "arc_minute"),
|
|
43
|
+
('"', "arc_second"),
|
|
44
|
+
("'/60", "arc_minute / 60"),
|
|
45
|
+
# =========================================================================
|
|
46
|
+
# Basic-Spec → Number: INT and REAL
|
|
47
|
+
# =========================================================================
|
|
48
|
+
("2", "2"),
|
|
49
|
+
("10", "10"),
|
|
50
|
+
("100", "100"),
|
|
51
|
+
("+1", "+1"),
|
|
52
|
+
("-1", "-1"),
|
|
53
|
+
("3.14", "3.14"),
|
|
54
|
+
("1.0", "1.0"),
|
|
55
|
+
("1.", "1."),
|
|
56
|
+
(".1", ".1"),
|
|
57
|
+
(".5", ".5"),
|
|
58
|
+
("0.5", "0.5"),
|
|
59
|
+
("0.01", "0.01"),
|
|
60
|
+
("1e10", "1e10"),
|
|
61
|
+
("1.0e3", "1.0e3"),
|
|
62
|
+
("1.0e-3", "1.0e-3"),
|
|
63
|
+
("1e-6", "1e-6"),
|
|
64
|
+
# =========================================================================
|
|
65
|
+
# Basic-Spec → "(" Shift-Spec ")" : parenthesized groups
|
|
66
|
+
# =========================================================================
|
|
67
|
+
("(m)", "(m)"),
|
|
68
|
+
("(kg)", "(kg)"),
|
|
69
|
+
("(m/s)", "(m / s)"),
|
|
70
|
+
("(m/s^2)", "(m / s ** 2)"),
|
|
71
|
+
("(kg/m^3)", "(kg / m ** 3)"),
|
|
72
|
+
("(m^2/s)", "(m ** 2 / s)"),
|
|
73
|
+
("(m/(s K))", "(m / (s * K))"),
|
|
74
|
+
("((m))", "((m))"),
|
|
75
|
+
# =========================================================================
|
|
76
|
+
# Power-Spec → Basic-Spec INT : trailing digits as exponent
|
|
77
|
+
# =========================================================================
|
|
78
|
+
("a2", "a ** 2"),
|
|
79
|
+
("m3", "m ** 3"),
|
|
80
|
+
("s1", "s ** 1"),
|
|
81
|
+
# Negative trailing exponent
|
|
82
|
+
("a-2", "a ** -2"),
|
|
83
|
+
("s-1", "s ** -1"),
|
|
84
|
+
("m-s-2", "m * s ** -2"),
|
|
85
|
+
# =========================================================================
|
|
86
|
+
# Power-Spec → Basic-Spec EXPONENT : unicode superscripts (¹²³)
|
|
87
|
+
# =========================================================================
|
|
88
|
+
("a¹", "a ** 1"),
|
|
89
|
+
("a²", "a ** 2"),
|
|
90
|
+
("a³", "a ** 3"),
|
|
91
|
+
("m²", "m ** 2"),
|
|
92
|
+
("m³", "m ** 3"),
|
|
93
|
+
("kg³", "kg ** 3"),
|
|
94
|
+
("s¹", "s ** 1"),
|
|
95
|
+
# Multi-digit superscript
|
|
96
|
+
("m¹²", "m ** 12"),
|
|
97
|
+
# =========================================================================
|
|
98
|
+
# Power-Spec → Basic-Spec RAISE INT : ^ or **
|
|
99
|
+
# =========================================================================
|
|
100
|
+
("a^2", "a ** 2"),
|
|
101
|
+
("a**2", "a ** 2"),
|
|
102
|
+
("a^-2", "a ** -2"),
|
|
103
|
+
("a**-2", "a ** -2"),
|
|
104
|
+
("m^3", "m ** 3"),
|
|
105
|
+
("m**3", "m ** 3"),
|
|
106
|
+
("m^-1", "m ** -1"),
|
|
107
|
+
("m**-1", "m ** -1"),
|
|
108
|
+
("s^-2", "s ** -2"),
|
|
109
|
+
("s**-2", "s ** -2"),
|
|
110
|
+
("K^-1", "K ** -1"),
|
|
111
|
+
("m^1", "m ** 1"),
|
|
112
|
+
("m^0", "m ** 0"),
|
|
113
|
+
("m**0", "m ** 0"),
|
|
114
|
+
("m^10", "m ** 10"),
|
|
115
|
+
("m^+2", "m ** +2"),
|
|
116
|
+
# Power of group
|
|
117
|
+
("(m)^2", "(m) ** 2"),
|
|
118
|
+
("(m/s)^2", "(m / s) ** 2"),
|
|
119
|
+
("(m/s)^-1", "(m / s) ** -1"),
|
|
120
|
+
("(m/s)^-2", "(m / s) ** -2"),
|
|
121
|
+
("(kg m)^2", "(kg * m) ** 2"),
|
|
122
|
+
("(kg m)^-1", "(kg * m) ** -1"),
|
|
123
|
+
# =========================================================================
|
|
124
|
+
# Product-Spec → MULTIPLY : all multiplication operators
|
|
125
|
+
# =========================================================================
|
|
126
|
+
# Dash (-)
|
|
127
|
+
("a-b", "a * b"),
|
|
128
|
+
# Dot (.)
|
|
129
|
+
("a.b", "a * b"),
|
|
130
|
+
# Asterisk (*)
|
|
131
|
+
("a*b", "a * b"),
|
|
132
|
+
# Space / juxtaposition
|
|
133
|
+
("a b", "a * b"),
|
|
134
|
+
("a b", "a * b"),
|
|
135
|
+
("m 2", "m * 2"),
|
|
136
|
+
("kg m 2", "kg * m * 2"),
|
|
137
|
+
# Centered middot (·)
|
|
138
|
+
("a·b", "a * b"),
|
|
139
|
+
# Chained multiply (3+ terms)
|
|
140
|
+
("kg m s", "kg * m * s"),
|
|
141
|
+
("m.s.K", "m * s * K"),
|
|
142
|
+
("m*s*K", "m * s * K"),
|
|
143
|
+
# Products with exponents
|
|
144
|
+
("m^2.s^-1", "m ** 2 * s ** -1"),
|
|
145
|
+
("kg.m.s^-2", "kg * m * s ** -2"),
|
|
146
|
+
("meter-second^-1", "meter * second ** -1"),
|
|
147
|
+
("meter-second**-2", "meter * second ** -2"),
|
|
148
|
+
# =========================================================================
|
|
149
|
+
# Product-Spec → DIVIDE : all division operators
|
|
150
|
+
# =========================================================================
|
|
151
|
+
("a/b", "a / b"),
|
|
152
|
+
("a / b", "a / b"),
|
|
153
|
+
("a / b", "a / b"),
|
|
154
|
+
("a per b", "a / b"),
|
|
155
|
+
("a PER b", "a / b"),
|
|
156
|
+
("kg / m", "kg / m"),
|
|
157
|
+
# Chained division
|
|
158
|
+
("m/s/K", "m / s / K"),
|
|
159
|
+
("kg/m/s", "kg / m / s"),
|
|
160
|
+
("m / s / K", "m / s / K"),
|
|
161
|
+
# Per with exponents
|
|
162
|
+
("kg per s^2", "kg / s ** 2"),
|
|
163
|
+
("W per m^2", "W / m ** 2"),
|
|
164
|
+
("m^2 per s", "m ** 2 / s"),
|
|
165
|
+
("m^2 per s^2", "m ** 2 / s ** 2"),
|
|
166
|
+
("m^2 PER s^2", "m ** 2 / s ** 2"),
|
|
167
|
+
# Division of groups
|
|
168
|
+
("(m)/(s)", "(m) / (s)"),
|
|
169
|
+
# =========================================================================
|
|
170
|
+
# Product-Spec → mixed multiply + divide
|
|
171
|
+
# =========================================================================
|
|
172
|
+
("kg*m/s", "kg * m / s"),
|
|
173
|
+
("kg m/s", "kg * m / s"),
|
|
174
|
+
("kg m / s", "kg * m / s"),
|
|
175
|
+
("kg m / s^2", "kg * m / s ** 2"),
|
|
176
|
+
("kg m -2", "kg * m * -2"),
|
|
177
|
+
("kg.m/s2", "kg * m * s ** -2"),
|
|
178
|
+
("1/s", "1 / s"),
|
|
179
|
+
("1/m", "1 / m"),
|
|
180
|
+
("1/s^2", "1 / s ** 2"),
|
|
181
|
+
("1 m/s", "1 * m / s"),
|
|
182
|
+
("g/kg", "g / kg"),
|
|
183
|
+
# =========================================================================
|
|
184
|
+
# Shift-Spec → Product-Spec SHIFT INT
|
|
185
|
+
# =========================================================================
|
|
186
|
+
("a @ 1", "a; offset: 1"),
|
|
187
|
+
("a after 1", "a; offset: 1"),
|
|
188
|
+
("a from 1", "a; offset: 1"),
|
|
189
|
+
("a ref 1", "a; offset: 1"),
|
|
190
|
+
("K @ 0", "K; offset: 0"),
|
|
191
|
+
# =========================================================================
|
|
192
|
+
# Shift-Spec → Product-Spec SHIFT REAL
|
|
193
|
+
# =========================================================================
|
|
194
|
+
("K @ 273.15", "K; offset: 273.15"),
|
|
195
|
+
("K @ -273.15", "K; offset: -273.15"),
|
|
196
|
+
("a @ 1.5", "a; offset: 1.5"),
|
|
197
|
+
("K after 273.15", "K; offset: 273.15"),
|
|
198
|
+
("K from 273.15", "K; offset: 273.15"),
|
|
199
|
+
("K ref 273.15", "K; offset: 273.15"),
|
|
200
|
+
("degC @ 1.5", "degC; offset: 1.5"),
|
|
201
|
+
("°R @ 459.67", "°R; offset: 459.67"),
|
|
202
|
+
# Shift on product expression
|
|
203
|
+
("m/s @ 0", "m / s; offset: 0"),
|
|
204
|
+
# =========================================================================
|
|
205
|
+
# Basic-Spec → LOGREF Product-Spec ")" : logarithmic units
|
|
206
|
+
# =========================================================================
|
|
207
|
+
# All log types with reference value
|
|
208
|
+
("log(re 1 a)", "1 * a; logbase: 10; logfactor: 10"),
|
|
209
|
+
("lg(re 1 a)", "1 * a; logbase: 10; logfactor: 10"),
|
|
210
|
+
("ln(re 1 a)", "1 * a; logbase: 2.718281828459045; logfactor: 0.5"),
|
|
211
|
+
("lb(re 1 a)", "1 * a; logbase: 2; logfactor: 1"),
|
|
212
|
+
# With colon in "re:"
|
|
213
|
+
("lb ( re: 1 a)", "1 * a; logbase: 2; logfactor: 1"),
|
|
214
|
+
("ln(re: 1 Pa)", "1 * Pa; logbase: 2.718281828459045; logfactor: 0.5"),
|
|
215
|
+
# With real unit names
|
|
216
|
+
("lg(re 1 mW)", "1 * mW; logbase: 10; logfactor: 10"),
|
|
217
|
+
("ln(re 1 Pa)", "1 * Pa; logbase: 2.718281828459045; logfactor: 0.5"),
|
|
218
|
+
("lb(re 1 W)", "1 * W; logbase: 2; logfactor: 1"),
|
|
219
|
+
# Without reference number
|
|
220
|
+
("lg(re mW)", "mW; logbase: 10; logfactor: 10"),
|
|
221
|
+
("lb(re W)", "W; logbase: 2; logfactor: 1"),
|
|
222
|
+
# Colon without number
|
|
223
|
+
("lb(re: W)", "W; logbase: 2; logfactor: 1"),
|
|
224
|
+
("lg(re: mW)", "mW; logbase: 10; logfactor: 10"),
|
|
225
|
+
# =========================================================================
|
|
226
|
+
# Complex / Real-world CF-convention expressions
|
|
227
|
+
# =========================================================================
|
|
228
|
+
# SI derived units with exponents
|
|
229
|
+
("kg m^2 s^-2", "kg * m ** 2 * s ** -2"),
|
|
230
|
+
("kg m^2 s^-2 K^-1", "kg * m ** 2 * s ** -2 * K ** -1"),
|
|
231
|
+
("kg m^-1 s^-2", "kg * m ** -1 * s ** -2"),
|
|
232
|
+
("kg^1 m^2 s^-3", "kg ** 1 * m ** 2 * s ** -3"),
|
|
233
|
+
("W m^-2", "W * m ** -2"),
|
|
234
|
+
("mol kg^-1", "mol * kg ** -1"),
|
|
235
|
+
("m^2 s^-1", "m ** 2 * s ** -1"),
|
|
236
|
+
# Division with parenthesized denominators
|
|
237
|
+
("W/(m^2)", "W / (m ** 2)"),
|
|
238
|
+
("J/(kg K)", "J / (kg * K)"),
|
|
239
|
+
("kg/(m s)", "kg / (m * s)"),
|
|
240
|
+
("kg / (m s^2)", "kg * m ** -1 * s ** -2"),
|
|
241
|
+
("W/(m^2 K)", "W / (m ** 2 * K)"),
|
|
242
|
+
("m^2/(s^2 K)", "m ** 2 / (s ** 2 * K)"),
|
|
243
|
+
("kg m^2/(s^2 K mol)", "kg * m ** 2 / (s ** 2 * K * mol)"),
|
|
244
|
+
# Scalar * unit
|
|
245
|
+
("0.001 m", "0.001 * m"),
|
|
246
|
+
("1000 kg", "1000 * kg"),
|
|
247
|
+
("m 1.5", "1.5 * m"),
|
|
248
|
+
("1e3 Pa", "1e3 * Pa"),
|
|
249
|
+
("Pa 1e3", "1e3 * Pa"),
|
|
250
|
+
("1e-3 m", "1e-3 * m"),
|
|
251
|
+
("1e-6 m", "1e-6 * m"),
|
|
252
|
+
("2 m^2", "2 * m ** 2"),
|
|
253
|
+
("0.5 kg^-1", "0.5 * kg ** -1"),
|
|
254
|
+
("0.1 Pa", "0.1 * Pa"),
|
|
255
|
+
# Common CF convention units
|
|
256
|
+
("Pa s", "Pa * s"),
|
|
257
|
+
("N m", "N * m"),
|
|
258
|
+
("mm/day", "mm / day"),
|
|
259
|
+
("W/m^2", "W / m ** 2"),
|
|
260
|
+
("K/m", "K / m"),
|
|
261
|
+
("mol/m^3", "mol / m ** 3"),
|
|
262
|
+
("m/s^2", "m / s ** 2"),
|
|
263
|
+
# =========================================================================
|
|
264
|
+
# Whitespace handling
|
|
265
|
+
# =========================================================================
|
|
266
|
+
(" m ", "m"),
|
|
267
|
+
(" m / s^2 ", "m / s ** 2"),
|
|
268
|
+
(" kg / s ", "kg / s"),
|
|
269
|
+
]
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
TEST_CASES_TIME_SHIFTS = [
|
|
273
|
+
"seconds since 1970-01-01",
|
|
274
|
+
"seconds since 1970-01-01 12:34",
|
|
275
|
+
"seconds since 1970-01-01 12:34 +01:00",
|
|
276
|
+
"seconds since 1970-01-01 12:34 UTC",
|
|
277
|
+
"seconds since 1970-01-01T12:34",
|
|
278
|
+
"seconds since 1970-01-01T12:34 Z",
|
|
279
|
+
"seconds since 19700101T1234",
|
|
280
|
+
"seconds since 19700101T1234 +01:00",
|
|
281
|
+
"seconds since 19700101T1234 UTC",
|
|
282
|
+
]
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
TEST_CASES_INVALID = [
|
|
286
|
+
# =========================================================================
|
|
287
|
+
# Invalid operators / syntax
|
|
288
|
+
# =========================================================================
|
|
289
|
+
# Double slash, double plus
|
|
290
|
+
"a // b",
|
|
291
|
+
"a ++ b",
|
|
292
|
+
# Double raise
|
|
293
|
+
"a^^2",
|
|
294
|
+
"a ^ ^ 2",
|
|
295
|
+
# Double consecutive operators
|
|
296
|
+
"a * * b",
|
|
297
|
+
"a / / b",
|
|
298
|
+
# Mixed invalid operators
|
|
299
|
+
"a*/b",
|
|
300
|
+
"a/*b",
|
|
301
|
+
"m ^^ 2",
|
|
302
|
+
# =========================================================================
|
|
303
|
+
# Trailing operators (incomplete expressions)
|
|
304
|
+
# =========================================================================
|
|
305
|
+
"a/",
|
|
306
|
+
"a*",
|
|
307
|
+
"a^",
|
|
308
|
+
"a**",
|
|
309
|
+
"m per",
|
|
310
|
+
"m since",
|
|
311
|
+
"K @",
|
|
312
|
+
# =========================================================================
|
|
313
|
+
# Leading operators (missing left operand)
|
|
314
|
+
# =========================================================================
|
|
315
|
+
"/m",
|
|
316
|
+
"*m",
|
|
317
|
+
"^m",
|
|
318
|
+
"/ s",
|
|
319
|
+
"* s",
|
|
320
|
+
"^ 2",
|
|
321
|
+
"^2",
|
|
322
|
+
"**2",
|
|
323
|
+
"@ 1",
|
|
324
|
+
# =========================================================================
|
|
325
|
+
# Unmatched / empty parentheses
|
|
326
|
+
# =========================================================================
|
|
327
|
+
"()",
|
|
328
|
+
"( )",
|
|
329
|
+
"( )",
|
|
330
|
+
"(m",
|
|
331
|
+
"((m)",
|
|
332
|
+
# =========================================================================
|
|
333
|
+
# Invalid characters (not part of <alpha>, <digit>, or operators)
|
|
334
|
+
# =========================================================================
|
|
335
|
+
"!m",
|
|
336
|
+
"#kg",
|
|
337
|
+
"$m",
|
|
338
|
+
"&m",
|
|
339
|
+
"~m",
|
|
340
|
+
"m~",
|
|
341
|
+
"?m",
|
|
342
|
+
# Invalid delimiters
|
|
343
|
+
"m[2]",
|
|
344
|
+
"m{2}",
|
|
345
|
+
"m<2>",
|
|
346
|
+
"a=b",
|
|
347
|
+
"a+b",
|
|
348
|
+
"m,s",
|
|
349
|
+
"m;s",
|
|
350
|
+
# =========================================================================
|
|
351
|
+
# Only operators (no operands)
|
|
352
|
+
# =========================================================================
|
|
353
|
+
"*",
|
|
354
|
+
"/",
|
|
355
|
+
"^",
|
|
356
|
+
"**",
|
|
357
|
+
"@",
|
|
358
|
+
# =========================================================================
|
|
359
|
+
# Nonsense strings
|
|
360
|
+
# =========================================================================
|
|
361
|
+
"!!!",
|
|
362
|
+
"---",
|
|
363
|
+
"...",
|
|
364
|
+
"@@@",
|
|
365
|
+
# =========================================================================
|
|
366
|
+
# Invalid number format
|
|
367
|
+
# =========================================================================
|
|
368
|
+
"1e+",
|
|
369
|
+
# =========================================================================
|
|
370
|
+
# Invalid shift expressions
|
|
371
|
+
# =========================================================================
|
|
372
|
+
# Double shift
|
|
373
|
+
"K @ 1 @ 2",
|
|
374
|
+
# Shift with parenthesized offset (not allowed)
|
|
375
|
+
"K @ (1)",
|
|
376
|
+
# =========================================================================
|
|
377
|
+
# Invalid LOGREF expressions
|
|
378
|
+
# =========================================================================
|
|
379
|
+
# Missing closing paren
|
|
380
|
+
"log(re 1 mW",
|
|
381
|
+
# Missing opening paren
|
|
382
|
+
"log mW)",
|
|
383
|
+
# Empty log
|
|
384
|
+
"log()",
|
|
385
|
+
"lg()",
|
|
386
|
+
"ln()",
|
|
387
|
+
"lb()",
|
|
388
|
+
]
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
@pytest.mark.parametrize("input_str, expected_str", TEST_CASES_TRANSFORM)
|
|
392
|
+
def test_transform(input_str: str, expected_str: str) -> None:
|
|
393
|
+
# assert Units(input_str).isvalid, f"Input '{input_str}' is not a valid unit"
|
|
394
|
+
|
|
395
|
+
u = cf_string_to_pint(input_str)
|
|
396
|
+
cfg = ParserConfig()
|
|
397
|
+
result = UnitDefinition.from_string_and_config(f"_ = {u}", cfg)
|
|
398
|
+
expected = UnitDefinition.from_string_and_config(f"_ = {expected_str}", cfg)
|
|
399
|
+
assert result == expected, (
|
|
400
|
+
f"Expected '{expected}', got '{result}' for input '{input_str}'"
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
@pytest.mark.parametrize("input_str", TEST_CASES_INVALID)
|
|
405
|
+
def test_invalid_raises(input_str: str) -> None:
|
|
406
|
+
"""Strings that violate the grammar must raise an exception when parsed."""
|
|
407
|
+
with pytest.raises(UnexpectedInput):
|
|
408
|
+
cf_string_to_pint(input_str)
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
@pytest.mark.parametrize("input_str", TEST_CASES_TIME_SHIFTS)
|
|
412
|
+
def test_time_shifts_raise_not_implemented(input_str: str) -> None:
|
|
413
|
+
with pytest.raises(
|
|
414
|
+
VisitError, match="Time-based offsets are not directly supported by pint"
|
|
415
|
+
):
|
|
416
|
+
cf_string_to_pint(input_str)
|