conic 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- conic-0.1.0/PKG-INFO +54 -0
- conic-0.1.0/README.md +41 -0
- conic-0.1.0/pyproject.toml +25 -0
- conic-0.1.0/src/conic/__init__.py +0 -0
- conic-0.1.0/src/conic/_canonical.py +26 -0
- conic-0.1.0/src/conic/_helpers.py +21 -0
- conic-0.1.0/src/conic/config.py +137 -0
- conic-0.1.0/src/conic/pipeline.py +142 -0
- conic-0.1.0/src/conic/preprocess.py +158 -0
- conic-0.1.0/src/conic/py.typed +0 -0
conic-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: conic
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CPT/SCPTu Data-Driven Evaluator.
|
|
5
|
+
Author: Francesco Franco
|
|
6
|
+
Author-email: Francesco Franco <ffrancoa97@gmail.com>
|
|
7
|
+
License-Expression: AGPL-3.0-or-later
|
|
8
|
+
Requires-Dist: polars>=1.40.1
|
|
9
|
+
Requires-Dist: pydantic>=2.13.4
|
|
10
|
+
Requires-Dist: pytest-benchmark>=5.2.3
|
|
11
|
+
Requires-Python: >=3.12
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
<h1 align="center">
|
|
15
|
+
<a href="https://github.com/ferrosoft/conic">
|
|
16
|
+
<img src="https://raw.githubusercontent.com/ferrosoft/conic/main/assets/conic_banner.png" alt="conic logo" width="80%">
|
|
17
|
+
</a>
|
|
18
|
+
</h1>
|
|
19
|
+
|
|
20
|
+
## A Python library for the exploration of CPT/SCPTu profiles
|
|
21
|
+
|
|
22
|
+
**Conic** (stylized in all lowercase) is a Python library for data-driven analysis of CPT soundings.
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
## Setup
|
|
26
|
+
|
|
27
|
+
Install the latest **`conic`** version with:
|
|
28
|
+
|
|
29
|
+
```sh
|
|
30
|
+
pip install conic
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
If you use `uv` (recommended) you can add **`conic`** to your environment by using:
|
|
34
|
+
|
|
35
|
+
```sh
|
|
36
|
+
uv add conic
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Credits
|
|
40
|
+
|
|
41
|
+
* **`conic`** author and maintainer: Francesco A. Franco ([@ffrancoa]).
|
|
42
|
+
|
|
43
|
+
* Font used in logo: [Lexend]. Open Font License.
|
|
44
|
+
|
|
45
|
+
[@ffrancoa]: https://github.com/ffrancoa
|
|
46
|
+
[Lexend]: https://www.lexend.com/
|
|
47
|
+
|
|
48
|
+
## License
|
|
49
|
+
|
|
50
|
+
Conic is released under the [Affero General Public License, Version 3.0].
|
|
51
|
+
|
|
52
|
+
[Affero General Public License, Version 3.0]: https://opensource.org/license/agpl-3-0
|
|
53
|
+
|
|
54
|
+
---
|
conic-0.1.0/README.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
<h1 align="center">
|
|
2
|
+
<a href="https://github.com/ferrosoft/conic">
|
|
3
|
+
<img src="https://raw.githubusercontent.com/ferrosoft/conic/main/assets/conic_banner.png" alt="conic logo" width="80%">
|
|
4
|
+
</a>
|
|
5
|
+
</h1>
|
|
6
|
+
|
|
7
|
+
## A Python library for the exploration of CPT/SCPTu profiles
|
|
8
|
+
|
|
9
|
+
**Conic** (stylized in all lowercase) is a Python library for data-driven analysis of CPT soundings.
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
## Setup
|
|
13
|
+
|
|
14
|
+
Install the latest **`conic`** version with:
|
|
15
|
+
|
|
16
|
+
```sh
|
|
17
|
+
pip install conic
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
If you use `uv` (recommended) you can add **`conic`** to your environment by using:
|
|
21
|
+
|
|
22
|
+
```sh
|
|
23
|
+
uv add conic
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Credits
|
|
27
|
+
|
|
28
|
+
* **`conic`** author and maintainer: Francesco A. Franco ([@ffrancoa]).
|
|
29
|
+
|
|
30
|
+
* Font used in logo: [Lexend]. Open Font License.
|
|
31
|
+
|
|
32
|
+
[@ffrancoa]: https://github.com/ffrancoa
|
|
33
|
+
[Lexend]: https://www.lexend.com/
|
|
34
|
+
|
|
35
|
+
## License
|
|
36
|
+
|
|
37
|
+
Conic is released under the [Affero General Public License, Version 3.0].
|
|
38
|
+
|
|
39
|
+
[Affero General Public License, Version 3.0]: https://opensource.org/license/agpl-3-0
|
|
40
|
+
|
|
41
|
+
---
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "conic"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "CPT/SCPTu Data-Driven Evaluator."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "AGPL-3.0-or-later"
|
|
7
|
+
authors = [
|
|
8
|
+
{ name = "Francesco Franco", email = "ffrancoa97@gmail.com" }
|
|
9
|
+
]
|
|
10
|
+
requires-python = ">=3.12"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"polars>=1.40.1",
|
|
13
|
+
"pydantic>=2.13.4",
|
|
14
|
+
"pytest-benchmark>=5.2.3",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[build-system]
|
|
18
|
+
requires = ["uv_build>=0.11.7,<0.12.0"]
|
|
19
|
+
build-backend = "uv_build"
|
|
20
|
+
|
|
21
|
+
[dependency-groups]
|
|
22
|
+
dev = [
|
|
23
|
+
"ipython>=9.13.0",
|
|
24
|
+
"pytest>=9.0.3",
|
|
25
|
+
]
|
|
File without changes
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
COL_DEPTH: str = "Depth (m)"
|
|
2
|
+
COL_QC: str = "qc (MPa)"
|
|
3
|
+
COL_FS: str = "fs (kPa)"
|
|
4
|
+
COL_U2: str = "u2 (kPa)"
|
|
5
|
+
|
|
6
|
+
COL_U0: str = "u0 (kPa)"
|
|
7
|
+
COL_SV_TOT: str = "σv tot (kPa)"
|
|
8
|
+
COL_SV_EFF: str = "σv eff (kPa)"
|
|
9
|
+
|
|
10
|
+
COL_QT: str = "qt (MPa)"
|
|
11
|
+
COL_FR: str = "Fr (%)"
|
|
12
|
+
COL_BQ: str = "Bq (-)"
|
|
13
|
+
|
|
14
|
+
COL_N: str = "n (-)"
|
|
15
|
+
COL_QTN: str = "Qtn (-)"
|
|
16
|
+
COL_IC: str = "Ic (-)"
|
|
17
|
+
COL_CONVG: str = "convg. (-)"
|
|
18
|
+
|
|
19
|
+
COL_CD: str = "CD (-)"
|
|
20
|
+
COL_IB: str = "IB (-)"
|
|
21
|
+
|
|
22
|
+
AREA_RATIO: float = 0.80
|
|
23
|
+
GAMMA_WATER: float = 9.81
|
|
24
|
+
|
|
25
|
+
CLEAN_MODE: str = "replace"
|
|
26
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
from io import BytesIO
|
|
3
|
+
from typing import Iterable, cast
|
|
4
|
+
|
|
5
|
+
import polars as pl
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def get_missing_columns(
|
|
9
|
+
data: pl.DataFrame,
|
|
10
|
+
required_columns: Iterable[str]
|
|
11
|
+
) -> set:
|
|
12
|
+
|
|
13
|
+
return set(required_columns).difference(data.columns)
|
|
14
|
+
|
|
15
|
+
def dataframe_fingerprint(data: pl.DataFrame):
|
|
16
|
+
data_to_bytes = cast(BytesIO, data.write_ipc(file=None)).getvalue()
|
|
17
|
+
|
|
18
|
+
hash_text = hashlib.sha256(data_to_bytes).hexdigest()
|
|
19
|
+
|
|
20
|
+
return hash_text[:16]
|
|
21
|
+
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import tomllib
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Annotated, Literal, Optional, Self, cast
|
|
4
|
+
|
|
5
|
+
from pydantic import (
|
|
6
|
+
BaseModel,
|
|
7
|
+
ConfigDict,
|
|
8
|
+
Field,
|
|
9
|
+
NegativeFloat,
|
|
10
|
+
NonNegativeFloat,
|
|
11
|
+
PositiveFloat,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from conic._canonical import (
|
|
15
|
+
AREA_RATIO,
|
|
16
|
+
COL_BQ,
|
|
17
|
+
COL_CD,
|
|
18
|
+
COL_CONVG,
|
|
19
|
+
COL_DEPTH,
|
|
20
|
+
COL_FR,
|
|
21
|
+
COL_FS,
|
|
22
|
+
COL_IB,
|
|
23
|
+
COL_IC,
|
|
24
|
+
COL_N,
|
|
25
|
+
COL_QC,
|
|
26
|
+
COL_QT,
|
|
27
|
+
COL_QTN,
|
|
28
|
+
COL_SV_EFF,
|
|
29
|
+
COL_SV_TOT,
|
|
30
|
+
COL_U0,
|
|
31
|
+
COL_U2,
|
|
32
|
+
GAMMA_WATER,
|
|
33
|
+
CLEAN_MODE
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
type ColumnName = Annotated[str, Field(max_length=50)]
|
|
38
|
+
type UnitRatio = Annotated[float, Field(gt=0.0, le=1.0)]
|
|
39
|
+
type Indicators = list[NegativeFloat]
|
|
40
|
+
type CleanMode = Literal["replace", "remove"]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class InputColumns(BaseModel):
|
|
44
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
45
|
+
|
|
46
|
+
depth: ColumnName = COL_DEPTH
|
|
47
|
+
qc: ColumnName = COL_QC
|
|
48
|
+
fs: ColumnName = COL_FS
|
|
49
|
+
u2: ColumnName = COL_U2
|
|
50
|
+
|
|
51
|
+
u0: ColumnName = COL_U0
|
|
52
|
+
sv_tot: ColumnName = COL_SV_TOT
|
|
53
|
+
sv_eff: ColumnName = COL_SV_EFF
|
|
54
|
+
|
|
55
|
+
class OutputColumns(BaseModel):
|
|
56
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
57
|
+
|
|
58
|
+
qt: ColumnName = COL_QT
|
|
59
|
+
fr: ColumnName = COL_FR
|
|
60
|
+
bq: ColumnName = COL_BQ
|
|
61
|
+
|
|
62
|
+
n: ColumnName = COL_N
|
|
63
|
+
qtn: ColumnName = COL_QTN
|
|
64
|
+
ic: ColumnName = COL_IC
|
|
65
|
+
convg: ColumnName = COL_CONVG
|
|
66
|
+
|
|
67
|
+
cd: ColumnName = COL_CD
|
|
68
|
+
ib: ColumnName = COL_IB
|
|
69
|
+
|
|
70
|
+
class Columns(BaseModel):
|
|
71
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
72
|
+
|
|
73
|
+
input: InputColumns = Field(default_factory=InputColumns)
|
|
74
|
+
output: OutputColumns = Field(default_factory=OutputColumns)
|
|
75
|
+
|
|
76
|
+
class Parameters(BaseModel):
|
|
77
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
78
|
+
|
|
79
|
+
area_ratio: UnitRatio = AREA_RATIO
|
|
80
|
+
gamma_water: PositiveFloat = GAMMA_WATER
|
|
81
|
+
gamma_soil: Optional[PositiveFloat] = None
|
|
82
|
+
water_level: Optional[NonNegativeFloat] = None
|
|
83
|
+
|
|
84
|
+
class Cleansing(BaseModel):
|
|
85
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
86
|
+
|
|
87
|
+
start_depth: Optional[NonNegativeFloat] = None
|
|
88
|
+
spacing: Optional[PositiveFloat] = None
|
|
89
|
+
|
|
90
|
+
indicators: Indicators = Field(default_factory=lambda: list())
|
|
91
|
+
clean_mode: CleanMode = cast(CleanMode, CLEAN_MODE)
|
|
92
|
+
|
|
93
|
+
class Configurator(BaseModel):
|
|
94
|
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
|
95
|
+
|
|
96
|
+
columns: Columns = Field(default_factory=Columns)
|
|
97
|
+
parameters: Parameters = Field(default_factory=Parameters)
|
|
98
|
+
cleansing: Cleansing = Field(default_factory=Cleansing)
|
|
99
|
+
|
|
100
|
+
def _with_field(self, submodel_name: str, field_name: str, value: object) -> Self:
|
|
101
|
+
submodel = getattr(self, submodel_name)
|
|
102
|
+
|
|
103
|
+
new_submodel_dict = submodel.model_dump() | {field_name: value}
|
|
104
|
+
new_submodel = type(submodel)(**new_submodel_dict)
|
|
105
|
+
|
|
106
|
+
return self.model_copy(update={submodel_name: new_submodel})
|
|
107
|
+
|
|
108
|
+
def with_area_ratio(self, value: UnitRatio) -> Self:
|
|
109
|
+
return self._with_field("parameters", "area_ratio", value)
|
|
110
|
+
|
|
111
|
+
def with_gamma_water(self, value: PositiveFloat) -> Self:
|
|
112
|
+
return self._with_field("parameters", "gamma_water", value)
|
|
113
|
+
|
|
114
|
+
def with_gamma_soil(self, value: Optional[PositiveFloat]) -> Self:
|
|
115
|
+
return self._with_field("parameters", "gamma_soil", value)
|
|
116
|
+
|
|
117
|
+
def with_water_level(self, value: Optional[NonNegativeFloat]) -> Self:
|
|
118
|
+
return self._with_field("parameters", "water_level", value)
|
|
119
|
+
|
|
120
|
+
def with_start_depth(self, value: Optional[NonNegativeFloat]) -> Self:
|
|
121
|
+
return self._with_field("cleansing", "start_depth", value)
|
|
122
|
+
|
|
123
|
+
def with_spacing(self, value: Optional[PositiveFloat]) -> Self:
|
|
124
|
+
return self._with_field("cleansing", "spacing", value)
|
|
125
|
+
|
|
126
|
+
def with_indicators(self, values: list[float]) -> Self:
|
|
127
|
+
return self._with_field("cleansing", "indicators", values)
|
|
128
|
+
|
|
129
|
+
def with_clean_mode(self, value: CleanMode) -> Self:
|
|
130
|
+
return self._with_field("cleansing", "clean_mode", value)
|
|
131
|
+
|
|
132
|
+
@classmethod
|
|
133
|
+
def from_toml(cls, file_path: Path | str) -> Self:
|
|
134
|
+
with Path(file_path).open("rb") as file:
|
|
135
|
+
config = tomllib.load(file)
|
|
136
|
+
return cls.model_validate(config)
|
|
137
|
+
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from typing import Literal, NamedTuple, Self, overload
|
|
5
|
+
|
|
6
|
+
import polars as pl
|
|
7
|
+
|
|
8
|
+
from conic import preprocess
|
|
9
|
+
from conic.config import Configurator
|
|
10
|
+
from conic._helpers import dataframe_fingerprint
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True, slots=True)
|
|
14
|
+
class Step:
|
|
15
|
+
name: str
|
|
16
|
+
apply: Callable[[pl.DataFrame], pl.DataFrame]
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True, slots=True)
|
|
19
|
+
class StepCatalog:
|
|
20
|
+
config: Configurator
|
|
21
|
+
|
|
22
|
+
def adjust_depth_spacing(self) -> Step:
|
|
23
|
+
function = preprocess.adjust_depth_spacing
|
|
24
|
+
cleansing = self.config.cleansing
|
|
25
|
+
columns = self.config.columns
|
|
26
|
+
|
|
27
|
+
def callback(data: pl.DataFrame) -> pl.DataFrame:
|
|
28
|
+
return function(
|
|
29
|
+
data,
|
|
30
|
+
start_depth=cleansing.start_depth,
|
|
31
|
+
spacing=cleansing.spacing,
|
|
32
|
+
col_depth=columns.input.depth
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
return Step(name=function.__name__, apply=callback)
|
|
36
|
+
|
|
37
|
+
def sanitize_dataframe(self) -> Step:
|
|
38
|
+
function = preprocess.sanitize_dataframe
|
|
39
|
+
columns = self.config.columns
|
|
40
|
+
|
|
41
|
+
def callback(data: pl.DataFrame) -> pl.DataFrame:
|
|
42
|
+
return function(
|
|
43
|
+
data,
|
|
44
|
+
col_depth=columns.input.depth,
|
|
45
|
+
col_qc=columns.input.qc,
|
|
46
|
+
col_fs=columns.input.fs,
|
|
47
|
+
col_u2=columns.input.u2,
|
|
48
|
+
col_u0=columns.input.u0,
|
|
49
|
+
col_sv_eff=columns.input.sv_eff,
|
|
50
|
+
col_sv_tot=columns.input.sv_tot
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return Step(name=function.__name__, apply=callback)
|
|
54
|
+
|
|
55
|
+
def compute_hydrostatic(self) -> Step:
|
|
56
|
+
function = preprocess.compute_hydrostatic
|
|
57
|
+
parameters = self.config.parameters
|
|
58
|
+
columns = self.config.columns
|
|
59
|
+
|
|
60
|
+
def callback(data: pl.DataFrame) -> pl.DataFrame:
|
|
61
|
+
return function(
|
|
62
|
+
data,
|
|
63
|
+
water_level=parameters.water_level,
|
|
64
|
+
gamma_water=parameters.gamma_water,
|
|
65
|
+
col_depth=columns.input.depth,
|
|
66
|
+
col_u0=columns.input.u0,
|
|
67
|
+
override=True
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
return Step(name=function.__name__, apply=callback)
|
|
71
|
+
|
|
72
|
+
def clean_by_indicators(self) -> Step:
|
|
73
|
+
function = preprocess.clean_by_indicators
|
|
74
|
+
cleansing = self.config.cleansing
|
|
75
|
+
|
|
76
|
+
def callback(data: pl.DataFrame) -> pl.DataFrame:
|
|
77
|
+
return function(
|
|
78
|
+
data,
|
|
79
|
+
indicators=cleansing.indicators,
|
|
80
|
+
mode=cleansing.clean_mode
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return Step(name=function.__name__, apply=callback)
|
|
84
|
+
|
|
85
|
+
class PipelineResult(NamedTuple):
|
|
86
|
+
data: pl.DataFrame
|
|
87
|
+
metadata: dict[str, object]
|
|
88
|
+
|
|
89
|
+
@dataclass(frozen=True, slots=True)
|
|
90
|
+
class Pipeliner:
|
|
91
|
+
config: Configurator
|
|
92
|
+
steps: tuple[Step, ...]
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def default(cls, config: Configurator) -> Self:
|
|
96
|
+
catalog = StepCatalog(config)
|
|
97
|
+
|
|
98
|
+
steps = (
|
|
99
|
+
catalog.sanitize_dataframe(),
|
|
100
|
+
catalog.adjust_depth_spacing(),
|
|
101
|
+
catalog.clean_by_indicators(),
|
|
102
|
+
catalog.compute_hydrostatic()
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return cls(config=config, steps=steps)
|
|
106
|
+
|
|
107
|
+
@overload
|
|
108
|
+
def run(
|
|
109
|
+
self,
|
|
110
|
+
inp_data: pl.DataFrame, *,
|
|
111
|
+
metadata: Literal[False] = False,
|
|
112
|
+
) -> pl.DataFrame: ...
|
|
113
|
+
@overload
|
|
114
|
+
def run(
|
|
115
|
+
self,
|
|
116
|
+
inp_data: pl.DataFrame, *,
|
|
117
|
+
metadata: Literal[True],
|
|
118
|
+
) -> PipelineResult: ...
|
|
119
|
+
def run(
|
|
120
|
+
self,
|
|
121
|
+
inp_data: pl.DataFrame, *,
|
|
122
|
+
metadata: bool = False
|
|
123
|
+
) -> pl.DataFrame | PipelineResult:
|
|
124
|
+
|
|
125
|
+
out_data = inp_data
|
|
126
|
+
|
|
127
|
+
for step in self.steps:
|
|
128
|
+
out_data = step.apply(out_data)
|
|
129
|
+
|
|
130
|
+
if not metadata:
|
|
131
|
+
return out_data
|
|
132
|
+
|
|
133
|
+
meta = {
|
|
134
|
+
"data_hash": dataframe_fingerprint(inp_data),
|
|
135
|
+
"source_path": None,
|
|
136
|
+
"config": self.config.model_dump(),
|
|
137
|
+
"steps": [step.name for step in self.steps],
|
|
138
|
+
"timestamp_utc": datetime.now(timezone.utc).isoformat
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return PipelineResult(data=out_data, metadata=meta)
|
|
142
|
+
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
from typing import Literal, Optional, cast
|
|
2
|
+
|
|
3
|
+
import polars as pl
|
|
4
|
+
from polars.exceptions import ColumnNotFoundError
|
|
5
|
+
|
|
6
|
+
from conic._canonical import (
|
|
7
|
+
COL_DEPTH,
|
|
8
|
+
COL_FS,
|
|
9
|
+
COL_QC,
|
|
10
|
+
COL_SV_EFF,
|
|
11
|
+
COL_SV_TOT,
|
|
12
|
+
COL_U0,
|
|
13
|
+
COL_U2,
|
|
14
|
+
GAMMA_WATER
|
|
15
|
+
)
|
|
16
|
+
from conic._helpers import get_missing_columns
|
|
17
|
+
|
|
18
|
+
def compute_hydrostatic(
|
|
19
|
+
data: pl.DataFrame,
|
|
20
|
+
water_level: Optional[float] = None, *,
|
|
21
|
+
gamma_water: float = GAMMA_WATER,
|
|
22
|
+
col_depth: str = COL_DEPTH,
|
|
23
|
+
col_u0: str = COL_U0,
|
|
24
|
+
override: bool = False
|
|
25
|
+
) -> pl.DataFrame:
|
|
26
|
+
|
|
27
|
+
if col_depth not in data.columns:
|
|
28
|
+
raise ColumnNotFoundError(
|
|
29
|
+
f"Depth column is missing in DataFrame: '{col_depth}'."
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
if col_u0 in data.columns and not override:
|
|
33
|
+
raise ValueError(
|
|
34
|
+
f"Hydrostatic pressure ({col_u0}) was already included in "
|
|
35
|
+
f"this DataFrame. Set `override=True` to override."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
if water_level is None:
|
|
39
|
+
data_with_u0 = data.with_columns(pl.lit(0.0).alias(col_u0))
|
|
40
|
+
else:
|
|
41
|
+
data_with_u0 = data.with_columns(
|
|
42
|
+
pl.when(pl.col(col_depth) >= water_level)
|
|
43
|
+
.then((pl.col(col_depth) - water_level) * gamma_water)
|
|
44
|
+
.otherwise(0.0)
|
|
45
|
+
.alias(col_u0)
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
return data_with_u0
|
|
49
|
+
|
|
50
|
+
def adjust_depth_spacing(
|
|
51
|
+
data: pl.DataFrame, *,
|
|
52
|
+
start_depth: Optional[float] = None,
|
|
53
|
+
spacing: Optional[float] = None,
|
|
54
|
+
digits: int = 3,
|
|
55
|
+
col_depth: str = COL_DEPTH,
|
|
56
|
+
) -> pl.DataFrame:
|
|
57
|
+
|
|
58
|
+
if col_depth not in data.columns:
|
|
59
|
+
raise ColumnNotFoundError(
|
|
60
|
+
f"Depth column is missing in DataFrame: '{col_depth}'."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if (nrows := data.height) < 2:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
"DataFrame must have at least 2 rows to infer depth spacing. "
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if start_depth is None:
|
|
69
|
+
start_depth = data.item(0, col_depth)
|
|
70
|
+
|
|
71
|
+
if spacing is None:
|
|
72
|
+
mean_spacing = data.get_column(col_depth).diff().mean()
|
|
73
|
+
spacing = round(cast(float, mean_spacing), digits)
|
|
74
|
+
|
|
75
|
+
new_depths = start_depth + pl.int_range(nrows, eager=True) * spacing
|
|
76
|
+
new_depths = new_depths.round(digits)
|
|
77
|
+
|
|
78
|
+
return data.with_columns(new_depths.alias(col_depth))
|
|
79
|
+
|
|
80
|
+
def sanitize_dataframe(
|
|
81
|
+
data: pl.DataFrame, *,
|
|
82
|
+
col_depth: str = COL_DEPTH,
|
|
83
|
+
col_qc: str = COL_QC,
|
|
84
|
+
col_fs: str = COL_FS,
|
|
85
|
+
col_u2: str = COL_U2,
|
|
86
|
+
col_u0: str = COL_U0,
|
|
87
|
+
col_sv_eff: str = COL_SV_EFF,
|
|
88
|
+
col_sv_tot: str = COL_SV_TOT,
|
|
89
|
+
include_optional: bool = True
|
|
90
|
+
) -> pl.DataFrame:
|
|
91
|
+
|
|
92
|
+
required_columns = [col_depth, col_qc, col_fs, col_u2]
|
|
93
|
+
optional_columns = [col_u0, col_sv_eff, col_sv_tot]
|
|
94
|
+
|
|
95
|
+
if missing_columns := get_missing_columns(data, required_columns):
|
|
96
|
+
raise ColumnNotFoundError(
|
|
97
|
+
f"Missing required columns: '{missing_columns}'."
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
selected_columns = required_columns
|
|
101
|
+
|
|
102
|
+
if include_optional:
|
|
103
|
+
additional_columns = set(optional_columns).intersection(data.columns)
|
|
104
|
+
selected_columns += list(additional_columns)
|
|
105
|
+
|
|
106
|
+
return data.select(selected_columns)
|
|
107
|
+
|
|
108
|
+
def filter_by_indicators(
|
|
109
|
+
data: pl.DataFrame,
|
|
110
|
+
indicators: list[float], *,
|
|
111
|
+
action: Literal["remove", "select"] = "remove"
|
|
112
|
+
) -> pl.DataFrame:
|
|
113
|
+
|
|
114
|
+
if action not in ["remove", "select"]:
|
|
115
|
+
raise ValueError(
|
|
116
|
+
"Invalid `action` argument. Must be 'remove' or 'select'."
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
expr = pl.selectors.numeric().is_in(indicators)
|
|
120
|
+
expr = pl.any_horizontal(expr)
|
|
121
|
+
|
|
122
|
+
return data.filter(expr.not_() if action == "remove" else expr)
|
|
123
|
+
|
|
124
|
+
def split_by_indicators(
|
|
125
|
+
data: pl.DataFrame,
|
|
126
|
+
indicators: list[float], *,
|
|
127
|
+
index_col: str = "_id_"
|
|
128
|
+
) -> tuple[pl.DataFrame, pl.DataFrame]:
|
|
129
|
+
|
|
130
|
+
indexed_data = data.with_row_index(index_col)
|
|
131
|
+
|
|
132
|
+
rows_with = filter_by_indicators(indexed_data, indicators, action="select")
|
|
133
|
+
rows_without = filter_by_indicators(indexed_data, indicators)
|
|
134
|
+
|
|
135
|
+
return rows_with, rows_without
|
|
136
|
+
|
|
137
|
+
def clean_by_indicators(
|
|
138
|
+
data: pl.DataFrame,
|
|
139
|
+
indicators: list[float], *,
|
|
140
|
+
mode: Literal["remove", "replace"] = "replace"
|
|
141
|
+
) -> pl.DataFrame:
|
|
142
|
+
|
|
143
|
+
if mode not in ["remove", "replace"]:
|
|
144
|
+
raise ValueError(
|
|
145
|
+
"Invalid `action` argument. Must be 'remove' or 'replace'."
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
match mode:
|
|
149
|
+
case "remove":
|
|
150
|
+
return filter_by_indicators(data, indicators, action="remove")
|
|
151
|
+
case "replace":
|
|
152
|
+
return data.with_columns(
|
|
153
|
+
pl.when(pl.selectors.numeric().is_in(indicators))
|
|
154
|
+
.then(None)
|
|
155
|
+
.otherwise(pl.selectors.numeric())
|
|
156
|
+
.name.keep()
|
|
157
|
+
)
|
|
158
|
+
|
|
File without changes
|