conic 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
conic-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,54 @@
1
+ Metadata-Version: 2.4
2
+ Name: conic
3
+ Version: 0.1.0
4
+ Summary: CPT/SCPTu Data-Driven Evaluator.
5
+ Author: Francesco Franco
6
+ Author-email: Francesco Franco <ffrancoa97@gmail.com>
7
+ License-Expression: AGPL-3.0-or-later
8
+ Requires-Dist: polars>=1.40.1
9
+ Requires-Dist: pydantic>=2.13.4
10
+ Requires-Dist: pytest-benchmark>=5.2.3
11
+ Requires-Python: >=3.12
12
+ Description-Content-Type: text/markdown
13
+
14
+ <h1 align="center">
15
+ <a href="https://github.com/ferrosoft/conic">
16
+ <img src="https://raw.githubusercontent.com/ferrosoft/conic/main/assets/conic_banner.png" alt="conic logo" width="80%">
17
+ </a>
18
+ </h1>
19
+
20
+ ## A Python library for the exploration of CPT/SCPTu profiles
21
+
22
+ **Conic** (stylized in all lowercase) is a Python library for data-driven analysis of CPT soundings.
23
+
24
+
25
+ ## Setup
26
+
27
+ Install the latest **`conic`** version with:
28
+
29
+ ```sh
30
+ pip install conic
31
+ ```
32
+
33
+ If you use `uv` (recommended) you can add **`conic`** to your environment by using:
34
+
35
+ ```sh
36
+ uv add conic
37
+ ```
38
+
39
+ ## Credits
40
+
41
+ * **`conic`** author and maintainer: Francesco A. Franco ([@ffrancoa]).
42
+
43
+ * Font used in logo: [Lexend]. Open Font License.
44
+
45
+ [@ffrancoa]: https://github.com/ffrancoa
46
+ [Lexend]: https://www.lexend.com/
47
+
48
+ ## License
49
+
50
+ Conic is released under the [Affero General Public License, Version 3.0].
51
+
52
+ [Affero General Public License, Version 3.0]: https://opensource.org/license/agpl-3-0
53
+
54
+ ---
conic-0.1.0/README.md ADDED
@@ -0,0 +1,41 @@
1
+ <h1 align="center">
2
+ <a href="https://github.com/ferrosoft/conic">
3
+ <img src="https://raw.githubusercontent.com/ferrosoft/conic/main/assets/conic_banner.png" alt="conic logo" width="80%">
4
+ </a>
5
+ </h1>
6
+
7
+ ## A Python library for the exploration of CPT/SCPTu profiles
8
+
9
+ **Conic** (stylized in all lowercase) is a Python library for data-driven analysis of CPT soundings.
10
+
11
+
12
+ ## Setup
13
+
14
+ Install the latest **`conic`** version with:
15
+
16
+ ```sh
17
+ pip install conic
18
+ ```
19
+
20
+ If you use `uv` (recommended) you can add **`conic`** to your environment by using:
21
+
22
+ ```sh
23
+ uv add conic
24
+ ```
25
+
26
+ ## Credits
27
+
28
+ * **`conic`** author and maintainer: Francesco A. Franco ([@ffrancoa]).
29
+
30
+ * Font used in logo: [Lexend]. Open Font License.
31
+
32
+ [@ffrancoa]: https://github.com/ffrancoa
33
+ [Lexend]: https://www.lexend.com/
34
+
35
+ ## License
36
+
37
+ Conic is released under the [Affero General Public License, Version 3.0].
38
+
39
+ [Affero General Public License, Version 3.0]: https://opensource.org/license/agpl-3-0
40
+
41
+ ---
@@ -0,0 +1,25 @@
1
+ [project]
2
+ name = "conic"
3
+ version = "0.1.0"
4
+ description = "CPT/SCPTu Data-Driven Evaluator."
5
+ readme = "README.md"
6
+ license = "AGPL-3.0-or-later"
7
+ authors = [
8
+ { name = "Francesco Franco", email = "ffrancoa97@gmail.com" }
9
+ ]
10
+ requires-python = ">=3.12"
11
+ dependencies = [
12
+ "polars>=1.40.1",
13
+ "pydantic>=2.13.4",
14
+ "pytest-benchmark>=5.2.3",
15
+ ]
16
+
17
+ [build-system]
18
+ requires = ["uv_build>=0.11.7,<0.12.0"]
19
+ build-backend = "uv_build"
20
+
21
+ [dependency-groups]
22
+ dev = [
23
+ "ipython>=9.13.0",
24
+ "pytest>=9.0.3",
25
+ ]
File without changes
@@ -0,0 +1,26 @@
1
+ COL_DEPTH: str = "Depth (m)"
2
+ COL_QC: str = "qc (MPa)"
3
+ COL_FS: str = "fs (kPa)"
4
+ COL_U2: str = "u2 (kPa)"
5
+
6
+ COL_U0: str = "u0 (kPa)"
7
+ COL_SV_TOT: str = "σv tot (kPa)"
8
+ COL_SV_EFF: str = "σv eff (kPa)"
9
+
10
+ COL_QT: str = "qt (MPa)"
11
+ COL_FR: str = "Fr (%)"
12
+ COL_BQ: str = "Bq (-)"
13
+
14
+ COL_N: str = "n (-)"
15
+ COL_QTN: str = "Qtn (-)"
16
+ COL_IC: str = "Ic (-)"
17
+ COL_CONVG: str = "convg. (-)"
18
+
19
+ COL_CD: str = "CD (-)"
20
+ COL_IB: str = "IB (-)"
21
+
22
+ AREA_RATIO: float = 0.80
23
+ GAMMA_WATER: float = 9.81
24
+
25
+ CLEAN_MODE: str = "replace"
26
+
@@ -0,0 +1,21 @@
1
+ import hashlib
2
+ from io import BytesIO
3
+ from typing import Iterable, cast
4
+
5
+ import polars as pl
6
+
7
+
8
+ def get_missing_columns(
9
+ data: pl.DataFrame,
10
+ required_columns: Iterable[str]
11
+ ) -> set:
12
+
13
+ return set(required_columns).difference(data.columns)
14
+
15
+ def dataframe_fingerprint(data: pl.DataFrame):
16
+ data_to_bytes = cast(BytesIO, data.write_ipc(file=None)).getvalue()
17
+
18
+ hash_text = hashlib.sha256(data_to_bytes).hexdigest()
19
+
20
+ return hash_text[:16]
21
+
@@ -0,0 +1,137 @@
1
+ import tomllib
2
+ from pathlib import Path
3
+ from typing import Annotated, Literal, Optional, Self, cast
4
+
5
+ from pydantic import (
6
+ BaseModel,
7
+ ConfigDict,
8
+ Field,
9
+ NegativeFloat,
10
+ NonNegativeFloat,
11
+ PositiveFloat,
12
+ )
13
+
14
+ from conic._canonical import (
15
+ AREA_RATIO,
16
+ COL_BQ,
17
+ COL_CD,
18
+ COL_CONVG,
19
+ COL_DEPTH,
20
+ COL_FR,
21
+ COL_FS,
22
+ COL_IB,
23
+ COL_IC,
24
+ COL_N,
25
+ COL_QC,
26
+ COL_QT,
27
+ COL_QTN,
28
+ COL_SV_EFF,
29
+ COL_SV_TOT,
30
+ COL_U0,
31
+ COL_U2,
32
+ GAMMA_WATER,
33
+ CLEAN_MODE
34
+ )
35
+
36
+
37
+ type ColumnName = Annotated[str, Field(max_length=50)]
38
+ type UnitRatio = Annotated[float, Field(gt=0.0, le=1.0)]
39
+ type Indicators = list[NegativeFloat]
40
+ type CleanMode = Literal["replace", "remove"]
41
+
42
+
43
+ class InputColumns(BaseModel):
44
+ model_config = ConfigDict(extra="forbid", frozen=True)
45
+
46
+ depth: ColumnName = COL_DEPTH
47
+ qc: ColumnName = COL_QC
48
+ fs: ColumnName = COL_FS
49
+ u2: ColumnName = COL_U2
50
+
51
+ u0: ColumnName = COL_U0
52
+ sv_tot: ColumnName = COL_SV_TOT
53
+ sv_eff: ColumnName = COL_SV_EFF
54
+
55
+ class OutputColumns(BaseModel):
56
+ model_config = ConfigDict(extra="forbid", frozen=True)
57
+
58
+ qt: ColumnName = COL_QT
59
+ fr: ColumnName = COL_FR
60
+ bq: ColumnName = COL_BQ
61
+
62
+ n: ColumnName = COL_N
63
+ qtn: ColumnName = COL_QTN
64
+ ic: ColumnName = COL_IC
65
+ convg: ColumnName = COL_CONVG
66
+
67
+ cd: ColumnName = COL_CD
68
+ ib: ColumnName = COL_IB
69
+
70
+ class Columns(BaseModel):
71
+ model_config = ConfigDict(extra="forbid", frozen=True)
72
+
73
+ input: InputColumns = Field(default_factory=InputColumns)
74
+ output: OutputColumns = Field(default_factory=OutputColumns)
75
+
76
+ class Parameters(BaseModel):
77
+ model_config = ConfigDict(extra="forbid", frozen=True)
78
+
79
+ area_ratio: UnitRatio = AREA_RATIO
80
+ gamma_water: PositiveFloat = GAMMA_WATER
81
+ gamma_soil: Optional[PositiveFloat] = None
82
+ water_level: Optional[NonNegativeFloat] = None
83
+
84
+ class Cleansing(BaseModel):
85
+ model_config = ConfigDict(extra="forbid", frozen=True)
86
+
87
+ start_depth: Optional[NonNegativeFloat] = None
88
+ spacing: Optional[PositiveFloat] = None
89
+
90
+ indicators: Indicators = Field(default_factory=lambda: list())
91
+ clean_mode: CleanMode = cast(CleanMode, CLEAN_MODE)
92
+
93
+ class Configurator(BaseModel):
94
+ model_config = ConfigDict(extra="forbid", frozen=True)
95
+
96
+ columns: Columns = Field(default_factory=Columns)
97
+ parameters: Parameters = Field(default_factory=Parameters)
98
+ cleansing: Cleansing = Field(default_factory=Cleansing)
99
+
100
+ def _with_field(self, submodel_name: str, field_name: str, value: object) -> Self:
101
+ submodel = getattr(self, submodel_name)
102
+
103
+ new_submodel_dict = submodel.model_dump() | {field_name: value}
104
+ new_submodel = type(submodel)(**new_submodel_dict)
105
+
106
+ return self.model_copy(update={submodel_name: new_submodel})
107
+
108
+ def with_area_ratio(self, value: UnitRatio) -> Self:
109
+ return self._with_field("parameters", "area_ratio", value)
110
+
111
+ def with_gamma_water(self, value: PositiveFloat) -> Self:
112
+ return self._with_field("parameters", "gamma_water", value)
113
+
114
+ def with_gamma_soil(self, value: Optional[PositiveFloat]) -> Self:
115
+ return self._with_field("parameters", "gamma_soil", value)
116
+
117
+ def with_water_level(self, value: Optional[NonNegativeFloat]) -> Self:
118
+ return self._with_field("parameters", "water_level", value)
119
+
120
+ def with_start_depth(self, value: Optional[NonNegativeFloat]) -> Self:
121
+ return self._with_field("cleansing", "start_depth", value)
122
+
123
+ def with_spacing(self, value: Optional[PositiveFloat]) -> Self:
124
+ return self._with_field("cleansing", "spacing", value)
125
+
126
+ def with_indicators(self, values: list[float]) -> Self:
127
+ return self._with_field("cleansing", "indicators", values)
128
+
129
+ def with_clean_mode(self, value: CleanMode) -> Self:
130
+ return self._with_field("cleansing", "clean_mode", value)
131
+
132
+ @classmethod
133
+ def from_toml(cls, file_path: Path | str) -> Self:
134
+ with Path(file_path).open("rb") as file:
135
+ config = tomllib.load(file)
136
+ return cls.model_validate(config)
137
+
@@ -0,0 +1,142 @@
1
+ from collections.abc import Callable
2
+ from dataclasses import dataclass
3
+ from datetime import datetime, timezone
4
+ from typing import Literal, NamedTuple, Self, overload
5
+
6
+ import polars as pl
7
+
8
+ from conic import preprocess
9
+ from conic.config import Configurator
10
+ from conic._helpers import dataframe_fingerprint
11
+
12
+
13
+ @dataclass(frozen=True, slots=True)
14
+ class Step:
15
+ name: str
16
+ apply: Callable[[pl.DataFrame], pl.DataFrame]
17
+
18
+ @dataclass(frozen=True, slots=True)
19
+ class StepCatalog:
20
+ config: Configurator
21
+
22
+ def adjust_depth_spacing(self) -> Step:
23
+ function = preprocess.adjust_depth_spacing
24
+ cleansing = self.config.cleansing
25
+ columns = self.config.columns
26
+
27
+ def callback(data: pl.DataFrame) -> pl.DataFrame:
28
+ return function(
29
+ data,
30
+ start_depth=cleansing.start_depth,
31
+ spacing=cleansing.spacing,
32
+ col_depth=columns.input.depth
33
+ )
34
+
35
+ return Step(name=function.__name__, apply=callback)
36
+
37
+ def sanitize_dataframe(self) -> Step:
38
+ function = preprocess.sanitize_dataframe
39
+ columns = self.config.columns
40
+
41
+ def callback(data: pl.DataFrame) -> pl.DataFrame:
42
+ return function(
43
+ data,
44
+ col_depth=columns.input.depth,
45
+ col_qc=columns.input.qc,
46
+ col_fs=columns.input.fs,
47
+ col_u2=columns.input.u2,
48
+ col_u0=columns.input.u0,
49
+ col_sv_eff=columns.input.sv_eff,
50
+ col_sv_tot=columns.input.sv_tot
51
+ )
52
+
53
+ return Step(name=function.__name__, apply=callback)
54
+
55
+ def compute_hydrostatic(self) -> Step:
56
+ function = preprocess.compute_hydrostatic
57
+ parameters = self.config.parameters
58
+ columns = self.config.columns
59
+
60
+ def callback(data: pl.DataFrame) -> pl.DataFrame:
61
+ return function(
62
+ data,
63
+ water_level=parameters.water_level,
64
+ gamma_water=parameters.gamma_water,
65
+ col_depth=columns.input.depth,
66
+ col_u0=columns.input.u0,
67
+ override=True
68
+ )
69
+
70
+ return Step(name=function.__name__, apply=callback)
71
+
72
+ def clean_by_indicators(self) -> Step:
73
+ function = preprocess.clean_by_indicators
74
+ cleansing = self.config.cleansing
75
+
76
+ def callback(data: pl.DataFrame) -> pl.DataFrame:
77
+ return function(
78
+ data,
79
+ indicators=cleansing.indicators,
80
+ mode=cleansing.clean_mode
81
+ )
82
+
83
+ return Step(name=function.__name__, apply=callback)
84
+
85
+ class PipelineResult(NamedTuple):
86
+ data: pl.DataFrame
87
+ metadata: dict[str, object]
88
+
89
+ @dataclass(frozen=True, slots=True)
90
+ class Pipeliner:
91
+ config: Configurator
92
+ steps: tuple[Step, ...]
93
+
94
+ @classmethod
95
+ def default(cls, config: Configurator) -> Self:
96
+ catalog = StepCatalog(config)
97
+
98
+ steps = (
99
+ catalog.sanitize_dataframe(),
100
+ catalog.adjust_depth_spacing(),
101
+ catalog.clean_by_indicators(),
102
+ catalog.compute_hydrostatic()
103
+ )
104
+
105
+ return cls(config=config, steps=steps)
106
+
107
+ @overload
108
+ def run(
109
+ self,
110
+ inp_data: pl.DataFrame, *,
111
+ metadata: Literal[False] = False,
112
+ ) -> pl.DataFrame: ...
113
+ @overload
114
+ def run(
115
+ self,
116
+ inp_data: pl.DataFrame, *,
117
+ metadata: Literal[True],
118
+ ) -> PipelineResult: ...
119
+ def run(
120
+ self,
121
+ inp_data: pl.DataFrame, *,
122
+ metadata: bool = False
123
+ ) -> pl.DataFrame | PipelineResult:
124
+
125
+ out_data = inp_data
126
+
127
+ for step in self.steps:
128
+ out_data = step.apply(out_data)
129
+
130
+ if not metadata:
131
+ return out_data
132
+
133
+ meta = {
134
+ "data_hash": dataframe_fingerprint(inp_data),
135
+ "source_path": None,
136
+ "config": self.config.model_dump(),
137
+ "steps": [step.name for step in self.steps],
138
+ "timestamp_utc": datetime.now(timezone.utc).isoformat
139
+ }
140
+
141
+ return PipelineResult(data=out_data, metadata=meta)
142
+
@@ -0,0 +1,158 @@
1
+ from typing import Literal, Optional, cast
2
+
3
+ import polars as pl
4
+ from polars.exceptions import ColumnNotFoundError
5
+
6
+ from conic._canonical import (
7
+ COL_DEPTH,
8
+ COL_FS,
9
+ COL_QC,
10
+ COL_SV_EFF,
11
+ COL_SV_TOT,
12
+ COL_U0,
13
+ COL_U2,
14
+ GAMMA_WATER
15
+ )
16
+ from conic._helpers import get_missing_columns
17
+
18
+ def compute_hydrostatic(
19
+ data: pl.DataFrame,
20
+ water_level: Optional[float] = None, *,
21
+ gamma_water: float = GAMMA_WATER,
22
+ col_depth: str = COL_DEPTH,
23
+ col_u0: str = COL_U0,
24
+ override: bool = False
25
+ ) -> pl.DataFrame:
26
+
27
+ if col_depth not in data.columns:
28
+ raise ColumnNotFoundError(
29
+ f"Depth column is missing in DataFrame: '{col_depth}'."
30
+ )
31
+
32
+ if col_u0 in data.columns and not override:
33
+ raise ValueError(
34
+ f"Hydrostatic pressure ({col_u0}) was already included in "
35
+ f"this DataFrame. Set `override=True` to override."
36
+ )
37
+
38
+ if water_level is None:
39
+ data_with_u0 = data.with_columns(pl.lit(0.0).alias(col_u0))
40
+ else:
41
+ data_with_u0 = data.with_columns(
42
+ pl.when(pl.col(col_depth) >= water_level)
43
+ .then((pl.col(col_depth) - water_level) * gamma_water)
44
+ .otherwise(0.0)
45
+ .alias(col_u0)
46
+ )
47
+
48
+ return data_with_u0
49
+
50
+ def adjust_depth_spacing(
51
+ data: pl.DataFrame, *,
52
+ start_depth: Optional[float] = None,
53
+ spacing: Optional[float] = None,
54
+ digits: int = 3,
55
+ col_depth: str = COL_DEPTH,
56
+ ) -> pl.DataFrame:
57
+
58
+ if col_depth not in data.columns:
59
+ raise ColumnNotFoundError(
60
+ f"Depth column is missing in DataFrame: '{col_depth}'."
61
+ )
62
+
63
+ if (nrows := data.height) < 2:
64
+ raise ValueError(
65
+ "DataFrame must have at least 2 rows to infer depth spacing. "
66
+ )
67
+
68
+ if start_depth is None:
69
+ start_depth = data.item(0, col_depth)
70
+
71
+ if spacing is None:
72
+ mean_spacing = data.get_column(col_depth).diff().mean()
73
+ spacing = round(cast(float, mean_spacing), digits)
74
+
75
+ new_depths = start_depth + pl.int_range(nrows, eager=True) * spacing
76
+ new_depths = new_depths.round(digits)
77
+
78
+ return data.with_columns(new_depths.alias(col_depth))
79
+
80
+ def sanitize_dataframe(
81
+ data: pl.DataFrame, *,
82
+ col_depth: str = COL_DEPTH,
83
+ col_qc: str = COL_QC,
84
+ col_fs: str = COL_FS,
85
+ col_u2: str = COL_U2,
86
+ col_u0: str = COL_U0,
87
+ col_sv_eff: str = COL_SV_EFF,
88
+ col_sv_tot: str = COL_SV_TOT,
89
+ include_optional: bool = True
90
+ ) -> pl.DataFrame:
91
+
92
+ required_columns = [col_depth, col_qc, col_fs, col_u2]
93
+ optional_columns = [col_u0, col_sv_eff, col_sv_tot]
94
+
95
+ if missing_columns := get_missing_columns(data, required_columns):
96
+ raise ColumnNotFoundError(
97
+ f"Missing required columns: '{missing_columns}'."
98
+ )
99
+
100
+ selected_columns = required_columns
101
+
102
+ if include_optional:
103
+ additional_columns = set(optional_columns).intersection(data.columns)
104
+ selected_columns += list(additional_columns)
105
+
106
+ return data.select(selected_columns)
107
+
108
+ def filter_by_indicators(
109
+ data: pl.DataFrame,
110
+ indicators: list[float], *,
111
+ action: Literal["remove", "select"] = "remove"
112
+ ) -> pl.DataFrame:
113
+
114
+ if action not in ["remove", "select"]:
115
+ raise ValueError(
116
+ "Invalid `action` argument. Must be 'remove' or 'select'."
117
+ )
118
+
119
+ expr = pl.selectors.numeric().is_in(indicators)
120
+ expr = pl.any_horizontal(expr)
121
+
122
+ return data.filter(expr.not_() if action == "remove" else expr)
123
+
124
+ def split_by_indicators(
125
+ data: pl.DataFrame,
126
+ indicators: list[float], *,
127
+ index_col: str = "_id_"
128
+ ) -> tuple[pl.DataFrame, pl.DataFrame]:
129
+
130
+ indexed_data = data.with_row_index(index_col)
131
+
132
+ rows_with = filter_by_indicators(indexed_data, indicators, action="select")
133
+ rows_without = filter_by_indicators(indexed_data, indicators)
134
+
135
+ return rows_with, rows_without
136
+
137
+ def clean_by_indicators(
138
+ data: pl.DataFrame,
139
+ indicators: list[float], *,
140
+ mode: Literal["remove", "replace"] = "replace"
141
+ ) -> pl.DataFrame:
142
+
143
+ if mode not in ["remove", "replace"]:
144
+ raise ValueError(
145
+ "Invalid `action` argument. Must be 'remove' or 'replace'."
146
+ )
147
+
148
+ match mode:
149
+ case "remove":
150
+ return filter_by_indicators(data, indicators, action="remove")
151
+ case "replace":
152
+ return data.with_columns(
153
+ pl.when(pl.selectors.numeric().is_in(indicators))
154
+ .then(None)
155
+ .otherwise(pl.selectors.numeric())
156
+ .name.keep()
157
+ )
158
+
File without changes