sibi-dst 2025.1.13__py3-none-any.whl → 2025.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. sibi_dst/__init__.py +7 -1
  2. sibi_dst/df_helper/_artifact_updater_multi_wrapper.py +235 -342
  3. sibi_dst/df_helper/_df_helper.py +417 -117
  4. sibi_dst/df_helper/_parquet_artifact.py +255 -283
  5. sibi_dst/df_helper/backends/parquet/_parquet_options.py +8 -4
  6. sibi_dst/df_helper/backends/sqlalchemy/_db_connection.py +68 -107
  7. sibi_dst/df_helper/backends/sqlalchemy/_db_gatekeeper.py +15 -0
  8. sibi_dst/df_helper/backends/sqlalchemy/_io_dask.py +105 -255
  9. sibi_dst/df_helper/backends/sqlalchemy/_load_from_db.py +90 -42
  10. sibi_dst/df_helper/backends/sqlalchemy/_model_registry.py +192 -0
  11. sibi_dst/df_helper/backends/sqlalchemy/_sql_model_builder.py +122 -72
  12. sibi_dst/osmnx_helper/route_path_builder.py +45 -46
  13. sibi_dst/utils/base.py +302 -96
  14. sibi_dst/utils/clickhouse_writer.py +472 -206
  15. sibi_dst/utils/data_utils.py +139 -186
  16. sibi_dst/utils/data_wrapper.py +317 -73
  17. sibi_dst/utils/date_utils.py +1 -0
  18. sibi_dst/utils/df_utils.py +193 -213
  19. sibi_dst/utils/file_utils.py +3 -2
  20. sibi_dst/utils/filepath_generator.py +314 -152
  21. sibi_dst/utils/log_utils.py +581 -242
  22. sibi_dst/utils/manifest_manager.py +60 -76
  23. sibi_dst/utils/parquet_saver.py +33 -27
  24. sibi_dst/utils/phone_formatter.py +88 -95
  25. sibi_dst/utils/update_planner.py +180 -178
  26. sibi_dst/utils/webdav_client.py +116 -166
  27. {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/METADATA +1 -1
  28. {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/RECORD +29 -27
  29. {sibi_dst-2025.1.13.dist-info → sibi_dst-2025.8.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,192 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import threading
5
+ from typing import Dict, Optional, Tuple
6
+
7
+ from sqlalchemy import MetaData, Table
8
+ from sqlalchemy.engine import Engine
9
+ from sqlalchemy.orm import DeclarativeBase
10
+
11
+
12
+ class Base(DeclarativeBase):
13
+ """Shared declarative base for all ORM models."""
14
+ pass
15
+
16
+
17
+ # Backward-compatible default module label for generated classes
18
+ apps_label = "datacubes.models"
19
+
20
+
21
+ class ModelRegistry:
22
+ """
23
+ Thread-safe registry that reflects tables once per (engine, schema) and
24
+ returns a single mapped class per (engine, schema, table).
25
+ """
26
+
27
+ def __init__(self) -> None:
28
+ self._metadata_cache: Dict[Tuple[str, Optional[str]], MetaData] = {}
29
+ self._model_cache: Dict[Tuple[str, Optional[str], str], type] = {}
30
+ self._lock = threading.RLock()
31
+ self._md_locks: Dict[Tuple[str, Optional[str]], threading.Lock] = {}
32
+
33
+ # ---------- key helpers ----------
34
+ @staticmethod
35
+ def _engine_key(engine: Engine) -> str:
36
+ return str(engine.url)
37
+
38
+ @staticmethod
39
+ def _qualified_key(schema: Optional[str], table: str) -> str:
40
+ return f"{schema}.{table}" if schema else table
41
+
42
+ @staticmethod
43
+ def _split_schema_and_table(name: str) -> Tuple[Optional[str], str]:
44
+ if "." in name:
45
+ s, t = name.split(".", 1)
46
+ return (s or None), t
47
+ return None, name
48
+
49
+ # ---------- class name helpers ----------
50
+ @staticmethod
51
+ def _normalize_class_name(table_name: str) -> str:
52
+ return "".join(part.capitalize() for part in table_name.split("_"))
53
+
54
+ @staticmethod
55
+ def _short_hash(*parts: str, length: int = 8) -> str:
56
+ h = hashlib.sha1("|".join(parts).encode("utf-8")).hexdigest()
57
+ return h[:length]
58
+
59
+ def _is_class_name_taken(self, class_name: str, module_label: str) -> bool:
60
+ # Avoid SA private registries; inspect mappers instead (public)
61
+ for mapper in list(Base.registry.mappers):
62
+ try:
63
+ cls = mapper.class_
64
+ if getattr(cls, "__name__", None) == class_name and getattr(cls, "__module__", None) == module_label:
65
+ return True
66
+ except Exception:
67
+ continue
68
+ return False
69
+
70
+ def _find_existing_model_for_table(self, tbl: Table) -> Optional[type]:
71
+ for mapper in list(Base.registry.mappers):
72
+ try:
73
+ mapped_cls = mapper.class_
74
+ mapped_tbl = getattr(mapped_cls, "__table__", None)
75
+ if mapped_tbl is tbl:
76
+ return mapped_cls
77
+ if isinstance(mapped_tbl, Table):
78
+ if (mapped_tbl.schema == tbl.schema) and (mapped_tbl.name == tbl.name):
79
+ return mapped_cls
80
+ except Exception:
81
+ continue
82
+ return None
83
+
84
+ # ---------- metadata helpers ----------
85
+ def _get_or_create_metadata(self, ekey: str, schema: Optional[str]) -> MetaData:
86
+ md_key = (ekey, schema)
87
+ with self._lock:
88
+ md = self._metadata_cache.get(md_key)
89
+ if md is None:
90
+ md = MetaData(schema=schema)
91
+ self._metadata_cache[md_key] = md
92
+ return md
93
+
94
+ def _get_or_create_md_lock(self, md_key: Tuple[str, Optional[str]]) -> threading.Lock:
95
+ with self._lock:
96
+ lock = self._md_locks.get(md_key)
97
+ if lock is None:
98
+ lock = threading.Lock()
99
+ self._md_locks[md_key] = lock
100
+ return lock
101
+
102
+ # ---------- public API ----------
103
+ def get_model(
104
+ self,
105
+ engine: Engine,
106
+ table_name: str,
107
+ *,
108
+ refresh: bool = False,
109
+ schema: Optional[str] = None,
110
+ module_label: Optional[str] = None,
111
+ prefer_stable_names: bool = True,
112
+ ) -> type:
113
+ s2, tname = self._split_schema_and_table(table_name)
114
+ schema = schema if schema is not None else s2
115
+ ekey = self._engine_key(engine)
116
+ model_key = (ekey, schema, tname)
117
+ md_key = (ekey, schema)
118
+ module_label = module_label or apps_label
119
+
120
+ if refresh:
121
+ with self._lock:
122
+ self._model_cache.pop(model_key, None)
123
+ self._metadata_cache.pop(md_key, None)
124
+ self._md_locks.pop(md_key, None)
125
+
126
+ # fast path: already cached model
127
+ with self._lock:
128
+ m = self._model_cache.get(model_key)
129
+ if m is not None:
130
+ return m
131
+
132
+ # ensure metadata and reflection are serialized per (engine, schema)
133
+ md = self._get_or_create_metadata(ekey, schema)
134
+ md_lock = self._get_or_create_md_lock(md_key)
135
+ qname = self._qualified_key(schema, tname)
136
+
137
+ tbl = md.tables.get(qname)
138
+ if tbl is None:
139
+ with md_lock:
140
+ # double-checked reflection
141
+ tbl = md.tables.get(qname)
142
+ if tbl is None:
143
+ md.reflect(bind=engine, only=[qname])
144
+ tbl = md.tables.get(qname)
145
+
146
+ if tbl is None:
147
+ raise ValueError(f"Table '{qname}' does not exist in the database.")
148
+
149
+ # If a mapped model for this Table already exists (anywhere), reuse it
150
+ reused = self._find_existing_model_for_table(tbl)
151
+ if reused is not None:
152
+ with self._lock:
153
+ self._model_cache[model_key] = reused
154
+ return reused
155
+
156
+ # pick class name
157
+ base_name = self._normalize_class_name(tname)
158
+ final_name = base_name
159
+ if self._is_class_name_taken(base_name, module_label):
160
+ # optionally keep stable names by suffixing with a short hash
161
+ if prefer_stable_names:
162
+ suffix = self._short_hash(ekey, schema or "", tname)
163
+ final_name = f"{base_name}_{suffix}"
164
+ else:
165
+ # let SQLAlchemy registry replacement occur (not recommended)
166
+ suffix = self._short_hash(ekey, schema or "", tname)
167
+ final_name = f"{base_name}_{suffix}"
168
+
169
+ # build the model
170
+ attrs = {
171
+ "__tablename__": tbl.name,
172
+ "__table__": tbl,
173
+ "__module__": module_label,
174
+ }
175
+ model_cls = type(final_name, (Base,), attrs)
176
+
177
+ with self._lock:
178
+ self._model_cache[model_key] = model_cls
179
+ return model_cls
180
+
181
+ def clear(self) -> None:
182
+ with self._lock:
183
+ self._metadata_cache.clear()
184
+ self._model_cache.clear()
185
+ self._md_locks.clear()
186
+
187
+
188
+ # Process-wide registry & helper
189
+ _global_registry = ModelRegistry()
190
+
191
+ def get_global_registry() -> ModelRegistry:
192
+ return _global_registry
@@ -1,104 +1,154 @@
1
- import re
2
1
  import keyword
2
+ import re
3
3
  import threading
4
- from sqlalchemy import MetaData, Engine
5
- from sqlalchemy.orm import DeclarativeBase
6
-
4
+ from sqlalchemy.engine import Engine
7
5
 
8
- class Base(DeclarativeBase):
9
- """Shared declarative base for all ORM models."""
10
- pass
6
+ from ._model_registry import ModelRegistry, apps_label
11
7
 
12
8
 
13
- apps_label = "datacubes.models"
9
+ # Global process-wide registry for backward compatibility
10
+ _global_model_registry = ModelRegistry()
14
11
 
15
12
 
16
13
  class SqlAlchemyModelBuilder:
17
14
  """
18
15
  Builds a single SQLAlchemy ORM model from a specific database table.
19
- This class is thread-safe and caches reflected table metadata to
20
- improve performance across multiple instantiations.
16
+ Thread-safe and uses a process-wide registry for reuse.
17
+
18
+ Backward compatibility:
19
+ - Keeps CamelCase(table) as preferred class name
20
+ - Publishes classes under `apps_label` unless overridden
21
+ - Public API unchanged
21
22
  """
23
+
22
24
  _lock = threading.Lock()
23
- _metadata_cache: dict[str, MetaData] = {}
24
25
 
25
26
  def __init__(self, engine: Engine, table_name: str):
26
- """
27
- Initializes the model builder for a specific table.
28
-
29
- Args:
30
- engine: The SQLAlchemy engine connected to the database.
31
- table_name: The name of the table to generate the model for.
32
- """
33
27
  self.engine = engine
34
28
  self.table_name = table_name
35
- self.class_name = self._normalize_class_name(self.table_name)
36
-
37
- engine_key = str(engine.url)
38
-
39
- # ✅ REFACTOR: Acquire lock to make cache access and creation atomic,
40
- # preventing a race condition between multiple threads.
41
- with self._lock:
42
- if engine_key not in self._metadata_cache:
43
- self._metadata_cache[engine_key] = MetaData()
44
- self.metadata = self._metadata_cache[engine_key]
45
29
 
46
30
  def build_model(self) -> type:
47
- """
48
- Builds and returns a database model class for the specified table.
49
- This process is atomic and thread-safe.
50
-
51
- Raises:
52
- ValueError: If the specified table does not exist in the database.
53
- Returns:
54
- The dynamically created ORM model class.
55
- """
56
31
  with self._lock:
57
- # NOTE: Using a private SQLAlchemy API. This is a performance
58
- # optimization but may break in future versions of the library.
59
- registered_model = Base.registry._class_registry.get(self.class_name)
60
- if registered_model:
61
- return registered_model
62
-
63
- # Check if the table's schema is in our metadata cache
64
- table = self.metadata.tables.get(self.table_name)
65
-
66
- # If not cached, reflect it from the database
67
- if table is None:
68
- self.metadata.reflect(bind=self.engine, only=[self.table_name])
69
- table = self.metadata.tables.get(self.table_name)
70
-
71
- if table is None:
72
- raise ValueError(
73
- f"Table '{self.table_name}' does not exist in the database."
74
- )
75
-
76
- # Create the model class dynamically.
77
- attrs = {
78
- "__tablename__": table.name,
79
- "__table__": table,
80
- "__module__": apps_label,
81
- }
82
- model = type(self.class_name, (Base,), attrs)
83
-
84
- return model
32
+ return _global_model_registry.get_model(
33
+ engine=self.engine,
34
+ table_name=self.table_name,
35
+ module_label=apps_label,
36
+ prefer_stable_names=True,
37
+ )
85
38
 
86
39
  @staticmethod
87
40
  def _normalize_class_name(table_name: str) -> str:
88
- """Converts a snake_case table_name to a CamelCase class name."""
89
41
  return "".join(word.capitalize() for word in table_name.split("_"))
90
42
 
91
43
  @staticmethod
92
44
  def _normalize_column_name(column_name: str) -> str:
93
- """
94
- Sanitizes a column name to be a valid Python identifier.
95
- (Kept for utility, though not used in the final model creation).
96
- """
97
45
  sane_name = re.sub(r"\W", "_", column_name)
98
46
  sane_name = re.sub(r"^\d", r"_\g<0>", sane_name)
99
-
100
47
  if keyword.iskeyword(sane_name):
101
48
  return f"{sane_name}_field"
102
49
  return sane_name
103
50
 
104
-
51
+ # import re
52
+ # import keyword
53
+ # import threading
54
+ # from sqlalchemy import MetaData, Engine
55
+ # from sqlalchemy.orm import DeclarativeBase
56
+ #
57
+ #
58
+ # class Base(DeclarativeBase):
59
+ # """Shared declarative base for all ORM models."""
60
+ # pass
61
+ #
62
+ #
63
+ # apps_label = "datacubes.models"
64
+ #
65
+ #
66
+ # class SqlAlchemyModelBuilder:
67
+ # """
68
+ # Builds a single SQLAlchemy ORM model from a specific database table.
69
+ # This class is thread-safe and caches reflected table metadata to
70
+ # improve performance across multiple instantiations.
71
+ # """
72
+ # _lock = threading.Lock()
73
+ # _metadata_cache: dict[str, MetaData] = {}
74
+ #
75
+ # def __init__(self, engine: Engine, table_name: str):
76
+ # """
77
+ # Initializes the model builder for a specific table.
78
+ #
79
+ # Args:
80
+ # engine: The SQLAlchemy engine connected to the database.
81
+ # table_name: The name of the table to generate the model for.
82
+ # """
83
+ # self.engine = engine
84
+ # self.table_name = table_name
85
+ # self.class_name = self._normalize_class_name(self.table_name)
86
+ #
87
+ # engine_key = str(engine.url)
88
+ #
89
+ # # ✅ REFACTOR: Acquire lock to make cache access and creation atomic,
90
+ # # preventing a race condition between multiple threads.
91
+ # with self._lock:
92
+ # if engine_key not in self._metadata_cache:
93
+ # self._metadata_cache[engine_key] = MetaData()
94
+ # self.metadata = self._metadata_cache[engine_key]
95
+ #
96
+ # def build_model(self) -> type:
97
+ # """
98
+ # Builds and returns a database model class for the specified table.
99
+ # This process is atomic and thread-safe.
100
+ #
101
+ # Raises:
102
+ # ValueError: If the specified table does not exist in the database.
103
+ # Returns:
104
+ # The dynamically created ORM model class.
105
+ # """
106
+ # with self._lock:
107
+ # # NOTE: Using a private SQLAlchemy API. This is a performance
108
+ # # optimization but may break in future versions of the library.
109
+ # registered_model = Base.registry._class_registry.get(self.class_name)
110
+ # if registered_model:
111
+ # return registered_model
112
+ #
113
+ # # Check if the table's schema is in our metadata cache
114
+ # table = self.metadata.tables.get(self.table_name)
115
+ #
116
+ # # If not cached, reflect it from the database
117
+ # if table is None:
118
+ # self.metadata.reflect(bind=self.engine, only=[self.table_name])
119
+ # table = self.metadata.tables.get(self.table_name)
120
+ #
121
+ # if table is None:
122
+ # raise ValueError(
123
+ # f"Table '{self.table_name}' does not exist in the database."
124
+ # )
125
+ #
126
+ # # Create the model class dynamically.
127
+ # attrs = {
128
+ # "__tablename__": table.name,
129
+ # "__table__": table,
130
+ # "__module__": apps_label,
131
+ # }
132
+ # model = type(self.class_name, (Base,), attrs)
133
+ #
134
+ # return model
135
+ #
136
+ # @staticmethod
137
+ # def _normalize_class_name(table_name: str) -> str:
138
+ # """Converts a snake_case table_name to a CamelCase class name."""
139
+ # return "".join(word.capitalize() for word in table_name.split("_"))
140
+ #
141
+ # @staticmethod
142
+ # def _normalize_column_name(column_name: str) -> str:
143
+ # """
144
+ # Sanitizes a column name to be a valid Python identifier.
145
+ # (Kept for utility, though not used in the final model creation).
146
+ # """
147
+ # sane_name = re.sub(r"\W", "_", column_name)
148
+ # sane_name = re.sub(r"^\d", r"_\g<0>", sane_name)
149
+ #
150
+ # if keyword.iskeyword(sane_name):
151
+ # return f"{sane_name}_field"
152
+ # return sane_name
153
+ #
154
+ #
@@ -2,48 +2,44 @@ import numpy as np
2
2
  import pandas as pd
3
3
  import networkx as nx
4
4
  import osmnx as ox
5
- from typing import List
5
+ from typing import List, Optional
6
+ from pydantic import BaseModel
7
+
8
+ class RoutePathBuilderConfig(BaseModel):
9
+ """
10
+ A Pydantic model to validate the configuration for the RoutePathBuilder.
11
+ """
12
+ graph: nx.MultiDiGraph
13
+ sort_key: List[str] # Made mandatory
14
+ grouping_col: Optional[str] = None
15
+ lat_col: str = "latitude"
16
+ lon_col: str = "longitude"
17
+
18
+ class Config:
19
+ arbitrary_types_allowed = True
6
20
 
7
21
  class RoutePathBuilder:
8
22
  """
9
- Builds shortest paths for consecutive GPS points (origins & destinations) within each associate's track.
23
+ Builds shortest paths (Dijkstra Algorithm) for consecutive GPS points.
24
+ This version requires an explicit sort_key for correctness.
10
25
  """
11
26
 
12
- def __init__(
13
- self,
14
- graph: nx.MultiDiGraph,
15
- lat_col: str = "latitude",
16
- lon_col: str = "longitude",
17
- grouping_col: str = "associate_id",
18
- sort_key=None # Default sort key for DataFrame
19
- ):
27
+ def __init__(self, config: RoutePathBuilderConfig):
20
28
  """
21
- :param graph: The OSMnx MultiDiGraph.
22
- :param lat_col: Column name for latitude.
23
- :param lon_col: Column name for longitude.
24
- :param associate_col: Column name for associate/grouping key.
29
+ Initializes the builder with a validated configuration object.
25
30
  """
26
- if sort_key is None:
27
- sort_key = ["associate_id", "date_time"]
28
- self.graph = graph
29
- self.lat_col = lat_col
30
- self.lon_col = lon_col
31
- self.grouping_col = grouping_col
32
- self.sort_key = sort_key
33
- if self.sort_key is None:
34
- self.sort_key = [self.grouping_col, "date_time"]
31
+ self.config = config
35
32
 
33
+ # Static methods _get_shortest_path and _path_length_from_nodes remain unchanged...
36
34
  @staticmethod
37
35
  def _get_shortest_path(u: int, v: int, graph: nx.MultiDiGraph) -> List[int]:
38
- """Return the node sequence for the shortest path from u to v, or [] if none."""
39
36
  try:
40
- return nx.shortest_path(graph, u, v, weight="length")
37
+ return nx.shortest_path(graph, u, v, weight="length", method="dijkstra")
41
38
  except nx.NetworkXNoPath:
42
39
  return []
43
40
 
44
41
  @staticmethod
45
42
  def _path_length_from_nodes(node_list: List[int], graph: nx.MultiDiGraph) -> float:
46
- """Sum up the 'length' attribute along consecutive node pairs."""
47
43
  if len(node_list) < 2:
48
44
  return np.nan
49
45
  total = 0.0
@@ -53,46 +49,49 @@ class RoutePathBuilder:
53
49
  total += min(lengths) if lengths else 0
54
50
  return total
55
51
 
52
+
56
53
  def build_routes(self, df: pd.DataFrame) -> pd.DataFrame:
57
54
  """
58
- Generate destination coordinates, snap to graph nodes, and compute shortest paths.
59
-
60
- :param df: Input DataFrame containing grouping_col, latitude, and longitude columns.
61
- :return: DataFrame with added columns:
62
- ['dest_lat', 'dest_lon', 'origin_node', 'dest_node', 'path_nodes', 'path_coords', 'distance_m']
55
+ Generates routes from a DataFrame of GPS points.
63
56
  """
64
- # 1) Build destination coordinates by shifting per grouping column
65
57
  df = df.copy()
66
- df["dest_lat"] = df.groupby(self.grouping_col)[self.lat_col].shift(-1)
67
- df["dest_lon"] = df.groupby(self.grouping_col)[self.lon_col].shift(-1)
68
58
 
69
- # Drop tail rows without next point
59
+ df = df.sort_values(by=self.config.sort_key).reset_index(drop=True)
60
+
61
+ # 2. Create destination columns by shifting within each group or across the df
62
+ if self.config.grouping_col:
63
+ df["dest_lat"] = df.groupby(by=self.config.grouping_col)[self.config.lat_col].shift(-1)
64
+ df["dest_lon"] = df.groupby(by=self.config.grouping_col)[self.config.lon_col].shift(-1)
65
+ else:
66
+ df["dest_lat"] = df[self.config.lat_col].shift(-1)
67
+ df["dest_lon"] = df[self.config.lon_col].shift(-1)
68
+
70
69
  df = df.dropna(subset=["dest_lat", "dest_lon"]).reset_index(drop=True)
71
70
 
72
- # 2) Snap origin & destination points to graph nodes
71
+ # 3. Snap origin & destination coordinates to the nearest graph nodes
73
72
  df["origin_node"] = ox.nearest_nodes(
74
- self.graph, X=df[self.lon_col].values, Y=df[self.lat_col].values
73
+ self.config.graph, X=df[self.config.lon_col].values, Y=df[self.config.lat_col].values
75
74
  )
76
75
  df["dest_node"] = ox.nearest_nodes(
77
- self.graph, X=df["dest_lon"].values, Y=df["dest_lat"].values
76
+ self.config.graph, X=df["dest_lon"].values, Y=df["dest_lat"].values
78
77
  )
79
78
 
80
- # 3) Compute paths, coordinates, and distances
79
+ # 4. Calculate paths, coordinates, and distances
81
80
  df["path_nodes"] = [
82
- self._get_shortest_path(u, v, self.graph)
81
+ self._get_shortest_path(u, v, self.config.graph)
83
82
  for u, v in zip(df["origin_node"], df["dest_node"])
84
83
  ]
85
84
 
85
+ df = df[df["path_nodes"].str.len() > 0].reset_index(drop=True)
86
+
86
87
  df["path_coords"] = df["path_nodes"].apply(
87
- lambda nl: [(self.graph.nodes[n]["y"], self.graph.nodes[n]["x"]) for n in nl]
88
+ lambda nl: [(self.config.graph.nodes[n]["y"], self.config.graph.nodes[n]["x"]) for n in nl]
88
89
  )
89
90
 
90
91
  df["distance_m"] = df["path_nodes"].apply(
91
- lambda nl: self._path_length_from_nodes(nl, self.graph)
92
+ lambda nl: self._path_length_from_nodes(nl, self.config.graph)
92
93
  )
93
- # Ensure NaN distances become 0
94
94
  df["distance_m"] = df["distance_m"].fillna(0)
95
- # Remove any legs with no path
96
- df = df[df["path_nodes"].str.len() > 0].reset_index(drop=True)
97
95
 
98
- return df.sort_values(self.sort_key).reset_index(drop=True)
96
+ # The final sort is no longer needed, as it was done at the beginning
97
+ return df