geo-activity-playground 0.24.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. geo_activity_playground/__main__.py +0 -2
  2. geo_activity_playground/core/activities.py +71 -149
  3. geo_activity_playground/core/enrichment.py +164 -0
  4. geo_activity_playground/core/paths.py +34 -15
  5. geo_activity_playground/core/tasks.py +26 -3
  6. geo_activity_playground/explorer/tile_visits.py +78 -42
  7. geo_activity_playground/{core → importers}/activity_parsers.py +7 -14
  8. geo_activity_playground/importers/directory.py +36 -27
  9. geo_activity_playground/importers/strava_api.py +45 -38
  10. geo_activity_playground/importers/strava_checkout.py +24 -16
  11. geo_activity_playground/webui/activity/controller.py +2 -2
  12. geo_activity_playground/webui/activity/templates/activity/show.html.j2 +2 -0
  13. geo_activity_playground/webui/app.py +11 -31
  14. geo_activity_playground/webui/entry_controller.py +5 -5
  15. geo_activity_playground/webui/equipment/controller.py +80 -39
  16. geo_activity_playground/webui/equipment/templates/equipment/index.html.j2 +14 -3
  17. geo_activity_playground/webui/heatmap/heatmap_controller.py +6 -0
  18. geo_activity_playground/webui/strava/__init__.py +0 -0
  19. geo_activity_playground/webui/strava/blueprint.py +33 -0
  20. geo_activity_playground/webui/strava/controller.py +47 -0
  21. geo_activity_playground/webui/{templates/strava-connect.html.j2 → strava/templates/strava/client-id.html.j2} +3 -7
  22. geo_activity_playground/webui/strava/templates/strava/connected.html.j2 +14 -0
  23. geo_activity_playground/webui/summary/controller.py +11 -8
  24. geo_activity_playground/webui/templates/home.html.j2 +5 -0
  25. geo_activity_playground/webui/templates/page.html.j2 +3 -0
  26. geo_activity_playground/webui/templates/settings.html.j2 +15 -0
  27. geo_activity_playground/webui/upload/controller.py +12 -16
  28. {geo_activity_playground-0.24.1.dist-info → geo_activity_playground-0.25.0.dist-info}/METADATA +1 -1
  29. {geo_activity_playground-0.24.1.dist-info → geo_activity_playground-0.25.0.dist-info}/RECORD +32 -28
  30. geo_activity_playground/core/cache_migrations.py +0 -133
  31. geo_activity_playground/webui/strava_controller.py +0 -27
  32. {geo_activity_playground-0.24.1.dist-info → geo_activity_playground-0.25.0.dist-info}/LICENSE +0 -0
  33. {geo_activity_playground-0.24.1.dist-info → geo_activity_playground-0.25.0.dist-info}/WHEEL +0 -0
  34. {geo_activity_playground-0.24.1.dist-info → geo_activity_playground-0.25.0.dist-info}/entry_points.txt +0 -0
@@ -8,7 +8,6 @@ import coloredlogs
8
8
 
9
9
  from .importers.strava_checkout import convert_strava_checkout
10
10
  from geo_activity_playground.core.activities import ActivityRepository
11
- from geo_activity_playground.core.cache_migrations import apply_cache_migrations
12
11
  from geo_activity_playground.core.config import get_config
13
12
  from geo_activity_playground.explorer.tile_visits import TileVisitAccessor
14
13
  from geo_activity_playground.explorer.video import explorer_video_main
@@ -97,7 +96,6 @@ def make_activity_repository(
97
96
  basedir: pathlib.Path, skip_strava: bool
98
97
  ) -> tuple[ActivityRepository, TileVisitAccessor, dict]:
99
98
  os.chdir(basedir)
100
- apply_cache_migrations()
101
99
  config = get_config()
102
100
 
103
101
  if not config.get("prefer_metadata_from_file", True):
@@ -1,7 +1,7 @@
1
1
  import datetime
2
2
  import functools
3
3
  import logging
4
- import pathlib
4
+ import pickle
5
5
  from typing import Iterator
6
6
  from typing import Optional
7
7
  from typing import TypedDict
@@ -13,12 +13,9 @@ import pandas as pd
13
13
  from tqdm import tqdm
14
14
 
15
15
  from geo_activity_playground.core.config import get_config
16
- from geo_activity_playground.core.coordinates import get_distance
17
- from geo_activity_playground.core.paths import activities_path
18
- from geo_activity_playground.core.paths import activity_timeseries_path
19
- from geo_activity_playground.core.tasks import WorkTracker
20
- from geo_activity_playground.core.tiles import compute_tile_float
21
- from geo_activity_playground.core.time_conversion import convert_to_datetime_ns
16
+ from geo_activity_playground.core.paths import activities_file
17
+ from geo_activity_playground.core.paths import activity_enriched_meta_dir
18
+ from geo_activity_playground.core.paths import activity_enriched_time_series_dir
22
19
 
23
20
  logger = logging.getLogger(__name__)
24
21
 
@@ -34,6 +31,7 @@ class ActivityMeta(TypedDict):
34
31
  equipment: str
35
32
  id: int
36
33
  kind: str
34
+ moving_time: datetime.timedelta
37
35
  name: str
38
36
  path: str
39
37
  start_latitude: float
@@ -42,69 +40,77 @@ class ActivityMeta(TypedDict):
42
40
  steps: int
43
41
 
44
42
 
43
+ def make_activity_meta() -> ActivityMeta:
44
+ return ActivityMeta(
45
+ calories=None,
46
+ commute=False,
47
+ consider_for_achievements=True,
48
+ equipment="Unknown",
49
+ kind="Unknown",
50
+ steps=None,
51
+ )
52
+
53
+
54
+ def build_activity_meta() -> None:
55
+ if activities_file().exists():
56
+ meta = pd.read_parquet(activities_file())
57
+ present_ids = set(meta["id"])
58
+ else:
59
+ meta = pd.DataFrame(columns=["id"])
60
+ present_ids = set()
61
+
62
+ available_ids = {
63
+ int(path.stem) for path in activity_enriched_meta_dir().glob("*.pickle")
64
+ }
65
+ new_ids = available_ids - present_ids
66
+ deleted_ids = present_ids - available_ids
67
+
68
+ # Remove updated activities and read these again.
69
+ if activities_file().exists():
70
+ meta_mtime = activities_file().stat().st_mtime
71
+ updated_ids = {
72
+ int(path.stem)
73
+ for path in activity_enriched_meta_dir().glob("*.pickle")
74
+ if path.stat().st_mtime > meta_mtime
75
+ }
76
+ new_ids.update(updated_ids)
77
+ deleted_ids.update(updated_ids & present_ids)
78
+
79
+ if deleted_ids:
80
+ logger.debug(f"Removing activities {deleted_ids} from repository.")
81
+ meta.drop(sorted(deleted_ids), axis="index", inplace=True)
82
+
83
+ rows = []
84
+ for new_id in tqdm(new_ids, desc="Register new activities"):
85
+ with open(activity_enriched_meta_dir() / f"{new_id}.pickle", "rb") as f:
86
+ rows.append(pickle.load(f))
87
+
88
+ if rows:
89
+ new_shard = pd.DataFrame(rows)
90
+ new_shard.index = new_shard["id"]
91
+ new_shard.index.name = "index"
92
+ meta = pd.concat([meta, new_shard])
93
+
94
+ if len(meta):
95
+ assert pd.api.types.is_dtype_equal(meta["start"].dtype, "datetime64[ns]"), (
96
+ meta["start"].dtype,
97
+ meta["start"].iloc[0],
98
+ )
99
+
100
+ meta.sort_values("start", inplace=True)
101
+
102
+ meta.to_parquet(activities_file())
103
+
104
+
45
105
  class ActivityRepository:
46
106
  def __init__(self) -> None:
47
- if activities_path().exists():
48
- self.meta = pd.read_parquet(activities_path())
49
- self.meta.index = self.meta["id"]
50
- self.meta.index.name = "index"
51
- if not pd.api.types.is_dtype_equal(
52
- self.meta["start"].dtype, "datetime64[ns]"
53
- ):
54
- self.meta["start"] = convert_to_datetime_ns(self.meta["start"])
55
- else:
56
- self.meta = pd.DataFrame()
57
-
58
- self._loose_activities: list[ActivityMeta] = []
59
- self._loose_activity_ids: set[int] = set()
107
+ self.meta = None
60
108
 
61
109
  def __len__(self) -> int:
62
110
  return len(self.meta)
63
111
 
64
- def add_activity(self, activity_meta: ActivityMeta) -> None:
65
- _extend_metadata_from_timeseries(activity_meta)
66
- if activity_meta["id"] in self._loose_activity_ids:
67
- logger.error(f"Activity with the same file already exists. New activity:")
68
- print(activity_meta)
69
- print("Existing activity:")
70
- print(
71
- [
72
- activity
73
- for activity in self._loose_activities
74
- if activity["id"] == activity_meta["id"]
75
- ]
76
- )
77
- raise ValueError("Activity with the same file already exists.")
78
- self._loose_activities.append(activity_meta)
79
- self._loose_activity_ids.add(activity_meta["id"])
80
-
81
- def commit(self) -> None:
82
- if self._loose_activities:
83
- logger.debug(
84
- f"Adding {len(self._loose_activities)} activities to the repository …"
85
- )
86
- new_df = pd.DataFrame(self._loose_activities)
87
- if len(self.meta):
88
- new_ids_set = set(new_df["id"])
89
- is_kept = [
90
- activity_id not in new_ids_set for activity_id in self.meta["id"]
91
- ]
92
- old_df = self.meta.loc[is_kept]
93
- else:
94
- old_df = self.meta
95
-
96
- self.meta = pd.concat([old_df, new_df])
97
- assert pd.api.types.is_dtype_equal(
98
- self.meta["start"].dtype, "datetime64[ns]"
99
- ), (self.meta["start"].dtype, self.meta["start"].iloc[0])
100
- self.save()
101
- self._loose_activities = []
102
-
103
- def save(self) -> None:
104
- self.meta.index = self.meta["id"]
105
- self.meta.index.name = "index"
106
- self.meta.sort_values("start", inplace=True)
107
- self.meta.to_parquet(activities_path())
112
+ def reload(self) -> None:
113
+ self.meta = pd.read_parquet(activities_file())
108
114
 
109
115
  def has_activity(self, activity_id: int) -> bool:
110
116
  if len(self.meta):
@@ -143,7 +149,7 @@ class ActivityRepository:
143
149
 
144
150
  @functools.lru_cache(maxsize=3000)
145
151
  def get_time_series(self, id: int) -> pd.DataFrame:
146
- path = activity_timeseries_path(id)
152
+ path = activity_enriched_time_series_dir() / f"{id}.parquet"
147
153
  try:
148
154
  df = pd.read_parquet(path)
149
155
  except OSError as e:
@@ -154,79 +160,6 @@ class ActivityRepository:
154
160
  return df
155
161
 
156
162
 
157
- def embellish_time_series(repository: ActivityRepository) -> None:
158
- work_tracker = WorkTracker("embellish-time-series")
159
- activities_to_process = work_tracker.filter(repository.get_activity_ids())
160
- for activity_id in tqdm(activities_to_process, desc="Embellish time series data"):
161
- path = activity_timeseries_path(activity_id)
162
- df = pd.read_parquet(path)
163
- df.name = id
164
- df, changed = embellish_single_time_series(
165
- df, repository.get_activity_by_id(activity_id)["start"]
166
- )
167
- if changed:
168
- df.to_parquet(path)
169
- work_tracker.mark_done(activity_id)
170
- work_tracker.close()
171
-
172
-
173
- def embellish_single_time_series(
174
- timeseries: pd.DataFrame, start: Optional[datetime.datetime] = None
175
- ) -> bool:
176
- changed = False
177
-
178
- if start is not None and pd.api.types.is_dtype_equal(
179
- timeseries["time"].dtype, "int64"
180
- ):
181
- time = timeseries["time"]
182
- del timeseries["time"]
183
- timeseries["time"] = [
184
- convert_to_datetime_ns(start + datetime.timedelta(seconds=t)) for t in time
185
- ]
186
- changed = True
187
- assert pd.api.types.is_dtype_equal(timeseries["time"].dtype, "datetime64[ns]")
188
-
189
- distances = get_distance(
190
- timeseries["latitude"].shift(1),
191
- timeseries["longitude"].shift(1),
192
- timeseries["latitude"],
193
- timeseries["longitude"],
194
- ).fillna(0.0)
195
- time_diff_threshold_seconds = 30
196
- time_diff = (timeseries["time"] - timeseries["time"].shift(1)).dt.total_seconds()
197
- jump_indices = (time_diff >= time_diff_threshold_seconds) & (distances > 100)
198
- distances.loc[jump_indices] = 0.0
199
-
200
- if not "distance_km" in timeseries.columns:
201
- timeseries["distance_km"] = pd.Series(np.cumsum(distances)) / 1000
202
- changed = True
203
-
204
- if "speed" not in timeseries.columns:
205
- timeseries["speed"] = (
206
- timeseries["distance_km"].diff()
207
- / (timeseries["time"].diff().dt.total_seconds() + 1e-3)
208
- * 3600
209
- )
210
- changed = True
211
-
212
- potential_jumps = (timeseries["speed"] > 40) & (timeseries["speed"].diff() > 10)
213
- if np.any(potential_jumps):
214
- timeseries = timeseries.loc[~potential_jumps].copy()
215
- changed = True
216
-
217
- if "segment_id" not in timeseries.columns:
218
- timeseries["segment_id"] = np.cumsum(jump_indices)
219
- changed = True
220
-
221
- if "x" not in timeseries.columns:
222
- x, y = compute_tile_float(timeseries["latitude"], timeseries["longitude"], 0)
223
- timeseries["x"] = x
224
- timeseries["y"] = y
225
- changed = True
226
-
227
- return timeseries, changed
228
-
229
-
230
163
  def make_geojson_from_time_series(time_series: pd.DataFrame) -> str:
231
164
  fc = geojson.FeatureCollection(
232
165
  features=[
@@ -319,14 +252,3 @@ def extract_heart_rate_zones(time_series: pd.DataFrame) -> Optional[pd.DataFrame
319
252
  duration_per_zone.loc[i] = 0.0
320
253
  result = duration_per_zone.reset_index()
321
254
  return result
322
-
323
-
324
- def _extend_metadata_from_timeseries(metadata: ActivityMeta) -> None:
325
- timeseries = pd.read_parquet(
326
- pathlib.Path("Cache/Activity Timeseries") / f"{metadata['id']}.parquet"
327
- )
328
-
329
- metadata["start_latitude"] = timeseries["latitude"].iloc[0]
330
- metadata["end_latitude"] = timeseries["latitude"].iloc[-1]
331
- metadata["start_longitude"] = timeseries["longitude"].iloc[0]
332
- metadata["end_longitude"] = timeseries["longitude"].iloc[-1]
@@ -0,0 +1,164 @@
1
+ import datetime
2
+ import logging
3
+ import pickle
4
+ from typing import Any
5
+ from typing import Optional
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from tqdm import tqdm
10
+
11
+ from geo_activity_playground.core.activities import ActivityMeta
12
+ from geo_activity_playground.core.activities import make_activity_meta
13
+ from geo_activity_playground.core.coordinates import get_distance
14
+ from geo_activity_playground.core.paths import activity_enriched_meta_dir
15
+ from geo_activity_playground.core.paths import activity_enriched_time_series_dir
16
+ from geo_activity_playground.core.paths import activity_extracted_meta_dir
17
+ from geo_activity_playground.core.paths import activity_extracted_time_series_dir
18
+ from geo_activity_playground.core.tiles import compute_tile_float
19
+ from geo_activity_playground.core.time_conversion import convert_to_datetime_ns
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ def enrich_activities(kind_defaults: dict[dict[str, Any]]) -> None:
25
+ # Delete removed activities.
26
+ for enriched_metadata_path in activity_enriched_meta_dir().glob("*.pickle"):
27
+ if not (activity_extracted_meta_dir() / enriched_metadata_path.name).exists():
28
+ logger.warning(f"Deleting {enriched_metadata_path}")
29
+ enriched_metadata_path.unlink()
30
+ for enriched_time_series_path in activity_enriched_time_series_dir().glob(
31
+ "*.parquet"
32
+ ):
33
+ if not (
34
+ activity_extracted_time_series_dir() / enriched_time_series_path.name
35
+ ).exists():
36
+ logger.warning(f"Deleting {enriched_time_series_path}")
37
+ enriched_time_series_path.unlink()
38
+
39
+ # Get new metadata paths.
40
+ new_extracted_metadata_paths = []
41
+ for extracted_metadata_path in activity_extracted_meta_dir().glob("*.pickle"):
42
+ enriched_metadata_path = (
43
+ activity_enriched_meta_dir() / extracted_metadata_path.name
44
+ )
45
+ if (
46
+ not enriched_metadata_path.exists()
47
+ or enriched_metadata_path.stat().st_mtime
48
+ < extracted_metadata_path.stat().st_mtime
49
+ ):
50
+ new_extracted_metadata_paths.append(extracted_metadata_path)
51
+
52
+ for extracted_metadata_path in tqdm(
53
+ new_extracted_metadata_paths, desc="Enrich new activity data"
54
+ ):
55
+ # Read extracted data.
56
+ activity_id = extracted_metadata_path.stem
57
+ extracted_time_series_path = (
58
+ activity_extracted_time_series_dir() / f"{activity_id}.parquet"
59
+ )
60
+ time_series = pd.read_parquet(extracted_time_series_path)
61
+ with open(extracted_metadata_path, "rb") as f:
62
+ extracted_metadata = pickle.load(f)
63
+
64
+ metadata = make_activity_meta()
65
+ metadata.update(extracted_metadata)
66
+
67
+ # Enrich time series.
68
+ metadata.update(kind_defaults.get(metadata["kind"], {}))
69
+ time_series = _embellish_single_time_series(
70
+ time_series, metadata.get("start", None)
71
+ )
72
+ metadata.update(_get_metadata_from_timeseries(time_series))
73
+
74
+ # Write enriched data.
75
+ enriched_metadata_path = activity_enriched_meta_dir() / f"{activity_id}.pickle"
76
+ enriched_time_series_path = (
77
+ activity_enriched_time_series_dir() / f"{activity_id}.parquet"
78
+ )
79
+ with open(enriched_metadata_path, "wb") as f:
80
+ pickle.dump(metadata, f)
81
+ time_series.to_parquet(enriched_time_series_path)
82
+
83
+
84
+ def _get_metadata_from_timeseries(timeseries: pd.DataFrame) -> ActivityMeta:
85
+ metadata = ActivityMeta()
86
+
87
+ # Extract some meta data from the time series.
88
+ metadata["start"] = timeseries["time"].iloc[0]
89
+ metadata["elapsed_time"] = timeseries["time"].iloc[-1] - timeseries["time"].iloc[0]
90
+ metadata["distance_km"] = timeseries["distance_km"].iloc[-1]
91
+ if "calories" in timeseries.columns:
92
+ metadata["calories"] = timeseries["calories"].iloc[-1]
93
+ metadata["moving_time"] = _compute_moving_time(timeseries)
94
+
95
+ metadata["start_latitude"] = timeseries["latitude"].iloc[0]
96
+ metadata["end_latitude"] = timeseries["latitude"].iloc[-1]
97
+ metadata["start_longitude"] = timeseries["longitude"].iloc[0]
98
+ metadata["end_longitude"] = timeseries["longitude"].iloc[-1]
99
+
100
+ return metadata
101
+
102
+
103
+ def _compute_moving_time(time_series: pd.DataFrame) -> datetime.timedelta:
104
+ def moving_time(group) -> datetime.timedelta:
105
+ selection = group["speed"] > 1.0
106
+ time_diff = group["time"].diff().loc[selection]
107
+ return time_diff.sum()
108
+
109
+ return (
110
+ time_series.groupby("segment_id").apply(moving_time, include_groups=False).sum()
111
+ )
112
+
113
+
114
+ def _embellish_single_time_series(
115
+ timeseries: pd.DataFrame, start: Optional[datetime.datetime] = None
116
+ ) -> pd.DataFrame:
117
+ if start is not None and pd.api.types.is_dtype_equal(
118
+ timeseries["time"].dtype, "int64"
119
+ ):
120
+ time = timeseries["time"]
121
+ del timeseries["time"]
122
+ timeseries["time"] = [
123
+ convert_to_datetime_ns(start + datetime.timedelta(seconds=t)) for t in time
124
+ ]
125
+ timeseries["time"] = convert_to_datetime_ns(timeseries["time"])
126
+ assert pd.api.types.is_dtype_equal(timeseries["time"].dtype, "datetime64[ns]"), (
127
+ timeseries["time"].dtype,
128
+ timeseries["time"].iloc[0],
129
+ )
130
+
131
+ distances = get_distance(
132
+ timeseries["latitude"].shift(1),
133
+ timeseries["longitude"].shift(1),
134
+ timeseries["latitude"],
135
+ timeseries["longitude"],
136
+ ).fillna(0.0)
137
+ time_diff_threshold_seconds = 30
138
+ time_diff = (timeseries["time"] - timeseries["time"].shift(1)).dt.total_seconds()
139
+ jump_indices = time_diff >= time_diff_threshold_seconds
140
+ distances.loc[jump_indices] = 0.0
141
+
142
+ if "distance_km" not in timeseries.columns:
143
+ timeseries["distance_km"] = pd.Series(np.cumsum(distances)) / 1000
144
+
145
+ if "speed" not in timeseries.columns:
146
+ timeseries["speed"] = (
147
+ timeseries["distance_km"].diff()
148
+ / (timeseries["time"].diff().dt.total_seconds() + 1e-3)
149
+ * 3600
150
+ )
151
+
152
+ potential_jumps = (timeseries["speed"] > 40) & (timeseries["speed"].diff() > 10)
153
+ if np.any(potential_jumps):
154
+ timeseries = timeseries.loc[~potential_jumps].copy()
155
+
156
+ if "segment_id" not in timeseries.columns:
157
+ timeseries["segment_id"] = np.cumsum(jump_indices)
158
+
159
+ if "x" not in timeseries.columns:
160
+ x, y = compute_tile_float(timeseries["latitude"], timeseries["longitude"], 0)
161
+ timeseries["x"] = x
162
+ timeseries["y"] = y
163
+
164
+ return timeseries
@@ -6,32 +6,51 @@ import pathlib
6
6
  import typing
7
7
 
8
8
 
9
- def dir_wrapper(
10
- dir_func: typing.Callable[[], pathlib.Path]
11
- ) -> typing.Callable[[], pathlib.Path]:
12
- @functools.wraps(dir_func)
9
+ def dir_wrapper(path: pathlib.Path) -> typing.Callable[[], pathlib.Path]:
13
10
  @functools.cache
14
11
  def wrapper() -> pathlib.Path:
15
- path = dir_func()
16
12
  path.mkdir(exist_ok=True, parents=True)
17
13
  return path
18
14
 
19
15
  return wrapper
20
16
 
21
17
 
22
- @dir_wrapper
23
- def cache_dir() -> pathlib.Path:
24
- return pathlib.Path("Cache")
18
+ def file_wrapper(path: pathlib.Path) -> typing.Callable[[], pathlib.Path]:
19
+ @functools.cache
20
+ def wrapper() -> pathlib.Path:
21
+ path.parent.mkdir(exist_ok=True, parents=True)
22
+ return path
23
+
24
+ return wrapper
25
+
26
+
27
+ _cache_dir = pathlib.Path("Cache")
28
+
29
+ _activity_dir = _cache_dir / "Activity"
30
+ _activity_extracted_dir = _activity_dir / "Extracted"
31
+ _activity_extracted_meta_dir = _activity_extracted_dir / "Meta"
32
+ _activity_extracted_time_series_dir = _activity_extracted_dir / "Time Series"
33
+
34
+ _activity_enriched_dir = _activity_dir / "Enriched"
35
+ _activity_enriched_meta_dir = _activity_enriched_dir / "Meta"
36
+ _activity_enriched_time_series_dir = _activity_enriched_dir / "Time Series"
37
+ _activities_file = _activity_dir / "activities.parquet"
25
38
 
39
+ _tiles_per_time_series = _cache_dir / "Tiles" / "Tiles Per Time Series"
26
40
 
27
- @dir_wrapper
28
- def activity_timeseries_dir() -> pathlib.Path:
29
- return cache_dir() / "Activity Timeseries"
41
+ _strava_api_dir = pathlib.Path("Strava API")
42
+ _strava_dynamic_config_path = _strava_api_dir / "strava-client-id.json"
30
43
 
31
44
 
32
- def activities_path() -> pathlib.Path:
33
- return cache_dir() / "activities.parquet"
45
+ cache_dir = dir_wrapper(_cache_dir)
34
46
 
47
+ activity_extracted_dir = dir_wrapper(_activity_extracted_dir)
48
+ activity_extracted_meta_dir = dir_wrapper(_activity_extracted_meta_dir)
49
+ activity_extracted_time_series_dir = dir_wrapper(_activity_extracted_time_series_dir)
50
+ activity_enriched_meta_dir = dir_wrapper(_activity_enriched_meta_dir)
51
+ activity_enriched_time_series_dir = dir_wrapper(_activity_enriched_time_series_dir)
52
+ tiles_per_time_series = dir_wrapper(_tiles_per_time_series)
53
+ strava_api_dir = dir_wrapper(_strava_api_dir)
35
54
 
36
- def activity_timeseries_path(activity_id: int) -> pathlib.Path:
37
- return activity_timeseries_dir() / f"{activity_id}.parquet"
55
+ activities_file = file_wrapper(_activities_file)
56
+ strava_dynamic_config_path = file_wrapper(_strava_dynamic_config_path)
@@ -50,8 +50,8 @@ def work_tracker(path: pathlib.Path):
50
50
 
51
51
 
52
52
  class WorkTracker:
53
- def __init__(self, name: str) -> None:
54
- self._path = work_tracker_path(name)
53
+ def __init__(self, path: pathlib.Path) -> None:
54
+ self._path = path
55
55
 
56
56
  if self._path.exists():
57
57
  with open(self._path, "rb") as f:
@@ -59,12 +59,15 @@ class WorkTracker:
59
59
  else:
60
60
  self._done = set()
61
61
 
62
- def filter(self, ids: Iterable[int]) -> set[int]:
62
+ def filter(self, ids: Iterable) -> set:
63
63
  return set(ids) - self._done
64
64
 
65
65
  def mark_done(self, id: int) -> None:
66
66
  self._done.add(id)
67
67
 
68
+ def discard(self, id) -> None:
69
+ self._done.discard(id)
70
+
68
71
  def close(self) -> None:
69
72
  with open(self._path, "wb") as f:
70
73
  pickle.dump(self._done, f)
@@ -77,3 +80,23 @@ def try_load_pickle(path: pathlib.Path) -> Any:
77
80
  return pickle.load(f)
78
81
  except ModuleNotFoundError:
79
82
  pass
83
+
84
+
85
+ class TransformVersion:
86
+ def __init__(self, path: pathlib.Path, code_version: int) -> None:
87
+ self._path = path
88
+ self._code_version = code_version
89
+
90
+ with open(path) as f:
91
+ self._actual_version = json.load(f)
92
+
93
+ assert (
94
+ self._actual_version <= self._code_version
95
+ ), "You attempt to use a more modern playground with an older code version, that is not supported."
96
+
97
+ def outdated(self) -> bool:
98
+ return self._actual_version < self._code_version
99
+
100
+ def write(self) -> None:
101
+ with open(self._path, "w") as f:
102
+ json.dump(self._code_version, f)