geo-activity-playground 0.24.1__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geo_activity_playground/__main__.py +0 -2
- geo_activity_playground/core/activities.py +71 -149
- geo_activity_playground/core/enrichment.py +164 -0
- geo_activity_playground/core/paths.py +34 -15
- geo_activity_playground/core/tasks.py +26 -3
- geo_activity_playground/explorer/tile_visits.py +78 -42
- geo_activity_playground/{core → importers}/activity_parsers.py +7 -14
- geo_activity_playground/importers/directory.py +36 -27
- geo_activity_playground/importers/strava_api.py +45 -38
- geo_activity_playground/importers/strava_checkout.py +24 -16
- geo_activity_playground/webui/activity/controller.py +2 -2
- geo_activity_playground/webui/activity/templates/activity/show.html.j2 +2 -0
- geo_activity_playground/webui/app.py +11 -31
- geo_activity_playground/webui/entry_controller.py +5 -5
- geo_activity_playground/webui/equipment/controller.py +80 -39
- geo_activity_playground/webui/equipment/templates/equipment/index.html.j2 +14 -3
- geo_activity_playground/webui/heatmap/heatmap_controller.py +6 -0
- geo_activity_playground/webui/strava/__init__.py +0 -0
- geo_activity_playground/webui/strava/blueprint.py +33 -0
- geo_activity_playground/webui/strava/controller.py +47 -0
- geo_activity_playground/webui/{templates/strava-connect.html.j2 → strava/templates/strava/client-id.html.j2} +3 -7
- geo_activity_playground/webui/strava/templates/strava/connected.html.j2 +14 -0
- geo_activity_playground/webui/summary/controller.py +11 -8
- geo_activity_playground/webui/templates/home.html.j2 +5 -0
- geo_activity_playground/webui/templates/page.html.j2 +3 -0
- geo_activity_playground/webui/templates/settings.html.j2 +15 -0
- geo_activity_playground/webui/upload/controller.py +12 -16
- {geo_activity_playground-0.24.1.dist-info → geo_activity_playground-0.25.0.dist-info}/METADATA +1 -1
- {geo_activity_playground-0.24.1.dist-info → geo_activity_playground-0.25.0.dist-info}/RECORD +32 -28
- geo_activity_playground/core/cache_migrations.py +0 -133
- geo_activity_playground/webui/strava_controller.py +0 -27
- {geo_activity_playground-0.24.1.dist-info → geo_activity_playground-0.25.0.dist-info}/LICENSE +0 -0
- {geo_activity_playground-0.24.1.dist-info → geo_activity_playground-0.25.0.dist-info}/WHEEL +0 -0
- {geo_activity_playground-0.24.1.dist-info → geo_activity_playground-0.25.0.dist-info}/entry_points.txt +0 -0
@@ -8,7 +8,6 @@ import coloredlogs
|
|
8
8
|
|
9
9
|
from .importers.strava_checkout import convert_strava_checkout
|
10
10
|
from geo_activity_playground.core.activities import ActivityRepository
|
11
|
-
from geo_activity_playground.core.cache_migrations import apply_cache_migrations
|
12
11
|
from geo_activity_playground.core.config import get_config
|
13
12
|
from geo_activity_playground.explorer.tile_visits import TileVisitAccessor
|
14
13
|
from geo_activity_playground.explorer.video import explorer_video_main
|
@@ -97,7 +96,6 @@ def make_activity_repository(
|
|
97
96
|
basedir: pathlib.Path, skip_strava: bool
|
98
97
|
) -> tuple[ActivityRepository, TileVisitAccessor, dict]:
|
99
98
|
os.chdir(basedir)
|
100
|
-
apply_cache_migrations()
|
101
99
|
config = get_config()
|
102
100
|
|
103
101
|
if not config.get("prefer_metadata_from_file", True):
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import datetime
|
2
2
|
import functools
|
3
3
|
import logging
|
4
|
-
import
|
4
|
+
import pickle
|
5
5
|
from typing import Iterator
|
6
6
|
from typing import Optional
|
7
7
|
from typing import TypedDict
|
@@ -13,12 +13,9 @@ import pandas as pd
|
|
13
13
|
from tqdm import tqdm
|
14
14
|
|
15
15
|
from geo_activity_playground.core.config import get_config
|
16
|
-
from geo_activity_playground.core.
|
17
|
-
from geo_activity_playground.core.paths import
|
18
|
-
from geo_activity_playground.core.paths import
|
19
|
-
from geo_activity_playground.core.tasks import WorkTracker
|
20
|
-
from geo_activity_playground.core.tiles import compute_tile_float
|
21
|
-
from geo_activity_playground.core.time_conversion import convert_to_datetime_ns
|
16
|
+
from geo_activity_playground.core.paths import activities_file
|
17
|
+
from geo_activity_playground.core.paths import activity_enriched_meta_dir
|
18
|
+
from geo_activity_playground.core.paths import activity_enriched_time_series_dir
|
22
19
|
|
23
20
|
logger = logging.getLogger(__name__)
|
24
21
|
|
@@ -34,6 +31,7 @@ class ActivityMeta(TypedDict):
|
|
34
31
|
equipment: str
|
35
32
|
id: int
|
36
33
|
kind: str
|
34
|
+
moving_time: datetime.timedelta
|
37
35
|
name: str
|
38
36
|
path: str
|
39
37
|
start_latitude: float
|
@@ -42,69 +40,77 @@ class ActivityMeta(TypedDict):
|
|
42
40
|
steps: int
|
43
41
|
|
44
42
|
|
43
|
+
def make_activity_meta() -> ActivityMeta:
|
44
|
+
return ActivityMeta(
|
45
|
+
calories=None,
|
46
|
+
commute=False,
|
47
|
+
consider_for_achievements=True,
|
48
|
+
equipment="Unknown",
|
49
|
+
kind="Unknown",
|
50
|
+
steps=None,
|
51
|
+
)
|
52
|
+
|
53
|
+
|
54
|
+
def build_activity_meta() -> None:
|
55
|
+
if activities_file().exists():
|
56
|
+
meta = pd.read_parquet(activities_file())
|
57
|
+
present_ids = set(meta["id"])
|
58
|
+
else:
|
59
|
+
meta = pd.DataFrame(columns=["id"])
|
60
|
+
present_ids = set()
|
61
|
+
|
62
|
+
available_ids = {
|
63
|
+
int(path.stem) for path in activity_enriched_meta_dir().glob("*.pickle")
|
64
|
+
}
|
65
|
+
new_ids = available_ids - present_ids
|
66
|
+
deleted_ids = present_ids - available_ids
|
67
|
+
|
68
|
+
# Remove updated activities and read these again.
|
69
|
+
if activities_file().exists():
|
70
|
+
meta_mtime = activities_file().stat().st_mtime
|
71
|
+
updated_ids = {
|
72
|
+
int(path.stem)
|
73
|
+
for path in activity_enriched_meta_dir().glob("*.pickle")
|
74
|
+
if path.stat().st_mtime > meta_mtime
|
75
|
+
}
|
76
|
+
new_ids.update(updated_ids)
|
77
|
+
deleted_ids.update(updated_ids & present_ids)
|
78
|
+
|
79
|
+
if deleted_ids:
|
80
|
+
logger.debug(f"Removing activities {deleted_ids} from repository.")
|
81
|
+
meta.drop(sorted(deleted_ids), axis="index", inplace=True)
|
82
|
+
|
83
|
+
rows = []
|
84
|
+
for new_id in tqdm(new_ids, desc="Register new activities"):
|
85
|
+
with open(activity_enriched_meta_dir() / f"{new_id}.pickle", "rb") as f:
|
86
|
+
rows.append(pickle.load(f))
|
87
|
+
|
88
|
+
if rows:
|
89
|
+
new_shard = pd.DataFrame(rows)
|
90
|
+
new_shard.index = new_shard["id"]
|
91
|
+
new_shard.index.name = "index"
|
92
|
+
meta = pd.concat([meta, new_shard])
|
93
|
+
|
94
|
+
if len(meta):
|
95
|
+
assert pd.api.types.is_dtype_equal(meta["start"].dtype, "datetime64[ns]"), (
|
96
|
+
meta["start"].dtype,
|
97
|
+
meta["start"].iloc[0],
|
98
|
+
)
|
99
|
+
|
100
|
+
meta.sort_values("start", inplace=True)
|
101
|
+
|
102
|
+
meta.to_parquet(activities_file())
|
103
|
+
|
104
|
+
|
45
105
|
class ActivityRepository:
|
46
106
|
def __init__(self) -> None:
|
47
|
-
|
48
|
-
self.meta = pd.read_parquet(activities_path())
|
49
|
-
self.meta.index = self.meta["id"]
|
50
|
-
self.meta.index.name = "index"
|
51
|
-
if not pd.api.types.is_dtype_equal(
|
52
|
-
self.meta["start"].dtype, "datetime64[ns]"
|
53
|
-
):
|
54
|
-
self.meta["start"] = convert_to_datetime_ns(self.meta["start"])
|
55
|
-
else:
|
56
|
-
self.meta = pd.DataFrame()
|
57
|
-
|
58
|
-
self._loose_activities: list[ActivityMeta] = []
|
59
|
-
self._loose_activity_ids: set[int] = set()
|
107
|
+
self.meta = None
|
60
108
|
|
61
109
|
def __len__(self) -> int:
|
62
110
|
return len(self.meta)
|
63
111
|
|
64
|
-
def
|
65
|
-
|
66
|
-
if activity_meta["id"] in self._loose_activity_ids:
|
67
|
-
logger.error(f"Activity with the same file already exists. New activity:")
|
68
|
-
print(activity_meta)
|
69
|
-
print("Existing activity:")
|
70
|
-
print(
|
71
|
-
[
|
72
|
-
activity
|
73
|
-
for activity in self._loose_activities
|
74
|
-
if activity["id"] == activity_meta["id"]
|
75
|
-
]
|
76
|
-
)
|
77
|
-
raise ValueError("Activity with the same file already exists.")
|
78
|
-
self._loose_activities.append(activity_meta)
|
79
|
-
self._loose_activity_ids.add(activity_meta["id"])
|
80
|
-
|
81
|
-
def commit(self) -> None:
|
82
|
-
if self._loose_activities:
|
83
|
-
logger.debug(
|
84
|
-
f"Adding {len(self._loose_activities)} activities to the repository …"
|
85
|
-
)
|
86
|
-
new_df = pd.DataFrame(self._loose_activities)
|
87
|
-
if len(self.meta):
|
88
|
-
new_ids_set = set(new_df["id"])
|
89
|
-
is_kept = [
|
90
|
-
activity_id not in new_ids_set for activity_id in self.meta["id"]
|
91
|
-
]
|
92
|
-
old_df = self.meta.loc[is_kept]
|
93
|
-
else:
|
94
|
-
old_df = self.meta
|
95
|
-
|
96
|
-
self.meta = pd.concat([old_df, new_df])
|
97
|
-
assert pd.api.types.is_dtype_equal(
|
98
|
-
self.meta["start"].dtype, "datetime64[ns]"
|
99
|
-
), (self.meta["start"].dtype, self.meta["start"].iloc[0])
|
100
|
-
self.save()
|
101
|
-
self._loose_activities = []
|
102
|
-
|
103
|
-
def save(self) -> None:
|
104
|
-
self.meta.index = self.meta["id"]
|
105
|
-
self.meta.index.name = "index"
|
106
|
-
self.meta.sort_values("start", inplace=True)
|
107
|
-
self.meta.to_parquet(activities_path())
|
112
|
+
def reload(self) -> None:
|
113
|
+
self.meta = pd.read_parquet(activities_file())
|
108
114
|
|
109
115
|
def has_activity(self, activity_id: int) -> bool:
|
110
116
|
if len(self.meta):
|
@@ -143,7 +149,7 @@ class ActivityRepository:
|
|
143
149
|
|
144
150
|
@functools.lru_cache(maxsize=3000)
|
145
151
|
def get_time_series(self, id: int) -> pd.DataFrame:
|
146
|
-
path =
|
152
|
+
path = activity_enriched_time_series_dir() / f"{id}.parquet"
|
147
153
|
try:
|
148
154
|
df = pd.read_parquet(path)
|
149
155
|
except OSError as e:
|
@@ -154,79 +160,6 @@ class ActivityRepository:
|
|
154
160
|
return df
|
155
161
|
|
156
162
|
|
157
|
-
def embellish_time_series(repository: ActivityRepository) -> None:
|
158
|
-
work_tracker = WorkTracker("embellish-time-series")
|
159
|
-
activities_to_process = work_tracker.filter(repository.get_activity_ids())
|
160
|
-
for activity_id in tqdm(activities_to_process, desc="Embellish time series data"):
|
161
|
-
path = activity_timeseries_path(activity_id)
|
162
|
-
df = pd.read_parquet(path)
|
163
|
-
df.name = id
|
164
|
-
df, changed = embellish_single_time_series(
|
165
|
-
df, repository.get_activity_by_id(activity_id)["start"]
|
166
|
-
)
|
167
|
-
if changed:
|
168
|
-
df.to_parquet(path)
|
169
|
-
work_tracker.mark_done(activity_id)
|
170
|
-
work_tracker.close()
|
171
|
-
|
172
|
-
|
173
|
-
def embellish_single_time_series(
|
174
|
-
timeseries: pd.DataFrame, start: Optional[datetime.datetime] = None
|
175
|
-
) -> bool:
|
176
|
-
changed = False
|
177
|
-
|
178
|
-
if start is not None and pd.api.types.is_dtype_equal(
|
179
|
-
timeseries["time"].dtype, "int64"
|
180
|
-
):
|
181
|
-
time = timeseries["time"]
|
182
|
-
del timeseries["time"]
|
183
|
-
timeseries["time"] = [
|
184
|
-
convert_to_datetime_ns(start + datetime.timedelta(seconds=t)) for t in time
|
185
|
-
]
|
186
|
-
changed = True
|
187
|
-
assert pd.api.types.is_dtype_equal(timeseries["time"].dtype, "datetime64[ns]")
|
188
|
-
|
189
|
-
distances = get_distance(
|
190
|
-
timeseries["latitude"].shift(1),
|
191
|
-
timeseries["longitude"].shift(1),
|
192
|
-
timeseries["latitude"],
|
193
|
-
timeseries["longitude"],
|
194
|
-
).fillna(0.0)
|
195
|
-
time_diff_threshold_seconds = 30
|
196
|
-
time_diff = (timeseries["time"] - timeseries["time"].shift(1)).dt.total_seconds()
|
197
|
-
jump_indices = (time_diff >= time_diff_threshold_seconds) & (distances > 100)
|
198
|
-
distances.loc[jump_indices] = 0.0
|
199
|
-
|
200
|
-
if not "distance_km" in timeseries.columns:
|
201
|
-
timeseries["distance_km"] = pd.Series(np.cumsum(distances)) / 1000
|
202
|
-
changed = True
|
203
|
-
|
204
|
-
if "speed" not in timeseries.columns:
|
205
|
-
timeseries["speed"] = (
|
206
|
-
timeseries["distance_km"].diff()
|
207
|
-
/ (timeseries["time"].diff().dt.total_seconds() + 1e-3)
|
208
|
-
* 3600
|
209
|
-
)
|
210
|
-
changed = True
|
211
|
-
|
212
|
-
potential_jumps = (timeseries["speed"] > 40) & (timeseries["speed"].diff() > 10)
|
213
|
-
if np.any(potential_jumps):
|
214
|
-
timeseries = timeseries.loc[~potential_jumps].copy()
|
215
|
-
changed = True
|
216
|
-
|
217
|
-
if "segment_id" not in timeseries.columns:
|
218
|
-
timeseries["segment_id"] = np.cumsum(jump_indices)
|
219
|
-
changed = True
|
220
|
-
|
221
|
-
if "x" not in timeseries.columns:
|
222
|
-
x, y = compute_tile_float(timeseries["latitude"], timeseries["longitude"], 0)
|
223
|
-
timeseries["x"] = x
|
224
|
-
timeseries["y"] = y
|
225
|
-
changed = True
|
226
|
-
|
227
|
-
return timeseries, changed
|
228
|
-
|
229
|
-
|
230
163
|
def make_geojson_from_time_series(time_series: pd.DataFrame) -> str:
|
231
164
|
fc = geojson.FeatureCollection(
|
232
165
|
features=[
|
@@ -319,14 +252,3 @@ def extract_heart_rate_zones(time_series: pd.DataFrame) -> Optional[pd.DataFrame
|
|
319
252
|
duration_per_zone.loc[i] = 0.0
|
320
253
|
result = duration_per_zone.reset_index()
|
321
254
|
return result
|
322
|
-
|
323
|
-
|
324
|
-
def _extend_metadata_from_timeseries(metadata: ActivityMeta) -> None:
|
325
|
-
timeseries = pd.read_parquet(
|
326
|
-
pathlib.Path("Cache/Activity Timeseries") / f"{metadata['id']}.parquet"
|
327
|
-
)
|
328
|
-
|
329
|
-
metadata["start_latitude"] = timeseries["latitude"].iloc[0]
|
330
|
-
metadata["end_latitude"] = timeseries["latitude"].iloc[-1]
|
331
|
-
metadata["start_longitude"] = timeseries["longitude"].iloc[0]
|
332
|
-
metadata["end_longitude"] = timeseries["longitude"].iloc[-1]
|
@@ -0,0 +1,164 @@
|
|
1
|
+
import datetime
|
2
|
+
import logging
|
3
|
+
import pickle
|
4
|
+
from typing import Any
|
5
|
+
from typing import Optional
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
from tqdm import tqdm
|
10
|
+
|
11
|
+
from geo_activity_playground.core.activities import ActivityMeta
|
12
|
+
from geo_activity_playground.core.activities import make_activity_meta
|
13
|
+
from geo_activity_playground.core.coordinates import get_distance
|
14
|
+
from geo_activity_playground.core.paths import activity_enriched_meta_dir
|
15
|
+
from geo_activity_playground.core.paths import activity_enriched_time_series_dir
|
16
|
+
from geo_activity_playground.core.paths import activity_extracted_meta_dir
|
17
|
+
from geo_activity_playground.core.paths import activity_extracted_time_series_dir
|
18
|
+
from geo_activity_playground.core.tiles import compute_tile_float
|
19
|
+
from geo_activity_playground.core.time_conversion import convert_to_datetime_ns
|
20
|
+
|
21
|
+
logger = logging.getLogger(__name__)
|
22
|
+
|
23
|
+
|
24
|
+
def enrich_activities(kind_defaults: dict[dict[str, Any]]) -> None:
|
25
|
+
# Delete removed activities.
|
26
|
+
for enriched_metadata_path in activity_enriched_meta_dir().glob("*.pickle"):
|
27
|
+
if not (activity_extracted_meta_dir() / enriched_metadata_path.name).exists():
|
28
|
+
logger.warning(f"Deleting {enriched_metadata_path}")
|
29
|
+
enriched_metadata_path.unlink()
|
30
|
+
for enriched_time_series_path in activity_enriched_time_series_dir().glob(
|
31
|
+
"*.parquet"
|
32
|
+
):
|
33
|
+
if not (
|
34
|
+
activity_extracted_time_series_dir() / enriched_time_series_path.name
|
35
|
+
).exists():
|
36
|
+
logger.warning(f"Deleting {enriched_time_series_path}")
|
37
|
+
enriched_time_series_path.unlink()
|
38
|
+
|
39
|
+
# Get new metadata paths.
|
40
|
+
new_extracted_metadata_paths = []
|
41
|
+
for extracted_metadata_path in activity_extracted_meta_dir().glob("*.pickle"):
|
42
|
+
enriched_metadata_path = (
|
43
|
+
activity_enriched_meta_dir() / extracted_metadata_path.name
|
44
|
+
)
|
45
|
+
if (
|
46
|
+
not enriched_metadata_path.exists()
|
47
|
+
or enriched_metadata_path.stat().st_mtime
|
48
|
+
< extracted_metadata_path.stat().st_mtime
|
49
|
+
):
|
50
|
+
new_extracted_metadata_paths.append(extracted_metadata_path)
|
51
|
+
|
52
|
+
for extracted_metadata_path in tqdm(
|
53
|
+
new_extracted_metadata_paths, desc="Enrich new activity data"
|
54
|
+
):
|
55
|
+
# Read extracted data.
|
56
|
+
activity_id = extracted_metadata_path.stem
|
57
|
+
extracted_time_series_path = (
|
58
|
+
activity_extracted_time_series_dir() / f"{activity_id}.parquet"
|
59
|
+
)
|
60
|
+
time_series = pd.read_parquet(extracted_time_series_path)
|
61
|
+
with open(extracted_metadata_path, "rb") as f:
|
62
|
+
extracted_metadata = pickle.load(f)
|
63
|
+
|
64
|
+
metadata = make_activity_meta()
|
65
|
+
metadata.update(extracted_metadata)
|
66
|
+
|
67
|
+
# Enrich time series.
|
68
|
+
metadata.update(kind_defaults.get(metadata["kind"], {}))
|
69
|
+
time_series = _embellish_single_time_series(
|
70
|
+
time_series, metadata.get("start", None)
|
71
|
+
)
|
72
|
+
metadata.update(_get_metadata_from_timeseries(time_series))
|
73
|
+
|
74
|
+
# Write enriched data.
|
75
|
+
enriched_metadata_path = activity_enriched_meta_dir() / f"{activity_id}.pickle"
|
76
|
+
enriched_time_series_path = (
|
77
|
+
activity_enriched_time_series_dir() / f"{activity_id}.parquet"
|
78
|
+
)
|
79
|
+
with open(enriched_metadata_path, "wb") as f:
|
80
|
+
pickle.dump(metadata, f)
|
81
|
+
time_series.to_parquet(enriched_time_series_path)
|
82
|
+
|
83
|
+
|
84
|
+
def _get_metadata_from_timeseries(timeseries: pd.DataFrame) -> ActivityMeta:
|
85
|
+
metadata = ActivityMeta()
|
86
|
+
|
87
|
+
# Extract some meta data from the time series.
|
88
|
+
metadata["start"] = timeseries["time"].iloc[0]
|
89
|
+
metadata["elapsed_time"] = timeseries["time"].iloc[-1] - timeseries["time"].iloc[0]
|
90
|
+
metadata["distance_km"] = timeseries["distance_km"].iloc[-1]
|
91
|
+
if "calories" in timeseries.columns:
|
92
|
+
metadata["calories"] = timeseries["calories"].iloc[-1]
|
93
|
+
metadata["moving_time"] = _compute_moving_time(timeseries)
|
94
|
+
|
95
|
+
metadata["start_latitude"] = timeseries["latitude"].iloc[0]
|
96
|
+
metadata["end_latitude"] = timeseries["latitude"].iloc[-1]
|
97
|
+
metadata["start_longitude"] = timeseries["longitude"].iloc[0]
|
98
|
+
metadata["end_longitude"] = timeseries["longitude"].iloc[-1]
|
99
|
+
|
100
|
+
return metadata
|
101
|
+
|
102
|
+
|
103
|
+
def _compute_moving_time(time_series: pd.DataFrame) -> datetime.timedelta:
|
104
|
+
def moving_time(group) -> datetime.timedelta:
|
105
|
+
selection = group["speed"] > 1.0
|
106
|
+
time_diff = group["time"].diff().loc[selection]
|
107
|
+
return time_diff.sum()
|
108
|
+
|
109
|
+
return (
|
110
|
+
time_series.groupby("segment_id").apply(moving_time, include_groups=False).sum()
|
111
|
+
)
|
112
|
+
|
113
|
+
|
114
|
+
def _embellish_single_time_series(
|
115
|
+
timeseries: pd.DataFrame, start: Optional[datetime.datetime] = None
|
116
|
+
) -> pd.DataFrame:
|
117
|
+
if start is not None and pd.api.types.is_dtype_equal(
|
118
|
+
timeseries["time"].dtype, "int64"
|
119
|
+
):
|
120
|
+
time = timeseries["time"]
|
121
|
+
del timeseries["time"]
|
122
|
+
timeseries["time"] = [
|
123
|
+
convert_to_datetime_ns(start + datetime.timedelta(seconds=t)) for t in time
|
124
|
+
]
|
125
|
+
timeseries["time"] = convert_to_datetime_ns(timeseries["time"])
|
126
|
+
assert pd.api.types.is_dtype_equal(timeseries["time"].dtype, "datetime64[ns]"), (
|
127
|
+
timeseries["time"].dtype,
|
128
|
+
timeseries["time"].iloc[0],
|
129
|
+
)
|
130
|
+
|
131
|
+
distances = get_distance(
|
132
|
+
timeseries["latitude"].shift(1),
|
133
|
+
timeseries["longitude"].shift(1),
|
134
|
+
timeseries["latitude"],
|
135
|
+
timeseries["longitude"],
|
136
|
+
).fillna(0.0)
|
137
|
+
time_diff_threshold_seconds = 30
|
138
|
+
time_diff = (timeseries["time"] - timeseries["time"].shift(1)).dt.total_seconds()
|
139
|
+
jump_indices = time_diff >= time_diff_threshold_seconds
|
140
|
+
distances.loc[jump_indices] = 0.0
|
141
|
+
|
142
|
+
if "distance_km" not in timeseries.columns:
|
143
|
+
timeseries["distance_km"] = pd.Series(np.cumsum(distances)) / 1000
|
144
|
+
|
145
|
+
if "speed" not in timeseries.columns:
|
146
|
+
timeseries["speed"] = (
|
147
|
+
timeseries["distance_km"].diff()
|
148
|
+
/ (timeseries["time"].diff().dt.total_seconds() + 1e-3)
|
149
|
+
* 3600
|
150
|
+
)
|
151
|
+
|
152
|
+
potential_jumps = (timeseries["speed"] > 40) & (timeseries["speed"].diff() > 10)
|
153
|
+
if np.any(potential_jumps):
|
154
|
+
timeseries = timeseries.loc[~potential_jumps].copy()
|
155
|
+
|
156
|
+
if "segment_id" not in timeseries.columns:
|
157
|
+
timeseries["segment_id"] = np.cumsum(jump_indices)
|
158
|
+
|
159
|
+
if "x" not in timeseries.columns:
|
160
|
+
x, y = compute_tile_float(timeseries["latitude"], timeseries["longitude"], 0)
|
161
|
+
timeseries["x"] = x
|
162
|
+
timeseries["y"] = y
|
163
|
+
|
164
|
+
return timeseries
|
@@ -6,32 +6,51 @@ import pathlib
|
|
6
6
|
import typing
|
7
7
|
|
8
8
|
|
9
|
-
def dir_wrapper(
|
10
|
-
dir_func: typing.Callable[[], pathlib.Path]
|
11
|
-
) -> typing.Callable[[], pathlib.Path]:
|
12
|
-
@functools.wraps(dir_func)
|
9
|
+
def dir_wrapper(path: pathlib.Path) -> typing.Callable[[], pathlib.Path]:
|
13
10
|
@functools.cache
|
14
11
|
def wrapper() -> pathlib.Path:
|
15
|
-
path = dir_func()
|
16
12
|
path.mkdir(exist_ok=True, parents=True)
|
17
13
|
return path
|
18
14
|
|
19
15
|
return wrapper
|
20
16
|
|
21
17
|
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
def file_wrapper(path: pathlib.Path) -> typing.Callable[[], pathlib.Path]:
|
19
|
+
@functools.cache
|
20
|
+
def wrapper() -> pathlib.Path:
|
21
|
+
path.parent.mkdir(exist_ok=True, parents=True)
|
22
|
+
return path
|
23
|
+
|
24
|
+
return wrapper
|
25
|
+
|
26
|
+
|
27
|
+
_cache_dir = pathlib.Path("Cache")
|
28
|
+
|
29
|
+
_activity_dir = _cache_dir / "Activity"
|
30
|
+
_activity_extracted_dir = _activity_dir / "Extracted"
|
31
|
+
_activity_extracted_meta_dir = _activity_extracted_dir / "Meta"
|
32
|
+
_activity_extracted_time_series_dir = _activity_extracted_dir / "Time Series"
|
33
|
+
|
34
|
+
_activity_enriched_dir = _activity_dir / "Enriched"
|
35
|
+
_activity_enriched_meta_dir = _activity_enriched_dir / "Meta"
|
36
|
+
_activity_enriched_time_series_dir = _activity_enriched_dir / "Time Series"
|
37
|
+
_activities_file = _activity_dir / "activities.parquet"
|
25
38
|
|
39
|
+
_tiles_per_time_series = _cache_dir / "Tiles" / "Tiles Per Time Series"
|
26
40
|
|
27
|
-
|
28
|
-
|
29
|
-
return cache_dir() / "Activity Timeseries"
|
41
|
+
_strava_api_dir = pathlib.Path("Strava API")
|
42
|
+
_strava_dynamic_config_path = _strava_api_dir / "strava-client-id.json"
|
30
43
|
|
31
44
|
|
32
|
-
|
33
|
-
return cache_dir() / "activities.parquet"
|
45
|
+
cache_dir = dir_wrapper(_cache_dir)
|
34
46
|
|
47
|
+
activity_extracted_dir = dir_wrapper(_activity_extracted_dir)
|
48
|
+
activity_extracted_meta_dir = dir_wrapper(_activity_extracted_meta_dir)
|
49
|
+
activity_extracted_time_series_dir = dir_wrapper(_activity_extracted_time_series_dir)
|
50
|
+
activity_enriched_meta_dir = dir_wrapper(_activity_enriched_meta_dir)
|
51
|
+
activity_enriched_time_series_dir = dir_wrapper(_activity_enriched_time_series_dir)
|
52
|
+
tiles_per_time_series = dir_wrapper(_tiles_per_time_series)
|
53
|
+
strava_api_dir = dir_wrapper(_strava_api_dir)
|
35
54
|
|
36
|
-
|
37
|
-
|
55
|
+
activities_file = file_wrapper(_activities_file)
|
56
|
+
strava_dynamic_config_path = file_wrapper(_strava_dynamic_config_path)
|
@@ -50,8 +50,8 @@ def work_tracker(path: pathlib.Path):
|
|
50
50
|
|
51
51
|
|
52
52
|
class WorkTracker:
|
53
|
-
def __init__(self,
|
54
|
-
self._path =
|
53
|
+
def __init__(self, path: pathlib.Path) -> None:
|
54
|
+
self._path = path
|
55
55
|
|
56
56
|
if self._path.exists():
|
57
57
|
with open(self._path, "rb") as f:
|
@@ -59,12 +59,15 @@ class WorkTracker:
|
|
59
59
|
else:
|
60
60
|
self._done = set()
|
61
61
|
|
62
|
-
def filter(self, ids: Iterable
|
62
|
+
def filter(self, ids: Iterable) -> set:
|
63
63
|
return set(ids) - self._done
|
64
64
|
|
65
65
|
def mark_done(self, id: int) -> None:
|
66
66
|
self._done.add(id)
|
67
67
|
|
68
|
+
def discard(self, id) -> None:
|
69
|
+
self._done.discard(id)
|
70
|
+
|
68
71
|
def close(self) -> None:
|
69
72
|
with open(self._path, "wb") as f:
|
70
73
|
pickle.dump(self._done, f)
|
@@ -77,3 +80,23 @@ def try_load_pickle(path: pathlib.Path) -> Any:
|
|
77
80
|
return pickle.load(f)
|
78
81
|
except ModuleNotFoundError:
|
79
82
|
pass
|
83
|
+
|
84
|
+
|
85
|
+
class TransformVersion:
|
86
|
+
def __init__(self, path: pathlib.Path, code_version: int) -> None:
|
87
|
+
self._path = path
|
88
|
+
self._code_version = code_version
|
89
|
+
|
90
|
+
with open(path) as f:
|
91
|
+
self._actual_version = json.load(f)
|
92
|
+
|
93
|
+
assert (
|
94
|
+
self._actual_version <= self._code_version
|
95
|
+
), "You attempt to use a more modern playground with an older code version, that is not supported."
|
96
|
+
|
97
|
+
def outdated(self) -> bool:
|
98
|
+
return self._actual_version < self._code_version
|
99
|
+
|
100
|
+
def write(self) -> None:
|
101
|
+
with open(self._path, "w") as f:
|
102
|
+
json.dump(self._code_version, f)
|