geo-activity-playground 0.19.0__tar.gz → 0.20.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/PKG-INFO +3 -2
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/__main__.py +2 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/activities.py +111 -47
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/activity_parsers.py +9 -15
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/cache_migrations.py +3 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/config.py +0 -3
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/heatmap.py +0 -30
- geo_activity_playground-0.20.0/geo_activity_playground/core/similarity.py +93 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/tasks.py +25 -1
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/explorer/grid_file.py +3 -4
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/explorer/tile_visits.py +4 -3
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/importers/directory.py +41 -22
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/importers/strava_api.py +1 -1
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/importers/strava_checkout.py +6 -2
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/activity_controller.py +39 -17
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/app.py +11 -0
- geo_activity_playground-0.20.0/geo_activity_playground/webui/locations_controller.py +28 -0
- geo_activity_playground-0.20.0/geo_activity_playground/webui/static/android-chrome-192x192.png +0 -0
- geo_activity_playground-0.20.0/geo_activity_playground/webui/static/android-chrome-512x512.png +0 -0
- geo_activity_playground-0.20.0/geo_activity_playground/webui/static/apple-touch-icon.png +0 -0
- geo_activity_playground-0.20.0/geo_activity_playground/webui/static/favicon-16x16.png +0 -0
- geo_activity_playground-0.20.0/geo_activity_playground/webui/static/favicon-32x32.png +0 -0
- geo_activity_playground-0.20.0/geo_activity_playground/webui/static/mstile-150x150.png +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/activity.html.j2 +31 -0
- geo_activity_playground-0.20.0/geo_activity_playground/webui/templates/locations.html.j2 +38 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/page.html.j2 +9 -1
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/square-planner.html.j2 +16 -4
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/pyproject.toml +10 -2
- geo_activity_playground-0.19.0/geo_activity_playground/webui/static/android-chrome-192x192.png +0 -0
- geo_activity_playground-0.19.0/geo_activity_playground/webui/static/apple-touch-icon.png +0 -0
- geo_activity_playground-0.19.0/geo_activity_playground/webui/static/favicon-16x16.png +0 -0
- geo_activity_playground-0.19.0/geo_activity_playground/webui/static/favicon-32x32.png +0 -0
- geo_activity_playground-0.19.0/geo_activity_playground/webui/static/mstile-150x150.png +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/LICENSE +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/__init__.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/__init__.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/coordinates.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/paths.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/test_tiles.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/tiles.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/explorer/__init__.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/explorer/video.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/importers/test_strava_api.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/calendar_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/config_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/eddington_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/entry_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/equipment_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/explorer_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/heatmap_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/search_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/square_planner_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/static/android-chrome-384x384.png +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/static/browserconfig.xml +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/static/favicon.ico +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/static/safari-pinned-tab.svg +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/static/site.webmanifest +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/strava_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/summary_controller.py +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/calendar-month.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/calendar.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/config.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/eddington.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/equipment.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/explorer.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/heatmap.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/index.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/search.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/strava-connect.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/summary.html.j2 +0 -0
- {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/tile_controller.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: geo-activity-playground
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.20.0
|
4
4
|
Summary: Analysis of geo data activities like rides, runs or hikes.
|
5
5
|
License: MIT
|
6
6
|
Author: Martin Ueding
|
@@ -20,6 +20,7 @@ Requires-Dist: fitdecode (>=0.10.0,<0.11.0)
|
|
20
20
|
Requires-Dist: flask (>=3.0.0,<4.0.0)
|
21
21
|
Requires-Dist: geojson (>=3.0.1,<4.0.0)
|
22
22
|
Requires-Dist: gpxpy (>=1.5.0,<2.0.0)
|
23
|
+
Requires-Dist: imagehash (>=4.3.1,<5.0.0)
|
23
24
|
Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
|
24
25
|
Requires-Dist: matplotlib (>=3.6.3,<4.0.0)
|
25
26
|
Requires-Dist: numpy (>=1.22.4,<2.0.0)
|
@@ -32,7 +33,7 @@ Requires-Dist: stravalib (>=1.3.3,<2.0.0)
|
|
32
33
|
Requires-Dist: tcxreader (>=0.4.5,<0.5.0)
|
33
34
|
Requires-Dist: tomli (>=2.0.1,<3.0.0) ; python_version < "3.11"
|
34
35
|
Requires-Dist: tqdm (>=4.64.0,<5.0.0)
|
35
|
-
Requires-Dist: vegafusion (>=1.4.3,<2.0.0)
|
36
36
|
Requires-Dist: vegafusion-python-embed (>=1.4.3,<2.0.0)
|
37
|
+
Requires-Dist: vegafusion[embed] (>=1.4.3,<2.0.0)
|
37
38
|
Requires-Dist: vl-convert-python (>=1.0.1,<2.0.0)
|
38
39
|
Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
|
@@ -6,6 +6,7 @@ import sys
|
|
6
6
|
|
7
7
|
import coloredlogs
|
8
8
|
|
9
|
+
from .core.similarity import precompute_activity_distances
|
9
10
|
from .importers.strava_checkout import convert_strava_checkout
|
10
11
|
from .importers.strava_checkout import import_from_strava_checkout
|
11
12
|
from geo_activity_playground.core.activities import ActivityRepository
|
@@ -114,6 +115,7 @@ def make_activity_repository(
|
|
114
115
|
import_from_strava_api(repository)
|
115
116
|
|
116
117
|
embellish_time_series(repository)
|
118
|
+
precompute_activity_distances(repository)
|
117
119
|
compute_tile_visits(repository)
|
118
120
|
compute_tile_evolution()
|
119
121
|
return repository
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import datetime
|
2
2
|
import functools
|
3
3
|
import logging
|
4
|
+
import pathlib
|
4
5
|
from typing import Iterator
|
5
6
|
from typing import Optional
|
6
7
|
from typing import TypedDict
|
@@ -12,6 +13,7 @@ import pandas as pd
|
|
12
13
|
from tqdm import tqdm
|
13
14
|
|
14
15
|
from geo_activity_playground.core.config import get_config
|
16
|
+
from geo_activity_playground.core.coordinates import get_distance
|
15
17
|
from geo_activity_playground.core.paths import activities_path
|
16
18
|
from geo_activity_playground.core.paths import activity_timeseries_path
|
17
19
|
from geo_activity_playground.core.tasks import WorkTracker
|
@@ -25,11 +27,15 @@ class ActivityMeta(TypedDict):
|
|
25
27
|
commute: bool
|
26
28
|
distance_km: float
|
27
29
|
elapsed_time: datetime.timedelta
|
30
|
+
end_latitude: float
|
31
|
+
end_longitude: float
|
28
32
|
equipment: str
|
29
33
|
id: int
|
30
34
|
kind: str
|
31
35
|
name: str
|
32
36
|
path: str
|
37
|
+
start_latitude: float
|
38
|
+
start_longitude: float
|
33
39
|
start: datetime.datetime
|
34
40
|
|
35
41
|
|
@@ -43,15 +49,27 @@ class ActivityRepository:
|
|
43
49
|
self.meta = pd.DataFrame()
|
44
50
|
|
45
51
|
self._loose_activities: list[ActivityMeta] = []
|
52
|
+
self._loose_activity_ids: set[int] = set()
|
46
53
|
|
47
54
|
def __len__(self) -> int:
|
48
55
|
return len(self.meta)
|
49
56
|
|
50
57
|
def add_activity(self, activity_meta: ActivityMeta) -> None:
|
51
|
-
|
52
|
-
|
53
|
-
|
58
|
+
_extend_metadata_from_timeseries(activity_meta)
|
59
|
+
if activity_meta["id"] in self._loose_activity_ids:
|
60
|
+
logger.error(f"Activity with the same file already exists. New activity:")
|
61
|
+
print(activity_meta)
|
62
|
+
print("Existing activity:")
|
63
|
+
print(
|
64
|
+
[
|
65
|
+
activity
|
66
|
+
for activity in self._loose_activities
|
67
|
+
if activity["id"] == activity_meta["id"]
|
68
|
+
]
|
69
|
+
)
|
70
|
+
raise ValueError("Activity with the same file already exists.")
|
54
71
|
self._loose_activities.append(activity_meta)
|
72
|
+
self._loose_activity_ids.add(activity_meta["id"])
|
55
73
|
|
56
74
|
def commit(self) -> None:
|
57
75
|
if self._loose_activities:
|
@@ -59,7 +77,15 @@ class ActivityRepository:
|
|
59
77
|
f"Adding {len(self._loose_activities)} activities to the repository …"
|
60
78
|
)
|
61
79
|
new_df = pd.DataFrame(self._loose_activities)
|
62
|
-
|
80
|
+
if len(self.meta):
|
81
|
+
new_ids_set = set(new_df["id"])
|
82
|
+
is_kept = [
|
83
|
+
activity_id not in new_ids_set for activity_id in self.meta["id"]
|
84
|
+
]
|
85
|
+
old_df = self.meta.loc[is_kept]
|
86
|
+
else:
|
87
|
+
old_df = self.meta
|
88
|
+
self.meta = pd.concat([old_df, new_df])
|
63
89
|
assert pd.api.types.is_dtype_equal(
|
64
90
|
self.meta["start"].dtype, "datetime64[ns, UTC]"
|
65
91
|
), self.meta["start"].dtype
|
@@ -86,6 +112,8 @@ class ActivityRepository:
|
|
86
112
|
def last_activity_date(self) -> Optional[datetime.datetime]:
|
87
113
|
if len(self.meta):
|
88
114
|
return self.meta.iloc[-1]["start"]
|
115
|
+
else:
|
116
|
+
return None
|
89
117
|
|
90
118
|
@property
|
91
119
|
def activity_ids(self) -> set[int]:
|
@@ -122,55 +150,81 @@ def embellish_time_series(repository: ActivityRepository) -> None:
|
|
122
150
|
path = activity_timeseries_path(activity_id)
|
123
151
|
df = pd.read_parquet(path)
|
124
152
|
df.name = id
|
125
|
-
changed =
|
126
|
-
|
127
|
-
|
128
|
-
time = df["time"]
|
129
|
-
del df["time"]
|
130
|
-
df["time"] = [start + datetime.timedelta(seconds=t) for t in time]
|
131
|
-
changed = True
|
132
|
-
assert pd.api.types.is_dtype_equal(df["time"].dtype, "datetime64[ns, UTC]")
|
133
|
-
|
134
|
-
if "distance_km" in df.columns:
|
135
|
-
if "speed" not in df.columns:
|
136
|
-
df["speed"] = (
|
137
|
-
df["distance_km"].diff()
|
138
|
-
/ (df["time"].diff().dt.total_seconds() + 1e-3)
|
139
|
-
* 3600
|
140
|
-
)
|
141
|
-
changed = True
|
142
|
-
|
143
|
-
potential_jumps = (df["speed"] > 40) & (df["speed"].diff() > 10)
|
144
|
-
if np.any(potential_jumps):
|
145
|
-
df = df.loc[~potential_jumps]
|
146
|
-
changed = True
|
147
|
-
|
148
|
-
if "x" not in df.columns:
|
149
|
-
x, y = compute_tile_float(df["latitude"], df["longitude"], 0)
|
150
|
-
df["x"] = x
|
151
|
-
df["y"] = y
|
152
|
-
changed = True
|
153
|
-
|
154
|
-
if "segment_id" not in df.columns:
|
155
|
-
time_diff = (df["time"] - df["time"].shift(1)).dt.total_seconds()
|
156
|
-
jump_indices = time_diff >= 30
|
157
|
-
df["segment_id"] = np.cumsum(jump_indices)
|
158
|
-
changed = True
|
159
|
-
|
153
|
+
df, changed = embellish_single_time_series(
|
154
|
+
df, repository.get_activity_by_id(activity_id)["start"]
|
155
|
+
)
|
160
156
|
if changed:
|
161
157
|
df.to_parquet(path)
|
162
158
|
work_tracker.mark_done(activity_id)
|
163
159
|
work_tracker.close()
|
164
160
|
|
165
161
|
|
162
|
+
def embellish_single_time_series(
|
163
|
+
timeseries: pd.DataFrame, start: Optional[datetime.datetime] = None
|
164
|
+
) -> bool:
|
165
|
+
changed = False
|
166
|
+
time_diff_threshold_seconds = 30
|
167
|
+
time_diff = (timeseries["time"] - timeseries["time"].shift(1)).dt.total_seconds()
|
168
|
+
jump_indices = time_diff >= time_diff_threshold_seconds
|
169
|
+
|
170
|
+
if start is not None and pd.api.types.is_dtype_equal(
|
171
|
+
timeseries["time"].dtype, "int64"
|
172
|
+
):
|
173
|
+
time = timeseries["time"]
|
174
|
+
del timeseries["time"]
|
175
|
+
timeseries["time"] = [start + datetime.timedelta(seconds=t) for t in time]
|
176
|
+
changed = True
|
177
|
+
assert pd.api.types.is_dtype_equal(timeseries["time"].dtype, "datetime64[ns, UTC]")
|
178
|
+
|
179
|
+
# Add distance column if missing.
|
180
|
+
if "distance_km" not in timeseries.columns:
|
181
|
+
distances = get_distance(
|
182
|
+
timeseries["latitude"].shift(1),
|
183
|
+
timeseries["longitude"].shift(1),
|
184
|
+
timeseries["latitude"],
|
185
|
+
timeseries["longitude"],
|
186
|
+
).fillna(0.0)
|
187
|
+
distances.loc[jump_indices] = 0.0
|
188
|
+
timeseries["distance_km"] = pd.Series(np.cumsum(distances)) / 1000
|
189
|
+
changed = True
|
190
|
+
|
191
|
+
if "distance_km" in timeseries.columns:
|
192
|
+
if "speed" not in timeseries.columns:
|
193
|
+
timeseries["speed"] = (
|
194
|
+
timeseries["distance_km"].diff()
|
195
|
+
/ (timeseries["time"].diff().dt.total_seconds() + 1e-3)
|
196
|
+
* 3600
|
197
|
+
)
|
198
|
+
changed = True
|
199
|
+
|
200
|
+
potential_jumps = (timeseries["speed"] > 40) & (timeseries["speed"].diff() > 10)
|
201
|
+
if np.any(potential_jumps):
|
202
|
+
timeseries = timeseries.loc[~potential_jumps]
|
203
|
+
changed = True
|
204
|
+
|
205
|
+
if "x" not in timeseries.columns:
|
206
|
+
x, y = compute_tile_float(timeseries["latitude"], timeseries["longitude"], 0)
|
207
|
+
timeseries["x"] = x
|
208
|
+
timeseries["y"] = y
|
209
|
+
changed = True
|
210
|
+
|
211
|
+
if "segment_id" not in timeseries.columns:
|
212
|
+
timeseries["segment_id"] = np.cumsum(jump_indices)
|
213
|
+
changed = True
|
214
|
+
|
215
|
+
return timeseries, changed
|
216
|
+
|
217
|
+
|
166
218
|
def make_geojson_from_time_series(time_series: pd.DataFrame) -> str:
|
167
|
-
|
168
|
-
[
|
169
|
-
(
|
170
|
-
|
219
|
+
fc = geojson.FeatureCollection(
|
220
|
+
features=[
|
221
|
+
geojson.LineString(
|
222
|
+
[(lon, lat) for lat, lon in zip(group["latitude"], group["longitude"])]
|
223
|
+
)
|
224
|
+
for _, group in time_series.groupby("segment_id")
|
171
225
|
]
|
172
226
|
)
|
173
|
-
return geojson.dumps(
|
227
|
+
return geojson.dumps(fc)
|
174
228
|
|
175
229
|
|
176
230
|
def make_geojson_color_line(time_series: pd.DataFrame) -> str:
|
@@ -188,9 +242,8 @@ def make_geojson_color_line(time_series: pd.DataFrame) -> str:
|
|
188
242
|
"color": matplotlib.colors.to_hex(cmap(min(next["speed"] / 35, 1.0))),
|
189
243
|
},
|
190
244
|
)
|
191
|
-
for
|
192
|
-
|
193
|
-
)
|
245
|
+
for _, group in time_series.groupby("segment_id")
|
246
|
+
for (_, row), (_, next) in zip(group.iterrows(), group.iloc[1:].iterrows())
|
194
247
|
]
|
195
248
|
feature_collection = geojson.FeatureCollection(features)
|
196
249
|
return geojson.dumps(feature_collection)
|
@@ -236,3 +289,14 @@ def extract_heart_rate_zones(time_series: pd.DataFrame) -> Optional[pd.DataFrame
|
|
236
289
|
duration_per_zone.loc[i] = 0.0
|
237
290
|
result = duration_per_zone.reset_index()
|
238
291
|
return result
|
292
|
+
|
293
|
+
|
294
|
+
def _extend_metadata_from_timeseries(metadata: ActivityMeta) -> None:
|
295
|
+
timeseries = pd.read_parquet(
|
296
|
+
pathlib.Path("Cache/Activity Timeseries") / f"{metadata['id']}.parquet"
|
297
|
+
)
|
298
|
+
|
299
|
+
metadata["start_latitude"] = timeseries["latitude"].iloc[0]
|
300
|
+
metadata["end_latitude"] = timeseries["latitude"].iloc[-1]
|
301
|
+
metadata["start_longitude"] = timeseries["longitude"].iloc[0]
|
302
|
+
metadata["end_longitude"] = timeseries["longitude"].iloc[-1]
|
@@ -14,6 +14,7 @@ import tcxreader.tcxreader
|
|
14
14
|
import xmltodict
|
15
15
|
|
16
16
|
from geo_activity_playground.core.activities import ActivityMeta
|
17
|
+
from geo_activity_playground.core.activities import embellish_single_time_series
|
17
18
|
from geo_activity_playground.core.coordinates import get_distance
|
18
19
|
|
19
20
|
logger = logging.getLogger(__name__)
|
@@ -51,7 +52,7 @@ def read_activity(path: pathlib.Path) -> tuple[ActivityMeta, pd.DataFrame]:
|
|
51
52
|
elif file_type in [".kml", ".kmz"]:
|
52
53
|
timeseries = read_kml_activity(path, opener)
|
53
54
|
elif file_type == ".csv": # Simra csv export
|
54
|
-
timeseries = read_simra_activity(path)
|
55
|
+
timeseries = read_simra_activity(path, opener)
|
55
56
|
else:
|
56
57
|
raise ActivityParseError(f"Unsupported file format: {file_type}")
|
57
58
|
|
@@ -74,18 +75,7 @@ def read_activity(path: pathlib.Path) -> tuple[ActivityMeta, pd.DataFrame]:
|
|
74
75
|
"It looks like the date parsing has gone wrong."
|
75
76
|
) from e
|
76
77
|
|
77
|
-
|
78
|
-
if "distance_km" not in timeseries.columns:
|
79
|
-
distances = [0] + [
|
80
|
-
get_distance(lat_1, lon_1, lat_2, lon_2)
|
81
|
-
for lat_1, lon_1, lat_2, lon_2 in zip(
|
82
|
-
timeseries["latitude"],
|
83
|
-
timeseries["longitude"],
|
84
|
-
timeseries["latitude"].iloc[1:],
|
85
|
-
timeseries["longitude"].iloc[1:],
|
86
|
-
)
|
87
|
-
]
|
88
|
-
timeseries["distance_km"] = pd.Series(np.cumsum(distances)) / 1000
|
78
|
+
timeseries, changed = embellish_single_time_series(timeseries)
|
89
79
|
|
90
80
|
# Extract some meta data from the time series.
|
91
81
|
metadata["start"] = timeseries["time"].iloc[0]
|
@@ -212,9 +202,13 @@ def read_gpx_activity(path: pathlib.Path, open) -> pd.DataFrame:
|
|
212
202
|
time = dateutil.parser.parse(str(point.time))
|
213
203
|
assert isinstance(time, datetime.datetime)
|
214
204
|
time = time.astimezone(datetime.timezone.utc)
|
215
|
-
points.append((time, point.latitude, point.longitude))
|
205
|
+
points.append((time, point.latitude, point.longitude, point.elevation))
|
216
206
|
|
217
|
-
|
207
|
+
df = pd.DataFrame(points, columns=["time", "latitude", "longitude", "altitude"])
|
208
|
+
# Some files don't have altitude information. In these cases we remove the column.
|
209
|
+
if not df["altitude"].any():
|
210
|
+
del df["altitude"]
|
211
|
+
return df
|
218
212
|
|
219
213
|
|
220
214
|
def read_tcx_activity(path: pathlib.Path, opener) -> pd.DataFrame:
|
@@ -95,6 +95,9 @@ def apply_cache_migrations() -> None:
|
|
95
95
|
delete_activity_metadata,
|
96
96
|
delete_activity_metadata,
|
97
97
|
convert_distances_to_km,
|
98
|
+
delete_activity_metadata,
|
99
|
+
delete_tile_visits,
|
100
|
+
delete_heatmap_cache,
|
98
101
|
]
|
99
102
|
|
100
103
|
for migration in migrations[cache_status["num_applied_migrations"] :]:
|
@@ -4,7 +4,6 @@ This code is based on https://github.com/remisalmon/Strava-local-heatmap.
|
|
4
4
|
import dataclasses
|
5
5
|
import logging
|
6
6
|
|
7
|
-
import matplotlib.pyplot as pl
|
8
7
|
import numpy as np
|
9
8
|
|
10
9
|
from geo_activity_playground.core.tiles import compute_tile_float
|
@@ -147,32 +146,3 @@ def crop_image_to_bounds(
|
|
147
146
|
max_y = int((max_y - tile_bounds.y_tile_min) * OSM_TILE_SIZE)
|
148
147
|
image = image[min_y:max_y, min_x:max_x, :]
|
149
148
|
return image
|
150
|
-
|
151
|
-
|
152
|
-
def gaussian_filter(image, sigma):
|
153
|
-
# returns image filtered with a gaussian function of variance sigma**2
|
154
|
-
#
|
155
|
-
# input: image = numpy.ndarray
|
156
|
-
# sigma = float
|
157
|
-
# output: image = numpy.ndarray
|
158
|
-
|
159
|
-
i, j = np.meshgrid(
|
160
|
-
np.arange(image.shape[0]), np.arange(image.shape[1]), indexing="ij"
|
161
|
-
)
|
162
|
-
|
163
|
-
mu = (int(image.shape[0] / 2.0), int(image.shape[1] / 2.0))
|
164
|
-
|
165
|
-
gaussian = (
|
166
|
-
1.0
|
167
|
-
/ (2.0 * np.pi * sigma * sigma)
|
168
|
-
* np.exp(-0.5 * (((i - mu[0]) / sigma) ** 2 + ((j - mu[1]) / sigma) ** 2))
|
169
|
-
)
|
170
|
-
|
171
|
-
gaussian = np.roll(gaussian, (-mu[0], -mu[1]), axis=(0, 1))
|
172
|
-
|
173
|
-
image_fft = np.fft.rfft2(image)
|
174
|
-
gaussian_fft = np.fft.rfft2(gaussian)
|
175
|
-
|
176
|
-
image = np.fft.irfft2(image_fft * gaussian_fft)
|
177
|
-
|
178
|
-
return image
|
@@ -0,0 +1,93 @@
|
|
1
|
+
import pathlib
|
2
|
+
import pickle
|
3
|
+
|
4
|
+
import imagehash
|
5
|
+
import numpy as np
|
6
|
+
import pandas as pd
|
7
|
+
from PIL import Image
|
8
|
+
from PIL import ImageDraw
|
9
|
+
from tqdm import tqdm
|
10
|
+
|
11
|
+
from .activities import ActivityRepository
|
12
|
+
from .coordinates import get_distance
|
13
|
+
from geo_activity_playground.core.tasks import stored_object
|
14
|
+
|
15
|
+
|
16
|
+
fingerprint_path = pathlib.Path("Cache/activity_fingerprints.pickle")
|
17
|
+
distances_path = pathlib.Path("Cache/activity_distances.pickle")
|
18
|
+
|
19
|
+
|
20
|
+
def add_distance(distances, this, other, distance) -> None:
|
21
|
+
if this not in distances:
|
22
|
+
distances[this] = {}
|
23
|
+
if distance not in distances[this]:
|
24
|
+
distances[this][distance] = set()
|
25
|
+
distances[this][distance].add(other)
|
26
|
+
|
27
|
+
|
28
|
+
def precompute_activity_distances(repository: ActivityRepository) -> None:
|
29
|
+
with stored_object(fingerprint_path, {}) as fingerprints, stored_object(
|
30
|
+
distances_path, {}
|
31
|
+
) as distances:
|
32
|
+
activity_ids = repository.activity_ids
|
33
|
+
|
34
|
+
activity_ids_without_fingerprint = [
|
35
|
+
activity_id
|
36
|
+
for activity_id in activity_ids
|
37
|
+
if activity_id not in fingerprints
|
38
|
+
]
|
39
|
+
for activity_id in tqdm(
|
40
|
+
activity_ids_without_fingerprint, desc="Compute activity fingerprints"
|
41
|
+
):
|
42
|
+
ts = repository.get_time_series(activity_id)
|
43
|
+
ts_hash = _compute_image_hash(ts)
|
44
|
+
fingerprints[activity_id] = ts_hash
|
45
|
+
|
46
|
+
for this in tqdm(
|
47
|
+
activity_ids_without_fingerprint, desc="Compute activity distances"
|
48
|
+
):
|
49
|
+
for other in activity_ids:
|
50
|
+
distance = _hamming_distance(fingerprints[this], fingerprints[other])
|
51
|
+
add_distance(distances, this, other, distance)
|
52
|
+
add_distance(distances, other, this, distance)
|
53
|
+
|
54
|
+
|
55
|
+
def asymmetric_activity_overlap(
|
56
|
+
activity: pd.DataFrame, reference: pd.DataFrame
|
57
|
+
) -> float:
|
58
|
+
sample = activity.iloc[np.linspace(0, len(activity) - 1, 50, dtype=np.int64)]
|
59
|
+
min_distances = [
|
60
|
+
_get_min_distance(latitude, longitude, reference)
|
61
|
+
for (latitude, longitude) in zip(sample["latitude"], sample["longitude"])
|
62
|
+
]
|
63
|
+
return sum(distance < 25 for distance in min_distances) / len(min_distances)
|
64
|
+
|
65
|
+
|
66
|
+
def _get_min_distance(latitude: float, longitude: float, other: pd.DataFrame) -> float:
|
67
|
+
distances = get_distance(latitude, longitude, other["latitude"], other["longitude"])
|
68
|
+
return np.min(distances)
|
69
|
+
|
70
|
+
|
71
|
+
def _compute_image_hash(time_series) -> int:
|
72
|
+
z = 12 + 8
|
73
|
+
x = time_series["x"] * 2**z
|
74
|
+
y = time_series["y"] * 2**z
|
75
|
+
xy_pixels = np.array([x - x.min(), y - y.min()]).T
|
76
|
+
dim = xy_pixels.max(axis=0)
|
77
|
+
# Some activities have bogus data in them which makes them require a huge image. We just skip those outright and return a dummy hash value.
|
78
|
+
if max(dim) > 6000:
|
79
|
+
return 0
|
80
|
+
im = Image.new("L", tuple(map(int, dim)))
|
81
|
+
draw = ImageDraw.Draw(im)
|
82
|
+
pixels = list(map(int, xy_pixels.flatten()))
|
83
|
+
draw.line(pixels, fill=255, width=5)
|
84
|
+
return int(str(imagehash.dhash(im)), 16)
|
85
|
+
|
86
|
+
|
87
|
+
def _hamming_distance(a: int, b: int) -> int:
|
88
|
+
diff = a ^ b
|
89
|
+
result = 0
|
90
|
+
while diff:
|
91
|
+
result += diff % 2
|
92
|
+
diff //= 2
|
93
|
+
return result
|
@@ -2,11 +2,35 @@ import contextlib
|
|
2
2
|
import json
|
3
3
|
import pathlib
|
4
4
|
import pickle
|
5
|
+
from collections.abc import Iterable
|
5
6
|
from typing import Any
|
7
|
+
from typing import Generic
|
8
|
+
from typing import Sequence
|
9
|
+
from typing import TypeVar
|
6
10
|
|
7
11
|
from geo_activity_playground.core.paths import cache_dir
|
8
12
|
|
9
13
|
|
14
|
+
T = TypeVar("T")
|
15
|
+
|
16
|
+
|
17
|
+
@contextlib.contextmanager
|
18
|
+
def stored_object(path: pathlib.Path, default):
|
19
|
+
if path.exists():
|
20
|
+
with open(path, "rb") as f:
|
21
|
+
payload = pickle.load(f)
|
22
|
+
else:
|
23
|
+
payload = default
|
24
|
+
|
25
|
+
yield payload
|
26
|
+
|
27
|
+
temp_location = path.with_suffix(".tmp")
|
28
|
+
with open(temp_location, "wb") as f:
|
29
|
+
pickle.dump(payload, f)
|
30
|
+
path.unlink(missing_ok=True)
|
31
|
+
temp_location.rename(path)
|
32
|
+
|
33
|
+
|
10
34
|
def work_tracker_path(name: str) -> pathlib.Path:
|
11
35
|
return cache_dir() / f"work-tracker-{name}.pickle"
|
12
36
|
|
@@ -35,7 +59,7 @@ class WorkTracker:
|
|
35
59
|
else:
|
36
60
|
self._done = set()
|
37
61
|
|
38
|
-
def filter(self, ids:
|
62
|
+
def filter(self, ids: Iterable[int]) -> set[int]:
|
39
63
|
return set(ids) - self._done
|
40
64
|
|
41
65
|
def mark_done(self, id: int) -> None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
|
-
from
|
3
|
+
from collections.abc import Iterable
|
4
4
|
from typing import Optional
|
5
5
|
|
6
6
|
import geojson
|
@@ -8,7 +8,6 @@ import gpxpy
|
|
8
8
|
import pandas as pd
|
9
9
|
|
10
10
|
from geo_activity_playground.core.coordinates import Bounds
|
11
|
-
from geo_activity_playground.core.tiles import adjacent_to
|
12
11
|
from geo_activity_playground.core.tiles import get_tile_upper_left_lat_lon
|
13
12
|
|
14
13
|
|
@@ -66,10 +65,10 @@ def make_explorer_rectangle(
|
|
66
65
|
|
67
66
|
|
68
67
|
def make_grid_points(
|
69
|
-
|
68
|
+
tiles: Iterable[tuple[int, int]], zoom: int
|
70
69
|
) -> list[list[list[float]]]:
|
71
70
|
result = []
|
72
|
-
for tile_x, tile_y in
|
71
|
+
for tile_x, tile_y in tiles:
|
73
72
|
tile = [
|
74
73
|
get_tile_upper_left_lat_lon(tile_x, tile_y, zoom),
|
75
74
|
get_tile_upper_left_lat_lon(tile_x + 1, tile_y, zoom),
|
@@ -6,6 +6,7 @@ import pathlib
|
|
6
6
|
import pickle
|
7
7
|
from typing import Any
|
8
8
|
from typing import Iterator
|
9
|
+
from typing import Optional
|
9
10
|
|
10
11
|
import pandas as pd
|
11
12
|
from tqdm import tqdm
|
@@ -116,10 +117,10 @@ class TileEvolutionState:
|
|
116
117
|
self.square_start = 0
|
117
118
|
self.cluster_start = 0
|
118
119
|
self.max_square_size = 0
|
119
|
-
self.visited_tiles = set()
|
120
|
+
self.visited_tiles: set[tuple[int, int]] = set()
|
120
121
|
self.square_evolution = pd.DataFrame()
|
121
|
-
self.square_x = None
|
122
|
-
self.square_y = None
|
122
|
+
self.square_x: Optional[int] = None
|
123
|
+
self.square_y: Optional[int] = None
|
123
124
|
|
124
125
|
|
125
126
|
def compute_tile_evolution() -> None:
|
@@ -1,6 +1,7 @@
|
|
1
1
|
import hashlib
|
2
2
|
import logging
|
3
3
|
import pathlib
|
4
|
+
import pickle
|
4
5
|
import sys
|
5
6
|
import traceback
|
6
7
|
|
@@ -22,35 +23,45 @@ def import_from_directory(
|
|
22
23
|
paths_with_errors = []
|
23
24
|
work_tracker = WorkTracker("parse-activity-files")
|
24
25
|
|
25
|
-
activity_paths =
|
26
|
-
|
26
|
+
activity_paths = [
|
27
|
+
path
|
27
28
|
for path in pathlib.Path("Activities").rglob("*.*")
|
28
29
|
if path.is_file() and path.suffixes and not path.stem.startswith(".")
|
29
|
-
|
30
|
-
|
30
|
+
]
|
31
|
+
new_activity_paths = work_tracker.filter(activity_paths)
|
31
32
|
|
32
33
|
activity_stream_dir = pathlib.Path("Cache/Activity Timeseries")
|
33
34
|
activity_stream_dir.mkdir(exist_ok=True, parents=True)
|
34
|
-
|
35
|
-
|
36
|
-
try:
|
37
|
-
activity_meta_from_file, timeseries = read_activity(path)
|
38
|
-
except ActivityParseError as e:
|
39
|
-
logger.error(f"Error while parsing file {path}:")
|
40
|
-
traceback.print_exc()
|
41
|
-
paths_with_errors.append((path, str(e)))
|
42
|
-
continue
|
43
|
-
except:
|
44
|
-
logger.error(f"Encountered a problem with {path=}, see details below.")
|
45
|
-
raise
|
46
|
-
|
47
|
-
work_tracker.mark_done(activity_id)
|
48
|
-
|
49
|
-
if len(timeseries) == 0:
|
50
|
-
continue
|
35
|
+
file_metadata_dir = pathlib.Path("Cache/Activity Metadata")
|
36
|
+
file_metadata_dir.mkdir(exist_ok=True, parents=True)
|
51
37
|
|
38
|
+
for path in tqdm(new_activity_paths, desc="Parse activity files"):
|
39
|
+
activity_id = _get_file_hash(path)
|
52
40
|
timeseries_path = activity_stream_dir / f"{activity_id}.parquet"
|
53
|
-
|
41
|
+
file_metadata_path = file_metadata_dir / f"{activity_id}.pickle"
|
42
|
+
work_tracker.mark_done(path)
|
43
|
+
|
44
|
+
if not timeseries_path.exists():
|
45
|
+
try:
|
46
|
+
activity_meta_from_file, timeseries = read_activity(path)
|
47
|
+
except ActivityParseError as e:
|
48
|
+
logger.error(f"Error while parsing file {path}:")
|
49
|
+
traceback.print_exc()
|
50
|
+
paths_with_errors.append((path, str(e)))
|
51
|
+
continue
|
52
|
+
except:
|
53
|
+
logger.error(f"Encountered a problem with {path=}, see details below.")
|
54
|
+
raise
|
55
|
+
|
56
|
+
if len(timeseries) == 0:
|
57
|
+
continue
|
58
|
+
|
59
|
+
timeseries.to_parquet(timeseries_path)
|
60
|
+
with open(file_metadata_path, "wb") as f:
|
61
|
+
pickle.dump(activity_meta_from_file, f)
|
62
|
+
else:
|
63
|
+
with open(file_metadata_path, "rb") as f:
|
64
|
+
activity_meta_from_file = pickle.load(f)
|
54
65
|
|
55
66
|
activity_meta = ActivityMeta(
|
56
67
|
commute=path.parts[-2] == "Commute",
|
@@ -82,3 +93,11 @@ def import_from_directory(
|
|
82
93
|
repository.commit()
|
83
94
|
|
84
95
|
work_tracker.close()
|
96
|
+
|
97
|
+
|
98
|
+
def _get_file_hash(path: pathlib.Path) -> int:
|
99
|
+
file_hash = hashlib.blake2s()
|
100
|
+
with open(path, "rb") as f:
|
101
|
+
while chunk := f.read(8192):
|
102
|
+
file_hash.update(chunk)
|
103
|
+
return int(file_hash.hexdigest(), 16) % 2**62
|
@@ -191,7 +191,7 @@ def download_strava_time_series(activity_id: int, client: Client) -> pd.DataFram
|
|
191
191
|
if name in streams:
|
192
192
|
columns[name] = streams[name].data
|
193
193
|
if "distance" in streams:
|
194
|
-
columns["distance_km"] = streams["distance"].data / 1000
|
194
|
+
columns["distance_km"] = pd.Series(streams["distance"].data) / 1000
|
195
195
|
|
196
196
|
df = pd.DataFrame(columns)
|
197
197
|
return df
|