geo-activity-playground 0.19.0__tar.gz → 0.20.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/PKG-INFO +3 -2
  2. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/__main__.py +2 -0
  3. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/activities.py +111 -47
  4. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/activity_parsers.py +9 -15
  5. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/cache_migrations.py +3 -0
  6. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/config.py +0 -3
  7. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/heatmap.py +0 -30
  8. geo_activity_playground-0.20.0/geo_activity_playground/core/similarity.py +93 -0
  9. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/tasks.py +25 -1
  10. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/explorer/grid_file.py +3 -4
  11. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/explorer/tile_visits.py +4 -3
  12. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/importers/directory.py +41 -22
  13. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/importers/strava_api.py +1 -1
  14. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/importers/strava_checkout.py +6 -2
  15. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/activity_controller.py +39 -17
  16. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/app.py +11 -0
  17. geo_activity_playground-0.20.0/geo_activity_playground/webui/locations_controller.py +28 -0
  18. geo_activity_playground-0.20.0/geo_activity_playground/webui/static/android-chrome-192x192.png +0 -0
  19. geo_activity_playground-0.20.0/geo_activity_playground/webui/static/android-chrome-512x512.png +0 -0
  20. geo_activity_playground-0.20.0/geo_activity_playground/webui/static/apple-touch-icon.png +0 -0
  21. geo_activity_playground-0.20.0/geo_activity_playground/webui/static/favicon-16x16.png +0 -0
  22. geo_activity_playground-0.20.0/geo_activity_playground/webui/static/favicon-32x32.png +0 -0
  23. geo_activity_playground-0.20.0/geo_activity_playground/webui/static/mstile-150x150.png +0 -0
  24. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/activity.html.j2 +31 -0
  25. geo_activity_playground-0.20.0/geo_activity_playground/webui/templates/locations.html.j2 +38 -0
  26. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/page.html.j2 +9 -1
  27. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/square-planner.html.j2 +16 -4
  28. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/pyproject.toml +10 -2
  29. geo_activity_playground-0.19.0/geo_activity_playground/webui/static/android-chrome-192x192.png +0 -0
  30. geo_activity_playground-0.19.0/geo_activity_playground/webui/static/apple-touch-icon.png +0 -0
  31. geo_activity_playground-0.19.0/geo_activity_playground/webui/static/favicon-16x16.png +0 -0
  32. geo_activity_playground-0.19.0/geo_activity_playground/webui/static/favicon-32x32.png +0 -0
  33. geo_activity_playground-0.19.0/geo_activity_playground/webui/static/mstile-150x150.png +0 -0
  34. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/LICENSE +0 -0
  35. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/__init__.py +0 -0
  36. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/__init__.py +0 -0
  37. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/coordinates.py +0 -0
  38. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/paths.py +0 -0
  39. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/test_tiles.py +0 -0
  40. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/core/tiles.py +0 -0
  41. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/explorer/__init__.py +0 -0
  42. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/explorer/video.py +0 -0
  43. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/importers/test_strava_api.py +0 -0
  44. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/calendar_controller.py +0 -0
  45. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/config_controller.py +0 -0
  46. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/eddington_controller.py +0 -0
  47. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/entry_controller.py +0 -0
  48. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/equipment_controller.py +0 -0
  49. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/explorer_controller.py +0 -0
  50. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/heatmap_controller.py +0 -0
  51. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/search_controller.py +0 -0
  52. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/square_planner_controller.py +0 -0
  53. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/static/android-chrome-384x384.png +0 -0
  54. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/static/browserconfig.xml +0 -0
  55. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/static/favicon.ico +0 -0
  56. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/static/safari-pinned-tab.svg +0 -0
  57. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/static/site.webmanifest +0 -0
  58. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/strava_controller.py +0 -0
  59. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/summary_controller.py +0 -0
  60. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/calendar-month.html.j2 +0 -0
  61. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/calendar.html.j2 +0 -0
  62. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/config.html.j2 +0 -0
  63. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/eddington.html.j2 +0 -0
  64. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/equipment.html.j2 +0 -0
  65. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/explorer.html.j2 +0 -0
  66. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/heatmap.html.j2 +0 -0
  67. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/index.html.j2 +0 -0
  68. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/search.html.j2 +0 -0
  69. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/strava-connect.html.j2 +0 -0
  70. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/templates/summary.html.j2 +0 -0
  71. {geo_activity_playground-0.19.0 → geo_activity_playground-0.20.0}/geo_activity_playground/webui/tile_controller.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geo-activity-playground
3
- Version: 0.19.0
3
+ Version: 0.20.0
4
4
  Summary: Analysis of geo data activities like rides, runs or hikes.
5
5
  License: MIT
6
6
  Author: Martin Ueding
@@ -20,6 +20,7 @@ Requires-Dist: fitdecode (>=0.10.0,<0.11.0)
20
20
  Requires-Dist: flask (>=3.0.0,<4.0.0)
21
21
  Requires-Dist: geojson (>=3.0.1,<4.0.0)
22
22
  Requires-Dist: gpxpy (>=1.5.0,<2.0.0)
23
+ Requires-Dist: imagehash (>=4.3.1,<5.0.0)
23
24
  Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
24
25
  Requires-Dist: matplotlib (>=3.6.3,<4.0.0)
25
26
  Requires-Dist: numpy (>=1.22.4,<2.0.0)
@@ -32,7 +33,7 @@ Requires-Dist: stravalib (>=1.3.3,<2.0.0)
32
33
  Requires-Dist: tcxreader (>=0.4.5,<0.5.0)
33
34
  Requires-Dist: tomli (>=2.0.1,<3.0.0) ; python_version < "3.11"
34
35
  Requires-Dist: tqdm (>=4.64.0,<5.0.0)
35
- Requires-Dist: vegafusion (>=1.4.3,<2.0.0)
36
36
  Requires-Dist: vegafusion-python-embed (>=1.4.3,<2.0.0)
37
+ Requires-Dist: vegafusion[embed] (>=1.4.3,<2.0.0)
37
38
  Requires-Dist: vl-convert-python (>=1.0.1,<2.0.0)
38
39
  Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
@@ -6,6 +6,7 @@ import sys
6
6
 
7
7
  import coloredlogs
8
8
 
9
+ from .core.similarity import precompute_activity_distances
9
10
  from .importers.strava_checkout import convert_strava_checkout
10
11
  from .importers.strava_checkout import import_from_strava_checkout
11
12
  from geo_activity_playground.core.activities import ActivityRepository
@@ -114,6 +115,7 @@ def make_activity_repository(
114
115
  import_from_strava_api(repository)
115
116
 
116
117
  embellish_time_series(repository)
118
+ precompute_activity_distances(repository)
117
119
  compute_tile_visits(repository)
118
120
  compute_tile_evolution()
119
121
  return repository
@@ -1,6 +1,7 @@
1
1
  import datetime
2
2
  import functools
3
3
  import logging
4
+ import pathlib
4
5
  from typing import Iterator
5
6
  from typing import Optional
6
7
  from typing import TypedDict
@@ -12,6 +13,7 @@ import pandas as pd
12
13
  from tqdm import tqdm
13
14
 
14
15
  from geo_activity_playground.core.config import get_config
16
+ from geo_activity_playground.core.coordinates import get_distance
15
17
  from geo_activity_playground.core.paths import activities_path
16
18
  from geo_activity_playground.core.paths import activity_timeseries_path
17
19
  from geo_activity_playground.core.tasks import WorkTracker
@@ -25,11 +27,15 @@ class ActivityMeta(TypedDict):
25
27
  commute: bool
26
28
  distance_km: float
27
29
  elapsed_time: datetime.timedelta
30
+ end_latitude: float
31
+ end_longitude: float
28
32
  equipment: str
29
33
  id: int
30
34
  kind: str
31
35
  name: str
32
36
  path: str
37
+ start_latitude: float
38
+ start_longitude: float
33
39
  start: datetime.datetime
34
40
 
35
41
 
@@ -43,15 +49,27 @@ class ActivityRepository:
43
49
  self.meta = pd.DataFrame()
44
50
 
45
51
  self._loose_activities: list[ActivityMeta] = []
52
+ self._loose_activity_ids: set[int] = set()
46
53
 
47
54
  def __len__(self) -> int:
48
55
  return len(self.meta)
49
56
 
50
57
  def add_activity(self, activity_meta: ActivityMeta) -> None:
51
- assert not self.has_activity(
52
- activity_meta["id"]
53
- ), f"Trying to add the following activity which already exists: {activity_meta}"
58
+ _extend_metadata_from_timeseries(activity_meta)
59
+ if activity_meta["id"] in self._loose_activity_ids:
60
+ logger.error(f"Activity with the same file already exists. New activity:")
61
+ print(activity_meta)
62
+ print("Existing activity:")
63
+ print(
64
+ [
65
+ activity
66
+ for activity in self._loose_activities
67
+ if activity["id"] == activity_meta["id"]
68
+ ]
69
+ )
70
+ raise ValueError("Activity with the same file already exists.")
54
71
  self._loose_activities.append(activity_meta)
72
+ self._loose_activity_ids.add(activity_meta["id"])
55
73
 
56
74
  def commit(self) -> None:
57
75
  if self._loose_activities:
@@ -59,7 +77,15 @@ class ActivityRepository:
59
77
  f"Adding {len(self._loose_activities)} activities to the repository …"
60
78
  )
61
79
  new_df = pd.DataFrame(self._loose_activities)
62
- self.meta = pd.concat([self.meta, new_df])
80
+ if len(self.meta):
81
+ new_ids_set = set(new_df["id"])
82
+ is_kept = [
83
+ activity_id not in new_ids_set for activity_id in self.meta["id"]
84
+ ]
85
+ old_df = self.meta.loc[is_kept]
86
+ else:
87
+ old_df = self.meta
88
+ self.meta = pd.concat([old_df, new_df])
63
89
  assert pd.api.types.is_dtype_equal(
64
90
  self.meta["start"].dtype, "datetime64[ns, UTC]"
65
91
  ), self.meta["start"].dtype
@@ -86,6 +112,8 @@ class ActivityRepository:
86
112
  def last_activity_date(self) -> Optional[datetime.datetime]:
87
113
  if len(self.meta):
88
114
  return self.meta.iloc[-1]["start"]
115
+ else:
116
+ return None
89
117
 
90
118
  @property
91
119
  def activity_ids(self) -> set[int]:
@@ -122,55 +150,81 @@ def embellish_time_series(repository: ActivityRepository) -> None:
122
150
  path = activity_timeseries_path(activity_id)
123
151
  df = pd.read_parquet(path)
124
152
  df.name = id
125
- changed = False
126
- if pd.api.types.is_dtype_equal(df["time"].dtype, "int64"):
127
- start = repository.get_activity_by_id(activity_id)["start"]
128
- time = df["time"]
129
- del df["time"]
130
- df["time"] = [start + datetime.timedelta(seconds=t) for t in time]
131
- changed = True
132
- assert pd.api.types.is_dtype_equal(df["time"].dtype, "datetime64[ns, UTC]")
133
-
134
- if "distance_km" in df.columns:
135
- if "speed" not in df.columns:
136
- df["speed"] = (
137
- df["distance_km"].diff()
138
- / (df["time"].diff().dt.total_seconds() + 1e-3)
139
- * 3600
140
- )
141
- changed = True
142
-
143
- potential_jumps = (df["speed"] > 40) & (df["speed"].diff() > 10)
144
- if np.any(potential_jumps):
145
- df = df.loc[~potential_jumps]
146
- changed = True
147
-
148
- if "x" not in df.columns:
149
- x, y = compute_tile_float(df["latitude"], df["longitude"], 0)
150
- df["x"] = x
151
- df["y"] = y
152
- changed = True
153
-
154
- if "segment_id" not in df.columns:
155
- time_diff = (df["time"] - df["time"].shift(1)).dt.total_seconds()
156
- jump_indices = time_diff >= 30
157
- df["segment_id"] = np.cumsum(jump_indices)
158
- changed = True
159
-
153
+ df, changed = embellish_single_time_series(
154
+ df, repository.get_activity_by_id(activity_id)["start"]
155
+ )
160
156
  if changed:
161
157
  df.to_parquet(path)
162
158
  work_tracker.mark_done(activity_id)
163
159
  work_tracker.close()
164
160
 
165
161
 
162
+ def embellish_single_time_series(
163
+ timeseries: pd.DataFrame, start: Optional[datetime.datetime] = None
164
+ ) -> bool:
165
+ changed = False
166
+ time_diff_threshold_seconds = 30
167
+ time_diff = (timeseries["time"] - timeseries["time"].shift(1)).dt.total_seconds()
168
+ jump_indices = time_diff >= time_diff_threshold_seconds
169
+
170
+ if start is not None and pd.api.types.is_dtype_equal(
171
+ timeseries["time"].dtype, "int64"
172
+ ):
173
+ time = timeseries["time"]
174
+ del timeseries["time"]
175
+ timeseries["time"] = [start + datetime.timedelta(seconds=t) for t in time]
176
+ changed = True
177
+ assert pd.api.types.is_dtype_equal(timeseries["time"].dtype, "datetime64[ns, UTC]")
178
+
179
+ # Add distance column if missing.
180
+ if "distance_km" not in timeseries.columns:
181
+ distances = get_distance(
182
+ timeseries["latitude"].shift(1),
183
+ timeseries["longitude"].shift(1),
184
+ timeseries["latitude"],
185
+ timeseries["longitude"],
186
+ ).fillna(0.0)
187
+ distances.loc[jump_indices] = 0.0
188
+ timeseries["distance_km"] = pd.Series(np.cumsum(distances)) / 1000
189
+ changed = True
190
+
191
+ if "distance_km" in timeseries.columns:
192
+ if "speed" not in timeseries.columns:
193
+ timeseries["speed"] = (
194
+ timeseries["distance_km"].diff()
195
+ / (timeseries["time"].diff().dt.total_seconds() + 1e-3)
196
+ * 3600
197
+ )
198
+ changed = True
199
+
200
+ potential_jumps = (timeseries["speed"] > 40) & (timeseries["speed"].diff() > 10)
201
+ if np.any(potential_jumps):
202
+ timeseries = timeseries.loc[~potential_jumps]
203
+ changed = True
204
+
205
+ if "x" not in timeseries.columns:
206
+ x, y = compute_tile_float(timeseries["latitude"], timeseries["longitude"], 0)
207
+ timeseries["x"] = x
208
+ timeseries["y"] = y
209
+ changed = True
210
+
211
+ if "segment_id" not in timeseries.columns:
212
+ timeseries["segment_id"] = np.cumsum(jump_indices)
213
+ changed = True
214
+
215
+ return timeseries, changed
216
+
217
+
166
218
  def make_geojson_from_time_series(time_series: pd.DataFrame) -> str:
167
- line = geojson.LineString(
168
- [
169
- (lon, lat)
170
- for lat, lon in zip(time_series["latitude"], time_series["longitude"])
219
+ fc = geojson.FeatureCollection(
220
+ features=[
221
+ geojson.LineString(
222
+ [(lon, lat) for lat, lon in zip(group["latitude"], group["longitude"])]
223
+ )
224
+ for _, group in time_series.groupby("segment_id")
171
225
  ]
172
226
  )
173
- return geojson.dumps(line)
227
+ return geojson.dumps(fc)
174
228
 
175
229
 
176
230
  def make_geojson_color_line(time_series: pd.DataFrame) -> str:
@@ -188,9 +242,8 @@ def make_geojson_color_line(time_series: pd.DataFrame) -> str:
188
242
  "color": matplotlib.colors.to_hex(cmap(min(next["speed"] / 35, 1.0))),
189
243
  },
190
244
  )
191
- for (_, row), (_, next) in zip(
192
- time_series.iterrows(), time_series.iloc[1:].iterrows()
193
- )
245
+ for _, group in time_series.groupby("segment_id")
246
+ for (_, row), (_, next) in zip(group.iterrows(), group.iloc[1:].iterrows())
194
247
  ]
195
248
  feature_collection = geojson.FeatureCollection(features)
196
249
  return geojson.dumps(feature_collection)
@@ -236,3 +289,14 @@ def extract_heart_rate_zones(time_series: pd.DataFrame) -> Optional[pd.DataFrame
236
289
  duration_per_zone.loc[i] = 0.0
237
290
  result = duration_per_zone.reset_index()
238
291
  return result
292
+
293
+
294
+ def _extend_metadata_from_timeseries(metadata: ActivityMeta) -> None:
295
+ timeseries = pd.read_parquet(
296
+ pathlib.Path("Cache/Activity Timeseries") / f"{metadata['id']}.parquet"
297
+ )
298
+
299
+ metadata["start_latitude"] = timeseries["latitude"].iloc[0]
300
+ metadata["end_latitude"] = timeseries["latitude"].iloc[-1]
301
+ metadata["start_longitude"] = timeseries["longitude"].iloc[0]
302
+ metadata["end_longitude"] = timeseries["longitude"].iloc[-1]
@@ -14,6 +14,7 @@ import tcxreader.tcxreader
14
14
  import xmltodict
15
15
 
16
16
  from geo_activity_playground.core.activities import ActivityMeta
17
+ from geo_activity_playground.core.activities import embellish_single_time_series
17
18
  from geo_activity_playground.core.coordinates import get_distance
18
19
 
19
20
  logger = logging.getLogger(__name__)
@@ -51,7 +52,7 @@ def read_activity(path: pathlib.Path) -> tuple[ActivityMeta, pd.DataFrame]:
51
52
  elif file_type in [".kml", ".kmz"]:
52
53
  timeseries = read_kml_activity(path, opener)
53
54
  elif file_type == ".csv": # Simra csv export
54
- timeseries = read_simra_activity(path)
55
+ timeseries = read_simra_activity(path, opener)
55
56
  else:
56
57
  raise ActivityParseError(f"Unsupported file format: {file_type}")
57
58
 
@@ -74,18 +75,7 @@ def read_activity(path: pathlib.Path) -> tuple[ActivityMeta, pd.DataFrame]:
74
75
  "It looks like the date parsing has gone wrong."
75
76
  ) from e
76
77
 
77
- # Add distance column if missing.
78
- if "distance_km" not in timeseries.columns:
79
- distances = [0] + [
80
- get_distance(lat_1, lon_1, lat_2, lon_2)
81
- for lat_1, lon_1, lat_2, lon_2 in zip(
82
- timeseries["latitude"],
83
- timeseries["longitude"],
84
- timeseries["latitude"].iloc[1:],
85
- timeseries["longitude"].iloc[1:],
86
- )
87
- ]
88
- timeseries["distance_km"] = pd.Series(np.cumsum(distances)) / 1000
78
+ timeseries, changed = embellish_single_time_series(timeseries)
89
79
 
90
80
  # Extract some meta data from the time series.
91
81
  metadata["start"] = timeseries["time"].iloc[0]
@@ -212,9 +202,13 @@ def read_gpx_activity(path: pathlib.Path, open) -> pd.DataFrame:
212
202
  time = dateutil.parser.parse(str(point.time))
213
203
  assert isinstance(time, datetime.datetime)
214
204
  time = time.astimezone(datetime.timezone.utc)
215
- points.append((time, point.latitude, point.longitude))
205
+ points.append((time, point.latitude, point.longitude, point.elevation))
216
206
 
217
- return pd.DataFrame(points, columns=["time", "latitude", "longitude"])
207
+ df = pd.DataFrame(points, columns=["time", "latitude", "longitude", "altitude"])
208
+ # Some files don't have altitude information. In these cases we remove the column.
209
+ if not df["altitude"].any():
210
+ del df["altitude"]
211
+ return df
218
212
 
219
213
 
220
214
  def read_tcx_activity(path: pathlib.Path, opener) -> pd.DataFrame:
@@ -95,6 +95,9 @@ def apply_cache_migrations() -> None:
95
95
  delete_activity_metadata,
96
96
  delete_activity_metadata,
97
97
  convert_distances_to_km,
98
+ delete_activity_metadata,
99
+ delete_tile_visits,
100
+ delete_heatmap_cache,
98
101
  ]
99
102
 
100
103
  for migration in migrations[cache_status["num_applied_migrations"] :]:
@@ -21,7 +21,4 @@ def get_config() -> dict:
21
21
  with open(config_path, "rb") as f:
22
22
  config = tomllib.load(f)
23
23
 
24
- # Filter out empty config groups.
25
- config = {key: value for key, value in config.items() if value}
26
-
27
24
  return config
@@ -4,7 +4,6 @@ This code is based on https://github.com/remisalmon/Strava-local-heatmap.
4
4
  import dataclasses
5
5
  import logging
6
6
 
7
- import matplotlib.pyplot as pl
8
7
  import numpy as np
9
8
 
10
9
  from geo_activity_playground.core.tiles import compute_tile_float
@@ -147,32 +146,3 @@ def crop_image_to_bounds(
147
146
  max_y = int((max_y - tile_bounds.y_tile_min) * OSM_TILE_SIZE)
148
147
  image = image[min_y:max_y, min_x:max_x, :]
149
148
  return image
150
-
151
-
152
- def gaussian_filter(image, sigma):
153
- # returns image filtered with a gaussian function of variance sigma**2
154
- #
155
- # input: image = numpy.ndarray
156
- # sigma = float
157
- # output: image = numpy.ndarray
158
-
159
- i, j = np.meshgrid(
160
- np.arange(image.shape[0]), np.arange(image.shape[1]), indexing="ij"
161
- )
162
-
163
- mu = (int(image.shape[0] / 2.0), int(image.shape[1] / 2.0))
164
-
165
- gaussian = (
166
- 1.0
167
- / (2.0 * np.pi * sigma * sigma)
168
- * np.exp(-0.5 * (((i - mu[0]) / sigma) ** 2 + ((j - mu[1]) / sigma) ** 2))
169
- )
170
-
171
- gaussian = np.roll(gaussian, (-mu[0], -mu[1]), axis=(0, 1))
172
-
173
- image_fft = np.fft.rfft2(image)
174
- gaussian_fft = np.fft.rfft2(gaussian)
175
-
176
- image = np.fft.irfft2(image_fft * gaussian_fft)
177
-
178
- return image
@@ -0,0 +1,93 @@
1
+ import pathlib
2
+ import pickle
3
+
4
+ import imagehash
5
+ import numpy as np
6
+ import pandas as pd
7
+ from PIL import Image
8
+ from PIL import ImageDraw
9
+ from tqdm import tqdm
10
+
11
+ from .activities import ActivityRepository
12
+ from .coordinates import get_distance
13
+ from geo_activity_playground.core.tasks import stored_object
14
+
15
+
16
+ fingerprint_path = pathlib.Path("Cache/activity_fingerprints.pickle")
17
+ distances_path = pathlib.Path("Cache/activity_distances.pickle")
18
+
19
+
20
+ def add_distance(distances, this, other, distance) -> None:
21
+ if this not in distances:
22
+ distances[this] = {}
23
+ if distance not in distances[this]:
24
+ distances[this][distance] = set()
25
+ distances[this][distance].add(other)
26
+
27
+
28
+ def precompute_activity_distances(repository: ActivityRepository) -> None:
29
+ with stored_object(fingerprint_path, {}) as fingerprints, stored_object(
30
+ distances_path, {}
31
+ ) as distances:
32
+ activity_ids = repository.activity_ids
33
+
34
+ activity_ids_without_fingerprint = [
35
+ activity_id
36
+ for activity_id in activity_ids
37
+ if activity_id not in fingerprints
38
+ ]
39
+ for activity_id in tqdm(
40
+ activity_ids_without_fingerprint, desc="Compute activity fingerprints"
41
+ ):
42
+ ts = repository.get_time_series(activity_id)
43
+ ts_hash = _compute_image_hash(ts)
44
+ fingerprints[activity_id] = ts_hash
45
+
46
+ for this in tqdm(
47
+ activity_ids_without_fingerprint, desc="Compute activity distances"
48
+ ):
49
+ for other in activity_ids:
50
+ distance = _hamming_distance(fingerprints[this], fingerprints[other])
51
+ add_distance(distances, this, other, distance)
52
+ add_distance(distances, other, this, distance)
53
+
54
+
55
+ def asymmetric_activity_overlap(
56
+ activity: pd.DataFrame, reference: pd.DataFrame
57
+ ) -> float:
58
+ sample = activity.iloc[np.linspace(0, len(activity) - 1, 50, dtype=np.int64)]
59
+ min_distances = [
60
+ _get_min_distance(latitude, longitude, reference)
61
+ for (latitude, longitude) in zip(sample["latitude"], sample["longitude"])
62
+ ]
63
+ return sum(distance < 25 for distance in min_distances) / len(min_distances)
64
+
65
+
66
+ def _get_min_distance(latitude: float, longitude: float, other: pd.DataFrame) -> float:
67
+ distances = get_distance(latitude, longitude, other["latitude"], other["longitude"])
68
+ return np.min(distances)
69
+
70
+
71
+ def _compute_image_hash(time_series) -> int:
72
+ z = 12 + 8
73
+ x = time_series["x"] * 2**z
74
+ y = time_series["y"] * 2**z
75
+ xy_pixels = np.array([x - x.min(), y - y.min()]).T
76
+ dim = xy_pixels.max(axis=0)
77
+ # Some activities have bogus data in them which makes them require a huge image. We just skip those outright and return a dummy hash value.
78
+ if max(dim) > 6000:
79
+ return 0
80
+ im = Image.new("L", tuple(map(int, dim)))
81
+ draw = ImageDraw.Draw(im)
82
+ pixels = list(map(int, xy_pixels.flatten()))
83
+ draw.line(pixels, fill=255, width=5)
84
+ return int(str(imagehash.dhash(im)), 16)
85
+
86
+
87
+ def _hamming_distance(a: int, b: int) -> int:
88
+ diff = a ^ b
89
+ result = 0
90
+ while diff:
91
+ result += diff % 2
92
+ diff //= 2
93
+ return result
@@ -2,11 +2,35 @@ import contextlib
2
2
  import json
3
3
  import pathlib
4
4
  import pickle
5
+ from collections.abc import Iterable
5
6
  from typing import Any
7
+ from typing import Generic
8
+ from typing import Sequence
9
+ from typing import TypeVar
6
10
 
7
11
  from geo_activity_playground.core.paths import cache_dir
8
12
 
9
13
 
14
+ T = TypeVar("T")
15
+
16
+
17
+ @contextlib.contextmanager
18
+ def stored_object(path: pathlib.Path, default):
19
+ if path.exists():
20
+ with open(path, "rb") as f:
21
+ payload = pickle.load(f)
22
+ else:
23
+ payload = default
24
+
25
+ yield payload
26
+
27
+ temp_location = path.with_suffix(".tmp")
28
+ with open(temp_location, "wb") as f:
29
+ pickle.dump(payload, f)
30
+ path.unlink(missing_ok=True)
31
+ temp_location.rename(path)
32
+
33
+
10
34
  def work_tracker_path(name: str) -> pathlib.Path:
11
35
  return cache_dir() / f"work-tracker-{name}.pickle"
12
36
 
@@ -35,7 +59,7 @@ class WorkTracker:
35
59
  else:
36
60
  self._done = set()
37
61
 
38
- def filter(self, ids: list[int]) -> set[int]:
62
+ def filter(self, ids: Iterable[int]) -> set[int]:
39
63
  return set(ids) - self._done
40
64
 
41
65
  def mark_done(self, id: int) -> None:
@@ -1,6 +1,6 @@
1
1
  import json
2
2
  import logging
3
- from typing import Iterator
3
+ from collections.abc import Iterable
4
4
  from typing import Optional
5
5
 
6
6
  import geojson
@@ -8,7 +8,6 @@ import gpxpy
8
8
  import pandas as pd
9
9
 
10
10
  from geo_activity_playground.core.coordinates import Bounds
11
- from geo_activity_playground.core.tiles import adjacent_to
12
11
  from geo_activity_playground.core.tiles import get_tile_upper_left_lat_lon
13
12
 
14
13
 
@@ -66,10 +65,10 @@ def make_explorer_rectangle(
66
65
 
67
66
 
68
67
  def make_grid_points(
69
- tile_iterator: Iterator[tuple[int, int]], zoom: int
68
+ tiles: Iterable[tuple[int, int]], zoom: int
70
69
  ) -> list[list[list[float]]]:
71
70
  result = []
72
- for tile_x, tile_y in tile_iterator:
71
+ for tile_x, tile_y in tiles:
73
72
  tile = [
74
73
  get_tile_upper_left_lat_lon(tile_x, tile_y, zoom),
75
74
  get_tile_upper_left_lat_lon(tile_x + 1, tile_y, zoom),
@@ -6,6 +6,7 @@ import pathlib
6
6
  import pickle
7
7
  from typing import Any
8
8
  from typing import Iterator
9
+ from typing import Optional
9
10
 
10
11
  import pandas as pd
11
12
  from tqdm import tqdm
@@ -116,10 +117,10 @@ class TileEvolutionState:
116
117
  self.square_start = 0
117
118
  self.cluster_start = 0
118
119
  self.max_square_size = 0
119
- self.visited_tiles = set()
120
+ self.visited_tiles: set[tuple[int, int]] = set()
120
121
  self.square_evolution = pd.DataFrame()
121
- self.square_x = None
122
- self.square_y = None
122
+ self.square_x: Optional[int] = None
123
+ self.square_y: Optional[int] = None
123
124
 
124
125
 
125
126
  def compute_tile_evolution() -> None:
@@ -1,6 +1,7 @@
1
1
  import hashlib
2
2
  import logging
3
3
  import pathlib
4
+ import pickle
4
5
  import sys
5
6
  import traceback
6
7
 
@@ -22,35 +23,45 @@ def import_from_directory(
22
23
  paths_with_errors = []
23
24
  work_tracker = WorkTracker("parse-activity-files")
24
25
 
25
- activity_paths = {
26
- int(hashlib.sha3_224(str(path).encode()).hexdigest(), 16) % 2**62: path
26
+ activity_paths = [
27
+ path
27
28
  for path in pathlib.Path("Activities").rglob("*.*")
28
29
  if path.is_file() and path.suffixes and not path.stem.startswith(".")
29
- }
30
- activities_ids_to_parse = work_tracker.filter(activity_paths.keys())
30
+ ]
31
+ new_activity_paths = work_tracker.filter(activity_paths)
31
32
 
32
33
  activity_stream_dir = pathlib.Path("Cache/Activity Timeseries")
33
34
  activity_stream_dir.mkdir(exist_ok=True, parents=True)
34
- for activity_id in tqdm(activities_ids_to_parse, desc="Parse activity files"):
35
- path = activity_paths[activity_id]
36
- try:
37
- activity_meta_from_file, timeseries = read_activity(path)
38
- except ActivityParseError as e:
39
- logger.error(f"Error while parsing file {path}:")
40
- traceback.print_exc()
41
- paths_with_errors.append((path, str(e)))
42
- continue
43
- except:
44
- logger.error(f"Encountered a problem with {path=}, see details below.")
45
- raise
46
-
47
- work_tracker.mark_done(activity_id)
48
-
49
- if len(timeseries) == 0:
50
- continue
35
+ file_metadata_dir = pathlib.Path("Cache/Activity Metadata")
36
+ file_metadata_dir.mkdir(exist_ok=True, parents=True)
51
37
 
38
+ for path in tqdm(new_activity_paths, desc="Parse activity files"):
39
+ activity_id = _get_file_hash(path)
52
40
  timeseries_path = activity_stream_dir / f"{activity_id}.parquet"
53
- timeseries.to_parquet(timeseries_path)
41
+ file_metadata_path = file_metadata_dir / f"{activity_id}.pickle"
42
+ work_tracker.mark_done(path)
43
+
44
+ if not timeseries_path.exists():
45
+ try:
46
+ activity_meta_from_file, timeseries = read_activity(path)
47
+ except ActivityParseError as e:
48
+ logger.error(f"Error while parsing file {path}:")
49
+ traceback.print_exc()
50
+ paths_with_errors.append((path, str(e)))
51
+ continue
52
+ except:
53
+ logger.error(f"Encountered a problem with {path=}, see details below.")
54
+ raise
55
+
56
+ if len(timeseries) == 0:
57
+ continue
58
+
59
+ timeseries.to_parquet(timeseries_path)
60
+ with open(file_metadata_path, "wb") as f:
61
+ pickle.dump(activity_meta_from_file, f)
62
+ else:
63
+ with open(file_metadata_path, "rb") as f:
64
+ activity_meta_from_file = pickle.load(f)
54
65
 
55
66
  activity_meta = ActivityMeta(
56
67
  commute=path.parts[-2] == "Commute",
@@ -82,3 +93,11 @@ def import_from_directory(
82
93
  repository.commit()
83
94
 
84
95
  work_tracker.close()
96
+
97
+
98
+ def _get_file_hash(path: pathlib.Path) -> int:
99
+ file_hash = hashlib.blake2s()
100
+ with open(path, "rb") as f:
101
+ while chunk := f.read(8192):
102
+ file_hash.update(chunk)
103
+ return int(file_hash.hexdigest(), 16) % 2**62
@@ -191,7 +191,7 @@ def download_strava_time_series(activity_id: int, client: Client) -> pd.DataFram
191
191
  if name in streams:
192
192
  columns[name] = streams[name].data
193
193
  if "distance" in streams:
194
- columns["distance_km"] = streams["distance"].data / 1000
194
+ columns["distance_km"] = pd.Series(streams["distance"].data) / 1000
195
195
 
196
196
  df = pd.DataFrame(columns)
197
197
  return df