geo-activity-playground 1.2.0__py3-none-any.whl → 1.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. geo_activity_playground/alembic/versions/85fe0348e8a2_add_time_series_uuid_field.py +28 -0
  2. geo_activity_playground/alembic/versions/f2f50843be2d_make_all_fields_in_activity_nullable.py +34 -0
  3. geo_activity_playground/core/coordinates.py +12 -1
  4. geo_activity_playground/core/copernicus_dem.py +95 -0
  5. geo_activity_playground/core/datamodel.py +43 -16
  6. geo_activity_playground/core/enrichment.py +229 -164
  7. geo_activity_playground/core/paths.py +8 -0
  8. geo_activity_playground/core/test_pandas_timezone.py +36 -0
  9. geo_activity_playground/core/test_time_zone_from_location.py +7 -0
  10. geo_activity_playground/core/test_time_zone_import.py +93 -0
  11. geo_activity_playground/core/test_timezone_sqlalchemy.py +44 -0
  12. geo_activity_playground/core/tiles.py +4 -1
  13. geo_activity_playground/core/time_conversion.py +42 -14
  14. geo_activity_playground/explorer/tile_visits.py +7 -4
  15. geo_activity_playground/importers/activity_parsers.py +31 -23
  16. geo_activity_playground/importers/directory.py +69 -108
  17. geo_activity_playground/importers/strava_api.py +55 -36
  18. geo_activity_playground/importers/strava_checkout.py +32 -57
  19. geo_activity_playground/webui/app.py +46 -2
  20. geo_activity_playground/webui/blueprints/activity_blueprint.py +13 -11
  21. geo_activity_playground/webui/blueprints/entry_views.py +1 -1
  22. geo_activity_playground/webui/blueprints/explorer_blueprint.py +1 -7
  23. geo_activity_playground/webui/blueprints/heatmap_blueprint.py +2 -2
  24. geo_activity_playground/webui/blueprints/settings_blueprint.py +3 -14
  25. geo_activity_playground/webui/blueprints/summary_blueprint.py +6 -6
  26. geo_activity_playground/webui/blueprints/time_zone_fixer_blueprint.py +69 -0
  27. geo_activity_playground/webui/blueprints/upload_blueprint.py +3 -16
  28. geo_activity_playground/webui/columns.py +9 -1
  29. geo_activity_playground/webui/templates/activity/show.html.j2 +5 -1
  30. geo_activity_playground/webui/templates/hall_of_fame/index.html.j2 +1 -1
  31. geo_activity_playground/webui/templates/home.html.j2 +3 -2
  32. geo_activity_playground/webui/templates/page.html.j2 +2 -0
  33. geo_activity_playground/webui/templates/time_zone_fixer/index.html.j2 +31 -0
  34. {geo_activity_playground-1.2.0.dist-info → geo_activity_playground-1.3.1.dist-info}/METADATA +8 -3
  35. {geo_activity_playground-1.2.0.dist-info → geo_activity_playground-1.3.1.dist-info}/RECORD +38 -30
  36. geo_activity_playground/core/test_time_conversion.py +0 -37
  37. {geo_activity_playground-1.2.0.dist-info → geo_activity_playground-1.3.1.dist-info}/LICENSE +0 -0
  38. {geo_activity_playground-1.2.0.dist-info → geo_activity_playground-1.3.1.dist-info}/WHEEL +0 -0
  39. {geo_activity_playground-1.2.0.dist-info → geo_activity_playground-1.3.1.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,42 @@
1
- import numpy as np
2
- import pandas as pd
3
-
4
-
5
- def convert_to_datetime_ns(date) -> np.datetime64 | pd.Series:
6
- if isinstance(date, pd.Series):
7
- ts = pd.to_datetime(date)
8
- ts = ts.dt.tz_localize(None)
9
- return ts
10
- else:
11
- ts = pd.to_datetime(date)
12
- if ts.tzinfo is not None:
13
- ts = ts.tz_localize(None)
14
- return ts.to_datetime64()
1
+ import datetime
2
+ import json
3
+ import logging
4
+ import zoneinfo
5
+
6
+ import requests
7
+
8
+ from .paths import USER_CACHE_DIR
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def sanitize_datetime(
14
+ dt: datetime.datetime, fallback_from: str, fallback_to: str
15
+ ) -> datetime.datetime:
16
+ if dt.tzinfo is None:
17
+ dt = dt.replace(tzinfo=zoneinfo.ZoneInfo(fallback_from))
18
+ return dt.astimezone(zoneinfo.ZoneInfo(fallback_to))
19
+
20
+
21
+ def get_country_timezone(latitude: float, longitude: float) -> tuple[str, str]:
22
+ cache_file = USER_CACHE_DIR / "geotimezone" / f"{latitude:.5f}-{longitude:.5f}.json"
23
+ data = {}
24
+ if cache_file.exists():
25
+ try:
26
+ with open(cache_file) as f:
27
+ data = json.load(f)
28
+ except json.decoder.JSONDecodeError as e:
29
+ logger.warning(
30
+ f"'{cache_file}' could not be parsed ('{e}'). Deleting and trying again."
31
+ )
32
+ cache_file.unlink()
33
+
34
+ if not cache_file.exists():
35
+ url = f"https://api.geotimezone.com/public/timezone?latitude={latitude}&longitude={longitude}"
36
+ r = requests.get(url)
37
+ r.raise_for_status()
38
+ data = r.json()
39
+ cache_file.parent.mkdir(exist_ok=True, parents=True)
40
+ with open(cache_file, "w") as f:
41
+ json.dump(data, f)
42
+ return data["location"], data["iana_timezone"]
@@ -143,7 +143,7 @@ def compute_tile_visits_new(
143
143
  work_tracker.reset()
144
144
 
145
145
  for activity_id in tqdm(
146
- work_tracker.filter(repository.get_activity_ids()), desc="Tile visits"
146
+ work_tracker.filter(repository.get_activity_ids()), desc="Tile visits", delay=2
147
147
  ):
148
148
  _process_activity(repository, tile_visit_accessor.tile_state, activity_id)
149
149
  work_tracker.mark_done(activity_id)
@@ -213,7 +213,6 @@ def _process_activity(
213
213
  def _tiles_from_points(
214
214
  time_series: pd.DataFrame, zoom: int
215
215
  ) -> Iterator[tuple[datetime.datetime, int, int]]:
216
- assert pd.api.types.is_dtype_equal(time_series["time"].dtype, "datetime64[ns]")
217
216
  xf = time_series["x"] * 2**zoom
218
217
  yf = time_series["y"] * 2**zoom
219
218
  for t1, x1, y1, x2, y2, s1, s2 in zip(
@@ -257,7 +256,9 @@ def _compute_cluster_evolution(
257
256
 
258
257
  rows = []
259
258
  for index, row in tqdm(
260
- tiles.iloc[s.cluster_start :].iterrows(), desc=f"Cluster evolution for {zoom=}"
259
+ tiles.iloc[s.cluster_start :].iterrows(),
260
+ desc=f"Cluster evolution for {zoom=}",
261
+ delay=2,
261
262
  ):
262
263
  new_clusters = False
263
264
  # Current tile.
@@ -334,7 +335,9 @@ def _compute_square_history(
334
335
  ) -> None:
335
336
  rows = []
336
337
  for index, row in tqdm(
337
- tiles.iloc[s.square_start :].iterrows(), desc=f"Square evolution for {zoom=}"
338
+ tiles.iloc[s.square_start :].iterrows(),
339
+ desc=f"Square evolution for {zoom=}",
340
+ delay=2,
338
341
  ):
339
342
  tile = (row["tile_x"], row["tile_y"])
340
343
  x, y = tile
@@ -13,8 +13,8 @@ import pandas as pd
13
13
  import tcxreader.tcxreader
14
14
  import xmltodict
15
15
 
16
- from ..core.datamodel import ActivityMeta
17
- from ..core.time_conversion import convert_to_datetime_ns
16
+ from ..core.datamodel import Activity
17
+ from ..core.datamodel import get_or_make_kind
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
@@ -23,9 +23,12 @@ class ActivityParseError(BaseException):
23
23
  pass
24
24
 
25
25
 
26
- def read_activity(path: pathlib.Path) -> tuple[ActivityMeta, pd.DataFrame]:
26
+ def read_activity(path: pathlib.Path) -> tuple[Activity, pd.DataFrame]:
27
27
  suffixes = [s.lower() for s in path.suffixes]
28
- metadata = ActivityMeta()
28
+ activity = Activity()
29
+
30
+ if len(suffixes) == 0:
31
+ raise ActivityParseError(f"File has no suffix, ignoring")
29
32
 
30
33
  if suffixes[-1] == ".gz":
31
34
  opener = gzip.open
@@ -43,7 +46,7 @@ def read_activity(path: pathlib.Path) -> tuple[ActivityMeta, pd.DataFrame]:
43
46
  raise ActivityParseError(f"Encoding issue") from e
44
47
  elif file_type == ".fit":
45
48
  try:
46
- metadata, timeseries = read_fit_activity(path, opener)
49
+ activity, timeseries = read_fit_activity(path, opener)
47
50
  except fitdecode.exceptions.FitError as e:
48
51
  raise ActivityParseError(f"Error in FIT file") from e
49
52
  except KeyError as e:
@@ -60,10 +63,10 @@ def read_activity(path: pathlib.Path) -> tuple[ActivityMeta, pd.DataFrame]:
60
63
  else:
61
64
  raise ActivityParseError(f"Unsupported file format: {file_type}")
62
65
 
63
- return metadata, timeseries
66
+ return activity, timeseries
64
67
 
65
68
 
66
- def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFrame]:
69
+ def read_fit_activity(path: pathlib.Path, open) -> tuple[Activity, pd.DataFrame]:
67
70
  """
68
71
  {'timestamp': datetime.datetime(2023, 11, 11, 16, 29, 49, tzinfo=datetime.timezone.utc),
69
72
  'position_lat': <int>,
@@ -82,7 +85,7 @@ def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFr
82
85
  'ascent': 35,
83
86
  'descent': 11}
84
87
  """
85
- metadata = ActivityMeta()
88
+ activity = Activity()
86
89
  rows = []
87
90
  with open(path, "rb") as f:
88
91
  with fitdecode.FitReader(f) as fit:
@@ -103,10 +106,9 @@ def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFr
103
106
  if isinstance(time, datetime.datetime):
104
107
  pass
105
108
  elif time is None or isinstance(time, int):
106
- time = pd.NaT
109
+ time = None
107
110
  else:
108
111
  raise RuntimeError(f"Cannot parse time: {time} in {path}.")
109
- time = convert_to_datetime_ns(time)
110
112
  row = {
111
113
  "time": time,
112
114
  "latitude": values["position_lat"] / ((2**32) / 360),
@@ -114,7 +116,9 @@ def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFr
114
116
  }
115
117
  if "heart_rate" in fields:
116
118
  row["heartrate"] = values["heart_rate"]
117
- if "calories" in fields:
119
+ if "calories" in fields and isinstance(
120
+ values["calories"], float
121
+ ):
118
122
  row["calories"] = values["calories"]
119
123
  if "cadence" in fields:
120
124
  row["cadence"] = values["cadence"]
@@ -131,7 +135,13 @@ def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFr
131
135
  factor = _fit_speed_unit_factor(
132
136
  fields["enhanced_speed"].units
133
137
  )
134
- row["speed"] = values["enhanced_speed"] * factor
138
+ try:
139
+ row["speed"] = values["enhanced_speed"] * factor
140
+ except TypeError as e:
141
+ # https://github.com/martin-ueding/geo-activity-playground/issues/301
142
+ raise ActivityParseError(
143
+ f'Cannot work with {values["enhanced_speed"]!r}, {factor!r}'
144
+ ) from e
135
145
  if "grade" in fields:
136
146
  row["grade"] = values["grade"]
137
147
  if "temperature" in fields:
@@ -142,17 +152,18 @@ def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFr
142
152
 
143
153
  # Additional meta data fields as documented in https://developer.garmin.com/fit/file-types/workout/.
144
154
  if "wkt_name" in fields:
145
- metadata["name"] = values["wkt_name"]
155
+ activity.name = values["wkt_name"]
146
156
  if "sport" in fields:
147
- metadata["kind"] = str(values["sport"])
157
+ kind_name = str(values["sport"])
148
158
  if "sub_sport" in values:
149
- metadata["kind"] += " " + str(values["sub_sport"])
159
+ kind_name += " " + str(values["sub_sport"])
160
+ activity.kind = get_or_make_kind(kind_name)
150
161
  if "total_calories" in fields:
151
- metadata["calories"] = values["total_calories"]
162
+ activity.calories = values["total_calories"]
152
163
  if "total_strides" in fields:
153
- metadata["steps"] = 2 * int(values["total_strides"])
164
+ activity.steps = 2 * int(values["total_strides"])
154
165
 
155
- return metadata, pd.DataFrame(rows)
166
+ return activity, pd.DataFrame(rows)
156
167
 
157
168
 
158
169
  def _fit_speed_unit_factor(unit: str) -> float:
@@ -184,8 +195,7 @@ def read_gpx_activity(path: pathlib.Path, open) -> pd.DataFrame:
184
195
  elif isinstance(point.time, str):
185
196
  time = dateutil.parser.parse(str(point.time))
186
197
  else:
187
- time = pd.NaT
188
- time = convert_to_datetime_ns(time)
198
+ time = None
189
199
  points.append((time, point.latitude, point.longitude, point.elevation))
190
200
 
191
201
  df = pd.DataFrame(points, columns=["time", "latitude", "longitude", "elevation"])
@@ -223,7 +233,6 @@ def read_tcx_activity(path: pathlib.Path, opener) -> pd.DataFrame:
223
233
  if trackpoint.latitude and trackpoint.longitude:
224
234
  time = trackpoint.time
225
235
  assert isinstance(time, datetime.datetime)
226
- time = convert_to_datetime_ns(time)
227
236
  row = {
228
237
  "time": time,
229
238
  "latitude": trackpoint.latitude,
@@ -252,7 +261,6 @@ def read_kml_activity(path: pathlib.Path, opener) -> pd.DataFrame:
252
261
  for track in _list_or_scalar(placemark.get("gx:Track", [])):
253
262
  for when, where in zip(track["when"], track["gx:coord"]):
254
263
  time = dateutil.parser.parse(when)
255
- time = convert_to_datetime_ns(time)
256
264
  parts = where.split(" ")
257
265
  if len(parts) == 2:
258
266
  lon, lat = parts
@@ -282,7 +290,7 @@ def read_simra_activity(path: pathlib.Path, opener) -> pd.DataFrame:
282
290
  data["time"] = data["timeStamp"].apply(
283
291
  lambda d: datetime.datetime.fromtimestamp(d / 1000)
284
292
  )
285
- data["time"] = convert_to_datetime_ns(data["time"])
293
+ data["time"] = data["time"]
286
294
  data = data.rename(columns={"lat": "latitude", "lon": "longitude"})
287
295
  return data.dropna(subset=["latitude"], ignore_index=True)[
288
296
  ["time", "latitude", "longitude"]
@@ -1,21 +1,21 @@
1
- import hashlib
2
1
  import logging
3
2
  import pathlib
4
- import pickle
5
3
  import re
6
4
  import traceback
7
- from typing import Optional
8
5
 
9
- from tqdm import tqdm
6
+ import sqlalchemy
10
7
 
8
+ from ..core.activities import ActivityRepository
11
9
  from ..core.config import Config
12
- from ..core.datamodel import ActivityMeta
10
+ from ..core.datamodel import Activity
11
+ from ..core.datamodel import DB
13
12
  from ..core.datamodel import DEFAULT_UNKNOWN_NAME
14
- from ..core.paths import activity_extracted_dir
15
- from ..core.paths import activity_extracted_meta_dir
16
- from ..core.paths import activity_extracted_time_series_dir
17
- from ..core.tasks import stored_object
18
- from ..core.tasks import WorkTracker
13
+ from ..core.datamodel import get_or_make_equipment
14
+ from ..core.datamodel import get_or_make_kind
15
+ from ..core.enrichment import update_and_commit
16
+ from ..explorer.tile_visits import compute_tile_evolution
17
+ from ..explorer.tile_visits import compute_tile_visits_new
18
+ from ..explorer.tile_visits import TileVisitAccessor
19
19
  from .activity_parsers import ActivityParseError
20
20
  from .activity_parsers import read_activity
21
21
 
@@ -25,9 +25,10 @@ ACTIVITY_DIR = pathlib.Path("Activities")
25
25
 
26
26
 
27
27
  def import_from_directory(
28
- metadata_extraction_regexes: list[str], config: Config
28
+ repository: ActivityRepository,
29
+ tile_visit_accessor: TileVisitAccessor,
30
+ config: Config,
29
31
  ) -> None:
30
-
31
32
  activity_paths = [
32
33
  path
33
34
  for path in ACTIVITY_DIR.rglob("*.*")
@@ -36,105 +37,65 @@ def import_from_directory(
36
37
  and not path.stem.startswith(".")
37
38
  and not path.suffix in config.ignore_suffixes
38
39
  ]
39
- work_tracker = WorkTracker(activity_extracted_dir() / "work-tracker-extract.pickle")
40
- new_activity_paths = work_tracker.filter(activity_paths)
41
-
42
- with stored_object(
43
- activity_extracted_dir() / "file-hashes.pickle", {}
44
- ) as file_hashes:
45
- for path in tqdm(new_activity_paths, desc="Detect deleted activities"):
46
- file_hashes[path] = get_file_hash(path)
47
-
48
- deleted_files = set(file_hashes.keys()) - set(activity_paths)
49
- deleted_hashes = [file_hashes[path] for path in deleted_files]
50
- for deleted_hash in deleted_hashes:
51
- activity_extracted_meta_path = (
52
- activity_extracted_meta_dir() / f"{deleted_hash}.pickle"
53
- )
54
- activity_extracted_time_series_path = (
55
- activity_extracted_time_series_dir() / f"{deleted_hash}.parquet"
40
+
41
+ for i, activity_path in enumerate(activity_paths):
42
+ with DB.session.no_autoflush:
43
+ activity = DB.session.scalar(
44
+ sqlalchemy.select(Activity).filter(Activity.path == str(activity_path))
56
45
  )
57
- logger.warning(f"Deleting {activity_extracted_meta_path}")
58
- logger.warning(f"Deleting {activity_extracted_time_series_path}")
59
- activity_extracted_meta_path.unlink(missing_ok=True)
60
- activity_extracted_time_series_path.unlink(missing_ok=True)
61
- for deleted_file in deleted_files:
62
- logger.warning(f"Deleting {deleted_file}")
63
- del file_hashes[deleted_file]
64
- work_tracker.discard(deleted_file)
65
-
66
- paths_with_errors = []
67
- for path in tqdm(new_activity_paths, desc="Parse activity metadata (serially)"):
68
- errors = _cache_single_file(path)
69
- if errors:
70
- paths_with_errors.append(errors)
71
-
72
- for path in tqdm(new_activity_paths, desc="Collate activity metadata"):
73
- activity_id = get_file_hash(path)
74
- file_metadata_path = activity_extracted_meta_dir() / f"{activity_id}.pickle"
75
- work_tracker.mark_done(path)
76
-
77
- if not file_metadata_path.exists():
78
- continue
79
-
80
- with open(file_metadata_path, "rb") as f:
81
- activity_meta_from_file = pickle.load(f)
82
-
83
- activity_meta = ActivityMeta(
84
- id=activity_id,
85
- # https://stackoverflow.com/a/74718395/653152
86
- name=path.name.removesuffix("".join(path.suffixes)),
87
- path=str(path),
88
- kind=DEFAULT_UNKNOWN_NAME,
89
- equipment=DEFAULT_UNKNOWN_NAME,
90
- consider_for_achievements=True,
46
+ if activity is None:
47
+ import_from_file(
48
+ activity_path, repository, tile_visit_accessor, config, i
49
+ )
50
+
51
+
52
+ def import_from_file(
53
+ path: pathlib.Path,
54
+ repository: ActivityRepository,
55
+ tile_visit_accessor: TileVisitAccessor,
56
+ config: Config,
57
+ i: int,
58
+ ) -> None:
59
+ logger.info(f"Importing {path} …")
60
+ try:
61
+ activity, time_series = read_activity(path)
62
+ except ActivityParseError as e:
63
+ logger.error(f"Error while parsing file {path}:")
64
+ traceback.print_exc()
65
+ return
66
+ except:
67
+ logger.error(f"Encountered a problem with {path=}, see details below.")
68
+ raise
69
+
70
+ if len(time_series) == 0:
71
+ logger.warning(f"Activity with {path=} has no time series data, skipping.")
72
+ return
73
+
74
+ activity.path = str(path)
75
+ if activity.name is None:
76
+ activity.name = path.name.removesuffix("".join(path.suffixes))
77
+
78
+ meta_from_path = _get_metadata_from_path(path, config.metadata_extraction_regexes)
79
+ activity.name = meta_from_path.get("name", activity.name)
80
+ if "equipment" in meta_from_path:
81
+ activity.equipment = get_or_make_equipment(meta_from_path["equipment"], config)
82
+ if "kind" in meta_from_path:
83
+ activity.kind = get_or_make_kind(meta_from_path["kind"])
84
+ if activity.equipment is None:
85
+ activity.equipment = get_or_make_equipment(
86
+ meta_from_path.get("equipment", DEFAULT_UNKNOWN_NAME), config
91
87
  )
92
- activity_meta.update(activity_meta_from_file)
93
- activity_meta.update(_get_metadata_from_path(path, metadata_extraction_regexes))
94
- with open(file_metadata_path, "wb") as f:
95
- pickle.dump(activity_meta, f)
96
-
97
- if paths_with_errors:
98
- logger.warning(
99
- "There were errors while parsing some of the files. These were skipped and tried again next time."
88
+ if activity.kind is None:
89
+ activity.kind = get_or_make_kind(
90
+ meta_from_path.get("kind", DEFAULT_UNKNOWN_NAME)
100
91
  )
101
- for path, error in paths_with_errors:
102
- logger.error(f"{path}: {error}")
103
-
104
- work_tracker.close()
105
-
106
-
107
- def _cache_single_file(path: pathlib.Path) -> Optional[tuple[pathlib.Path, str]]:
108
- activity_id = get_file_hash(path)
109
- timeseries_path = activity_extracted_time_series_dir() / f"{activity_id}.parquet"
110
- file_metadata_path = activity_extracted_meta_dir() / f"{activity_id}.pickle"
111
-
112
- if not timeseries_path.exists():
113
- try:
114
- activity_meta_from_file, timeseries = read_activity(path)
115
- except ActivityParseError as e:
116
- logger.error(f"Error while parsing file {path}:")
117
- traceback.print_exc()
118
- return path, str(e)
119
- except:
120
- logger.error(f"Encountered a problem with {path=}, see details below.")
121
- raise
122
-
123
- if len(timeseries) == 0:
124
- return None
125
-
126
- timeseries.to_parquet(timeseries_path)
127
- with open(file_metadata_path, "wb") as f:
128
- pickle.dump(activity_meta_from_file, f)
129
- return None
130
-
131
-
132
- def get_file_hash(path: pathlib.Path) -> int:
133
- file_hash = hashlib.blake2s()
134
- with open(path, "rb") as f:
135
- while chunk := f.read(8192):
136
- file_hash.update(chunk)
137
- return int(file_hash.hexdigest(), 16) % 2**62
92
+
93
+ update_and_commit(activity, time_series, config)
94
+
95
+ if len(repository) > 0 and i % 50 == 0:
96
+ compute_tile_visits_new(repository, tile_visit_accessor)
97
+ compute_tile_evolution(tile_visit_accessor.tile_state, config)
98
+ tile_visit_accessor.save()
138
99
 
139
100
 
140
101
  def _get_metadata_from_path(
@@ -3,6 +3,7 @@ import logging
3
3
  import pathlib
4
4
  import pickle
5
5
  import time
6
+ import zoneinfo
6
7
 
7
8
  import pandas as pd
8
9
  from stravalib import Client
@@ -11,15 +12,22 @@ from stravalib.exc import ObjectNotFound
11
12
  from stravalib.exc import RateLimitExceeded
12
13
  from tqdm import tqdm
13
14
 
15
+ from ..core.activities import ActivityRepository
14
16
  from ..core.config import Config
15
- from ..core.datamodel import ActivityMeta
16
- from ..core.paths import activity_extracted_meta_dir
17
+ from ..core.datamodel import Activity
18
+ from ..core.datamodel import DB
19
+ from ..core.datamodel import get_or_make_equipment
20
+ from ..core.datamodel import get_or_make_kind
21
+ from ..core.enrichment import apply_enrichments
22
+ from ..core.enrichment import update_and_commit
17
23
  from ..core.paths import activity_extracted_time_series_dir
18
24
  from ..core.paths import strava_api_dir
19
25
  from ..core.paths import strava_last_activity_date_path
20
26
  from ..core.tasks import get_state
21
27
  from ..core.tasks import set_state
22
- from ..core.time_conversion import convert_to_datetime_ns
28
+ from ..explorer.tile_visits import compute_tile_evolution
29
+ from ..explorer.tile_visits import compute_tile_visits_new
30
+ from ..explorer.tile_visits import TileVisitAccessor
23
31
 
24
32
 
25
33
  logger = logging.getLogger(__name__)
@@ -68,8 +76,12 @@ def round_to_next_quarter_hour(date: datetime.datetime) -> datetime.datetime:
68
76
  return next_quarter
69
77
 
70
78
 
71
- def import_from_strava_api(config: Config) -> None:
72
- while try_import_strava(config):
79
+ def import_from_strava_api(
80
+ config: Config,
81
+ repository: ActivityRepository,
82
+ tile_visit_accessor: TileVisitAccessor,
83
+ ) -> None:
84
+ while try_import_strava(config, repository, tile_visit_accessor):
73
85
  now = datetime.datetime.now()
74
86
  next_quarter = round_to_next_quarter_hour(now)
75
87
  seconds_to_wait = (next_quarter - now).total_seconds() + 10
@@ -79,7 +91,11 @@ def import_from_strava_api(config: Config) -> None:
79
91
  time.sleep(seconds_to_wait)
80
92
 
81
93
 
82
- def try_import_strava(config: Config) -> bool:
94
+ def try_import_strava(
95
+ config: Config,
96
+ repository: ActivityRepository,
97
+ tile_visit_accessor: TileVisitAccessor,
98
+ ) -> bool:
83
99
  get_after = get_state(strava_last_activity_date_path(), "2000-01-01T00:00:00Z")
84
100
 
85
101
  gear_names = {None: "None"}
@@ -87,74 +103,77 @@ def try_import_strava(config: Config) -> bool:
87
103
  client = Client(access_token=get_current_access_token(config))
88
104
 
89
105
  try:
90
- for activity in tqdm(
106
+ for strava_activity in tqdm(
91
107
  client.get_activities(after=get_after), desc="Downloading Strava activities"
92
108
  ):
93
109
  cache_file = (
94
110
  pathlib.Path("Cache")
95
111
  / "Strava Activity Metadata"
96
- / f"{activity.id}.pickle"
112
+ / f"{strava_activity.id}.pickle"
97
113
  )
98
114
  # Sometimes we still get an activity here although it has already been imported from the Strava checkout.
99
115
  if cache_file.exists():
100
116
  continue
101
117
  cache_file.parent.mkdir(exist_ok=True, parents=True)
102
118
  with open(cache_file, "wb") as f:
103
- pickle.dump(activity, f)
104
- if activity.gear_id not in gear_names:
105
- gear = client.get_gear(activity.gear_id)
106
- gear_names[activity.gear_id] = (
119
+ pickle.dump(strava_activity, f)
120
+ if strava_activity.gear_id not in gear_names:
121
+ gear = client.get_gear(strava_activity.gear_id)
122
+ gear_names[strava_activity.gear_id] = (
107
123
  f"{gear.name}" or f"{gear.brand_name} {gear.model_name}"
108
124
  )
109
125
 
110
126
  time_series_path = (
111
- activity_extracted_time_series_dir() / f"{activity.id}.parquet"
127
+ activity_extracted_time_series_dir() / f"{strava_activity.id}.parquet"
112
128
  )
113
129
  if time_series_path.exists():
114
130
  time_series = pd.read_parquet(time_series_path)
115
131
  else:
116
132
  try:
117
- time_series = download_strava_time_series(activity.id, client)
133
+ time_series = download_strava_time_series(
134
+ strava_activity.id, client
135
+ )
118
136
  except ObjectNotFound as e:
119
137
  logger.error(
120
- f"The activity {activity.id} with name “{activity.name}” cannot be found."
138
+ f"The activity {strava_activity.id} with name “{strava_activity.name}” cannot be found."
121
139
  f"Perhaps it is a manual activity without a time series. Ignoring. {e=}"
122
140
  )
123
141
  continue
124
- time_series.name = activity.id
142
+ time_series.name = strava_activity.id
125
143
  new_time = [
126
- activity.start_date + datetime.timedelta(seconds=time)
144
+ strava_activity.start_date + datetime.timedelta(seconds=time)
127
145
  for time in time_series["time"]
128
146
  ]
129
147
  del time_series["time"]
130
148
  time_series["time"] = new_time
131
149
  time_series.to_parquet(time_series_path)
132
150
 
133
- detailed_activity = get_detailed_activity(activity.id, client)
151
+ detailed_activity = get_detailed_activity(strava_activity.id, client)
134
152
 
135
153
  if len(time_series) > 0 and "latitude" in time_series.columns:
136
- activity_meta = ActivityMeta(
137
- **{
138
- "id": activity.id,
139
- "commute": activity.commute,
140
- "distance_km": activity.distance / 1000,
141
- "name": activity.name,
142
- "kind": str(activity.type.root),
143
- "start": convert_to_datetime_ns(activity.start_date),
144
- "elapsed_time": activity.elapsed_time,
145
- "equipment": gear_names[activity.gear_id],
146
- "calories": detailed_activity.calories,
147
- "moving_time": activity.moving_time,
148
- }
154
+ activity = Activity()
155
+ activity.upstream_id = str(strava_activity.id)
156
+ activity.distance_km = strava_activity.distance / 1000
157
+ activity.name = strava_activity.name
158
+ activity.kind = get_or_make_kind(str(strava_activity.type.root))
159
+ activity.start = strava_activity.start_date.astimezone(
160
+ zoneinfo.ZoneInfo("UTC")
161
+ )
162
+ activity.elapsed_time = strava_activity.elapsed_time
163
+ activity.equipment = get_or_make_equipment(
164
+ gear_names[strava_activity.gear_id], config
149
165
  )
150
- with open(
151
- activity_extracted_meta_dir() / f"{activity.id}.pickle", "wb"
152
- ) as f:
153
- pickle.dump(activity_meta, f)
166
+ activity.calories = detailed_activity.calories
167
+ activity.moving_time = detailed_activity.moving_time
168
+
169
+ update_and_commit(activity, time_series, config)
170
+ compute_tile_visits_new(repository, tile_visit_accessor)
171
+ compute_tile_evolution(tile_visit_accessor.tile_state, config)
172
+ tile_visit_accessor.save()
154
173
 
155
174
  set_state(
156
175
  strava_last_activity_date_path(),
157
- activity.start_date.isoformat().replace("+00:00", "Z"),
176
+ strava_activity.start_date.isoformat().replace("+00:00", "Z"),
158
177
  )
159
178
 
160
179
  limit_exceeded = False