geo-activity-playground 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. geo_activity_playground/alembic/versions/85fe0348e8a2_add_time_series_uuid_field.py +28 -0
  2. geo_activity_playground/alembic/versions/f2f50843be2d_make_all_fields_in_activity_nullable.py +34 -0
  3. geo_activity_playground/core/coordinates.py +12 -1
  4. geo_activity_playground/core/copernicus_dem.py +95 -0
  5. geo_activity_playground/core/datamodel.py +43 -16
  6. geo_activity_playground/core/enrichment.py +226 -164
  7. geo_activity_playground/core/paths.py +8 -0
  8. geo_activity_playground/core/test_pandas_timezone.py +36 -0
  9. geo_activity_playground/core/test_time_zone_from_location.py +7 -0
  10. geo_activity_playground/core/test_time_zone_import.py +93 -0
  11. geo_activity_playground/core/test_timezone_sqlalchemy.py +44 -0
  12. geo_activity_playground/core/tiles.py +4 -1
  13. geo_activity_playground/core/time_conversion.py +42 -14
  14. geo_activity_playground/explorer/tile_visits.py +7 -4
  15. geo_activity_playground/importers/activity_parsers.py +21 -22
  16. geo_activity_playground/importers/directory.py +62 -108
  17. geo_activity_playground/importers/strava_api.py +53 -36
  18. geo_activity_playground/importers/strava_checkout.py +30 -56
  19. geo_activity_playground/webui/app.py +40 -2
  20. geo_activity_playground/webui/blueprints/activity_blueprint.py +13 -11
  21. geo_activity_playground/webui/blueprints/entry_views.py +1 -1
  22. geo_activity_playground/webui/blueprints/explorer_blueprint.py +1 -7
  23. geo_activity_playground/webui/blueprints/heatmap_blueprint.py +2 -2
  24. geo_activity_playground/webui/blueprints/settings_blueprint.py +3 -14
  25. geo_activity_playground/webui/blueprints/summary_blueprint.py +6 -6
  26. geo_activity_playground/webui/blueprints/time_zone_fixer_blueprint.py +69 -0
  27. geo_activity_playground/webui/blueprints/upload_blueprint.py +3 -16
  28. geo_activity_playground/webui/columns.py +9 -1
  29. geo_activity_playground/webui/templates/activity/show.html.j2 +3 -1
  30. geo_activity_playground/webui/templates/hall_of_fame/index.html.j2 +1 -1
  31. geo_activity_playground/webui/templates/home.html.j2 +3 -2
  32. geo_activity_playground/webui/templates/page.html.j2 +2 -0
  33. geo_activity_playground/webui/templates/time_zone_fixer/index.html.j2 +31 -0
  34. {geo_activity_playground-1.2.0.dist-info → geo_activity_playground-1.3.0.dist-info}/METADATA +7 -3
  35. {geo_activity_playground-1.2.0.dist-info → geo_activity_playground-1.3.0.dist-info}/RECORD +38 -30
  36. geo_activity_playground/core/test_time_conversion.py +0 -37
  37. {geo_activity_playground-1.2.0.dist-info → geo_activity_playground-1.3.0.dist-info}/LICENSE +0 -0
  38. {geo_activity_playground-1.2.0.dist-info → geo_activity_playground-1.3.0.dist-info}/WHEEL +0 -0
  39. {geo_activity_playground-1.2.0.dist-info → geo_activity_playground-1.3.0.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,42 @@
1
- import numpy as np
2
- import pandas as pd
3
-
4
-
5
- def convert_to_datetime_ns(date) -> np.datetime64 | pd.Series:
6
- if isinstance(date, pd.Series):
7
- ts = pd.to_datetime(date)
8
- ts = ts.dt.tz_localize(None)
9
- return ts
10
- else:
11
- ts = pd.to_datetime(date)
12
- if ts.tzinfo is not None:
13
- ts = ts.tz_localize(None)
14
- return ts.to_datetime64()
1
+ import datetime
2
+ import json
3
+ import logging
4
+ import zoneinfo
5
+
6
+ import requests
7
+
8
+ from .paths import USER_CACHE_DIR
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def sanitize_datetime(
14
+ dt: datetime.datetime, fallback_from: str, fallback_to: str
15
+ ) -> datetime.datetime:
16
+ if dt.tzinfo is None:
17
+ dt = dt.replace(tzinfo=zoneinfo.ZoneInfo(fallback_from))
18
+ return dt.astimezone(zoneinfo.ZoneInfo(fallback_to))
19
+
20
+
21
+ def get_country_timezone(latitude: float, longitude: float) -> tuple[str, str]:
22
+ cache_file = USER_CACHE_DIR / "geotimezone" / f"{latitude:.5f}-{longitude:.5f}.json"
23
+ data = {}
24
+ if cache_file.exists():
25
+ try:
26
+ with open(cache_file) as f:
27
+ data = json.load(f)
28
+ except json.decoder.JSONDecodeError as e:
29
+ logger.warning(
30
+ f"'{cache_file}' could not be parsed ('{e}'). Deleting and trying again."
31
+ )
32
+ cache_file.unlink()
33
+
34
+ if not cache_file.exists():
35
+ url = f"https://api.geotimezone.com/public/timezone?latitude={latitude}&longitude={longitude}"
36
+ r = requests.get(url)
37
+ r.raise_for_status()
38
+ data = r.json()
39
+ cache_file.parent.mkdir(exist_ok=True, parents=True)
40
+ with open(cache_file, "w") as f:
41
+ json.dump(data, f)
42
+ return data["location"], data["iana_timezone"]
@@ -143,7 +143,7 @@ def compute_tile_visits_new(
143
143
  work_tracker.reset()
144
144
 
145
145
  for activity_id in tqdm(
146
- work_tracker.filter(repository.get_activity_ids()), desc="Tile visits"
146
+ work_tracker.filter(repository.get_activity_ids()), desc="Tile visits", delay=2
147
147
  ):
148
148
  _process_activity(repository, tile_visit_accessor.tile_state, activity_id)
149
149
  work_tracker.mark_done(activity_id)
@@ -213,7 +213,6 @@ def _process_activity(
213
213
  def _tiles_from_points(
214
214
  time_series: pd.DataFrame, zoom: int
215
215
  ) -> Iterator[tuple[datetime.datetime, int, int]]:
216
- assert pd.api.types.is_dtype_equal(time_series["time"].dtype, "datetime64[ns]")
217
216
  xf = time_series["x"] * 2**zoom
218
217
  yf = time_series["y"] * 2**zoom
219
218
  for t1, x1, y1, x2, y2, s1, s2 in zip(
@@ -257,7 +256,9 @@ def _compute_cluster_evolution(
257
256
 
258
257
  rows = []
259
258
  for index, row in tqdm(
260
- tiles.iloc[s.cluster_start :].iterrows(), desc=f"Cluster evolution for {zoom=}"
259
+ tiles.iloc[s.cluster_start :].iterrows(),
260
+ desc=f"Cluster evolution for {zoom=}",
261
+ delay=2,
261
262
  ):
262
263
  new_clusters = False
263
264
  # Current tile.
@@ -334,7 +335,9 @@ def _compute_square_history(
334
335
  ) -> None:
335
336
  rows = []
336
337
  for index, row in tqdm(
337
- tiles.iloc[s.square_start :].iterrows(), desc=f"Square evolution for {zoom=}"
338
+ tiles.iloc[s.square_start :].iterrows(),
339
+ desc=f"Square evolution for {zoom=}",
340
+ delay=2,
338
341
  ):
339
342
  tile = (row["tile_x"], row["tile_y"])
340
343
  x, y = tile
@@ -13,8 +13,8 @@ import pandas as pd
13
13
  import tcxreader.tcxreader
14
14
  import xmltodict
15
15
 
16
- from ..core.datamodel import ActivityMeta
17
- from ..core.time_conversion import convert_to_datetime_ns
16
+ from ..core.datamodel import Activity
17
+ from ..core.datamodel import get_or_make_kind
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
@@ -23,9 +23,9 @@ class ActivityParseError(BaseException):
23
23
  pass
24
24
 
25
25
 
26
- def read_activity(path: pathlib.Path) -> tuple[ActivityMeta, pd.DataFrame]:
26
+ def read_activity(path: pathlib.Path) -> tuple[Activity, pd.DataFrame]:
27
27
  suffixes = [s.lower() for s in path.suffixes]
28
- metadata = ActivityMeta()
28
+ activity = Activity()
29
29
 
30
30
  if suffixes[-1] == ".gz":
31
31
  opener = gzip.open
@@ -43,7 +43,7 @@ def read_activity(path: pathlib.Path) -> tuple[ActivityMeta, pd.DataFrame]:
43
43
  raise ActivityParseError(f"Encoding issue") from e
44
44
  elif file_type == ".fit":
45
45
  try:
46
- metadata, timeseries = read_fit_activity(path, opener)
46
+ activity, timeseries = read_fit_activity(path, opener)
47
47
  except fitdecode.exceptions.FitError as e:
48
48
  raise ActivityParseError(f"Error in FIT file") from e
49
49
  except KeyError as e:
@@ -60,10 +60,10 @@ def read_activity(path: pathlib.Path) -> tuple[ActivityMeta, pd.DataFrame]:
60
60
  else:
61
61
  raise ActivityParseError(f"Unsupported file format: {file_type}")
62
62
 
63
- return metadata, timeseries
63
+ return activity, timeseries
64
64
 
65
65
 
66
- def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFrame]:
66
+ def read_fit_activity(path: pathlib.Path, open) -> tuple[Activity, pd.DataFrame]:
67
67
  """
68
68
  {'timestamp': datetime.datetime(2023, 11, 11, 16, 29, 49, tzinfo=datetime.timezone.utc),
69
69
  'position_lat': <int>,
@@ -82,7 +82,7 @@ def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFr
82
82
  'ascent': 35,
83
83
  'descent': 11}
84
84
  """
85
- metadata = ActivityMeta()
85
+ activity = Activity()
86
86
  rows = []
87
87
  with open(path, "rb") as f:
88
88
  with fitdecode.FitReader(f) as fit:
@@ -103,10 +103,9 @@ def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFr
103
103
  if isinstance(time, datetime.datetime):
104
104
  pass
105
105
  elif time is None or isinstance(time, int):
106
- time = pd.NaT
106
+ time = None
107
107
  else:
108
108
  raise RuntimeError(f"Cannot parse time: {time} in {path}.")
109
- time = convert_to_datetime_ns(time)
110
109
  row = {
111
110
  "time": time,
112
111
  "latitude": values["position_lat"] / ((2**32) / 360),
@@ -114,7 +113,9 @@ def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFr
114
113
  }
115
114
  if "heart_rate" in fields:
116
115
  row["heartrate"] = values["heart_rate"]
117
- if "calories" in fields:
116
+ if "calories" in fields and isinstance(
117
+ values["calories"], float
118
+ ):
118
119
  row["calories"] = values["calories"]
119
120
  if "cadence" in fields:
120
121
  row["cadence"] = values["cadence"]
@@ -142,17 +143,18 @@ def read_fit_activity(path: pathlib.Path, open) -> tuple[ActivityMeta, pd.DataFr
142
143
 
143
144
  # Additional meta data fields as documented in https://developer.garmin.com/fit/file-types/workout/.
144
145
  if "wkt_name" in fields:
145
- metadata["name"] = values["wkt_name"]
146
+ activity.name = values["wkt_name"]
146
147
  if "sport" in fields:
147
- metadata["kind"] = str(values["sport"])
148
+ kind_name = str(values["sport"])
148
149
  if "sub_sport" in values:
149
- metadata["kind"] += " " + str(values["sub_sport"])
150
+ kind_name += " " + str(values["sub_sport"])
151
+ activity.kind = get_or_make_kind(kind_name)
150
152
  if "total_calories" in fields:
151
- metadata["calories"] = values["total_calories"]
153
+ activity.calories = values["total_calories"]
152
154
  if "total_strides" in fields:
153
- metadata["steps"] = 2 * int(values["total_strides"])
155
+ activity.steps = 2 * int(values["total_strides"])
154
156
 
155
- return metadata, pd.DataFrame(rows)
157
+ return activity, pd.DataFrame(rows)
156
158
 
157
159
 
158
160
  def _fit_speed_unit_factor(unit: str) -> float:
@@ -184,8 +186,7 @@ def read_gpx_activity(path: pathlib.Path, open) -> pd.DataFrame:
184
186
  elif isinstance(point.time, str):
185
187
  time = dateutil.parser.parse(str(point.time))
186
188
  else:
187
- time = pd.NaT
188
- time = convert_to_datetime_ns(time)
189
+ time = None
189
190
  points.append((time, point.latitude, point.longitude, point.elevation))
190
191
 
191
192
  df = pd.DataFrame(points, columns=["time", "latitude", "longitude", "elevation"])
@@ -223,7 +224,6 @@ def read_tcx_activity(path: pathlib.Path, opener) -> pd.DataFrame:
223
224
  if trackpoint.latitude and trackpoint.longitude:
224
225
  time = trackpoint.time
225
226
  assert isinstance(time, datetime.datetime)
226
- time = convert_to_datetime_ns(time)
227
227
  row = {
228
228
  "time": time,
229
229
  "latitude": trackpoint.latitude,
@@ -252,7 +252,6 @@ def read_kml_activity(path: pathlib.Path, opener) -> pd.DataFrame:
252
252
  for track in _list_or_scalar(placemark.get("gx:Track", [])):
253
253
  for when, where in zip(track["when"], track["gx:coord"]):
254
254
  time = dateutil.parser.parse(when)
255
- time = convert_to_datetime_ns(time)
256
255
  parts = where.split(" ")
257
256
  if len(parts) == 2:
258
257
  lon, lat = parts
@@ -282,7 +281,7 @@ def read_simra_activity(path: pathlib.Path, opener) -> pd.DataFrame:
282
281
  data["time"] = data["timeStamp"].apply(
283
282
  lambda d: datetime.datetime.fromtimestamp(d / 1000)
284
283
  )
285
- data["time"] = convert_to_datetime_ns(data["time"])
284
+ data["time"] = data["time"]
286
285
  data = data.rename(columns={"lat": "latitude", "lon": "longitude"})
287
286
  return data.dropna(subset=["latitude"], ignore_index=True)[
288
287
  ["time", "latitude", "longitude"]
@@ -1,21 +1,21 @@
1
- import hashlib
2
1
  import logging
3
2
  import pathlib
4
- import pickle
5
3
  import re
6
4
  import traceback
7
- from typing import Optional
8
5
 
9
- from tqdm import tqdm
6
+ import sqlalchemy
10
7
 
8
+ from ..core.activities import ActivityRepository
11
9
  from ..core.config import Config
12
- from ..core.datamodel import ActivityMeta
10
+ from ..core.datamodel import Activity
11
+ from ..core.datamodel import DB
13
12
  from ..core.datamodel import DEFAULT_UNKNOWN_NAME
14
- from ..core.paths import activity_extracted_dir
15
- from ..core.paths import activity_extracted_meta_dir
16
- from ..core.paths import activity_extracted_time_series_dir
17
- from ..core.tasks import stored_object
18
- from ..core.tasks import WorkTracker
13
+ from ..core.datamodel import get_or_make_equipment
14
+ from ..core.datamodel import get_or_make_kind
15
+ from ..core.enrichment import update_and_commit
16
+ from ..explorer.tile_visits import compute_tile_evolution
17
+ from ..explorer.tile_visits import compute_tile_visits_new
18
+ from ..explorer.tile_visits import TileVisitAccessor
19
19
  from .activity_parsers import ActivityParseError
20
20
  from .activity_parsers import read_activity
21
21
 
@@ -25,9 +25,10 @@ ACTIVITY_DIR = pathlib.Path("Activities")
25
25
 
26
26
 
27
27
  def import_from_directory(
28
- metadata_extraction_regexes: list[str], config: Config
28
+ repository: ActivityRepository,
29
+ tile_visit_accessor: TileVisitAccessor,
30
+ config: Config,
29
31
  ) -> None:
30
-
31
32
  activity_paths = [
32
33
  path
33
34
  for path in ACTIVITY_DIR.rglob("*.*")
@@ -36,105 +37,58 @@ def import_from_directory(
36
37
  and not path.stem.startswith(".")
37
38
  and not path.suffix in config.ignore_suffixes
38
39
  ]
39
- work_tracker = WorkTracker(activity_extracted_dir() / "work-tracker-extract.pickle")
40
- new_activity_paths = work_tracker.filter(activity_paths)
41
-
42
- with stored_object(
43
- activity_extracted_dir() / "file-hashes.pickle", {}
44
- ) as file_hashes:
45
- for path in tqdm(new_activity_paths, desc="Detect deleted activities"):
46
- file_hashes[path] = get_file_hash(path)
47
-
48
- deleted_files = set(file_hashes.keys()) - set(activity_paths)
49
- deleted_hashes = [file_hashes[path] for path in deleted_files]
50
- for deleted_hash in deleted_hashes:
51
- activity_extracted_meta_path = (
52
- activity_extracted_meta_dir() / f"{deleted_hash}.pickle"
53
- )
54
- activity_extracted_time_series_path = (
55
- activity_extracted_time_series_dir() / f"{deleted_hash}.parquet"
40
+
41
+ for activity_path in activity_paths:
42
+ with DB.session.no_autoflush:
43
+ activity = DB.session.scalar(
44
+ sqlalchemy.select(Activity).filter(Activity.path == str(activity_path))
56
45
  )
57
- logger.warning(f"Deleting {activity_extracted_meta_path}")
58
- logger.warning(f"Deleting {activity_extracted_time_series_path}")
59
- activity_extracted_meta_path.unlink(missing_ok=True)
60
- activity_extracted_time_series_path.unlink(missing_ok=True)
61
- for deleted_file in deleted_files:
62
- logger.warning(f"Deleting {deleted_file}")
63
- del file_hashes[deleted_file]
64
- work_tracker.discard(deleted_file)
65
-
66
- paths_with_errors = []
67
- for path in tqdm(new_activity_paths, desc="Parse activity metadata (serially)"):
68
- errors = _cache_single_file(path)
69
- if errors:
70
- paths_with_errors.append(errors)
71
-
72
- for path in tqdm(new_activity_paths, desc="Collate activity metadata"):
73
- activity_id = get_file_hash(path)
74
- file_metadata_path = activity_extracted_meta_dir() / f"{activity_id}.pickle"
75
- work_tracker.mark_done(path)
76
-
77
- if not file_metadata_path.exists():
78
- continue
79
-
80
- with open(file_metadata_path, "rb") as f:
81
- activity_meta_from_file = pickle.load(f)
82
-
83
- activity_meta = ActivityMeta(
84
- id=activity_id,
85
- # https://stackoverflow.com/a/74718395/653152
86
- name=path.name.removesuffix("".join(path.suffixes)),
87
- path=str(path),
88
- kind=DEFAULT_UNKNOWN_NAME,
89
- equipment=DEFAULT_UNKNOWN_NAME,
90
- consider_for_achievements=True,
46
+ if activity is None:
47
+ import_from_file(activity_path, repository, tile_visit_accessor, config)
48
+
49
+
50
+ def import_from_file(
51
+ path: pathlib.Path,
52
+ repository: ActivityRepository,
53
+ tile_visit_accessor: TileVisitAccessor,
54
+ config: Config,
55
+ ) -> None:
56
+ logger.info(f"Importing {path} ")
57
+ try:
58
+ activity, time_series = read_activity(path)
59
+ except ActivityParseError as e:
60
+ logger.error(f"Error while parsing file {path}:")
61
+ traceback.print_exc()
62
+ return
63
+ except:
64
+ logger.error(f"Encountered a problem with {path=}, see details below.")
65
+ raise
66
+
67
+ if len(time_series) == 0:
68
+ logger.warning(f"Activity with {path=} has no time series data, skipping.")
69
+ return
70
+
71
+ activity.path = str(path)
72
+ if activity.name is None:
73
+ activity.name = path.name.removesuffix("".join(path.suffixes))
74
+
75
+ meta_from_path = _get_metadata_from_path(path, config.metadata_extraction_regexes)
76
+ activity.name = meta_from_path.get("name", activity.name)
77
+ if activity.equipment is None:
78
+ activity.equipment = get_or_make_equipment(
79
+ meta_from_path.get("equipment", DEFAULT_UNKNOWN_NAME), config
91
80
  )
92
- activity_meta.update(activity_meta_from_file)
93
- activity_meta.update(_get_metadata_from_path(path, metadata_extraction_regexes))
94
- with open(file_metadata_path, "wb") as f:
95
- pickle.dump(activity_meta, f)
96
-
97
- if paths_with_errors:
98
- logger.warning(
99
- "There were errors while parsing some of the files. These were skipped and tried again next time."
81
+ if activity.kind is None:
82
+ activity.kind = get_or_make_kind(
83
+ meta_from_path.get("kind", DEFAULT_UNKNOWN_NAME)
100
84
  )
101
- for path, error in paths_with_errors:
102
- logger.error(f"{path}: {error}")
103
-
104
- work_tracker.close()
105
-
106
-
107
- def _cache_single_file(path: pathlib.Path) -> Optional[tuple[pathlib.Path, str]]:
108
- activity_id = get_file_hash(path)
109
- timeseries_path = activity_extracted_time_series_dir() / f"{activity_id}.parquet"
110
- file_metadata_path = activity_extracted_meta_dir() / f"{activity_id}.pickle"
111
-
112
- if not timeseries_path.exists():
113
- try:
114
- activity_meta_from_file, timeseries = read_activity(path)
115
- except ActivityParseError as e:
116
- logger.error(f"Error while parsing file {path}:")
117
- traceback.print_exc()
118
- return path, str(e)
119
- except:
120
- logger.error(f"Encountered a problem with {path=}, see details below.")
121
- raise
122
-
123
- if len(timeseries) == 0:
124
- return None
125
-
126
- timeseries.to_parquet(timeseries_path)
127
- with open(file_metadata_path, "wb") as f:
128
- pickle.dump(activity_meta_from_file, f)
129
- return None
130
-
131
-
132
- def get_file_hash(path: pathlib.Path) -> int:
133
- file_hash = hashlib.blake2s()
134
- with open(path, "rb") as f:
135
- while chunk := f.read(8192):
136
- file_hash.update(chunk)
137
- return int(file_hash.hexdigest(), 16) % 2**62
85
+
86
+ update_and_commit(activity, time_series, config)
87
+
88
+ if len(repository) > 0:
89
+ compute_tile_visits_new(repository, tile_visit_accessor)
90
+ compute_tile_evolution(tile_visit_accessor.tile_state, config)
91
+ tile_visit_accessor.save()
138
92
 
139
93
 
140
94
  def _get_metadata_from_path(
@@ -3,6 +3,7 @@ import logging
3
3
  import pathlib
4
4
  import pickle
5
5
  import time
6
+ import zoneinfo
6
7
 
7
8
  import pandas as pd
8
9
  from stravalib import Client
@@ -11,15 +12,20 @@ from stravalib.exc import ObjectNotFound
11
12
  from stravalib.exc import RateLimitExceeded
12
13
  from tqdm import tqdm
13
14
 
15
+ from ..core.activities import ActivityRepository
14
16
  from ..core.config import Config
15
- from ..core.datamodel import ActivityMeta
16
- from ..core.paths import activity_extracted_meta_dir
17
+ from ..core.datamodel import Activity
18
+ from ..core.datamodel import DB
19
+ from ..core.datamodel import get_or_make_equipment
20
+ from ..core.datamodel import get_or_make_kind
21
+ from ..core.enrichment import apply_enrichments
22
+ from ..core.enrichment import update_and_commit
17
23
  from ..core.paths import activity_extracted_time_series_dir
18
24
  from ..core.paths import strava_api_dir
19
25
  from ..core.paths import strava_last_activity_date_path
20
26
  from ..core.tasks import get_state
21
27
  from ..core.tasks import set_state
22
- from ..core.time_conversion import convert_to_datetime_ns
28
+ from ..explorer.tile_visits import TileVisitAccessor
23
29
 
24
30
 
25
31
  logger = logging.getLogger(__name__)
@@ -68,8 +74,12 @@ def round_to_next_quarter_hour(date: datetime.datetime) -> datetime.datetime:
68
74
  return next_quarter
69
75
 
70
76
 
71
- def import_from_strava_api(config: Config) -> None:
72
- while try_import_strava(config):
77
+ def import_from_strava_api(
78
+ config: Config,
79
+ repository: ActivityRepository,
80
+ tile_visit_accessor: TileVisitAccessor,
81
+ ) -> None:
82
+ while try_import_strava(config, repository, tile_visit_accessor):
73
83
  now = datetime.datetime.now()
74
84
  next_quarter = round_to_next_quarter_hour(now)
75
85
  seconds_to_wait = (next_quarter - now).total_seconds() + 10
@@ -79,7 +89,11 @@ def import_from_strava_api(config: Config) -> None:
79
89
  time.sleep(seconds_to_wait)
80
90
 
81
91
 
82
- def try_import_strava(config: Config) -> bool:
92
+ def try_import_strava(
93
+ config: Config,
94
+ repository: ActivityRepository,
95
+ tile_visit_accessor: TileVisitAccessor,
96
+ ) -> bool:
83
97
  get_after = get_state(strava_last_activity_date_path(), "2000-01-01T00:00:00Z")
84
98
 
85
99
  gear_names = {None: "None"}
@@ -87,74 +101,77 @@ def try_import_strava(config: Config) -> bool:
87
101
  client = Client(access_token=get_current_access_token(config))
88
102
 
89
103
  try:
90
- for activity in tqdm(
104
+ for strava_activity in tqdm(
91
105
  client.get_activities(after=get_after), desc="Downloading Strava activities"
92
106
  ):
93
107
  cache_file = (
94
108
  pathlib.Path("Cache")
95
109
  / "Strava Activity Metadata"
96
- / f"{activity.id}.pickle"
110
+ / f"{strava_activity.id}.pickle"
97
111
  )
98
112
  # Sometimes we still get an activity here although it has already been imported from the Strava checkout.
99
113
  if cache_file.exists():
100
114
  continue
101
115
  cache_file.parent.mkdir(exist_ok=True, parents=True)
102
116
  with open(cache_file, "wb") as f:
103
- pickle.dump(activity, f)
104
- if activity.gear_id not in gear_names:
105
- gear = client.get_gear(activity.gear_id)
106
- gear_names[activity.gear_id] = (
117
+ pickle.dump(strava_activity, f)
118
+ if strava_activity.gear_id not in gear_names:
119
+ gear = client.get_gear(strava_activity.gear_id)
120
+ gear_names[strava_activity.gear_id] = (
107
121
  f"{gear.name}" or f"{gear.brand_name} {gear.model_name}"
108
122
  )
109
123
 
110
124
  time_series_path = (
111
- activity_extracted_time_series_dir() / f"{activity.id}.parquet"
125
+ activity_extracted_time_series_dir() / f"{strava_activity.id}.parquet"
112
126
  )
113
127
  if time_series_path.exists():
114
128
  time_series = pd.read_parquet(time_series_path)
115
129
  else:
116
130
  try:
117
- time_series = download_strava_time_series(activity.id, client)
131
+ time_series = download_strava_time_series(
132
+ strava_activity.id, client
133
+ )
118
134
  except ObjectNotFound as e:
119
135
  logger.error(
120
- f"The activity {activity.id} with name “{activity.name}” cannot be found."
136
+ f"The activity {strava_activity.id} with name “{strava_activity.name}” cannot be found."
121
137
  f"Perhaps it is a manual activity without a time series. Ignoring. {e=}"
122
138
  )
123
139
  continue
124
- time_series.name = activity.id
140
+ time_series.name = strava_activity.id
125
141
  new_time = [
126
- activity.start_date + datetime.timedelta(seconds=time)
142
+ strava_activity.start_date + datetime.timedelta(seconds=time)
127
143
  for time in time_series["time"]
128
144
  ]
129
145
  del time_series["time"]
130
146
  time_series["time"] = new_time
131
147
  time_series.to_parquet(time_series_path)
132
148
 
133
- detailed_activity = get_detailed_activity(activity.id, client)
149
+ detailed_activity = get_detailed_activity(strava_activity.id, client)
134
150
 
135
151
  if len(time_series) > 0 and "latitude" in time_series.columns:
136
- activity_meta = ActivityMeta(
137
- **{
138
- "id": activity.id,
139
- "commute": activity.commute,
140
- "distance_km": activity.distance / 1000,
141
- "name": activity.name,
142
- "kind": str(activity.type.root),
143
- "start": convert_to_datetime_ns(activity.start_date),
144
- "elapsed_time": activity.elapsed_time,
145
- "equipment": gear_names[activity.gear_id],
146
- "calories": detailed_activity.calories,
147
- "moving_time": activity.moving_time,
148
- }
152
+ activity = Activity()
153
+ activity.upstream_id = str(strava_activity.id)
154
+ activity.distance_km = strava_activity.distance / 1000
155
+ activity.name = strava_activity.name
156
+ activity.kind = get_or_make_kind(str(strava_activity.type.root))
157
+ activity.start = strava_activity.start_date.astimezone(
158
+ zoneinfo.ZoneInfo("UTC")
159
+ )
160
+ activity.elapsed_time = strava_activity.elapsed_time
161
+ activity.equipment = get_or_make_equipment(
162
+ gear_names[strava_activity.gear_id], config
149
163
  )
150
- with open(
151
- activity_extracted_meta_dir() / f"{activity.id}.pickle", "wb"
152
- ) as f:
153
- pickle.dump(activity_meta, f)
164
+ activity.calories = detailed_activity.calories
165
+ activity.moving_time = detailed_activity.moving_time
166
+
167
+ update_and_commit(activity, time_series, config)
168
+ compute_tile_visits_new(repository, tile_visit_accessor)
169
+ compute_tile_evolution(tile_visit_accessor.tile_state, config)
170
+ tile_visit_accessor.save()
154
171
 
155
172
  set_state(
156
173
  strava_last_activity_date_path(),
157
- activity.start_date.isoformat().replace("+00:00", "Z"),
174
+ strava_activity.start_date.isoformat().replace("+00:00", "Z"),
158
175
  )
159
176
 
160
177
  limit_exceeded = False