geo-activity-playground 0.26.3__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. geo_activity_playground/__main__.py +23 -20
  2. geo_activity_playground/core/activities.py +1 -44
  3. geo_activity_playground/core/config.py +111 -0
  4. geo_activity_playground/core/enrichment.py +11 -2
  5. geo_activity_playground/core/heart_rate.py +49 -0
  6. geo_activity_playground/core/paths.py +6 -0
  7. geo_activity_playground/core/tasks.py +14 -0
  8. geo_activity_playground/core/tiles.py +1 -1
  9. geo_activity_playground/explorer/tile_visits.py +23 -11
  10. geo_activity_playground/importers/csv_parser.py +73 -0
  11. geo_activity_playground/importers/directory.py +17 -8
  12. geo_activity_playground/importers/strava_api.py +20 -44
  13. geo_activity_playground/importers/strava_checkout.py +57 -32
  14. geo_activity_playground/importers/test_csv_parser.py +49 -0
  15. geo_activity_playground/webui/activity/blueprint.py +3 -4
  16. geo_activity_playground/webui/activity/controller.py +40 -14
  17. geo_activity_playground/webui/activity/templates/activity/show.html.j2 +6 -2
  18. geo_activity_playground/webui/app.py +26 -26
  19. geo_activity_playground/webui/eddington/controller.py +1 -1
  20. geo_activity_playground/webui/equipment/blueprint.py +5 -2
  21. geo_activity_playground/webui/equipment/controller.py +5 -6
  22. geo_activity_playground/webui/explorer/blueprint.py +14 -2
  23. geo_activity_playground/webui/explorer/controller.py +21 -1
  24. geo_activity_playground/webui/explorer/templates/explorer/index.html.j2 +12 -1
  25. geo_activity_playground/webui/settings/blueprint.py +106 -0
  26. geo_activity_playground/webui/settings/controller.py +228 -0
  27. geo_activity_playground/webui/settings/templates/settings/equipment-offsets.html.j2 +44 -0
  28. geo_activity_playground/webui/settings/templates/settings/heart-rate.html.j2 +102 -0
  29. geo_activity_playground/webui/settings/templates/settings/index.html.j2 +74 -0
  30. geo_activity_playground/webui/settings/templates/settings/kinds-without-achievements.html.j2 +30 -0
  31. geo_activity_playground/webui/settings/templates/settings/metadata-extraction.html.j2 +55 -0
  32. geo_activity_playground/webui/settings/templates/settings/privacy-zones.html.j2 +81 -0
  33. geo_activity_playground/webui/{strava/templates/strava/client-id.html.j2 → settings/templates/settings/strava.html.j2} +17 -7
  34. geo_activity_playground/webui/templates/page.html.j2 +5 -1
  35. geo_activity_playground/webui/upload/blueprint.py +10 -1
  36. geo_activity_playground/webui/upload/controller.py +24 -11
  37. geo_activity_playground/webui/upload/templates/upload/reload.html.j2 +16 -0
  38. {geo_activity_playground-0.26.3.dist-info → geo_activity_playground-0.27.0.dist-info}/METADATA +1 -1
  39. {geo_activity_playground-0.26.3.dist-info → geo_activity_playground-0.27.0.dist-info}/RECORD +42 -35
  40. geo_activity_playground/webui/strava/__init__.py +0 -0
  41. geo_activity_playground/webui/strava/blueprint.py +0 -33
  42. geo_activity_playground/webui/strava/controller.py +0 -49
  43. geo_activity_playground/webui/strava/templates/strava/connected.html.j2 +0 -14
  44. geo_activity_playground/webui/templates/settings.html.j2 +0 -24
  45. {geo_activity_playground-0.26.3.dist-info → geo_activity_playground-0.27.0.dist-info}/LICENSE +0 -0
  46. {geo_activity_playground-0.26.3.dist-info → geo_activity_playground-0.27.0.dist-info}/WHEEL +0 -0
  47. {geo_activity_playground-0.26.3.dist-info → geo_activity_playground-0.27.0.dist-info}/entry_points.txt +0 -0
@@ -2,16 +2,17 @@ import argparse
2
2
  import logging
3
3
  import os
4
4
  import pathlib
5
- import sys
6
5
 
7
6
  import coloredlogs
8
7
 
9
8
  from .importers.strava_checkout import convert_strava_checkout
10
9
  from geo_activity_playground.core.activities import ActivityRepository
11
- from geo_activity_playground.core.config import get_config
10
+ from geo_activity_playground.core.config import ConfigAccessor
11
+ from geo_activity_playground.core.config import import_old_config
12
+ from geo_activity_playground.core.config import import_old_strava_config
12
13
  from geo_activity_playground.explorer.tile_visits import TileVisitAccessor
13
14
  from geo_activity_playground.explorer.video import explorer_video_main
14
- from geo_activity_playground.webui.app import webui_main
15
+ from geo_activity_playground.webui.app import web_ui_main
15
16
  from geo_activity_playground.webui.upload.controller import scan_for_activities
16
17
 
17
18
  logger = logging.getLogger(__name__)
@@ -62,8 +63,8 @@ def main() -> None:
62
63
 
63
64
  subparser = subparsers.add_parser("serve", help="Launch webserver")
64
65
  subparser.set_defaults(
65
- func=lambda options: webui_main(
66
- *make_activity_repository(options.basedir, options.skip_strava),
66
+ func=lambda options: web_ui_main(
67
+ *make_activity_repository(options.basedir, options.skip_reload),
67
68
  host=options.host,
68
69
  port=options.port,
69
70
  )
@@ -74,12 +75,10 @@ def main() -> None:
74
75
  subparser.add_argument(
75
76
  "--port", default=5000, type=int, help="the port to run listen on"
76
77
  )
77
- subparser.add_argument("--skip-strava", action=argparse.BooleanOptionalAction)
78
+ subparser.add_argument("--skip-reload", action=argparse.BooleanOptionalAction)
78
79
 
79
80
  subparser = subparsers.add_parser("cache", help="Cache stuff")
80
- subparser.set_defaults(
81
- func=lambda options: make_activity_repository(options.basedir, False)
82
- )
81
+ subparser.set_defaults(func=lambda options: main_cache(options.basedir))
83
82
 
84
83
  options = parser.parse_args()
85
84
  coloredlogs.install(
@@ -93,23 +92,27 @@ def main() -> None:
93
92
 
94
93
 
95
94
  def make_activity_repository(
96
- basedir: pathlib.Path, skip_strava: bool
97
- ) -> tuple[ActivityRepository, TileVisitAccessor, dict]:
95
+ basedir: pathlib.Path, skip_reload: bool
96
+ ) -> tuple[ActivityRepository, TileVisitAccessor, ConfigAccessor]:
98
97
  os.chdir(basedir)
99
- config = get_config()
100
-
101
- if not config.get("prefer_metadata_from_file", True):
102
- logger.error(
103
- "The config option `prefer_metadata_from_file` is deprecated. If you want to prefer extract metadata from the activity file paths, please use the new `metadata_extraction_regexes` as explained at https://martin-ueding.github.io/geo-activity-playground/getting-started/using-activity-files/#directory-structure."
104
- )
105
- sys.exit(1)
106
98
 
107
99
  repository = ActivityRepository()
108
100
  tile_visit_accessor = TileVisitAccessor()
101
+ config_accessor = ConfigAccessor()
102
+ import_old_config(config_accessor)
103
+ import_old_strava_config(config_accessor)
109
104
 
110
- scan_for_activities(repository, tile_visit_accessor, config, skip_strava)
105
+ if not skip_reload:
106
+ scan_for_activities(repository, tile_visit_accessor, config_accessor())
111
107
 
112
- return repository, tile_visit_accessor, config
108
+ return repository, tile_visit_accessor, config_accessor
109
+
110
+
111
+ def main_cache(basedir: pathlib.Path) -> None:
112
+ repository, tile_visit_accessor, config_accessor = make_activity_repository(
113
+ basedir, False
114
+ )
115
+ scan_for_activities(repository, tile_visit_accessor, config_accessor())
113
116
 
114
117
 
115
118
  if __name__ == "__main__":
@@ -12,7 +12,6 @@ import numpy as np
12
12
  import pandas as pd
13
13
  from tqdm import tqdm
14
14
 
15
- from geo_activity_playground.core.config import get_config
16
15
  from geo_activity_playground.core.paths import activities_file
17
16
  from geo_activity_playground.core.paths import activity_enriched_meta_dir
18
17
  from geo_activity_playground.core.paths import activity_enriched_time_series_dir
@@ -36,7 +35,7 @@ class ActivityMeta(TypedDict):
36
35
  path: str
37
36
  start_latitude: float
38
37
  start_longitude: float
39
- start: datetime.datetime
38
+ start: np.datetime64
40
39
  steps: int
41
40
 
42
41
 
@@ -210,45 +209,3 @@ def make_speed_color_bar(time_series: pd.DataFrame) -> dict[str, str]:
210
209
  for speed in np.linspace(low, high, 10)
211
210
  ]
212
211
  return {"low": low, "high": high, "colors": colors}
213
-
214
-
215
- def extract_heart_rate_zones(time_series: pd.DataFrame) -> Optional[pd.DataFrame]:
216
- if "heartrate" not in time_series:
217
- return None
218
- config = get_config()
219
- try:
220
- heart_config = config["heart"]
221
- except KeyError:
222
- logger.warning(
223
- "Missing config entry `heart`, cannot determine heart rate zones."
224
- )
225
- return None
226
-
227
- birthyear = heart_config.get("birthyear", None)
228
- maximum = heart_config.get("maximum", None)
229
- resting = heart_config.get("resting", None)
230
-
231
- if not maximum and birthyear:
232
- age = time_series["time"].iloc[0].year - birthyear
233
- maximum = 220 - age
234
- if not resting:
235
- resting = 0
236
- if not maximum:
237
- logger.warning(
238
- "Missing config entry `heart.maximum` or `heart.birthyear`, cannot determine heart rate zones."
239
- )
240
- return None
241
-
242
- zones: pd.Series = (time_series["heartrate"] - resting) * 10 // (
243
- maximum - resting
244
- ) - 4
245
- zones.loc[zones < 0] = 0
246
- zones.loc[zones > 5] = 5
247
- df = pd.DataFrame({"heartzone": zones, "step": time_series["time"].diff()}).dropna()
248
- duration_per_zone = df.groupby("heartzone").sum()["step"].dt.total_seconds() / 60
249
- duration_per_zone.name = "minutes"
250
- for i in range(6):
251
- if i not in duration_per_zone:
252
- duration_per_zone.loc[i] = 0.0
253
- result = duration_per_zone.reset_index()
254
- return result
@@ -1,6 +1,12 @@
1
+ import dataclasses
1
2
  import functools
3
+ import json
2
4
  import logging
3
5
  import pathlib
6
+ from typing import Optional
7
+
8
+ from geo_activity_playground.core.paths import new_config_file
9
+ from geo_activity_playground.core.paths import strava_dynamic_config_path
4
10
 
5
11
 
6
12
  try:
@@ -12,6 +18,49 @@ except ModuleNotFoundError:
12
18
  logger = logging.getLogger(__name__)
13
19
 
14
20
 
21
+ @dataclasses.dataclass
22
+ class Config:
23
+ birth_year: Optional[int] = None
24
+ equipment_offsets: dict[str, float] = dataclasses.field(default_factory=dict)
25
+ explorer_zoom_levels: list[int] = dataclasses.field(
26
+ default_factory=lambda: [14, 17]
27
+ )
28
+ heart_rate_resting: int = 0
29
+ heart_rate_maximum: Optional[int] = None
30
+ kinds_without_achievements: list[str] = dataclasses.field(default_factory=list)
31
+ metadata_extraction_regexes: list[str] = dataclasses.field(default_factory=list)
32
+ num_processes: Optional[int] = None
33
+ privacy_zones: dict[str, list[list[float]]] = dataclasses.field(
34
+ default_factory=dict
35
+ )
36
+ strava_client_id: int = 131693
37
+ strava_client_secret: str = "0ccc0100a2c218512a7ef0cea3b0e322fb4b4365"
38
+ strava_client_code: Optional[str] = None
39
+ upload_password: Optional[str] = None
40
+
41
+
42
+ class ConfigAccessor:
43
+ def __init__(self) -> None:
44
+ if new_config_file().exists():
45
+ with open(new_config_file()) as f:
46
+ self._config = Config(**json.load(f))
47
+ else:
48
+ self._config = Config()
49
+
50
+ def __call__(self) -> Config:
51
+ return self._config
52
+
53
+ def save(self) -> None:
54
+ with open(new_config_file(), "w") as f:
55
+ json.dump(
56
+ dataclasses.asdict(self._config),
57
+ f,
58
+ ensure_ascii=False,
59
+ indent=2,
60
+ sort_keys=True,
61
+ )
62
+
63
+
15
64
  @functools.cache
16
65
  def get_config() -> dict:
17
66
  config_path = pathlib.Path("config.toml")
@@ -22,3 +71,65 @@ def get_config() -> dict:
22
71
  config = tomllib.load(f)
23
72
 
24
73
  return config
74
+
75
+
76
+ def import_old_config(config_accessor: ConfigAccessor) -> None:
77
+ old_config_path = pathlib.Path("config.toml")
78
+ if not old_config_path.exists():
79
+ return
80
+
81
+ if new_config_file().exists():
82
+ logger.warning(
83
+ "You have an old 'config.toml' which is now superseded by the 'config.json'. You can check the contents of the new 'config.json' and then delete the old 'config.toml'."
84
+ )
85
+ return
86
+
87
+ old_config = get_config()
88
+ config = config_accessor()
89
+
90
+ if "metadata_extraction_regexes" in old_config:
91
+ config.metadata_extraction_regexes = old_config["metadata_extraction_regexes"]
92
+
93
+ if "heart" in old_config:
94
+ if "birthyear" in old_config["heart"]:
95
+ config.birth_year = old_config["heart"]["birthyear"]
96
+ if "resting" in old_config["heart"]:
97
+ config.heart_rate_resting = old_config["heart"]["resting"]
98
+ if "maximum" in old_config["heart"]:
99
+ config.heart_rate_maximum = old_config["heart"]["maximum"]
100
+
101
+ if "strava" in old_config:
102
+ if "client_id" in old_config["strava"]:
103
+ config.strava_client_id = old_config["strava"]["client_id"]
104
+ if "client_secret" in old_config["strava"]:
105
+ config.strava_client_secret = old_config["strava"]["client_secret"]
106
+ if "code" in old_config["strava"]:
107
+ config.strava_client_code = old_config["strava"]["code"]
108
+
109
+ if "offsets" in old_config:
110
+ config.equipment_offsets = old_config["offsets"]
111
+
112
+ if "upload" in old_config:
113
+ if "password" in old_config["upload"]:
114
+ config.upload_password = old_config["upload"]["password"]
115
+
116
+ if "privacy_zones" in old_config:
117
+ config.privacy_zones = old_config["privacy_zones"]
118
+
119
+ config_accessor.save()
120
+
121
+
122
+ def import_old_strava_config(config_accessor: ConfigAccessor) -> None:
123
+ if not strava_dynamic_config_path().exists():
124
+ return
125
+
126
+ with open(strava_dynamic_config_path()) as f:
127
+ strava_dynamic_config = json.load(f)
128
+
129
+ config = config_accessor()
130
+ config.strava_client_id = strava_dynamic_config["client_id"]
131
+ config.strava_client_secret = strava_dynamic_config["client_secret"]
132
+ config.strava_client_code = strava_dynamic_config["code"]
133
+
134
+ config_accessor.save()
135
+ strava_dynamic_config_path().unlink()
@@ -10,6 +10,7 @@ from tqdm import tqdm
10
10
 
11
11
  from geo_activity_playground.core.activities import ActivityMeta
12
12
  from geo_activity_playground.core.activities import make_activity_meta
13
+ from geo_activity_playground.core.config import Config
13
14
  from geo_activity_playground.core.coordinates import get_distance
14
15
  from geo_activity_playground.core.paths import activity_enriched_meta_dir
15
16
  from geo_activity_playground.core.paths import activity_enriched_time_series_dir
@@ -21,7 +22,7 @@ from geo_activity_playground.core.time_conversion import convert_to_datetime_ns
21
22
  logger = logging.getLogger(__name__)
22
23
 
23
24
 
24
- def enrich_activities(kind_defaults: dict[dict[str, Any]]) -> None:
25
+ def enrich_activities(config: Config) -> None:
25
26
  # Delete removed activities.
26
27
  for enriched_metadata_path in activity_enriched_meta_dir().glob("*.pickle"):
27
28
  if not (activity_extracted_meta_dir() / enriched_metadata_path.name).exists():
@@ -74,8 +75,16 @@ def enrich_activities(kind_defaults: dict[dict[str, Any]]) -> None:
74
75
  metadata = make_activity_meta()
75
76
  metadata.update(extracted_metadata)
76
77
 
78
+ # Skip activities that don't have geo information attached to them. This shouldn't happen, though.
79
+ if "latitude" not in time_series.columns:
80
+ logger.warning(
81
+ f"Activity {metadata} doesn't have latitude/longitude information. Ignoring this one."
82
+ )
83
+ continue
84
+
77
85
  # Enrich time series.
78
- metadata.update(kind_defaults.get(metadata["kind"], {}))
86
+ if metadata["kind"] in config.kinds_without_achievements:
87
+ metadata["consider_for_achievements"] = False
79
88
  time_series = _embellish_single_time_series(
80
89
  time_series, metadata.get("start", None)
81
90
  )
@@ -0,0 +1,49 @@
1
+ import datetime
2
+ import math
3
+ from typing import Optional
4
+
5
+ import pandas as pd
6
+
7
+ from geo_activity_playground.core.config import Config
8
+
9
+
10
+ class HeartRateZoneComputer:
11
+ def __init__(
12
+ self,
13
+ config: Config,
14
+ ) -> None:
15
+ self._config = config
16
+
17
+ def compute_zones(self, frequencies: pd.Series, year: int) -> pd.Series:
18
+ maximum = self._get_maximum(year)
19
+ zones: pd.Series = (frequencies - self._config.heart_rate_resting) * 10 // (
20
+ maximum - self._config.heart_rate_resting
21
+ ) - 4
22
+ zones.loc[zones < 0] = 0
23
+ zones.loc[zones > 5] = 5
24
+ return zones
25
+
26
+ def zone_boundaries(self) -> list[tuple[int, int]]:
27
+ maximum = self._get_maximum(datetime.date.today().year)
28
+ result = []
29
+ for zone in [1, 2, 3, 4, 5]:
30
+ lower = math.ceil(
31
+ (zone + 4) / 10 * (maximum - self._config.heart_rate_resting)
32
+ + self._config.heart_rate_resting
33
+ )
34
+ upper = math.floor(
35
+ (zone + 5) / 10 * (maximum - self._config.heart_rate_resting)
36
+ + self._config.heart_rate_resting
37
+ )
38
+ result.append((lower, upper))
39
+ return result
40
+
41
+ def _get_maximum(self, year: int) -> int:
42
+ if self._config.heart_rate_maximum:
43
+ return self._config.heart_rate_maximum
44
+ elif self._config.birth_year:
45
+ return 220 - year + self._config.birth_year
46
+ else:
47
+ raise RuntimeError(
48
+ "Cannot compute heart rate maximum from the given configuration items."
49
+ )
@@ -41,6 +41,10 @@ _tiles_per_time_series = _cache_dir / "Tiles" / "Tiles Per Time Series"
41
41
  _strava_api_dir = pathlib.Path("Strava API")
42
42
  _strava_dynamic_config_path = _strava_api_dir / "strava-client-id.json"
43
43
 
44
+ _strava_last_activity_date_path = _cache_dir / "strava-last-activity-date.json"
45
+
46
+ _new_config_file = pathlib.Path("config.json")
47
+
44
48
 
45
49
  cache_dir = dir_wrapper(_cache_dir)
46
50
 
@@ -54,3 +58,5 @@ strava_api_dir = dir_wrapper(_strava_api_dir)
54
58
 
55
59
  activities_file = file_wrapper(_activities_file)
56
60
  strava_dynamic_config_path = file_wrapper(_strava_dynamic_config_path)
61
+ strava_last_activity_date_path = file_wrapper(_strava_last_activity_date_path)
62
+ new_config_file = file_wrapper(_new_config_file)
@@ -100,3 +100,17 @@ class TransformVersion:
100
100
  def write(self) -> None:
101
101
  with open(self._path, "w") as f:
102
102
  json.dump(self._code_version, f)
103
+
104
+
105
+ def get_state(path: pathlib.Path, default: Any) -> Any:
106
+ if path.exists():
107
+ with open(path) as f:
108
+ return json.load(f)
109
+ else:
110
+ return default
111
+
112
+
113
+ def set_state(path: pathlib.Path, state: Any) -> None:
114
+ path.parent.mkdir(exist_ok=True, parents=True)
115
+ with open(path, "w") as f:
116
+ json.dump(state, f, indent=2, sort_keys=True, ensure_ascii=False)
@@ -95,7 +95,7 @@ def interpolate_missing_tile(
95
95
  return None
96
96
 
97
97
  # Some people have large jumps in their tracks. We don't want to interpolate when there is more than tile in between.
98
- if abs(x1 - x2) > 1 or abs(y1 - y2) > 1:
98
+ if abs(int(x1) - int(x2)) > 1 or abs(int(y1) - int(y2)) > 1:
99
99
  return None
100
100
 
101
101
  x_hat = int(max(x1, x2))
@@ -13,6 +13,7 @@ import pandas as pd
13
13
  from tqdm import tqdm
14
14
 
15
15
  from geo_activity_playground.core.activities import ActivityRepository
16
+ from geo_activity_playground.core.config import Config
16
17
  from geo_activity_playground.core.paths import tiles_per_time_series
17
18
  from geo_activity_playground.core.tasks import try_load_pickle
18
19
  from geo_activity_playground.core.tasks import work_tracker_path
@@ -195,29 +196,36 @@ class TileEvolutionState:
195
196
  self.square_y: Optional[int] = None
196
197
 
197
198
 
198
- def compute_tile_evolution(tile_visits_accessor: TileVisitAccessor) -> None:
199
- zoom_levels = list(reversed(list(range(20))))
200
-
201
- for zoom in tqdm(zoom_levels, desc="Compute explorer cluster evolution"):
199
+ def compute_tile_evolution(
200
+ tile_visits_accessor: TileVisitAccessor, config: Config
201
+ ) -> None:
202
+ for zoom in config.explorer_zoom_levels:
202
203
  _compute_cluster_evolution(
203
- tile_visits_accessor.histories[zoom], tile_visits_accessor.states[zoom]
204
+ tile_visits_accessor.histories[zoom],
205
+ tile_visits_accessor.states[zoom],
206
+ zoom,
204
207
  )
205
- for zoom in tqdm(zoom_levels, desc="Compute explorer square evolution"):
206
208
  _compute_square_history(
207
- tile_visits_accessor.histories[zoom], tile_visits_accessor.states[zoom]
209
+ tile_visits_accessor.histories[zoom],
210
+ tile_visits_accessor.states[zoom],
211
+ zoom,
208
212
  )
209
213
 
210
214
  tile_visits_accessor.save()
211
215
 
212
216
 
213
- def _compute_cluster_evolution(tiles: pd.DataFrame, s: TileEvolutionState) -> None:
217
+ def _compute_cluster_evolution(
218
+ tiles: pd.DataFrame, s: TileEvolutionState, zoom: int
219
+ ) -> None:
214
220
  if len(s.cluster_evolution) > 0:
215
221
  max_cluster_so_far = s.cluster_evolution["max_cluster_size"].iloc[-1]
216
222
  else:
217
223
  max_cluster_so_far = 0
218
224
 
219
225
  rows = []
220
- for index, row in tiles.iloc[s.cluster_start :].iterrows():
226
+ for index, row in tqdm(
227
+ tiles.iloc[s.cluster_start :].iterrows(), desc=f"Cluster evolution for {zoom=}"
228
+ ):
221
229
  new_clusters = False
222
230
  # Current tile.
223
231
  tile = (row["tile_x"], row["tile_y"])
@@ -288,9 +296,13 @@ def _compute_cluster_evolution(tiles: pd.DataFrame, s: TileEvolutionState) -> No
288
296
  s.cluster_start = len(tiles)
289
297
 
290
298
 
291
- def _compute_square_history(tiles: pd.DataFrame, s: TileEvolutionState) -> None:
299
+ def _compute_square_history(
300
+ tiles: pd.DataFrame, s: TileEvolutionState, zoom: int
301
+ ) -> None:
292
302
  rows = []
293
- for index, row in tiles.iloc[s.square_start :].iterrows():
303
+ for index, row in tqdm(
304
+ tiles.iloc[s.square_start :].iterrows(), desc=f"Square evolution for {zoom=}"
305
+ ):
294
306
  tile = (row["tile_x"], row["tile_y"])
295
307
  x, y = tile
296
308
  s.visited_tiles.add(tile)
@@ -0,0 +1,73 @@
1
+ """
2
+ CSV parser that can handle newlines in cells.
3
+
4
+ In the Strava export there is a file `activities.csv`. With CSV being a horrible format, there are of course issues with it. One is that the activity description can have newlines in it, they are in the CSV file in verbatim. Therefore we need to have a CSV parser that can handle it. `pandas.read_csv` cannot do it.
5
+
6
+ The grammar that we have looks like this:
7
+
8
+ document ::= line [line ...]
9
+
10
+ line ::= cell [ "," cell ...] "\n"
11
+
12
+ cell ::= '"' text_with_comma '"' | text_without_comma
13
+
14
+ text_with_comma ::= (token | '\\n' | ',') ...
15
+ text_without_comma ::= token ...
16
+
17
+ This module implements a "recursive descent parser" that parses this grammar.
18
+ """
19
+
20
+
21
+ def parse_csv(text: str) -> list[list]:
22
+ text = text.strip() + "\n"
23
+ result = {}
24
+ index = 0
25
+ result = []
26
+ while index < len(text):
27
+ line, index = _parse_line(text, index)
28
+ result.append(line)
29
+ assert len(line) == len(
30
+ result[0]
31
+ ), f"Expected {len(result[0])} columns at {index=}, got {len(line)} columns"
32
+
33
+ return result
34
+
35
+
36
+ def _parse_line(text: str, start: int) -> tuple[list, int]:
37
+ index = start
38
+ result = []
39
+ while index < len(text) and text[index] != "\n":
40
+ cell, index = _parse_cell(text, index)
41
+ result.append(cell)
42
+ if text[index] == "\n":
43
+ return result, index + 1
44
+ else:
45
+ assert text[index] == ",", f"Expected ',' at {index=}, got {text[index]}"
46
+ index += 1
47
+ return result, index
48
+
49
+
50
+ def _parse_cell(text: str, start: int) -> tuple[str, int]:
51
+ characters = []
52
+ escape = False
53
+ within_quotes = False
54
+ i = start
55
+ for i in range(start, len(text) + 1):
56
+ if i == len(text):
57
+ break
58
+
59
+ c = text[i]
60
+
61
+ if c == '"' and not escape:
62
+ within_quotes = not within_quotes
63
+ continue
64
+ elif c == "\\":
65
+ escape = True
66
+ continue
67
+ elif (c == "," or c == "\n") and not within_quotes:
68
+ break
69
+ else:
70
+ characters.append(c)
71
+ escape = False
72
+
73
+ return "".join(characters), i
@@ -23,7 +23,9 @@ logger = logging.getLogger(__name__)
23
23
  ACTIVITY_DIR = pathlib.Path("Activities")
24
24
 
25
25
 
26
- def import_from_directory(metadata_extraction_regexes: list[str] = []) -> None:
26
+ def import_from_directory(
27
+ metadata_extraction_regexes: list[str], num_processes: Optional[int]
28
+ ) -> None:
27
29
 
28
30
  activity_paths = [
29
31
  path
@@ -57,13 +59,20 @@ def import_from_directory(metadata_extraction_regexes: list[str] = []) -> None:
57
59
  del file_hashes[deleted_file]
58
60
  work_tracker.discard(deleted_file)
59
61
 
60
- with multiprocessing.Pool() as pool:
61
- paths_with_errors = tqdm(
62
- pool.imap(_cache_single_file, new_activity_paths),
63
- desc="Parse activity metadata",
64
- total=len(new_activity_paths),
65
- )
66
- paths_with_errors = [error for error in paths_with_errors if error]
62
+ if num_processes == 1:
63
+ paths_with_errors = []
64
+ for path in tqdm(new_activity_paths, desc="Parse activity metadata"):
65
+ errors = _cache_single_file(path)
66
+ if errors:
67
+ paths_with_errors.append(errors)
68
+ else:
69
+ with multiprocessing.Pool(num_processes) as pool:
70
+ paths_with_errors = tqdm(
71
+ pool.imap(_cache_single_file, new_activity_paths),
72
+ desc="Parse activity metadata (concurrently)",
73
+ total=len(new_activity_paths),
74
+ )
75
+ paths_with_errors = [error for error in paths_with_errors if error]
67
76
 
68
77
  for path in tqdm(new_activity_paths, desc="Collate activity metadata"):
69
78
  activity_id = get_file_hash(path)