geo-activity-playground 0.26.3__py3-none-any.whl → 0.27.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geo_activity_playground/__main__.py +23 -20
- geo_activity_playground/core/activities.py +1 -44
- geo_activity_playground/core/config.py +111 -0
- geo_activity_playground/core/enrichment.py +11 -2
- geo_activity_playground/core/heart_rate.py +49 -0
- geo_activity_playground/core/paths.py +6 -0
- geo_activity_playground/core/tasks.py +14 -0
- geo_activity_playground/core/tiles.py +1 -1
- geo_activity_playground/explorer/tile_visits.py +23 -11
- geo_activity_playground/importers/csv_parser.py +73 -0
- geo_activity_playground/importers/directory.py +17 -8
- geo_activity_playground/importers/strava_api.py +20 -44
- geo_activity_playground/importers/strava_checkout.py +57 -32
- geo_activity_playground/importers/test_csv_parser.py +49 -0
- geo_activity_playground/webui/activity/blueprint.py +3 -4
- geo_activity_playground/webui/activity/controller.py +40 -14
- geo_activity_playground/webui/activity/templates/activity/show.html.j2 +6 -2
- geo_activity_playground/webui/app.py +26 -26
- geo_activity_playground/webui/eddington/controller.py +1 -1
- geo_activity_playground/webui/equipment/blueprint.py +5 -2
- geo_activity_playground/webui/equipment/controller.py +5 -6
- geo_activity_playground/webui/explorer/blueprint.py +14 -2
- geo_activity_playground/webui/explorer/controller.py +21 -1
- geo_activity_playground/webui/explorer/templates/explorer/index.html.j2 +12 -1
- geo_activity_playground/webui/settings/blueprint.py +106 -0
- geo_activity_playground/webui/settings/controller.py +228 -0
- geo_activity_playground/webui/settings/templates/settings/equipment-offsets.html.j2 +44 -0
- geo_activity_playground/webui/settings/templates/settings/heart-rate.html.j2 +102 -0
- geo_activity_playground/webui/settings/templates/settings/index.html.j2 +74 -0
- geo_activity_playground/webui/settings/templates/settings/kinds-without-achievements.html.j2 +30 -0
- geo_activity_playground/webui/settings/templates/settings/metadata-extraction.html.j2 +55 -0
- geo_activity_playground/webui/settings/templates/settings/privacy-zones.html.j2 +81 -0
- geo_activity_playground/webui/{strava/templates/strava/client-id.html.j2 → settings/templates/settings/strava.html.j2} +17 -7
- geo_activity_playground/webui/templates/home.html.j2 +1 -1
- geo_activity_playground/webui/templates/page.html.j2 +5 -1
- geo_activity_playground/webui/upload/blueprint.py +10 -1
- geo_activity_playground/webui/upload/controller.py +24 -11
- geo_activity_playground/webui/upload/templates/upload/reload.html.j2 +16 -0
- {geo_activity_playground-0.26.3.dist-info → geo_activity_playground-0.27.1.dist-info}/METADATA +1 -1
- {geo_activity_playground-0.26.3.dist-info → geo_activity_playground-0.27.1.dist-info}/RECORD +43 -36
- geo_activity_playground/webui/strava/__init__.py +0 -0
- geo_activity_playground/webui/strava/blueprint.py +0 -33
- geo_activity_playground/webui/strava/controller.py +0 -49
- geo_activity_playground/webui/strava/templates/strava/connected.html.j2 +0 -14
- geo_activity_playground/webui/templates/settings.html.j2 +0 -24
- {geo_activity_playground-0.26.3.dist-info → geo_activity_playground-0.27.1.dist-info}/LICENSE +0 -0
- {geo_activity_playground-0.26.3.dist-info → geo_activity_playground-0.27.1.dist-info}/WHEEL +0 -0
- {geo_activity_playground-0.26.3.dist-info → geo_activity_playground-0.27.1.dist-info}/entry_points.txt +0 -0
@@ -2,16 +2,17 @@ import argparse
|
|
2
2
|
import logging
|
3
3
|
import os
|
4
4
|
import pathlib
|
5
|
-
import sys
|
6
5
|
|
7
6
|
import coloredlogs
|
8
7
|
|
9
8
|
from .importers.strava_checkout import convert_strava_checkout
|
10
9
|
from geo_activity_playground.core.activities import ActivityRepository
|
11
|
-
from geo_activity_playground.core.config import
|
10
|
+
from geo_activity_playground.core.config import ConfigAccessor
|
11
|
+
from geo_activity_playground.core.config import import_old_config
|
12
|
+
from geo_activity_playground.core.config import import_old_strava_config
|
12
13
|
from geo_activity_playground.explorer.tile_visits import TileVisitAccessor
|
13
14
|
from geo_activity_playground.explorer.video import explorer_video_main
|
14
|
-
from geo_activity_playground.webui.app import
|
15
|
+
from geo_activity_playground.webui.app import web_ui_main
|
15
16
|
from geo_activity_playground.webui.upload.controller import scan_for_activities
|
16
17
|
|
17
18
|
logger = logging.getLogger(__name__)
|
@@ -62,8 +63,8 @@ def main() -> None:
|
|
62
63
|
|
63
64
|
subparser = subparsers.add_parser("serve", help="Launch webserver")
|
64
65
|
subparser.set_defaults(
|
65
|
-
func=lambda options:
|
66
|
-
*make_activity_repository(options.basedir, options.
|
66
|
+
func=lambda options: web_ui_main(
|
67
|
+
*make_activity_repository(options.basedir, options.skip_reload),
|
67
68
|
host=options.host,
|
68
69
|
port=options.port,
|
69
70
|
)
|
@@ -74,12 +75,10 @@ def main() -> None:
|
|
74
75
|
subparser.add_argument(
|
75
76
|
"--port", default=5000, type=int, help="the port to run listen on"
|
76
77
|
)
|
77
|
-
subparser.add_argument("--skip-
|
78
|
+
subparser.add_argument("--skip-reload", action=argparse.BooleanOptionalAction)
|
78
79
|
|
79
80
|
subparser = subparsers.add_parser("cache", help="Cache stuff")
|
80
|
-
subparser.set_defaults(
|
81
|
-
func=lambda options: make_activity_repository(options.basedir, False)
|
82
|
-
)
|
81
|
+
subparser.set_defaults(func=lambda options: main_cache(options.basedir))
|
83
82
|
|
84
83
|
options = parser.parse_args()
|
85
84
|
coloredlogs.install(
|
@@ -93,23 +92,27 @@ def main() -> None:
|
|
93
92
|
|
94
93
|
|
95
94
|
def make_activity_repository(
|
96
|
-
basedir: pathlib.Path,
|
97
|
-
) -> tuple[ActivityRepository, TileVisitAccessor,
|
95
|
+
basedir: pathlib.Path, skip_reload: bool
|
96
|
+
) -> tuple[ActivityRepository, TileVisitAccessor, ConfigAccessor]:
|
98
97
|
os.chdir(basedir)
|
99
|
-
config = get_config()
|
100
|
-
|
101
|
-
if not config.get("prefer_metadata_from_file", True):
|
102
|
-
logger.error(
|
103
|
-
"The config option `prefer_metadata_from_file` is deprecated. If you want to prefer extract metadata from the activity file paths, please use the new `metadata_extraction_regexes` as explained at https://martin-ueding.github.io/geo-activity-playground/getting-started/using-activity-files/#directory-structure."
|
104
|
-
)
|
105
|
-
sys.exit(1)
|
106
98
|
|
107
99
|
repository = ActivityRepository()
|
108
100
|
tile_visit_accessor = TileVisitAccessor()
|
101
|
+
config_accessor = ConfigAccessor()
|
102
|
+
import_old_config(config_accessor)
|
103
|
+
import_old_strava_config(config_accessor)
|
109
104
|
|
110
|
-
|
105
|
+
if not skip_reload:
|
106
|
+
scan_for_activities(repository, tile_visit_accessor, config_accessor())
|
111
107
|
|
112
|
-
return repository, tile_visit_accessor,
|
108
|
+
return repository, tile_visit_accessor, config_accessor
|
109
|
+
|
110
|
+
|
111
|
+
def main_cache(basedir: pathlib.Path) -> None:
|
112
|
+
repository, tile_visit_accessor, config_accessor = make_activity_repository(
|
113
|
+
basedir, False
|
114
|
+
)
|
115
|
+
scan_for_activities(repository, tile_visit_accessor, config_accessor())
|
113
116
|
|
114
117
|
|
115
118
|
if __name__ == "__main__":
|
@@ -12,7 +12,6 @@ import numpy as np
|
|
12
12
|
import pandas as pd
|
13
13
|
from tqdm import tqdm
|
14
14
|
|
15
|
-
from geo_activity_playground.core.config import get_config
|
16
15
|
from geo_activity_playground.core.paths import activities_file
|
17
16
|
from geo_activity_playground.core.paths import activity_enriched_meta_dir
|
18
17
|
from geo_activity_playground.core.paths import activity_enriched_time_series_dir
|
@@ -36,7 +35,7 @@ class ActivityMeta(TypedDict):
|
|
36
35
|
path: str
|
37
36
|
start_latitude: float
|
38
37
|
start_longitude: float
|
39
|
-
start:
|
38
|
+
start: np.datetime64
|
40
39
|
steps: int
|
41
40
|
|
42
41
|
|
@@ -210,45 +209,3 @@ def make_speed_color_bar(time_series: pd.DataFrame) -> dict[str, str]:
|
|
210
209
|
for speed in np.linspace(low, high, 10)
|
211
210
|
]
|
212
211
|
return {"low": low, "high": high, "colors": colors}
|
213
|
-
|
214
|
-
|
215
|
-
def extract_heart_rate_zones(time_series: pd.DataFrame) -> Optional[pd.DataFrame]:
|
216
|
-
if "heartrate" not in time_series:
|
217
|
-
return None
|
218
|
-
config = get_config()
|
219
|
-
try:
|
220
|
-
heart_config = config["heart"]
|
221
|
-
except KeyError:
|
222
|
-
logger.warning(
|
223
|
-
"Missing config entry `heart`, cannot determine heart rate zones."
|
224
|
-
)
|
225
|
-
return None
|
226
|
-
|
227
|
-
birthyear = heart_config.get("birthyear", None)
|
228
|
-
maximum = heart_config.get("maximum", None)
|
229
|
-
resting = heart_config.get("resting", None)
|
230
|
-
|
231
|
-
if not maximum and birthyear:
|
232
|
-
age = time_series["time"].iloc[0].year - birthyear
|
233
|
-
maximum = 220 - age
|
234
|
-
if not resting:
|
235
|
-
resting = 0
|
236
|
-
if not maximum:
|
237
|
-
logger.warning(
|
238
|
-
"Missing config entry `heart.maximum` or `heart.birthyear`, cannot determine heart rate zones."
|
239
|
-
)
|
240
|
-
return None
|
241
|
-
|
242
|
-
zones: pd.Series = (time_series["heartrate"] - resting) * 10 // (
|
243
|
-
maximum - resting
|
244
|
-
) - 4
|
245
|
-
zones.loc[zones < 0] = 0
|
246
|
-
zones.loc[zones > 5] = 5
|
247
|
-
df = pd.DataFrame({"heartzone": zones, "step": time_series["time"].diff()}).dropna()
|
248
|
-
duration_per_zone = df.groupby("heartzone").sum()["step"].dt.total_seconds() / 60
|
249
|
-
duration_per_zone.name = "minutes"
|
250
|
-
for i in range(6):
|
251
|
-
if i not in duration_per_zone:
|
252
|
-
duration_per_zone.loc[i] = 0.0
|
253
|
-
result = duration_per_zone.reset_index()
|
254
|
-
return result
|
@@ -1,6 +1,12 @@
|
|
1
|
+
import dataclasses
|
1
2
|
import functools
|
3
|
+
import json
|
2
4
|
import logging
|
3
5
|
import pathlib
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
from geo_activity_playground.core.paths import new_config_file
|
9
|
+
from geo_activity_playground.core.paths import strava_dynamic_config_path
|
4
10
|
|
5
11
|
|
6
12
|
try:
|
@@ -12,6 +18,49 @@ except ModuleNotFoundError:
|
|
12
18
|
logger = logging.getLogger(__name__)
|
13
19
|
|
14
20
|
|
21
|
+
@dataclasses.dataclass
|
22
|
+
class Config:
|
23
|
+
birth_year: Optional[int] = None
|
24
|
+
equipment_offsets: dict[str, float] = dataclasses.field(default_factory=dict)
|
25
|
+
explorer_zoom_levels: list[int] = dataclasses.field(
|
26
|
+
default_factory=lambda: [14, 17]
|
27
|
+
)
|
28
|
+
heart_rate_resting: int = 0
|
29
|
+
heart_rate_maximum: Optional[int] = None
|
30
|
+
kinds_without_achievements: list[str] = dataclasses.field(default_factory=list)
|
31
|
+
metadata_extraction_regexes: list[str] = dataclasses.field(default_factory=list)
|
32
|
+
num_processes: Optional[int] = 1
|
33
|
+
privacy_zones: dict[str, list[list[float]]] = dataclasses.field(
|
34
|
+
default_factory=dict
|
35
|
+
)
|
36
|
+
strava_client_id: int = 131693
|
37
|
+
strava_client_secret: str = "0ccc0100a2c218512a7ef0cea3b0e322fb4b4365"
|
38
|
+
strava_client_code: Optional[str] = None
|
39
|
+
upload_password: Optional[str] = None
|
40
|
+
|
41
|
+
|
42
|
+
class ConfigAccessor:
|
43
|
+
def __init__(self) -> None:
|
44
|
+
if new_config_file().exists():
|
45
|
+
with open(new_config_file()) as f:
|
46
|
+
self._config = Config(**json.load(f))
|
47
|
+
else:
|
48
|
+
self._config = Config()
|
49
|
+
|
50
|
+
def __call__(self) -> Config:
|
51
|
+
return self._config
|
52
|
+
|
53
|
+
def save(self) -> None:
|
54
|
+
with open(new_config_file(), "w") as f:
|
55
|
+
json.dump(
|
56
|
+
dataclasses.asdict(self._config),
|
57
|
+
f,
|
58
|
+
ensure_ascii=False,
|
59
|
+
indent=2,
|
60
|
+
sort_keys=True,
|
61
|
+
)
|
62
|
+
|
63
|
+
|
15
64
|
@functools.cache
|
16
65
|
def get_config() -> dict:
|
17
66
|
config_path = pathlib.Path("config.toml")
|
@@ -22,3 +71,65 @@ def get_config() -> dict:
|
|
22
71
|
config = tomllib.load(f)
|
23
72
|
|
24
73
|
return config
|
74
|
+
|
75
|
+
|
76
|
+
def import_old_config(config_accessor: ConfigAccessor) -> None:
|
77
|
+
old_config_path = pathlib.Path("config.toml")
|
78
|
+
if not old_config_path.exists():
|
79
|
+
return
|
80
|
+
|
81
|
+
if new_config_file().exists():
|
82
|
+
logger.warning(
|
83
|
+
"You have an old 'config.toml' which is now superseded by the 'config.json'. You can check the contents of the new 'config.json' and then delete the old 'config.toml'."
|
84
|
+
)
|
85
|
+
return
|
86
|
+
|
87
|
+
old_config = get_config()
|
88
|
+
config = config_accessor()
|
89
|
+
|
90
|
+
if "metadata_extraction_regexes" in old_config:
|
91
|
+
config.metadata_extraction_regexes = old_config["metadata_extraction_regexes"]
|
92
|
+
|
93
|
+
if "heart" in old_config:
|
94
|
+
if "birthyear" in old_config["heart"]:
|
95
|
+
config.birth_year = old_config["heart"]["birthyear"]
|
96
|
+
if "resting" in old_config["heart"]:
|
97
|
+
config.heart_rate_resting = old_config["heart"]["resting"]
|
98
|
+
if "maximum" in old_config["heart"]:
|
99
|
+
config.heart_rate_maximum = old_config["heart"]["maximum"]
|
100
|
+
|
101
|
+
if "strava" in old_config:
|
102
|
+
if "client_id" in old_config["strava"]:
|
103
|
+
config.strava_client_id = old_config["strava"]["client_id"]
|
104
|
+
if "client_secret" in old_config["strava"]:
|
105
|
+
config.strava_client_secret = old_config["strava"]["client_secret"]
|
106
|
+
if "code" in old_config["strava"]:
|
107
|
+
config.strava_client_code = old_config["strava"]["code"]
|
108
|
+
|
109
|
+
if "offsets" in old_config:
|
110
|
+
config.equipment_offsets = old_config["offsets"]
|
111
|
+
|
112
|
+
if "upload" in old_config:
|
113
|
+
if "password" in old_config["upload"]:
|
114
|
+
config.upload_password = old_config["upload"]["password"]
|
115
|
+
|
116
|
+
if "privacy_zones" in old_config:
|
117
|
+
config.privacy_zones = old_config["privacy_zones"]
|
118
|
+
|
119
|
+
config_accessor.save()
|
120
|
+
|
121
|
+
|
122
|
+
def import_old_strava_config(config_accessor: ConfigAccessor) -> None:
|
123
|
+
if not strava_dynamic_config_path().exists():
|
124
|
+
return
|
125
|
+
|
126
|
+
with open(strava_dynamic_config_path()) as f:
|
127
|
+
strava_dynamic_config = json.load(f)
|
128
|
+
|
129
|
+
config = config_accessor()
|
130
|
+
config.strava_client_id = strava_dynamic_config["client_id"]
|
131
|
+
config.strava_client_secret = strava_dynamic_config["client_secret"]
|
132
|
+
config.strava_client_code = strava_dynamic_config["code"]
|
133
|
+
|
134
|
+
config_accessor.save()
|
135
|
+
strava_dynamic_config_path().unlink()
|
@@ -10,6 +10,7 @@ from tqdm import tqdm
|
|
10
10
|
|
11
11
|
from geo_activity_playground.core.activities import ActivityMeta
|
12
12
|
from geo_activity_playground.core.activities import make_activity_meta
|
13
|
+
from geo_activity_playground.core.config import Config
|
13
14
|
from geo_activity_playground.core.coordinates import get_distance
|
14
15
|
from geo_activity_playground.core.paths import activity_enriched_meta_dir
|
15
16
|
from geo_activity_playground.core.paths import activity_enriched_time_series_dir
|
@@ -21,7 +22,7 @@ from geo_activity_playground.core.time_conversion import convert_to_datetime_ns
|
|
21
22
|
logger = logging.getLogger(__name__)
|
22
23
|
|
23
24
|
|
24
|
-
def enrich_activities(
|
25
|
+
def enrich_activities(config: Config) -> None:
|
25
26
|
# Delete removed activities.
|
26
27
|
for enriched_metadata_path in activity_enriched_meta_dir().glob("*.pickle"):
|
27
28
|
if not (activity_extracted_meta_dir() / enriched_metadata_path.name).exists():
|
@@ -74,8 +75,16 @@ def enrich_activities(kind_defaults: dict[dict[str, Any]]) -> None:
|
|
74
75
|
metadata = make_activity_meta()
|
75
76
|
metadata.update(extracted_metadata)
|
76
77
|
|
78
|
+
# Skip activities that don't have geo information attached to them. This shouldn't happen, though.
|
79
|
+
if "latitude" not in time_series.columns:
|
80
|
+
logger.warning(
|
81
|
+
f"Activity {metadata} doesn't have latitude/longitude information. Ignoring this one."
|
82
|
+
)
|
83
|
+
continue
|
84
|
+
|
77
85
|
# Enrich time series.
|
78
|
-
metadata
|
86
|
+
if metadata["kind"] in config.kinds_without_achievements:
|
87
|
+
metadata["consider_for_achievements"] = False
|
79
88
|
time_series = _embellish_single_time_series(
|
80
89
|
time_series, metadata.get("start", None)
|
81
90
|
)
|
@@ -0,0 +1,49 @@
|
|
1
|
+
import datetime
|
2
|
+
import math
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
|
7
|
+
from geo_activity_playground.core.config import Config
|
8
|
+
|
9
|
+
|
10
|
+
class HeartRateZoneComputer:
|
11
|
+
def __init__(
|
12
|
+
self,
|
13
|
+
config: Config,
|
14
|
+
) -> None:
|
15
|
+
self._config = config
|
16
|
+
|
17
|
+
def compute_zones(self, frequencies: pd.Series, year: int) -> pd.Series:
|
18
|
+
maximum = self._get_maximum(year)
|
19
|
+
zones: pd.Series = (frequencies - self._config.heart_rate_resting) * 10 // (
|
20
|
+
maximum - self._config.heart_rate_resting
|
21
|
+
) - 4
|
22
|
+
zones.loc[zones < 0] = 0
|
23
|
+
zones.loc[zones > 5] = 5
|
24
|
+
return zones
|
25
|
+
|
26
|
+
def zone_boundaries(self) -> list[tuple[int, int]]:
|
27
|
+
maximum = self._get_maximum(datetime.date.today().year)
|
28
|
+
result = []
|
29
|
+
for zone in [1, 2, 3, 4, 5]:
|
30
|
+
lower = math.ceil(
|
31
|
+
(zone + 4) / 10 * (maximum - self._config.heart_rate_resting)
|
32
|
+
+ self._config.heart_rate_resting
|
33
|
+
)
|
34
|
+
upper = math.floor(
|
35
|
+
(zone + 5) / 10 * (maximum - self._config.heart_rate_resting)
|
36
|
+
+ self._config.heart_rate_resting
|
37
|
+
)
|
38
|
+
result.append((lower, upper))
|
39
|
+
return result
|
40
|
+
|
41
|
+
def _get_maximum(self, year: int) -> int:
|
42
|
+
if self._config.heart_rate_maximum:
|
43
|
+
return self._config.heart_rate_maximum
|
44
|
+
elif self._config.birth_year:
|
45
|
+
return 220 - year + self._config.birth_year
|
46
|
+
else:
|
47
|
+
raise RuntimeError(
|
48
|
+
"Cannot compute heart rate maximum from the given configuration items."
|
49
|
+
)
|
@@ -41,6 +41,10 @@ _tiles_per_time_series = _cache_dir / "Tiles" / "Tiles Per Time Series"
|
|
41
41
|
_strava_api_dir = pathlib.Path("Strava API")
|
42
42
|
_strava_dynamic_config_path = _strava_api_dir / "strava-client-id.json"
|
43
43
|
|
44
|
+
_strava_last_activity_date_path = _cache_dir / "strava-last-activity-date.json"
|
45
|
+
|
46
|
+
_new_config_file = pathlib.Path("config.json")
|
47
|
+
|
44
48
|
|
45
49
|
cache_dir = dir_wrapper(_cache_dir)
|
46
50
|
|
@@ -54,3 +58,5 @@ strava_api_dir = dir_wrapper(_strava_api_dir)
|
|
54
58
|
|
55
59
|
activities_file = file_wrapper(_activities_file)
|
56
60
|
strava_dynamic_config_path = file_wrapper(_strava_dynamic_config_path)
|
61
|
+
strava_last_activity_date_path = file_wrapper(_strava_last_activity_date_path)
|
62
|
+
new_config_file = file_wrapper(_new_config_file)
|
@@ -100,3 +100,17 @@ class TransformVersion:
|
|
100
100
|
def write(self) -> None:
|
101
101
|
with open(self._path, "w") as f:
|
102
102
|
json.dump(self._code_version, f)
|
103
|
+
|
104
|
+
|
105
|
+
def get_state(path: pathlib.Path, default: Any) -> Any:
|
106
|
+
if path.exists():
|
107
|
+
with open(path) as f:
|
108
|
+
return json.load(f)
|
109
|
+
else:
|
110
|
+
return default
|
111
|
+
|
112
|
+
|
113
|
+
def set_state(path: pathlib.Path, state: Any) -> None:
|
114
|
+
path.parent.mkdir(exist_ok=True, parents=True)
|
115
|
+
with open(path, "w") as f:
|
116
|
+
json.dump(state, f, indent=2, sort_keys=True, ensure_ascii=False)
|
@@ -95,7 +95,7 @@ def interpolate_missing_tile(
|
|
95
95
|
return None
|
96
96
|
|
97
97
|
# Some people have large jumps in their tracks. We don't want to interpolate when there is more than tile in between.
|
98
|
-
if abs(x1 - x2) > 1 or abs(y1 - y2) > 1:
|
98
|
+
if abs(int(x1) - int(x2)) > 1 or abs(int(y1) - int(y2)) > 1:
|
99
99
|
return None
|
100
100
|
|
101
101
|
x_hat = int(max(x1, x2))
|
@@ -13,6 +13,7 @@ import pandas as pd
|
|
13
13
|
from tqdm import tqdm
|
14
14
|
|
15
15
|
from geo_activity_playground.core.activities import ActivityRepository
|
16
|
+
from geo_activity_playground.core.config import Config
|
16
17
|
from geo_activity_playground.core.paths import tiles_per_time_series
|
17
18
|
from geo_activity_playground.core.tasks import try_load_pickle
|
18
19
|
from geo_activity_playground.core.tasks import work_tracker_path
|
@@ -195,29 +196,36 @@ class TileEvolutionState:
|
|
195
196
|
self.square_y: Optional[int] = None
|
196
197
|
|
197
198
|
|
198
|
-
def compute_tile_evolution(
|
199
|
-
|
200
|
-
|
201
|
-
for zoom in
|
199
|
+
def compute_tile_evolution(
|
200
|
+
tile_visits_accessor: TileVisitAccessor, config: Config
|
201
|
+
) -> None:
|
202
|
+
for zoom in config.explorer_zoom_levels:
|
202
203
|
_compute_cluster_evolution(
|
203
|
-
tile_visits_accessor.histories[zoom],
|
204
|
+
tile_visits_accessor.histories[zoom],
|
205
|
+
tile_visits_accessor.states[zoom],
|
206
|
+
zoom,
|
204
207
|
)
|
205
|
-
for zoom in tqdm(zoom_levels, desc="Compute explorer square evolution"):
|
206
208
|
_compute_square_history(
|
207
|
-
tile_visits_accessor.histories[zoom],
|
209
|
+
tile_visits_accessor.histories[zoom],
|
210
|
+
tile_visits_accessor.states[zoom],
|
211
|
+
zoom,
|
208
212
|
)
|
209
213
|
|
210
214
|
tile_visits_accessor.save()
|
211
215
|
|
212
216
|
|
213
|
-
def _compute_cluster_evolution(
|
217
|
+
def _compute_cluster_evolution(
|
218
|
+
tiles: pd.DataFrame, s: TileEvolutionState, zoom: int
|
219
|
+
) -> None:
|
214
220
|
if len(s.cluster_evolution) > 0:
|
215
221
|
max_cluster_so_far = s.cluster_evolution["max_cluster_size"].iloc[-1]
|
216
222
|
else:
|
217
223
|
max_cluster_so_far = 0
|
218
224
|
|
219
225
|
rows = []
|
220
|
-
for index, row in
|
226
|
+
for index, row in tqdm(
|
227
|
+
tiles.iloc[s.cluster_start :].iterrows(), desc=f"Cluster evolution for {zoom=}"
|
228
|
+
):
|
221
229
|
new_clusters = False
|
222
230
|
# Current tile.
|
223
231
|
tile = (row["tile_x"], row["tile_y"])
|
@@ -288,9 +296,13 @@ def _compute_cluster_evolution(tiles: pd.DataFrame, s: TileEvolutionState) -> No
|
|
288
296
|
s.cluster_start = len(tiles)
|
289
297
|
|
290
298
|
|
291
|
-
def _compute_square_history(
|
299
|
+
def _compute_square_history(
|
300
|
+
tiles: pd.DataFrame, s: TileEvolutionState, zoom: int
|
301
|
+
) -> None:
|
292
302
|
rows = []
|
293
|
-
for index, row in
|
303
|
+
for index, row in tqdm(
|
304
|
+
tiles.iloc[s.square_start :].iterrows(), desc=f"Square evolution for {zoom=}"
|
305
|
+
):
|
294
306
|
tile = (row["tile_x"], row["tile_y"])
|
295
307
|
x, y = tile
|
296
308
|
s.visited_tiles.add(tile)
|
@@ -0,0 +1,73 @@
|
|
1
|
+
"""
|
2
|
+
CSV parser that can handle newlines in cells.
|
3
|
+
|
4
|
+
In the Strava export there is a file `activities.csv`. With CSV being a horrible format, there are of course issues with it. One is that the activity description can have newlines in it, they are in the CSV file in verbatim. Therefore we need to have a CSV parser that can handle it. `pandas.read_csv` cannot do it.
|
5
|
+
|
6
|
+
The grammar that we have looks like this:
|
7
|
+
|
8
|
+
document ::= line [line ...]
|
9
|
+
|
10
|
+
line ::= cell [ "," cell ...] "\n"
|
11
|
+
|
12
|
+
cell ::= '"' text_with_comma '"' | text_without_comma
|
13
|
+
|
14
|
+
text_with_comma ::= (token | '\\n' | ',') ...
|
15
|
+
text_without_comma ::= token ...
|
16
|
+
|
17
|
+
This module implements a "recursive descent parser" that parses this grammar.
|
18
|
+
"""
|
19
|
+
|
20
|
+
|
21
|
+
def parse_csv(text: str) -> list[list]:
|
22
|
+
text = text.strip() + "\n"
|
23
|
+
result = {}
|
24
|
+
index = 0
|
25
|
+
result = []
|
26
|
+
while index < len(text):
|
27
|
+
line, index = _parse_line(text, index)
|
28
|
+
result.append(line)
|
29
|
+
assert len(line) == len(
|
30
|
+
result[0]
|
31
|
+
), f"Expected {len(result[0])} columns at {index=}, got {len(line)} columns"
|
32
|
+
|
33
|
+
return result
|
34
|
+
|
35
|
+
|
36
|
+
def _parse_line(text: str, start: int) -> tuple[list, int]:
|
37
|
+
index = start
|
38
|
+
result = []
|
39
|
+
while index < len(text) and text[index] != "\n":
|
40
|
+
cell, index = _parse_cell(text, index)
|
41
|
+
result.append(cell)
|
42
|
+
if text[index] == "\n":
|
43
|
+
return result, index + 1
|
44
|
+
else:
|
45
|
+
assert text[index] == ",", f"Expected ',' at {index=}, got {text[index]}"
|
46
|
+
index += 1
|
47
|
+
return result, index
|
48
|
+
|
49
|
+
|
50
|
+
def _parse_cell(text: str, start: int) -> tuple[str, int]:
|
51
|
+
characters = []
|
52
|
+
escape = False
|
53
|
+
within_quotes = False
|
54
|
+
i = start
|
55
|
+
for i in range(start, len(text) + 1):
|
56
|
+
if i == len(text):
|
57
|
+
break
|
58
|
+
|
59
|
+
c = text[i]
|
60
|
+
|
61
|
+
if c == '"' and not escape:
|
62
|
+
within_quotes = not within_quotes
|
63
|
+
continue
|
64
|
+
elif c == "\\":
|
65
|
+
escape = True
|
66
|
+
continue
|
67
|
+
elif (c == "," or c == "\n") and not within_quotes:
|
68
|
+
break
|
69
|
+
else:
|
70
|
+
characters.append(c)
|
71
|
+
escape = False
|
72
|
+
|
73
|
+
return "".join(characters), i
|
@@ -23,7 +23,9 @@ logger = logging.getLogger(__name__)
|
|
23
23
|
ACTIVITY_DIR = pathlib.Path("Activities")
|
24
24
|
|
25
25
|
|
26
|
-
def import_from_directory(
|
26
|
+
def import_from_directory(
|
27
|
+
metadata_extraction_regexes: list[str], num_processes: Optional[int]
|
28
|
+
) -> None:
|
27
29
|
|
28
30
|
activity_paths = [
|
29
31
|
path
|
@@ -57,13 +59,20 @@ def import_from_directory(metadata_extraction_regexes: list[str] = []) -> None:
|
|
57
59
|
del file_hashes[deleted_file]
|
58
60
|
work_tracker.discard(deleted_file)
|
59
61
|
|
60
|
-
|
61
|
-
paths_with_errors =
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
62
|
+
if num_processes == 1:
|
63
|
+
paths_with_errors = []
|
64
|
+
for path in tqdm(new_activity_paths, desc="Parse activity metadata (serially)"):
|
65
|
+
errors = _cache_single_file(path)
|
66
|
+
if errors:
|
67
|
+
paths_with_errors.append(errors)
|
68
|
+
else:
|
69
|
+
with multiprocessing.Pool(num_processes) as pool:
|
70
|
+
paths_with_errors = tqdm(
|
71
|
+
pool.imap(_cache_single_file, new_activity_paths),
|
72
|
+
desc="Parse activity metadata (concurrently)",
|
73
|
+
total=len(new_activity_paths),
|
74
|
+
)
|
75
|
+
paths_with_errors = [error for error in paths_with_errors if error]
|
67
76
|
|
68
77
|
for path in tqdm(new_activity_paths, desc="Collate activity metadata"):
|
69
78
|
activity_id = get_file_hash(path)
|