gpxtractor 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gpxtractor/__init__.py ADDED
@@ -0,0 +1,12 @@
1
+ """
2
+ GPX, TCX and FIT data extraction for Python
3
+ ======================================
4
+
5
+ gpxtractor is a python package to extract data from
6
+ gpx, tcx and fit files and present it in a dataframe.
7
+ """
8
+
9
+ __version__ = "0.1.0"
10
+
11
+
12
+ from gpxtractor._core import Activity, extract_data
gpxtractor/_core.py ADDED
@@ -0,0 +1,265 @@
1
+ import gzip
2
+ import pathlib
3
+ from typing import Optional
4
+ from dataclasses import dataclass, field
5
+ import pyarrow as pa
6
+ import pandas as pd
7
+
8
+ import gpxtractor._xml_extraction as xml_ext
9
+ import gpxtractor._fit_extraction as fit_ext
10
+ import gpxtractor._transformation as tr
11
+ import gpxtractor._utils as ut
12
+
13
+
14
+ @dataclass
15
+ class Activity:
16
+ """Stores and manages records and metadata parsed from a gpx, tcx or
17
+ fit file.
18
+
19
+ This class is designed to hold structured data and associated metadata
20
+ extracted from a gpx, tcx or fit file, providing methods for accessing
21
+ and transforming the records.
22
+
23
+ Parameters
24
+ ----------
25
+ file_type : str
26
+ Can be any of the following: 'GPX', 'TCX' or 'FIT'.
27
+ Corresponds to the type of the file for which the instance of the
28
+ class holds data.
29
+
30
+ sport : str
31
+ Is the type of sport as extracted from the file in lower case.
32
+
33
+ records : pandas.DataFrame
34
+ DataFrame holding the records extracted from the gpx, tcx or fit file.
35
+
36
+ Attributes
37
+ ----------
38
+ is_transformed : bool
39
+ initially False, becomes True once either the method
40
+ `transform_records` or `full_transform` is used.
41
+
42
+ file_type : str
43
+ Can be any of the following: 'GPX', 'TCX' or 'FIT'.
44
+ Corresponds to the type of the file for which the instance of the
45
+ class holds data.
46
+
47
+ sport : None or str
48
+ Is the type of sport as extracted from the file in lower case.
49
+
50
+ start_time : None or pandas.Timestamp
51
+ Is None before a transformation method has been called.
52
+ A pandas Timestamp with timezone information indicating the start
53
+ time of the activity.
54
+
55
+ elapsed_time : None or int
56
+ Is None before a transformation method has been called.
57
+ An integer indicating the total elapsed time of the activity in
58
+ seconds.
59
+
60
+ distance : None or float
61
+ Is None before a transformation method has been called.
62
+ A float indicating the total distance covered during the activity
63
+ in kilometres.
64
+
65
+ avg_speed : None or float
66
+ Is None before a transformation method has been called.
67
+ A float indicating the average speed over the activity in kph.
68
+
69
+ avg_pace : None or str
70
+ Is None before a transformation method has been called.
71
+ A string indicating the average pace over the activity in min per km.
72
+
73
+ elevation_gain : None or int
74
+ Is None before a transformation method has been called.
75
+ An integer indicating the total elevation gained during the activity
76
+ in meters.
77
+
78
+ elevation_loss : None or int
79
+ Is None before a transformation method has been called.
80
+ An integer indicating the total elevation lossed during the activity
81
+ in meters.
82
+
83
+ avg_heart_rate : None or int
84
+ Is None before a transformation method has been called.
85
+ An integer indicating the average heart rate of the activity in bpm.
86
+
87
+ max_heart_rate : None or int
88
+ Is None before a transformation method has been called.
89
+ An integer indicating the maximum heart rate of the activity in bpm.
90
+
91
+ avg_cadence : None or int
92
+ Is None before a transformation method has been called.
93
+ An integer indicating the average cadence of the activity in either
94
+ rpm or, in the case of a running activity spm.
95
+
96
+ max_cadence : None or int
97
+ Is None before a transformation method has been called.
98
+ An integer indicating the maximum cadence of the activity in either
99
+ rpm or, in the case of a running activity spm.
100
+
101
+ records : pandas.DataFrame
102
+ DataFrame holding the records extracted from the gpx, tcx or fit file.
103
+ Records can be transformed with the methods `transform_records` or
104
+ `full_transform`.
105
+
106
+ km_splits : None or pandas.DataFrame
107
+ Initially None. DataFrame holding the transformed and aggregated data
108
+ grouped by kilometre splits once the `compute_km_splits` or
109
+ `full_transform` method has been used.
110
+
111
+ lap_splits : None or pandas.DataFrame
112
+ Initially None. DataFrame holding the transformed and aggregated data
113
+ grouped by lap splits once the `compute_lap_splits` or
114
+ `full_transform` method has been used. Can only hold data if the file
115
+ has lap data which is not the case for gpx files.
116
+ """
117
+
118
+ file_type: str
119
+ sport: str
120
+ records: pd.DataFrame
121
+ is_transformed: bool = field(default=False, init=False)
122
+ start_time: Optional[pd.Timestamp] = field(default=None, init=False)
123
+ elapsed_time: Optional[int] = field(default=None, init=False)
124
+ distance: Optional[float] = field(default=None, init=False)
125
+ avg_speed: Optional[float] = field(default=None, init=False)
126
+ max_speed: Optional[float] = field(default=None, init=False)
127
+ avg_pace: Optional[str] = field(default=None, init=False)
128
+ elevation_gain: Optional[int] = field(default=None, init=False)
129
+ elevation_loss: Optional[int] = field(default=None, init=False)
130
+ avg_heart_rate: Optional[int] = field(default=None, init=False)
131
+ max_heart_rate: Optional[int] = field(default=None, init=False)
132
+ avg_cadence: Optional[int] = field(default=None, init=False)
133
+ max_cadence: Optional[int] = field(default=None, init=False)
134
+ km_splits: Optional[pd.DataFrame] = field(default=None, init=False)
135
+ lap_splits: Optional[pd.DataFrame] = field(default=None, init=False)
136
+
137
+ def __str__(self):
138
+ records_str = str(self.records.head())
139
+ km_splits_str = (
140
+ str(self.km_splits.head()) if self.km_splits is not None else None
141
+ )
142
+ lap_splits_str = (
143
+ str(self.lap_splits.head()) if self.lap_splits is not None else None
144
+ )
145
+ return (
146
+ "Activity(\n"
147
+ f" is_transformed: {self.is_transformed}\n"
148
+ f" file_type: {self.file_type}\n"
149
+ f" sport: {self.sport}\n"
150
+ f" start_time: {self.start_time}\n"
151
+ f" elapsed_time: {self.elapsed_time}\n"
152
+ f" distance: {self.distance}\n"
153
+ f" avg_speed: {self.avg_speed}\n"
154
+ f" max_speed: {self.max_speed}\n"
155
+ f" avg_pace: {self.avg_pace}\n"
156
+ f" elevation_gain: {self.elevation_gain}\n"
157
+ f" elevation_loss: {self.elevation_loss}\n"
158
+ f" avg_heart_rate: {self.avg_heart_rate}\n"
159
+ f" max_heart_rate: {self.max_heart_rate}\n"
160
+ f" avg_cadence: {self.avg_cadence}\n"
161
+ f" max_cadence: {self.max_cadence}\n"
162
+ f" records:\n{records_str}\n"
163
+ f" km_splits:\n{km_splits_str}\n"
164
+ f" lap_splits:\n{lap_splits_str}\n"
165
+ ")"
166
+ )
167
+
168
+ def _transform_records_to_pyarrow(self):
169
+ if not self.is_transformed:
170
+ self.records = pa.Table.from_pandas(self.records)
171
+ self.records = tr.transform_data(self.records, self.sport)
172
+ stats = tr.compute_overall_stats(self.records)
173
+ self.start_time = stats["start_time"].at[0]
174
+ self.elapsed_time = int(stats["elapsed_time"].at[0])
175
+ self.distance = float(stats["distance"].at[0])
176
+ self.avg_speed = float(stats["avg_speed"].at[0])
177
+ self.max_speed = float(stats["max_speed"].at[0])
178
+ self.avg_pace = stats["avg_pace"].at[0]
179
+ self.elevation_gain = int(stats["elevation_gain"].at[0])
180
+ self.elevation_loss = int(stats["elevation_loss"].at[0])
181
+ self.avg_heart_rate = int(stats["avg_heart_rate"].at[0])
182
+ self.max_heart_rate = int(stats["max_heart_rate"].at[0])
183
+ self.avg_cadence = int(stats["avg_cadence"].at[0])
184
+ self.max_cadence = int(stats["max_cadence"].at[0])
185
+
186
+ def transform_records(self):
187
+ """Transforms the data in the records attributes to calculate distance,
188
+ speed if absent and elevation difference, gradient and, in the case of
189
+ running activities, pace.
190
+ """
191
+ if not self.is_transformed:
192
+ self._transform_records_to_pyarrow()
193
+ self.records = self.records.to_pandas(types_mapper=pd.ArrowDtype)
194
+ self.is_transformed = True
195
+
196
+ def compute_lap_splits(self):
197
+ """If there is lap data in the records, updates the lap_splits to a
198
+ DataFrame holding the transformed and aggregated data grouped by lap
199
+ splits. Note: there is no lap data in gpx files.
200
+ """
201
+ if self.file_type != "GPX" and self.is_transformed:
202
+ self.records = pa.Table.from_pandas(self.records)
203
+ self.lap_splits = tr.compute_lap_data(self.records)
204
+ self.records = self.records.to_pandas(types_mapper=pd.ArrowDtype)
205
+
206
+ def compute_km_splits(self):
207
+ """Updates km_splits attribute to a DataFrame holding the transformed
208
+ and aggregated data grouped by kilometre splits.
209
+ """
210
+ if self.is_transformed:
211
+ self.records = pa.Table.from_pandas(self.records)
212
+ self.km_splits = tr.compute_km_data(self.records)
213
+ self.records = self.records.to_pandas(types_mapper=pd.ArrowDtype)
214
+
215
+ def full_transform(self):
216
+ """Transforms data in records, computes km and lap splits"""
217
+ if not self.is_transformed:
218
+ self._transform_records_to_pyarrow()
219
+ self.km_splits = tr.compute_km_data(self.records)
220
+ if self.file_type != "GPX":
221
+ self.lap_splits = tr.compute_lap_data(self.records)
222
+ self.records = self.records.to_pandas(types_mapper=pd.ArrowDtype)
223
+ self.is_transformed = True
224
+
225
+
226
+ def extract_data(file_path: pathlib.Path) -> Activity:
227
+ """Extract records from a gpx, tcx or fit file.
228
+ Create and return a new Activity instance where records are
229
+ stored as a pandas.DataFrame in the records attribute and the
230
+ sport is stored as a string in the sport attribute.
231
+
232
+ Parameters
233
+ ----------
234
+ file_path : pathlib.Path
235
+ Path to a file of type .gpx, .tcx or .fit. Can be gzipped.
236
+
237
+ Returns
238
+ -------
239
+ gpxtractor.Activity
240
+
241
+ Raises
242
+ ------
243
+ ValueError
244
+ if the file type is not gpx, tcx or fit or their gzipped
245
+ equivalent.
246
+ """
247
+ extensions = ut._get_file_extensions(file_path)
248
+ match extensions:
249
+ case ".gpx" | ".gpx.gz":
250
+ sport, records = ut._handle_gzipped_xml_files(
251
+ file_path, extensions, xml_ext.get_sport_from_gpx, xml_ext.extract_gpx
252
+ )
253
+ case ".tcx" | ".tcx.gz":
254
+ sport, records = ut._handle_gzipped_xml_files(
255
+ file_path, extensions, xml_ext.get_sport_from_tcx, xml_ext.extract_tcx
256
+ )
257
+ case ".fit":
258
+ sport, records = fit_ext.extract_fit(file_path)
259
+ case ".fit.gz":
260
+ with gzip.open(file_path, "rb") as gz:
261
+ sport, records = fit_ext.extract_fit(gz)
262
+ case _:
263
+ raise ValueError("Not a valid file type: Try a GPX, TCX or FIT file")
264
+ file_type = ut._get_file_type_from_extensions(extensions)
265
+ return Activity(file_type=file_type, sport=sport, records=records)
@@ -0,0 +1,90 @@
1
+ import pathlib
2
+ import numpy as np
3
+ import pandas as pd
4
+ import fitdecode
5
+
6
+
7
+ def _convert_fit_coords_to_deg(coord):
8
+ """Convert semicircle 32-bit integer coordinate to degrees"""
9
+ return coord * (180 / 2**31)
10
+
11
+
12
+ def _generate_frame_from_fit(fit_file: pathlib.Path, selected_frames: list):
13
+ with fitdecode.FitReader(fit_file, check_crc=False) as fit:
14
+ for frame in fit:
15
+ if (
16
+ frame.frame_type == fitdecode.FIT_FRAME_DATA
17
+ and frame.name in selected_frames
18
+ ):
19
+ yield frame
20
+
21
+
22
+ def _extract_str(frame, field_name: str):
23
+ if frame.has_field(field_name) and frame.get_value(field_name) is not None:
24
+ return frame.get_value(field_name)
25
+ return None
26
+
27
+
28
+ def _extract_value(frame, field_name: str, datatype):
29
+ if frame.has_field(field_name) and frame.get_value(field_name) is not None:
30
+
31
+ return datatype(frame.get_value(field_name))
32
+ return 0 if datatype is int else np.nan
33
+
34
+
35
+ def get_sport_from_fit(fit_content) -> str:
36
+ for frame in _generate_frame_from_fit(fit_content, ["session"]):
37
+ return _extract_str(frame, "sport")
38
+ return None
39
+
40
+
41
+ def extract_fit(file_path: pathlib.Path) -> pd.DataFrame:
42
+ lap_number = 1
43
+ laps = []
44
+ times = []
45
+ lats = []
46
+ lons = []
47
+ eles = []
48
+ dists = []
49
+ speeds = []
50
+ hrs = []
51
+ cads = []
52
+
53
+ for frame in _generate_frame_from_fit(file_path, ["lap", "record", "session"]):
54
+ if frame.name == "record":
55
+ laps.append(lap_number)
56
+ times.append(_extract_str(frame, "timestamp"))
57
+ lats.append(_extract_value(frame, "position_lat", float))
58
+ lons.append(_extract_value(frame, "position_long", float))
59
+ eles.append(_extract_value(frame, "altitude", float))
60
+ dists.append(_extract_value(frame, "distance", float))
61
+ speeds.append(_extract_value(frame, "speed", float))
62
+ hrs.append(_extract_value(frame, "heart_rate", int))
63
+ cads.append(_extract_value(frame, "cadence", int))
64
+ elif frame.name == "lap":
65
+ lap_number += 1
66
+ elif frame.name == "session":
67
+ sport = _extract_str(frame, "sport")
68
+
69
+ laps = np.array(laps, dtype=np.uint16)
70
+ lats = _convert_fit_coords_to_deg(np.array(lats, dtype=np.float32))
71
+ lons = _convert_fit_coords_to_deg(np.array(lons, dtype=np.float32))
72
+ eles = np.array(eles, dtype=np.float32)
73
+ dists = np.array(dists, dtype=np.float32)
74
+ speeds = np.array(speeds, dtype=np.float32)
75
+ hrs = np.array(hrs, dtype=np.uint8)
76
+ cads = np.array(cads, dtype=np.uint8)
77
+
78
+ return sport, pd.DataFrame(
79
+ {
80
+ "lap": laps,
81
+ "timestamp": pd.to_datetime(times),
82
+ "latitude": lats,
83
+ "longitude": lons,
84
+ "distance": dists,
85
+ "speed": speeds,
86
+ "altitude": eles,
87
+ "heart_rate": hrs,
88
+ "cadence": cads,
89
+ }
90
+ )
@@ -0,0 +1,99 @@
1
+ import inspect
2
+ from importlib_resources import files
3
+ import pandas as pd
4
+ import pyarrow as pa
5
+ import duckdb
6
+
7
+
8
+ def get_var_name(var):
9
+ callers_local_vars = inspect.currentframe().f_back.f_locals.items()
10
+ return [name for name, val in callers_local_vars if val is var][0]
11
+
12
+
13
+ def is_col_all_null(table: pa.Table, col: str) -> bool:
14
+ null_mask = pa.compute.is_null(table.column(col))
15
+ return pa.compute.all(null_mask).as_py()
16
+
17
+
18
+ def add_empty_col_if_absent(arrow_table: pa.Table, col: str, datatype) -> pa.Table:
19
+ if col not in arrow_table.schema.names:
20
+ empty_values = pa.nulls(len(arrow_table), type=datatype)
21
+ return arrow_table.append_column(col, empty_values)
22
+ else:
23
+ return arrow_table
24
+
25
+
26
+ def query_table(arrow_table: pa.Table, sql_file: str) -> pa.Table:
27
+ sql_path = files("gpxtractor.sql").joinpath(sql_file)
28
+ safe_table_name = get_var_name(arrow_table)
29
+ sql_query = sql_path.read_text().format(table_name=safe_table_name)
30
+ return duckdb.sql(sql_query).arrow().read_all()
31
+
32
+
33
+ def compute_distance_and_speed(arrow_table: pa.Table) -> pa.Table:
34
+ sql_haversine_file = files("gpxtractor.sql").joinpath("haversine_formula.sql")
35
+ haversine_formula = sql_haversine_file.read_text()
36
+ duckdb.sql(haversine_formula)
37
+ sql_file = "compute_distance_and_speed.sql"
38
+ return query_table(arrow_table, sql_file)
39
+
40
+
41
+ def compute_speed(arrow_table: pa.Table) -> pa.Table:
42
+ sql_file = "compute_speed.sql"
43
+ return query_table(arrow_table, sql_file)
44
+
45
+
46
+ def preprocess_data(arrow_table: pa.Table) -> pa.Table:
47
+ sql_file = "preprocess_data.sql"
48
+ return query_table(arrow_table, sql_file)
49
+
50
+
51
+ def preprocess_running_data(arrow_table: pa.Table) -> pa.Table:
52
+ sql_file = "preprocess_running_data.sql"
53
+ return query_table(arrow_table, sql_file)
54
+
55
+
56
+ def transform_data(arrow_table: pa.Table, sport: str) -> pa.Table:
57
+ REQUIRED_COLUMNS = {
58
+ "timestamp": pa.timestamp("us"),
59
+ "latitude": pa.float32(),
60
+ "longitude": pa.float32(),
61
+ "altitude": pa.float32(),
62
+ "heart_rate": pa.uint8(),
63
+ "cadence": pa.uint8(),
64
+ "lap": pa.uint16(),
65
+ }
66
+ for col, datatype in REQUIRED_COLUMNS.items():
67
+ arrow_table = add_empty_col_if_absent(arrow_table, col, datatype)
68
+ if "distance" not in arrow_table.schema.names or is_col_all_null(
69
+ arrow_table, "distance"
70
+ ):
71
+ arrow_table = compute_distance_and_speed(arrow_table)
72
+ elif "speed" not in arrow_table.schema.names or is_col_all_null(
73
+ arrow_table, "speed"
74
+ ):
75
+ arrow_table = compute_speed(arrow_table)
76
+ if sport == "running":
77
+ arrow_table = preprocess_running_data(arrow_table)
78
+ else:
79
+ arrow_table = preprocess_data(arrow_table)
80
+
81
+ return arrow_table
82
+
83
+
84
+ def compute_km_data(arrow_table: pa.Table) -> pd.DataFrame:
85
+ sql_file = "km_data_query.sql"
86
+ arrow_table = query_table(arrow_table, sql_file)
87
+ return arrow_table.to_pandas(types_mapper=pd.ArrowDtype)
88
+
89
+
90
+ def compute_lap_data(arrow_table: pa.Table) -> pd.DataFrame:
91
+ sql_file = "lap_data_query.sql"
92
+ arrow_table = query_table(arrow_table, sql_file)
93
+ return arrow_table.to_pandas(types_mapper=pd.ArrowDtype)
94
+
95
+
96
+ def compute_overall_stats(arrow_table: pa.Table):
97
+ sql_file = "overall_stats.sql"
98
+ arrow_table = query_table(arrow_table, sql_file)
99
+ return arrow_table.to_pandas(types_mapper=pd.ArrowDtype)
gpxtractor/_utils.py ADDED
@@ -0,0 +1,32 @@
1
+ import pathlib
2
+ import gzip
3
+
4
+
5
+ def _get_file_extensions(file_path: pathlib.Path) -> str:
6
+ path = pathlib.Path(file_path)
7
+ return "".join(path.suffixes)
8
+
9
+
10
+ def _get_file_type_from_extensions(extensions) -> str:
11
+ match extensions:
12
+ case ".gpx" | ".gpx.gz":
13
+ return "GPX"
14
+ case ".tcx" | ".tcx.gz":
15
+ return "TCX"
16
+ case ".fit" | ".fit.gz":
17
+ return "FIT"
18
+
19
+
20
+ def _handle_gzipped_xml_files(
21
+ file_path: pathlib.Path, extensions, sport_func, extraction_func
22
+ ):
23
+ is_gzipped = ".gz" in extensions
24
+ if is_gzipped:
25
+ with gzip.open(file_path, "rt") as gz:
26
+ sport = sport_func(gz)
27
+ with gzip.GzipFile(file_path, "r") as gz:
28
+ return sport, extraction_func(gz)
29
+ else:
30
+ with open(file_path, "r") as file:
31
+ sport = sport_func(file)
32
+ return sport, extraction_func(file_path)
@@ -0,0 +1,165 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from lxml import etree
4
+ from typing import Union, Type
5
+
6
+
7
+ def _extract_value(
8
+ etree_element: etree.Element, datatype: Union[Type[int], Type[float]]
9
+ ) -> Union[int, float]:
10
+ if etree_element is None:
11
+ return 0 if datatype is int else np.nan
12
+ return datatype(etree_element.text)
13
+
14
+
15
+ def _get_tcx_cadence(trkpt_ext: etree.Element, ns: dict) -> int:
16
+ run_cadence = trkpt_ext.find("tcxtpx:RunCadence", ns)
17
+ if run_cadence is not None:
18
+ return int(run_cadence.text)
19
+ cadence = trkpt_ext.find("tcxtpx:Cadence", ns)
20
+ if cadence is not None:
21
+ return int(cadence.text)
22
+ return 0
23
+
24
+
25
+ def get_sport_from_gpx(open_file):
26
+ for line in open_file:
27
+ if "<type>" in line:
28
+ start = line.find("<type>") + len("<type>")
29
+ end = line.find("</type>")
30
+ return line[start:end].strip().lower()
31
+ return None
32
+
33
+
34
+ def get_sport_from_tcx(open_file):
35
+ for line in open_file:
36
+ if "<Activity" in line:
37
+ start = line.find('Sport="') + len('Sport="')
38
+ end = line.find('"', start)
39
+ return line[start:end].strip().lower()
40
+ return None
41
+
42
+
43
+ def extract_gpx(gpx_file: str) -> pd.DataFrame:
44
+ ns = {
45
+ "gpx": "http://www.topografix.com/GPX/1/1",
46
+ "gpxtpx": "http://www.garmin.com/xmlschemas/TrackPointExtension/v1",
47
+ }
48
+
49
+ times = []
50
+ lats = []
51
+ lons = []
52
+ eles = []
53
+ hrs = []
54
+ cads = []
55
+
56
+ for event, trkpt in etree.iterparse(
57
+ gpx_file, events=("end",), tag="{http://www.topografix.com/GPX/1/1}trkpt"
58
+ ):
59
+
60
+ times.append(trkpt.find("gpx:time", ns).text)
61
+ lats.append(float(trkpt.attrib["lat"]))
62
+ lons.append(float(trkpt.attrib["lon"]))
63
+ eles.append(_extract_value(trkpt.find("gpx:ele", ns), float))
64
+ extensions = trkpt.find("gpx:extensions", ns)
65
+ if extensions is not None:
66
+ hrs.append(
67
+ _extract_value(
68
+ extensions.find("gpxtpx:TrackPointExtension/gpxtpx:hr", ns), int
69
+ )
70
+ )
71
+ cads.append(
72
+ _extract_value(
73
+ extensions.find("gpxtpx:TrackPointExtension/gpxtpx:cad", ns), int
74
+ )
75
+ )
76
+
77
+ lats = np.array(lats, dtype=np.float32)
78
+ lons = np.array(lons, dtype=np.float32)
79
+ eles = np.array(eles, dtype=np.float32)
80
+ hrs = np.array(hrs, dtype=np.uint8)
81
+ cads = np.array(cads, dtype=np.uint8)
82
+
83
+ return pd.DataFrame(
84
+ {
85
+ "timestamp": pd.to_datetime(times),
86
+ "latitude": lats,
87
+ "longitude": lons,
88
+ "altitude": eles,
89
+ "heart_rate": hrs,
90
+ "cadence": cads,
91
+ }
92
+ )
93
+
94
+
95
+ def extract_tcx(tcx_file: str) -> pd.DataFrame:
96
+ ns = {
97
+ "tcx": "http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2",
98
+ "tcxtpx": "http://www.garmin.com/xmlschemas/ActivityExtension/v2",
99
+ }
100
+
101
+ lap_number = 0
102
+ laps = []
103
+ times = []
104
+ lats = []
105
+ lons = []
106
+ eles = []
107
+ dists = []
108
+ speeds = []
109
+ hrs = []
110
+ cads = []
111
+
112
+ for event, lap in etree.iterparse(
113
+ tcx_file,
114
+ events=("end",),
115
+ tag="{http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2}Lap",
116
+ ):
117
+ lap_number += 1
118
+ for trkpt in lap.findall(".//tcx:Trackpoint", ns):
119
+ laps.append(lap_number)
120
+ times.append(trkpt.find("tcx:Time", ns).text)
121
+ lats.append(
122
+ _extract_value(
123
+ trkpt.find("tcx:Position/tcx:LatitudeDegrees", ns), float
124
+ )
125
+ )
126
+ lons.append(
127
+ _extract_value(
128
+ trkpt.find("tcx:Position/tcx:LongitudeDegrees", ns), float
129
+ )
130
+ )
131
+ eles.append(_extract_value(trkpt.find("tcx:AltitudeMeters", ns), float))
132
+ dists.append(_extract_value(trkpt.find("tcx:DistanceMeters", ns), float))
133
+ hrs.append(
134
+ _extract_value(trkpt.find("tcx:HeartRateBpm/tcx:Value", ns), int)
135
+ )
136
+
137
+ extensions = trkpt.find("tcx:Extensions", ns)
138
+ if extensions is not None:
139
+ tpx = extensions.find("tcxtpx:TPX", ns)
140
+ if tpx is not None:
141
+ speeds.append(_extract_value(tpx.find("tcxtpx:Speed", ns), float))
142
+ cads.append(_get_tcx_cadence(tpx, ns))
143
+
144
+ laps = np.array(laps, dtype=np.uint16)
145
+ lats = np.array(lats, dtype=np.float32)
146
+ lons = np.array(lons, dtype=np.float32)
147
+ eles = np.array(eles, dtype=np.float32)
148
+ dists = np.array(dists, dtype=np.float32)
149
+ speeds = np.array(speeds, dtype=np.float32)
150
+ hrs = np.array(hrs, dtype=np.uint8)
151
+ cads = np.array(cads, dtype=np.uint8)
152
+
153
+ return pd.DataFrame(
154
+ {
155
+ "lap": laps,
156
+ "timestamp": pd.to_datetime(times),
157
+ "latitude": lats,
158
+ "longitude": lons,
159
+ "distance": dists,
160
+ "speed": speeds,
161
+ "altitude": eles,
162
+ "heart_rate": hrs,
163
+ "cadence": cads,
164
+ }
165
+ )
gpxtractor/cli.py ADDED
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/env python
2
+
3
+ import argparse
4
+ import visidata
5
+
6
+ import gpxtractor
7
+ from gpxtractor import extract_data
8
+
9
+
10
+ def parse_args():
11
+ parser = argparse.ArgumentParser(
12
+ description="Display a GPX or TCX file as a dataframe in visidata."
13
+ )
14
+ parser.add_argument("file", type=str, nargs="?", help="")
15
+ parser.add_argument(
16
+ "--raw",
17
+ action="store_true",
18
+ help="Display the data from the file with no transformation in a table.",
19
+ )
20
+ parser.add_argument(
21
+ "--sport",
22
+ action="store_true",
23
+ help="Print the sport or activity type of the file.",
24
+ )
25
+ parser.add_argument(
26
+ "--kms",
27
+ action="store_true",
28
+ help="Display aggregated stats grouped by kilometer.",
29
+ )
30
+ parser.add_argument(
31
+ "--laps",
32
+ action="store_true",
33
+ help="Display aggregated stats grouped by lap.",
34
+ )
35
+ parser.add_argument(
36
+ "--version",
37
+ action="store_true",
38
+ help="Display version and exit.",
39
+ )
40
+
41
+ return parser, parser.parse_args()
42
+
43
+
44
+ def main():
45
+ parser, args = parse_args()
46
+
47
+ if args.version:
48
+ print(f"gpxtractor v{gpxtractor.__version__}")
49
+ return
50
+
51
+ if not args.file and not args.version:
52
+ parser.error("the following arguments are required: file")
53
+
54
+ activity = extract_data(file_path=args.file)
55
+ if args.raw:
56
+ visidata.vd.view_pandas(df=activity.records)
57
+ return
58
+
59
+ activity.full_transform()
60
+
61
+ if args.sport:
62
+ print(activity.sport)
63
+ elif args.kms:
64
+ visidata.vd.view_pandas(df=activity.km_splits)
65
+ elif args.laps:
66
+ if activity.lap_splits is not None:
67
+ visidata.vd.view_pandas(df=activity.lap_splits)
68
+ else:
69
+ print("No laps in file")
70
+ else:
71
+ visidata.vd.view_pandas(df=activity.records)
72
+
73
+
74
+ if __name__ == "__main__":
75
+ main()
@@ -0,0 +1,49 @@
1
+ WITH stage_1 AS (
2
+ SELECT
3
+ ROW_NUMBER() OVER (ORDER BY timestamp) AS index,
4
+ timestamp,
5
+ latitude,
6
+ longitude,
7
+ LAG(timestamp, 1) OVER (ORDER BY timestamp) AS prev_time,
8
+ LAG(latitude, 1) OVER (ORDER BY timestamp) AS prev_lat,
9
+ LAG(longitude, 1) OVER (ORDER BY timestamp) AS prev_long,
10
+ altitude,
11
+ heart_rate,
12
+ cadence,
13
+ lap,
14
+ FROM {table_name}
15
+ ORDER BY timestamp
16
+ ), stage_2 AS (
17
+ SELECT
18
+ timestamp,
19
+ latitude,
20
+ longitude,
21
+ CASE
22
+ WHEN index == 1
23
+ THEN 0.0::FLOAT
24
+ ELSE haversine(prev_lat, prev_long, latitude, longitude)::FLOAT END AS marginal_distance,
25
+ CASE
26
+ WHEN index == 1
27
+ THEN 0::SMALLINT
28
+ ELSE date_diff('second', prev_time, timestamp)::SMALLINT END AS marginal_time,
29
+ altitude,
30
+ heart_rate,
31
+ cadence,
32
+ lap,
33
+ FROM stage_1
34
+ ORDER BY timestamp
35
+ )
36
+
37
+ SELECT
38
+ timestamp,
39
+ latitude,
40
+ longitude,
41
+ ROUND(SUM(marginal_distance) OVER (ORDER BY timestamp), 2)::FLOAT AS distance, -- in meters
42
+ -- SUM(marginal_time) OVER (ORDER BY timestamp) AS elapsed_time,
43
+ (marginal_distance / marginal_time)::FLOAT AS speed, -- in meters per second
44
+ altitude, -- in meters
45
+ heart_rate,
46
+ cadence, -- strides (2 steps) per minute
47
+ lap,
48
+ FROM stage_2
49
+ ORDER BY timestamp;
@@ -0,0 +1,49 @@
1
+ WITH stage_1 AS (
2
+ SELECT
3
+ ROW_NUMBER() OVER (ORDER BY timestamp) AS index,
4
+ timestamp,
5
+ latitude,
6
+ longitude,
7
+ LAG(timestamp, 1) OVER (ORDER BY timestamp) AS prev_time,
8
+ distance,
9
+ LAG(distance, 1) OVER (ORDER BY timestamp) AS prev_dist,
10
+ altitude,
11
+ heart_rate,
12
+ cadence,
13
+ lap,
14
+ FROM {table_name}
15
+ ORDER BY timestamp
16
+ ), stage_2 AS (
17
+ SELECT
18
+ timestamp,
19
+ latitude,
20
+ longitude,
21
+ CASE
22
+ WHEN index == 1
23
+ THEN 0.0::FLOAT
24
+ ELSE (distance - prev_dist)::FLOAT END AS marginal_distance,
25
+ CASE
26
+ WHEN index == 1
27
+ THEN 0::SMALLINT
28
+ ELSE date_diff('second', prev_time, timestamp)::SMALLINT END AS marginal_time,
29
+ altitude,
30
+ heart_rate,
31
+ cadence,
32
+ lap,
33
+ FROM stage_1
34
+ ORDER BY timestamp
35
+ )
36
+
37
+ SELECT
38
+ timestamp,
39
+ latitude,
40
+ longitude,
41
+ ROUND(SUM(marginal_distance) OVER (ORDER BY timestamp), 2)::FLOAT AS distance, -- in meters
42
+ -- SUM(marginal_time) OVER (ORDER BY timestamp) AS elapsed_time,
43
+ (marginal_distance / marginal_time)::FLOAT AS speed, -- in meters per second
44
+ altitude, -- in meters
45
+ heart_rate,
46
+ cadence, -- strides (2 steps) per minute
47
+ lap,
48
+ FROM stage_2
49
+ ORDER BY timestamp;
@@ -0,0 +1,8 @@
1
+ CREATE OR REPLACE FUNCTION haversine(lat1 DOUBLE, lon1 DOUBLE, lat2 DOUBLE, lon2 DOUBLE) AS
2
+ 2 * 6371000 * ASIN(
3
+ SQRT(
4
+ POW(SIN(RADIANS(lat2 - lat1) / 2), 2) +
5
+ COS(RADIANS(lat1)) * COS(RADIANS(lat2)) *
6
+ POW(SIN(RADIANS(lon2 - lon1) / 2), 2)
7
+ )
8
+ );
@@ -0,0 +1,92 @@
1
+ WITH data_with_km_col AS (
2
+ SELECT
3
+ *,
4
+ TRUNC(distance) + 1 AS km
5
+ FROM {table_name}
6
+ ORDER BY timestamp
7
+ ), km_data AS (
8
+ SELECT
9
+ km::SMALLINT AS km,
10
+ MIN(timestamp) AS start_time,
11
+ MAX(timestamp) AS end_time,
12
+ MAX(distance) AS max_distance,
13
+ ROUND(SUM(CASE
14
+ WHEN diff_alt > 0
15
+ THEN diff_alt
16
+ ELSE 0 END))::USMALLINT AS elevation_gain,
17
+ ABS(ROUND(SUM(CASE
18
+ WHEN diff_alt < 0
19
+ THEN diff_alt
20
+ ELSE 0 END)))::USMALLINT AS elevation_loss,
21
+ ROUND(AVG(heart_rate))::UTINYINT AS avg_hr,
22
+ MAX(heart_rate)::UTINYINT AS max_hr,
23
+ ROUND(AVG(cadence))::UTINYINT AS avg_cadence,
24
+ MAX(cadence)::UTINYINT AS max_cadence
25
+ FROM data_with_km_col
26
+ GROUP BY km
27
+ ORDER BY km
28
+ ), km_data_stage_2 AS (
29
+ SELECT
30
+ km,
31
+ start_time,
32
+ end_time,
33
+ LAG(end_time, 1) OVER (ORDER BY km) AS km_start_time,
34
+ LAG(max_distance, 1) OVER (ORDER BY km) AS km_start_distance,
35
+ max_distance,
36
+ elevation_gain,
37
+ elevation_loss,
38
+ avg_hr,
39
+ max_hr,
40
+ avg_cadence,
41
+ max_cadence
42
+ FROM km_data
43
+ ORDER BY km
44
+ ), km_data_stage_3 AS (
45
+ SELECT
46
+ km,
47
+ CASE
48
+ WHEN km > 1
49
+ THEN date_diff('second', km_start_time , end_time)
50
+ ELSE date_diff('second', start_time, end_time) END AS elapsed_time,
51
+ CASE
52
+ WHEN km > 1
53
+ THEN max_distance - km_start_distance
54
+ ELSE max_distance END AS distance_km,
55
+ CASE
56
+ WHEN km > 1
57
+ THEN (km_start_distance + distance_km / 2)
58
+ ELSE (distance_km / 2) END AS midpoint,
59
+ elevation_gain,
60
+ elevation_loss,
61
+ avg_hr,
62
+ max_hr,
63
+ avg_cadence,
64
+ max_cadence
65
+ FROM km_data_stage_2
66
+ ORDER BY km
67
+ )
68
+
69
+ SELECT
70
+ km,
71
+ distance_km AS distance,
72
+ CASE
73
+ WHEN elapsed_time == 0
74
+ THEN 0
75
+ ELSE (distance_km / elapsed_time * 3600) END AS avg_speed_kph,
76
+ CASE
77
+ WHEN avg_speed_kph == 0
78
+ THEN NULL
79
+ ELSE printf(
80
+ '%02d:%02d',
81
+ CAST(FLOOR(60 / avg_speed_kph) AS INT),
82
+ CAST(((60 / avg_speed_kph - FLOOR(60 / avg_speed_kph)) * 60) AS INT)
83
+ ) END AS avg_pace,
84
+ midpoint AS midpoint,
85
+ elevation_gain,
86
+ elevation_loss,
87
+ avg_hr,
88
+ max_hr,
89
+ avg_cadence,
90
+ max_cadence
91
+ FROM km_data_stage_3
92
+ ORDER BY km;
@@ -0,0 +1,86 @@
1
+ WITH lap_data AS (
2
+ SELECT
3
+ lap AS lap,
4
+ MIN(timestamp) AS start_time,
5
+ MAX(timestamp) AS end_time,
6
+ MAX(distance) AS max_distance,
7
+ ROUND(SUM(CASE
8
+ WHEN diff_alt > 0
9
+ THEN diff_alt
10
+ ELSE 0 END))::USMALLINT AS elevation_gain,
11
+ ABS(ROUND(SUM(CASE
12
+ WHEN diff_alt < 0
13
+ THEN diff_alt
14
+ ELSE 0 END)))::USMALLINT AS elevation_loss,
15
+ ROUND(AVG(heart_rate))::UTINYINT AS avg_hr,
16
+ MAX(heart_rate)::UTINYINT AS max_hr,
17
+ ROUND(AVG(cadence))::UTINYINT AS avg_cadence,
18
+ MAX(cadence)::UTINYINT AS max_cadence
19
+ FROM {table_name}
20
+ GROUP BY lap
21
+ ORDER BY lap
22
+ ), stage_2 AS (
23
+ SELECT
24
+ lap,
25
+ start_time,
26
+ end_time,
27
+ LAG(end_time, 1) OVER (ORDER BY lap) AS lap_start_time,
28
+ LAG(max_distance, 1) OVER (ORDER BY lap) AS lap_start_distance,
29
+ max_distance,
30
+ elevation_gain,
31
+ elevation_loss,
32
+ avg_hr,
33
+ max_hr,
34
+ avg_cadence,
35
+ max_cadence
36
+ FROM lap_data
37
+ ORDER BY lap
38
+ ), stage_3 AS (
39
+ SELECT
40
+ lap,
41
+ CASE
42
+ WHEN lap > 1
43
+ THEN date_diff('second', lap_start_time , end_time)
44
+ ELSE date_diff('second', start_time, end_time) END AS elapsed_time,
45
+ CASE
46
+ WHEN lap > 1
47
+ THEN max_distance - lap_start_distance
48
+ ELSE max_distance END AS distance_km,
49
+ CASE
50
+ WHEN lap > 1
51
+ THEN (lap_start_distance + distance_km / 2)
52
+ ELSE (distance_km / 2) END AS midpoint,
53
+ elevation_gain,
54
+ elevation_loss,
55
+ avg_hr,
56
+ max_hr,
57
+ avg_cadence,
58
+ max_cadence
59
+ FROM stage_2
60
+ ORDER BY lap
61
+ )
62
+
63
+ SELECT
64
+ lap,
65
+ distance_km AS distance,
66
+ CASE
67
+ WHEN elapsed_time == 0
68
+ THEN 0
69
+ ELSE (distance_km / elapsed_time * 3600) END AS avg_speed_kph,
70
+ CASE
71
+ WHEN avg_speed_kph == 0
72
+ THEN NULL
73
+ ELSE printf(
74
+ '%02d:%02d',
75
+ CAST(FLOOR(60 / avg_speed_kph) AS INT),
76
+ CAST(((60 / avg_speed_kph - FLOOR(60 / avg_speed_kph)) * 60) AS INT)
77
+ ) END AS avg_pace,
78
+ midpoint AS midpoint,
79
+ elevation_gain,
80
+ elevation_loss,
81
+ avg_hr,
82
+ max_hr,
83
+ avg_cadence,
84
+ max_cadence
85
+ FROM stage_3
86
+ ORDER BY lap;
@@ -0,0 +1,42 @@
1
+ WITH overall_stats_stage1 AS (
2
+ SELECT
3
+ MIN(timestamp) AS start_time,
4
+ MAX(timestamp) AS end_time,
5
+ date_diff('second', start_time, end_time)::INT as elapsed_time,
6
+ ROUND(MAX(distance), 2)::FLOAT AS total_distance,
7
+ ROUND((total_distance / elapsed_time) * 3600, 2)::FLOAT AS avg_speed,
8
+ MAX(speed)::FLOAT AS max_speed,
9
+ printf(
10
+ '%02d:%02d',
11
+ CAST(FLOOR(60 / avg_speed) AS INT),
12
+ CAST(((60 / avg_speed - FLOOR(60 / avg_speed)) * 60) AS INT)
13
+ ) AS avg_pace,
14
+ ROUND(SUM(CASE
15
+ WHEN diff_alt > 0
16
+ THEN diff_alt
17
+ ELSE 0 END))::INTEGER AS elevation_gain,
18
+ ABS(ROUND(SUM(CASE
19
+ WHEN diff_alt < 0
20
+ THEN diff_alt
21
+ ELSE 0 END)))::INTEGER AS elevation_loss,
22
+ ROUND(AVG(heart_rate))::UTINYINT as avg_heart_rate,
23
+ MAX(heart_rate) as max_heart_rate,
24
+ ROUND(AVG(cadence))::UTINYINT AS avg_cadence,
25
+ MAX(cadence) AS max_cadence,
26
+ FROM {table_name}
27
+ )
28
+
29
+ SELECT
30
+ start_time,
31
+ elapsed_time,
32
+ total_distance AS distance,
33
+ avg_speed,
34
+ max_speed,
35
+ avg_pace,
36
+ elevation_gain,
37
+ elevation_loss,
38
+ avg_heart_rate,
39
+ max_heart_rate,
40
+ avg_cadence,
41
+ max_cadence,
42
+ FROM overall_stats_stage1;
@@ -0,0 +1,50 @@
1
+ WITH stage_1 AS (
2
+ SELECT
3
+ timestamp,
4
+ latitude,
5
+ longitude,
6
+ altitude,
7
+ LAG(altitude, 1) OVER (ORDER BY timestamp) AS prev_alt,
8
+ distance,
9
+ LAG(distance, 1) OVER (ORDER BY timestamp) AS prev_dist,
10
+ speed,
11
+ heart_rate::UTINYINT AS heart_rate,
12
+ cadence::UTINYINT AS cadence,
13
+ lap::USMALLINT as lap,
14
+ FROM {table_name}
15
+ ORDER BY timestamp
16
+ ), stage_2 AS (
17
+ SELECT
18
+ ROW_NUMBER() OVER (ORDER BY timestamp) AS index,
19
+ timestamp,
20
+ latitude,
21
+ longitude,
22
+ altitude,
23
+ altitude - prev_alt AS diff_alt,
24
+ distance,
25
+ distance - prev_dist AS diff_dist,
26
+ speed AS speed_mps,
27
+ heart_rate,
28
+ cadence,
29
+ lap,
30
+ FROM stage_1
31
+ ORDER BY index
32
+ )
33
+
34
+ SELECT
35
+ timestamp,
36
+ latitude,
37
+ longitude,
38
+ ROUND(altitude, 2)::FLOAT AS altitude, -- in meters
39
+ diff_alt,
40
+ CASE
41
+ WHEN index > 1
42
+ THEN ROUND((diff_alt / diff_dist) * 100, 2)::FLOAT
43
+ ELSE 'NaN'::FLOAT END AS gradient, -- as percentage
44
+ (distance / 1000)::FLOAT AS distance, -- in km
45
+ ROUND(speed_mps * 3.6, 2)::FLOAT AS speed, -- in kph
46
+ heart_rate,
47
+ cadence, -- in revolutions per minute (unmodified)
48
+ lap,
49
+ FROM stage_2
50
+ ORDER BY index;
@@ -0,0 +1,58 @@
1
+ WITH stage_1 AS (
2
+ SELECT
3
+ timestamp,
4
+ latitude,
5
+ longitude,
6
+ altitude,
7
+ LAG(altitude, 1) OVER (ORDER BY timestamp) AS prev_alt,
8
+ distance,
9
+ LAG(distance, 1) OVER (ORDER BY timestamp) AS prev_dist,
10
+ speed,
11
+ heart_rate::UTINYINT AS heart_rate,
12
+ (cadence * 2)::UTINYINT AS cadence,
13
+ lap::USMALLINT as lap,
14
+ FROM {table_name}
15
+ ORDER BY timestamp
16
+ ), stage_2 AS (
17
+ SELECT
18
+ ROW_NUMBER() OVER (ORDER BY timestamp) AS index,
19
+ timestamp,
20
+ latitude,
21
+ longitude,
22
+ altitude,
23
+ altitude - prev_alt AS diff_alt,
24
+ distance,
25
+ distance - prev_dist AS diff_dist,
26
+ speed AS speed_mps,
27
+ heart_rate,
28
+ cadence,
29
+ lap,
30
+ FROM stage_1
31
+ ORDER BY index
32
+ )
33
+
34
+ SELECT
35
+ timestamp,
36
+ latitude,
37
+ longitude,
38
+ ROUND(altitude, 2)::FLOAT AS altitude, -- in meters
39
+ diff_alt,
40
+ CASE
41
+ WHEN index > 1
42
+ THEN ROUND((diff_alt / diff_dist) * 100, 2)::FLOAT
43
+ ELSE 'NaN'::FLOAT END AS gradient, -- as percentage
44
+ (distance / 1000)::FLOAT AS distance, -- in km
45
+ ROUND(speed_mps * 3.6, 2)::FLOAT AS speed, -- in kph
46
+ CASE
47
+ WHEN speed == 0 OR isnan(speed)
48
+ THEN NULL
49
+ ELSE printf(
50
+ '%02d:%02d',
51
+ CAST(FLOOR(60 / speed) AS INT),
52
+ CAST(((60 / speed - FLOOR(60 / speed)) * 60) AS INT))
53
+ END AS pace,
54
+ heart_rate,
55
+ cadence, -- in steps per minute
56
+ lap,
57
+ FROM stage_2
58
+ ORDER BY index;
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.4
2
+ Name: gpxtractor
3
+ Version: 0.1.0
4
+ Summary: GPX, TCX and FIT data extraction for Python
5
+ Author-email: Charlie Stapylton <278091496+c-stap@users.noreply.github.com>
6
+ Requires-Python: >=3.13
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: lxml>=6.0.2
10
+ Requires-Dist: numpy>=2.3.1
11
+ Requires-Dist: pyarrow>=20.0.0
12
+ Requires-Dist: pandas>=2.3.1
13
+ Requires-Dist: duckdb>=1.4.4
14
+ Provides-Extra: cli
15
+ Requires-Dist: visidata; extra == "cli"
16
+ Dynamic: license-file
17
+
18
+ # gpxtractor
19
+
20
+ **GPX, TCX and FIT data extraction for Python**
21
+
22
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
23
+
24
+ ## Description
25
+
26
+
27
+ ## Features
28
+
29
+
30
+ ## Installation
31
+
32
+ ```bash
33
+ git clone
34
+ cd gpxtractor
35
+ pip install .
36
+ ```
37
+
38
+ ## Usage Example
39
+ Use the `gpxtractor.extract_data` function that returns a gpxtractor.Activity instance.
40
+
41
+ ```python
42
+ import gpxtractor
43
+
44
+ activity = gpxtractor.extract_data("your-gpx-tcx-or-fit_file.gpx")
45
+
46
+ print(activity.sport) # Output: name of the sport in the file as a string
47
+
48
+ ```
49
+
50
+ The records attribute is a `pandas.DataFrame` holding the records extracted from the file\n
51
+ with the `gpxtractor.extract_data` function. So the usual `pandas.DataFrame` methods can be applied
52
+
53
+ ```python
54
+ print(activity.records.head())
55
+ ```
56
+
57
+ Once an instance of an Activity as been created with the `extract_data` function, the method\n
58
+ `transform_records` can be used to calculate distance and speed if missing from the file as well as\n
59
+ elevation incremental difference, gradient and in the case of running activities, pace.
60
+
61
+ ```python
62
+ activity.transform_records()
63
+ print(activity.records.head())
64
+ ```
65
+
66
+ And once the records have been transformed with `transform_records`, it is possible to use the 2\n
67
+ following methods to calculate aggregated data for kilometre and lap splits.
68
+
69
+ ```python
70
+ activity.compute_km_splits()
71
+ print(activity.km_splits)
72
+
73
+ activity.compute_lap_splits()
74
+ print(activity.lap_splits)
75
+ ```
76
+ Note: the `compute_lap_splits` will only compute lap splits if the file contains lap data which is not\n
77
+ the case for GPX files. It does not update the `lap_splits` attribute otherwise.
78
+
79
+
@@ -0,0 +1,21 @@
1
+ gpxtractor/__init__.py,sha256=XDtN5-aHWDjY_wz8lpCWqVbIDGyRZK9dll4KipI5ISs,275
2
+ gpxtractor/_core.py,sha256=aJ4s9fQFxliY-51Yo6g6a2FWq-GVCCzZ3W5mw1uthkw,11069
3
+ gpxtractor/_fit_extraction.py,sha256=Np9rXPXFLmwCHnpJuc1CiCClvaKWD6oiY5JgK2AsHB4,2933
4
+ gpxtractor/_transformation.py,sha256=WVoloI5-b7nYHM3LuxGLy6d_OGBblvfEvnHQaKgoob8,3377
5
+ gpxtractor/_utils.py,sha256=Naa2ZwQm9aBuvN5bEHpAixUgqs_xcKIWE3x0m4ZK4es,887
6
+ gpxtractor/_xml_extraction.py,sha256=29ijNf51iBq08jHSaZtZIAnsRZW9a7b85y6XcTMB0fY,5066
7
+ gpxtractor/cli.py,sha256=2bLVZzDeKfhknKl7leLfdq5t94wvl99W1p1YSKe8vtM,1867
8
+ gpxtractor/sql/compute_distance_and_speed.sql,sha256=bOgO_p1e6R7x-e4QGdloYfjUH2YFVmqZlTSnDWpXMpo,1404
9
+ gpxtractor/sql/compute_speed.sql,sha256=DLxB7HV-cBfGr3pp1uqv30C_tvH8_Y1EvIwC9bJ6_aE,1328
10
+ gpxtractor/sql/haversine_formula.sql,sha256=Rm1MvZ-CohRkG1ydvJl21fjaEnEUUhGlaqMFj4dlVQA,303
11
+ gpxtractor/sql/km_data_query.sql,sha256=KQO2fgpF8jvwaPaXw8qV7ypTOVg7ZLiVTzR_a5ybiPI,2485
12
+ gpxtractor/sql/lap_data_query.sql,sha256=W4eUF3t9fwd5yn8825ZSlj4in4hhmumTesh4h_U6AQ0,2334
13
+ gpxtractor/sql/overall_stats.sql,sha256=U7gZO8EICPagSaUpGYHT197VUhyOysUJYD-00dD1NdE,1295
14
+ gpxtractor/sql/preprocess_data.sql,sha256=lChlMfkIpjNMf6eVEFZME-Vk2_CKCgZDfrLquuIBB5s,1289
15
+ gpxtractor/sql/preprocess_running_data.sql,sha256=obHbGR2I0BwSKv4DmIOAaRDSWjF4ZjG85eYXa4YEULc,1517
16
+ gpxtractor-0.1.0.dist-info/licenses/LICENSE,sha256=yq10OHiKCRtJ9QAdp_Wsdbds46X9ZLfOB4cXjGJOuoM,1067
17
+ gpxtractor-0.1.0.dist-info/METADATA,sha256=tEXhKYPVtpkaqHYmDXt_1sRxg5FFFP_fNi0RYaY5xDw,2143
18
+ gpxtractor-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
19
+ gpxtractor-0.1.0.dist-info/entry_points.txt,sha256=SOAH_X7mtPe7XiwjnYobOrVpE0Hq1l-VZqiUCHon4gY,51
20
+ gpxtractor-0.1.0.dist-info/top_level.txt,sha256=qhnkz7mFbaMSwYDfPSKSgyn3ViXa2t37GenvvHkKSDo,11
21
+ gpxtractor-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ gpxtractor = gpxtractor.cli:main
@@ -0,0 +1,7 @@
1
+ Copyright (c) 2026 Charles Stapylton-Smith
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
1
+ gpxtractor