fastgpx 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. fastgpx-0.1.0/.clang-format +36 -0
  2. fastgpx-0.1.0/.gitignore +24 -0
  3. fastgpx-0.1.0/.python-version +1 -0
  4. fastgpx-0.1.0/CMakeLists.txt +17 -0
  5. fastgpx-0.1.0/CMakeSettings.json +29 -0
  6. fastgpx-0.1.0/Development.md +152 -0
  7. fastgpx-0.1.0/GPX.md +50 -0
  8. fastgpx-0.1.0/LICENSE.md +22 -0
  9. fastgpx-0.1.0/PKG-INFO +148 -0
  10. fastgpx-0.1.0/README.md +132 -0
  11. fastgpx-0.1.0/benchmarks/benchmark_gpx.py +199 -0
  12. fastgpx-0.1.0/benchmarks/benchmark_polyline.py +66 -0
  13. fastgpx-0.1.0/benchmarks/gpx_parse.md +114 -0
  14. fastgpx-0.1.0/catch2.py +87 -0
  15. fastgpx-0.1.0/coverage.bat +8 -0
  16. fastgpx-0.1.0/datetime.md +118 -0
  17. fastgpx-0.1.0/docs/Makefile +20 -0
  18. fastgpx-0.1.0/docs/make.bat +35 -0
  19. fastgpx-0.1.0/docs/source/api.rst +6 -0
  20. fastgpx-0.1.0/docs/source/conf.py +51 -0
  21. fastgpx-0.1.0/docs/source/index.rst +14 -0
  22. fastgpx-0.1.0/docs/source/overview.rst +30 -0
  23. fastgpx-0.1.0/profiling/profile_polyline.py +72 -0
  24. fastgpx-0.1.0/pyproject.toml +65 -0
  25. fastgpx-0.1.0/pytest.ini +7 -0
  26. fastgpx-0.1.0/src/cpp/CMakeLists.txt +139 -0
  27. fastgpx-0.1.0/src/cpp/app.cpp +31 -0
  28. fastgpx-0.1.0/src/cpp/expected_gpx_data.json +4194 -0
  29. fastgpx-0.1.0/src/cpp/fastgpx/datetime.cpp +1041 -0
  30. fastgpx-0.1.0/src/cpp/fastgpx/datetime.hpp +111 -0
  31. fastgpx-0.1.0/src/cpp/fastgpx/datetime_test.cpp +668 -0
  32. fastgpx-0.1.0/src/cpp/fastgpx/errors.cpp +23 -0
  33. fastgpx-0.1.0/src/cpp/fastgpx/errors.hpp +22 -0
  34. fastgpx-0.1.0/src/cpp/fastgpx/errors_test.cpp +22 -0
  35. fastgpx-0.1.0/src/cpp/fastgpx/fastgpx.cpp +443 -0
  36. fastgpx-0.1.0/src/cpp/fastgpx/fastgpx.hpp +140 -0
  37. fastgpx-0.1.0/src/cpp/fastgpx/fastgpx_test.cpp +283 -0
  38. fastgpx-0.1.0/src/cpp/fastgpx/filesystem.cpp +70 -0
  39. fastgpx-0.1.0/src/cpp/fastgpx/filesystem.hpp +26 -0
  40. fastgpx-0.1.0/src/cpp/fastgpx/filesystem_test.cpp +61 -0
  41. fastgpx-0.1.0/src/cpp/fastgpx/geom.cpp +148 -0
  42. fastgpx-0.1.0/src/cpp/fastgpx/geom.hpp +86 -0
  43. fastgpx-0.1.0/src/cpp/fastgpx/geom_test.cpp +152 -0
  44. fastgpx-0.1.0/src/cpp/fastgpx/polyline.cpp +84 -0
  45. fastgpx-0.1.0/src/cpp/fastgpx/polyline.hpp +25 -0
  46. fastgpx-0.1.0/src/cpp/fastgpx/test_data.cpp +69 -0
  47. fastgpx-0.1.0/src/cpp/fastgpx/test_data.hpp +37 -0
  48. fastgpx-0.1.0/src/cpp/fastgpx/test_data.json +39 -0
  49. fastgpx-0.1.0/src/cpp/fastgpx/test_data_test.cpp +35 -0
  50. fastgpx-0.1.0/src/cpp/python_fastgpx.cpp +258 -0
  51. fastgpx-0.1.0/src/cpp/python_utc_chrono.hpp +270 -0
  52. fastgpx-0.1.0/src/fastgpx/__init__.py +5 -0
  53. fastgpx-0.1.0/src/fastgpx/__init__.pyi +12 -0
  54. fastgpx-0.1.0/src/fastgpx/fastgpx/__init__.pyi +128 -0
  55. fastgpx-0.1.0/src/fastgpx/fastgpx/geo.pyi +9 -0
  56. fastgpx-0.1.0/src/fastgpx/fastgpx/polyline.pyi +54 -0
  57. fastgpx-0.1.0/src/fastgpx/py.typed +0 -0
  58. fastgpx-0.1.0/tests/test_bounds.py +57 -0
  59. fastgpx-0.1.0/tests/test_fastgpx.py +92 -0
  60. fastgpx-0.1.0/tests/test_polyline.py +141 -0
  61. fastgpx-0.1.0/uv.lock +546 -0
@@ -0,0 +1,36 @@
1
+ ---
2
+ AccessModifierOffset: -2
3
+ # AlignConsecutiveAssignments: Consecutive
4
+ AllowShortBlocksOnASingleLine: false
5
+ # AllowShortCaseLabelsOnASingleLine: false
6
+ AllowShortEnumsOnASingleLine: false
7
+ AllowShortFunctionsOnASingleLine: Inline # InlineOnly # Empty
8
+ AllowShortIfStatementsOnASingleLine: Never
9
+ AllowShortLambdasOnASingleLine: All # Inline
10
+ BasedOnStyle: Google
11
+ BreakBeforeBraces: Custom
12
+ BraceWrapping:
13
+ AfterCaseLabel: true
14
+ AfterClass: true
15
+ AfterControlStatement: Always
16
+ AfterEnum: true
17
+ AfterExternBlock: true
18
+ AfterFunction: true
19
+ AfterNamespace: false
20
+ AfterStruct: true
21
+ AfterUnion: true
22
+ BeforeCatch: true
23
+ BeforeElse: true
24
+ BeforeLambdaBody: false
25
+ BeforeWhile: true
26
+ SplitEmptyFunction: false
27
+ ColumnLimit: 100
28
+ DerivePointerAlignment: false
29
+ IncludeBlocks: Preserve
30
+ IndentCaseLabels: false
31
+ IndentExternBlock: NoIndent
32
+ IndentPPDirectives: BeforeHash
33
+ PointerAlignment: Left
34
+ ReferenceAlignment: Left
35
+ SpaceAfterTemplateKeyword: false
36
+ SpacesBeforeTrailingComments: 1
@@ -0,0 +1,24 @@
1
+ __pycache__
2
+ .venv
3
+ .env
4
+ env39/
5
+
6
+ /dist/
7
+
8
+ /build/
9
+ /output/
10
+
11
+ /.vs/
12
+ /out/
13
+
14
+ .mypy_cache/
15
+ .pytest_cache/
16
+
17
+ /*.png
18
+
19
+ /profiling/*.prof
20
+
21
+ /coverage/
22
+ LastCoverageResults.log
23
+
24
+ /docs/build/
@@ -0,0 +1 @@
1
+ 3.11
@@ -0,0 +1,17 @@
1
+ cmake_minimum_required(VERSION 3.30.2)
2
+
3
+ project(fastgpx)
4
+
5
+ # > The CTest module defines a BUILD_TESTING cache variable which defaults to true.
6
+ # > It is used to decide whether the module calls enable_testing() or not, so the
7
+ # > project does not have to make its own explicit call to enable_testing(). The
8
+ # > project can also use this cache variable to perform certain processing only
9
+ # > if testing is enabled. If the project has many tests that take a long time
10
+ # > to build, this can be a useful way to avoid adding them to the build when
11
+ # > they are not needed.
12
+ #
13
+ # The BUILD_TESTING is set to false in pyproject.toml, so the tests will not build
14
+ # when building the Python package.
15
+ include(CTest)
16
+
17
+ add_subdirectory(src/cpp)
@@ -0,0 +1,29 @@
1
+ {
2
+ "configurations": [
3
+ {
4
+ "name": "x64-Debug",
5
+ "generator": "Ninja",
6
+ "configurationType": "Debug",
7
+ "inheritEnvironments": [ "msvc_x64_x64" ],
8
+ "buildRoot": "${projectDir}\\out\\build\\${name}",
9
+ "installRoot": "${projectDir}\\out\\install\\${name}",
10
+ "cmakeCommandArgs": "-DPYTHON_EXECUTABLE=.venv/Scripts/python.exe -DCMAKE_PREFIX_PATH=${projectDir}/.venv/Lib/site-packages/pybind11/share/cmake/pybind11",
11
+ "buildCommandArgs": "",
12
+ "ctestCommandArgs": "",
13
+ "cmakeExecutable": "C:/Program Files/CMake/bin/cmake.exe"
14
+ },
15
+ {
16
+ "name": "x64-Release",
17
+ "generator": "Ninja",
18
+ "configurationType": "RelWithDebInfo",
19
+ "buildRoot": "${projectDir}\\out\\build\\${name}",
20
+ "installRoot": "${projectDir}\\out\\install\\${name}",
21
+ "cmakeExecutable": "C:/Program Files/CMake/bin/cmake.exe",
22
+ "cmakeCommandArgs": "-DPYTHON_EXECUTABLE=.venv/Scripts/python.exe -DCMAKE_PREFIX_PATH=${projectDir}/.venv/Lib/site-packages/pybind11/share/cmake/pybind11",
23
+ "buildCommandArgs": "",
24
+ "ctestCommandArgs": "",
25
+ "inheritEnvironments": [ "msvc_x64_x64" ],
26
+ "variables": []
27
+ }
28
+ ]
29
+ }
@@ -0,0 +1,152 @@
1
+ # Development
2
+
3
+ ## Python
4
+
5
+ ### Import Order
6
+
7
+ The typical organization of Python imports follows a structured convention to improve readability and maintainability of the code. This organization often adheres to the PEP 8 style guide, which is widely adopted in the Python community. Here is the recommended order and format:
8
+
9
+ Python imports are generally organized into three main sections, each separated by a blank line:
10
+
11
+ 1. Standard Library Imports: These are modules that are part of Python's standard library, such as `os`, `sys`, `datetime`, etc.
12
+
13
+ 2. Third-Party Imports: These are external libraries that are not part of the standard library, such as `numpy`, `requests`, etc.
14
+
15
+ 3. Local Application or Project-Specific Imports: These are your own modules that are part of the project.
16
+
17
+ ```py
18
+ # Standard library imports
19
+ import os
20
+ import sys
21
+ from datetime import datetime
22
+
23
+ # Third-party imports
24
+ import requests
25
+ import numpy as np
26
+
27
+ # Local application imports
28
+ from my_project.module import my_function
29
+ from . import another_module
30
+ ```
31
+
32
+
33
+ ### Python C Extension
34
+
35
+ Once set up, you can build the C++ extension with a simple `pip install .` or `pip install --editable .` for development builds.
36
+
37
+ ```sh
38
+ pip install --editable .
39
+ ```
40
+
41
+ ```sh
42
+ pybind11-stubgen fastgpx -o src
43
+
44
+ pybind11-stubgen fastgpx --enum-class-locations "Precision:fastgpx.polyline" -o src
45
+ ```
46
+
47
+ ### Python Profiling
48
+
49
+ https://learn.microsoft.com/en-us/visualstudio/python/profiling-python-code-in-visual-studio?view=vs-2022
50
+
51
+ ```sh
52
+ snakeviz profiling/fastgpx_polyline_encode.prof
53
+ ```
54
+
55
+ ```sh
56
+ snakeviz profiling/polyline_encode.prof
57
+ ```
58
+
59
+ ### pyproject.toml
60
+
61
+ > Installing Dependencies with pyproject.toml
62
+ >
63
+ > You no longer need to use `pip install -r requirements.txt`. Instead, you can simply install dependencies directly using:
64
+ >
65
+ > ```sh
66
+ > pip install .
67
+ > ```
68
+
69
+ > For Development Dependencies:
70
+ >
71
+ > To install development dependencies (like `pytest` and `pybind11-stubgen`), you can use the `--extra` option (assuming you defined them under dev):
72
+ >
73
+ > ```sh
74
+ > pip install .[dev]
75
+ > ```
76
+ >
77
+ > This will install both the main dependencies and the development dependencies defined in the dev section of `pyproject.toml`.
78
+
79
+ ### Locally test build wheel
80
+
81
+ ```sh
82
+ pip install --upgrade build twine wheel
83
+ ```
84
+
85
+ ```sh
86
+ python -m build
87
+ twine check dist/*
88
+ ```
89
+
90
+ ## C++
91
+
92
+ ### Include order
93
+
94
+ The organization of C++ includes follows certain conventions that are similar in spirit to Python import conventions. Well-structured includes can improve readability, reduce compile times, and minimize dependencies. Here are the typical guidelines and best practices for organizing C++ includes:
95
+
96
+ C++ includes are generally organized in a specific order, often grouped and separated by blank lines. The typical order is as follows:
97
+
98
+ 1. Header File for the Current Implementation File (if applicable)
99
+ 2. Standard Library Headers (e.g., `<iostream>`, `<vector>`)
100
+ 3. Third-Party Library Headers (e.g., `boost`, or other external dependencies)
101
+ 4. Project-Specific Headers (e.g., your own modules or classes)
102
+
103
+ This organization helps to ensure that your file includes what it needs directly, and minimizes the chance of accidentally relying on transitive includes from other files.
104
+
105
+ ```cpp
106
+ // Current implementation file's corresponding header
107
+ #include "my_class.h"
108
+
109
+ // Standard library headers
110
+ #include <iostream>
111
+ #include <vector>
112
+ #include <string>
113
+
114
+ // Third-party library headers
115
+ #include <boost/algorithm/string.hpp>
116
+
117
+ // Project-specific headers
118
+ #include "utils.h"
119
+ #include "data_processing.h"
120
+ ```
121
+
122
+ ### Reformat all C++ sources
123
+
124
+ ```sh
125
+ cd src\cpp
126
+ for /R %f in (*.cpp *.hpp) do "C:\Program Files\LLVM\bin\clang-format.exe" -i "%f"
127
+ ```
128
+
129
+ ### Coverage (C++ OpenCppCoverage)
130
+
131
+ ```sh
132
+ coverage.bat ~[real_world]
133
+ ```
134
+
135
+ ### Catch2 Tests
136
+
137
+ If the output prints UTF-8 characters the terminal needs to be set to UTF-8 mode on Windows:
138
+
139
+ ```sh
140
+ chcp 65001
141
+ ```
142
+ #### Running Tests
143
+
144
+ ```sh
145
+ build\src\cpp\RelWithDebInfo\fastgpx_test.exe
146
+ ```
147
+
148
+ #### Running Benchmarks
149
+
150
+ ```sh
151
+ build\src\cpp\RelWithDebInfo\fastgpx_test.exe [!benchmark]
152
+ ```
fastgpx-0.1.0/GPX.md ADDED
@@ -0,0 +1,50 @@
1
+
2
+ # GPX
3
+
4
+ ## GPX for Developers
5
+
6
+ https://www.topografix.com/gpx_for_developers.asp
7
+
8
+ > The standard in the gpx files for the time zone format is not completely
9
+ > clear. For instance some applications generate time formats in the track
10
+ > points of the form
11
+ >
12
+ > <time>2008-07-18T16:07:50.000+02:00</time>
13
+ >
14
+ > this is slightly substandard because the standard as described in
15
+ > http://www.topografix.com/gpx.asp says that
16
+ >
17
+ > "Date and time in are in Univeral Coordinated Time (UTC), not local time!
18
+ > Conforms to ISO 8601 specification for date/time representation."
19
+
20
+ https://web.archive.org/web/20130725164436/http://tech.groups.yahoo.com/group/gpsxml/message/1090?l=1
21
+
22
+ ## GPS Exchange Format (GPX): A Comprehensive Guide
23
+
24
+ https://mapscaping.com/gps-exchange-format-gpx/
25
+
26
+ ## Assumed typical <time> representation in .GPX files
27
+
28
+ To simplify logic and maximize performance, maybe one can assume a more narrow
29
+ scope of ISO 8601.
30
+
31
+ By using the length of the string one can make assumption to the variation of
32
+ the format and directly extract the numeric components of the string. Probably
33
+ worth validating the separator characters as a minimal validation safety check
34
+ to eliminate pure junk input.
35
+
36
+ Variations observed:
37
+ * Only Extended Format.
38
+ * With or without milliseconds. (3 fractional decimals)
39
+ * Zulu hours are the norm, but timezone offsets might occur.
40
+ * Some mention of missing timezone notation all together.
41
+ While ISO 8601 describe this as local time, one can probably assume this is
42
+ an omission by the author and it really should be Zulu hours.
43
+
44
+ | Example | Length | |
45
+ |-------------------------------|--------|-------------------------------------|
46
+ | 2008-07-18T16:07:50.000+02:00 | 29 | Not per GPX definition. |
47
+ | 2008-07-18T16:07:50.000Z | 24 | |
48
+ | 2008-07-18T16:07:50+02:00 | 25 | Not per GPX definition. |
49
+ | 2008-07-18T16:07:50Z | 20 | |
50
+ | 2008-07-18T16:07:50 | 19 | Assume Zulu time? |
@@ -0,0 +1,22 @@
1
+
2
+ The MIT License (MIT)
3
+
4
+ Copyright (c) 2024-2025 Thomas Thomassen
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
fastgpx-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,148 @@
1
+ Metadata-Version: 2.4
2
+ Name: fastgpx
3
+ Version: 0.1.0
4
+ Summary: An experimental Python library for parsing GPX files fast.
5
+ Keywords: gpx,parser,fast
6
+ Author: Thomas Thomassen
7
+ License-Expression: MIT
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Topic :: File Formats
11
+ Classifier: Operating System :: Microsoft :: Windows
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Project-URL: Documentation, https://thomthom.github.io/fastgpx/
14
+ Requires-Python: >=3.11
15
+ Description-Content-Type: text/markdown
16
+
17
+ # fastgpx
18
+
19
+ An experimental Python library for parsing GPX files fast.
20
+
21
+ ```py
22
+ # Get the total length of the tracks in a GPX file:
23
+ import fastgpx
24
+
25
+ gpx = fastgpx.parse("example.gpx")
26
+ print(f'{gpx.length_2d()} m')
27
+ ```
28
+
29
+ ```py
30
+ # Iterate over GPX file:
31
+ import fastgpx
32
+
33
+ gpx = fastgpx.parse("example.gpx")
34
+ for track in gpx.tracks:
35
+ print(f'Track: {track.name}')
36
+ print(f'Distance: {track.length_2d()} m')
37
+ if not track.time_bounds.is_empty():
38
+ print(f'Time: {track.time_bounds().start_time} - {track.time_bounds().end_time}')
39
+ for segment in track.segments:
40
+ print(f'Segment: {segment.name}')
41
+ for point in segment.points:
42
+ print(f'Point: {point.latitude}, {point.longitude}')
43
+ ```
44
+
45
+ [Documentation](https://thomthom.github.io/fastgpx/)
46
+
47
+ ## GPX/XML Performance (Background)
48
+
49
+ `gpxpy` appear to be the most popular GPX library for Python.
50
+
51
+ `gpxpy` docs says that it uses `lxml` is available because it is faster than "`minidom`" (`etree`).
52
+ When benchmarking that seemed not to be the case. It appear that the stdlib XML library has gotten
53
+ much better since `gpxpy` was created.
54
+
55
+ Reference: Open ticket on making `etree` default:
56
+ https://github.com/tkrajina/gpxpy/issues/248
57
+
58
+ ## Benchmarks
59
+
60
+ Test machine:
61
+
62
+ * AMD Ryzen 7 5800 8-Core, 3.80 GHz
63
+ * 32 GB memory
64
+ * m2 SSD storage
65
+
66
+ ### gpxpy benchmarks
67
+
68
+ Comparing getting the distance of a GPX file using `gpxpy` vs manually extracting
69
+ the data using `xml_etree`, computing distance between points using `gpxpy`
70
+ distance functions.
71
+
72
+ #### gpxpy without `lxml`
73
+
74
+ ```
75
+ Running benchmark with 3 iterations...
76
+ gpxpy 5463041.784135511 meters
77
+ gpxpy 5463041.784135511 meters
78
+ gpxpy 5463041.784135511 meters
79
+ gpxpy: 11.497863 seconds (Average: 3.832621 seconds)
80
+ ```
81
+
82
+ #### gpxpy with `lxml`
83
+
84
+ ```
85
+ Running benchmark with 3 iterations...
86
+ gpxpy 5463041.784135511 meters
87
+ gpxpy 5463041.784135511 meters
88
+ gpxpy 5463041.784135511 meters
89
+ gpxpy: 37.803625 seconds (Average: 12.601208 seconds)
90
+ ```
91
+
92
+ #### xml_etree data extraction
93
+
94
+ ```
95
+ Running benchmark with 3 iterations...
96
+ xml_etree 5463043.740615641 meters
97
+ xml_etree 5463043.740615641 meters
98
+ xml_etree 5463043.740615641 meters
99
+ xml_etree: 2.333200 seconds (Average: 0.777733 seconds)
100
+ ```
101
+
102
+ Even with `gpxpy` using `etree` to parse the XML it is paster to parse it
103
+ directly with `etree` and use `gpxpy.geo` distance functions to compute the
104
+ distance of a GPX file. Unclear what the extra overhead is, possibly the cost
105
+ of extraction additional data. (Some minor difference in how the total distance
106
+ is computed in this example. Using different options for computing the distance.)
107
+
108
+ ### C++ benchmarks
109
+
110
+ Since XML parsing itself appear to have a significant impact on performance some
111
+ popular C++ XML libraries was tested:
112
+
113
+ #### tinyxml2
114
+ ```
115
+ Total Length: 5456930.710560566
116
+ Elapsed time: 0.4980144 seconds
117
+ ```
118
+
119
+ #### pugixml
120
+ ```
121
+ Total Length: 5456930.710560566
122
+ Elapsed time: 0.1890089 seconds
123
+ ```
124
+
125
+ ### C++ vs Python implementations
126
+
127
+
128
+ ```
129
+ Running 5 benchmarks with 3 iterations...
130
+
131
+ Running gpxpy ...
132
+ gpxpy: 50.182288 seconds (Average: 16.727429 seconds)
133
+
134
+ Running xml_etree ...
135
+ xml_etree: 8.269050 seconds (Average: 2.756350 seconds)
136
+
137
+ Running lxml ...
138
+ lxml: 8.479702 seconds (Average: 2.826567 seconds)
139
+
140
+ Running tinyxml (C++) ...
141
+ tinyxml (C++): 2.699880 seconds (Average: 0.899960 seconds)
142
+
143
+ Running pugixml (C++) ...
144
+ pugixml (C++): 0.381095 seconds (Average: 0.127032 seconds)
145
+ ```
146
+
147
+ For computing the length of a GPX file, `pugixml` in a Python C extension was ~140
148
+ times faster than using `gpxpy`.
@@ -0,0 +1,132 @@
1
+ # fastgpx
2
+
3
+ An experimental Python library for parsing GPX files fast.
4
+
5
+ ```py
6
+ # Get the total length of the tracks in a GPX file:
7
+ import fastgpx
8
+
9
+ gpx = fastgpx.parse("example.gpx")
10
+ print(f'{gpx.length_2d()} m')
11
+ ```
12
+
13
+ ```py
14
+ # Iterate over GPX file:
15
+ import fastgpx
16
+
17
+ gpx = fastgpx.parse("example.gpx")
18
+ for track in gpx.tracks:
19
+ print(f'Track: {track.name}')
20
+ print(f'Distance: {track.length_2d()} m')
21
+ if not track.time_bounds.is_empty():
22
+ print(f'Time: {track.time_bounds().start_time} - {track.time_bounds().end_time}')
23
+ for segment in track.segments:
24
+ print(f'Segment: {segment.name}')
25
+ for point in segment.points:
26
+ print(f'Point: {point.latitude}, {point.longitude}')
27
+ ```
28
+
29
+ [Documentation](https://thomthom.github.io/fastgpx/)
30
+
31
+ ## GPX/XML Performance (Background)
32
+
33
+ `gpxpy` appear to be the most popular GPX library for Python.
34
+
35
+ `gpxpy` docs says that it uses `lxml` is available because it is faster than "`minidom`" (`etree`).
36
+ When benchmarking that seemed not to be the case. It appear that the stdlib XML library has gotten
37
+ much better since `gpxpy` was created.
38
+
39
+ Reference: Open ticket on making `etree` default:
40
+ https://github.com/tkrajina/gpxpy/issues/248
41
+
42
+ ## Benchmarks
43
+
44
+ Test machine:
45
+
46
+ * AMD Ryzen 7 5800 8-Core, 3.80 GHz
47
+ * 32 GB memory
48
+ * m2 SSD storage
49
+
50
+ ### gpxpy benchmarks
51
+
52
+ Comparing getting the distance of a GPX file using `gpxpy` vs manually extracting
53
+ the data using `xml_etree`, computing distance between points using `gpxpy`
54
+ distance functions.
55
+
56
+ #### gpxpy without `lxml`
57
+
58
+ ```
59
+ Running benchmark with 3 iterations...
60
+ gpxpy 5463041.784135511 meters
61
+ gpxpy 5463041.784135511 meters
62
+ gpxpy 5463041.784135511 meters
63
+ gpxpy: 11.497863 seconds (Average: 3.832621 seconds)
64
+ ```
65
+
66
+ #### gpxpy with `lxml`
67
+
68
+ ```
69
+ Running benchmark with 3 iterations...
70
+ gpxpy 5463041.784135511 meters
71
+ gpxpy 5463041.784135511 meters
72
+ gpxpy 5463041.784135511 meters
73
+ gpxpy: 37.803625 seconds (Average: 12.601208 seconds)
74
+ ```
75
+
76
+ #### xml_etree data extraction
77
+
78
+ ```
79
+ Running benchmark with 3 iterations...
80
+ xml_etree 5463043.740615641 meters
81
+ xml_etree 5463043.740615641 meters
82
+ xml_etree 5463043.740615641 meters
83
+ xml_etree: 2.333200 seconds (Average: 0.777733 seconds)
84
+ ```
85
+
86
+ Even with `gpxpy` using `etree` to parse the XML it is paster to parse it
87
+ directly with `etree` and use `gpxpy.geo` distance functions to compute the
88
+ distance of a GPX file. Unclear what the extra overhead is, possibly the cost
89
+ of extraction additional data. (Some minor difference in how the total distance
90
+ is computed in this example. Using different options for computing the distance.)
91
+
92
+ ### C++ benchmarks
93
+
94
+ Since XML parsing itself appear to have a significant impact on performance some
95
+ popular C++ XML libraries was tested:
96
+
97
+ #### tinyxml2
98
+ ```
99
+ Total Length: 5456930.710560566
100
+ Elapsed time: 0.4980144 seconds
101
+ ```
102
+
103
+ #### pugixml
104
+ ```
105
+ Total Length: 5456930.710560566
106
+ Elapsed time: 0.1890089 seconds
107
+ ```
108
+
109
+ ### C++ vs Python implementations
110
+
111
+
112
+ ```
113
+ Running 5 benchmarks with 3 iterations...
114
+
115
+ Running gpxpy ...
116
+ gpxpy: 50.182288 seconds (Average: 16.727429 seconds)
117
+
118
+ Running xml_etree ...
119
+ xml_etree: 8.269050 seconds (Average: 2.756350 seconds)
120
+
121
+ Running lxml ...
122
+ lxml: 8.479702 seconds (Average: 2.826567 seconds)
123
+
124
+ Running tinyxml (C++) ...
125
+ tinyxml (C++): 2.699880 seconds (Average: 0.899960 seconds)
126
+
127
+ Running pugixml (C++) ...
128
+ pugixml (C++): 0.381095 seconds (Average: 0.127032 seconds)
129
+ ```
130
+
131
+ For computing the length of a GPX file, `pugixml` in a Python C extension was ~140
132
+ times faster than using `gpxpy`.