fastgpx 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fastgpx-0.1.0/.clang-format +36 -0
- fastgpx-0.1.0/.gitignore +24 -0
- fastgpx-0.1.0/.python-version +1 -0
- fastgpx-0.1.0/CMakeLists.txt +17 -0
- fastgpx-0.1.0/CMakeSettings.json +29 -0
- fastgpx-0.1.0/Development.md +152 -0
- fastgpx-0.1.0/GPX.md +50 -0
- fastgpx-0.1.0/LICENSE.md +22 -0
- fastgpx-0.1.0/PKG-INFO +148 -0
- fastgpx-0.1.0/README.md +132 -0
- fastgpx-0.1.0/benchmarks/benchmark_gpx.py +199 -0
- fastgpx-0.1.0/benchmarks/benchmark_polyline.py +66 -0
- fastgpx-0.1.0/benchmarks/gpx_parse.md +114 -0
- fastgpx-0.1.0/catch2.py +87 -0
- fastgpx-0.1.0/coverage.bat +8 -0
- fastgpx-0.1.0/datetime.md +118 -0
- fastgpx-0.1.0/docs/Makefile +20 -0
- fastgpx-0.1.0/docs/make.bat +35 -0
- fastgpx-0.1.0/docs/source/api.rst +6 -0
- fastgpx-0.1.0/docs/source/conf.py +51 -0
- fastgpx-0.1.0/docs/source/index.rst +14 -0
- fastgpx-0.1.0/docs/source/overview.rst +30 -0
- fastgpx-0.1.0/profiling/profile_polyline.py +72 -0
- fastgpx-0.1.0/pyproject.toml +65 -0
- fastgpx-0.1.0/pytest.ini +7 -0
- fastgpx-0.1.0/src/cpp/CMakeLists.txt +139 -0
- fastgpx-0.1.0/src/cpp/app.cpp +31 -0
- fastgpx-0.1.0/src/cpp/expected_gpx_data.json +4194 -0
- fastgpx-0.1.0/src/cpp/fastgpx/datetime.cpp +1041 -0
- fastgpx-0.1.0/src/cpp/fastgpx/datetime.hpp +111 -0
- fastgpx-0.1.0/src/cpp/fastgpx/datetime_test.cpp +668 -0
- fastgpx-0.1.0/src/cpp/fastgpx/errors.cpp +23 -0
- fastgpx-0.1.0/src/cpp/fastgpx/errors.hpp +22 -0
- fastgpx-0.1.0/src/cpp/fastgpx/errors_test.cpp +22 -0
- fastgpx-0.1.0/src/cpp/fastgpx/fastgpx.cpp +443 -0
- fastgpx-0.1.0/src/cpp/fastgpx/fastgpx.hpp +140 -0
- fastgpx-0.1.0/src/cpp/fastgpx/fastgpx_test.cpp +283 -0
- fastgpx-0.1.0/src/cpp/fastgpx/filesystem.cpp +70 -0
- fastgpx-0.1.0/src/cpp/fastgpx/filesystem.hpp +26 -0
- fastgpx-0.1.0/src/cpp/fastgpx/filesystem_test.cpp +61 -0
- fastgpx-0.1.0/src/cpp/fastgpx/geom.cpp +148 -0
- fastgpx-0.1.0/src/cpp/fastgpx/geom.hpp +86 -0
- fastgpx-0.1.0/src/cpp/fastgpx/geom_test.cpp +152 -0
- fastgpx-0.1.0/src/cpp/fastgpx/polyline.cpp +84 -0
- fastgpx-0.1.0/src/cpp/fastgpx/polyline.hpp +25 -0
- fastgpx-0.1.0/src/cpp/fastgpx/test_data.cpp +69 -0
- fastgpx-0.1.0/src/cpp/fastgpx/test_data.hpp +37 -0
- fastgpx-0.1.0/src/cpp/fastgpx/test_data.json +39 -0
- fastgpx-0.1.0/src/cpp/fastgpx/test_data_test.cpp +35 -0
- fastgpx-0.1.0/src/cpp/python_fastgpx.cpp +258 -0
- fastgpx-0.1.0/src/cpp/python_utc_chrono.hpp +270 -0
- fastgpx-0.1.0/src/fastgpx/__init__.py +5 -0
- fastgpx-0.1.0/src/fastgpx/__init__.pyi +12 -0
- fastgpx-0.1.0/src/fastgpx/fastgpx/__init__.pyi +128 -0
- fastgpx-0.1.0/src/fastgpx/fastgpx/geo.pyi +9 -0
- fastgpx-0.1.0/src/fastgpx/fastgpx/polyline.pyi +54 -0
- fastgpx-0.1.0/src/fastgpx/py.typed +0 -0
- fastgpx-0.1.0/tests/test_bounds.py +57 -0
- fastgpx-0.1.0/tests/test_fastgpx.py +92 -0
- fastgpx-0.1.0/tests/test_polyline.py +141 -0
- fastgpx-0.1.0/uv.lock +546 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
---
|
2
|
+
AccessModifierOffset: -2
|
3
|
+
# AlignConsecutiveAssignments: Consecutive
|
4
|
+
AllowShortBlocksOnASingleLine: false
|
5
|
+
# AllowShortCaseLabelsOnASingleLine: false
|
6
|
+
AllowShortEnumsOnASingleLine: false
|
7
|
+
AllowShortFunctionsOnASingleLine: Inline # InlineOnly # Empty
|
8
|
+
AllowShortIfStatementsOnASingleLine: Never
|
9
|
+
AllowShortLambdasOnASingleLine: All # Inline
|
10
|
+
BasedOnStyle: Google
|
11
|
+
BreakBeforeBraces: Custom
|
12
|
+
BraceWrapping:
|
13
|
+
AfterCaseLabel: true
|
14
|
+
AfterClass: true
|
15
|
+
AfterControlStatement: Always
|
16
|
+
AfterEnum: true
|
17
|
+
AfterExternBlock: true
|
18
|
+
AfterFunction: true
|
19
|
+
AfterNamespace: false
|
20
|
+
AfterStruct: true
|
21
|
+
AfterUnion: true
|
22
|
+
BeforeCatch: true
|
23
|
+
BeforeElse: true
|
24
|
+
BeforeLambdaBody: false
|
25
|
+
BeforeWhile: true
|
26
|
+
SplitEmptyFunction: false
|
27
|
+
ColumnLimit: 100
|
28
|
+
DerivePointerAlignment: false
|
29
|
+
IncludeBlocks: Preserve
|
30
|
+
IndentCaseLabels: false
|
31
|
+
IndentExternBlock: NoIndent
|
32
|
+
IndentPPDirectives: BeforeHash
|
33
|
+
PointerAlignment: Left
|
34
|
+
ReferenceAlignment: Left
|
35
|
+
SpaceAfterTemplateKeyword: false
|
36
|
+
SpacesBeforeTrailingComments: 1
|
fastgpx-0.1.0/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.11
|
@@ -0,0 +1,17 @@
|
|
1
|
+
cmake_minimum_required(VERSION 3.30.2)
|
2
|
+
|
3
|
+
project(fastgpx)
|
4
|
+
|
5
|
+
# > The CTest module defines a BUILD_TESTING cache variable which defaults to true.
|
6
|
+
# > It is used to decide whether the module calls enable_testing() or not, so the
|
7
|
+
# > project does not have to make its own explicit call to enable_testing(). The
|
8
|
+
# > project can also use this cache variable to perform certain processing only
|
9
|
+
# > if testing is enabled. If the project has many tests that take a long time
|
10
|
+
# > to build, this can be a useful way to avoid adding them to the build when
|
11
|
+
# > they are not needed.
|
12
|
+
#
|
13
|
+
# The BUILD_TESTING is set to false in pyproject.toml, so the tests will not build
|
14
|
+
# when building the Python package.
|
15
|
+
include(CTest)
|
16
|
+
|
17
|
+
add_subdirectory(src/cpp)
|
@@ -0,0 +1,29 @@
|
|
1
|
+
{
|
2
|
+
"configurations": [
|
3
|
+
{
|
4
|
+
"name": "x64-Debug",
|
5
|
+
"generator": "Ninja",
|
6
|
+
"configurationType": "Debug",
|
7
|
+
"inheritEnvironments": [ "msvc_x64_x64" ],
|
8
|
+
"buildRoot": "${projectDir}\\out\\build\\${name}",
|
9
|
+
"installRoot": "${projectDir}\\out\\install\\${name}",
|
10
|
+
"cmakeCommandArgs": "-DPYTHON_EXECUTABLE=.venv/Scripts/python.exe -DCMAKE_PREFIX_PATH=${projectDir}/.venv/Lib/site-packages/pybind11/share/cmake/pybind11",
|
11
|
+
"buildCommandArgs": "",
|
12
|
+
"ctestCommandArgs": "",
|
13
|
+
"cmakeExecutable": "C:/Program Files/CMake/bin/cmake.exe"
|
14
|
+
},
|
15
|
+
{
|
16
|
+
"name": "x64-Release",
|
17
|
+
"generator": "Ninja",
|
18
|
+
"configurationType": "RelWithDebInfo",
|
19
|
+
"buildRoot": "${projectDir}\\out\\build\\${name}",
|
20
|
+
"installRoot": "${projectDir}\\out\\install\\${name}",
|
21
|
+
"cmakeExecutable": "C:/Program Files/CMake/bin/cmake.exe",
|
22
|
+
"cmakeCommandArgs": "-DPYTHON_EXECUTABLE=.venv/Scripts/python.exe -DCMAKE_PREFIX_PATH=${projectDir}/.venv/Lib/site-packages/pybind11/share/cmake/pybind11",
|
23
|
+
"buildCommandArgs": "",
|
24
|
+
"ctestCommandArgs": "",
|
25
|
+
"inheritEnvironments": [ "msvc_x64_x64" ],
|
26
|
+
"variables": []
|
27
|
+
}
|
28
|
+
]
|
29
|
+
}
|
@@ -0,0 +1,152 @@
|
|
1
|
+
# Development
|
2
|
+
|
3
|
+
## Python
|
4
|
+
|
5
|
+
### Import Order
|
6
|
+
|
7
|
+
The typical organization of Python imports follows a structured convention to improve readability and maintainability of the code. This organization often adheres to the PEP 8 style guide, which is widely adopted in the Python community. Here is the recommended order and format:
|
8
|
+
|
9
|
+
Python imports are generally organized into three main sections, each separated by a blank line:
|
10
|
+
|
11
|
+
1. Standard Library Imports: These are modules that are part of Python's standard library, such as `os`, `sys`, `datetime`, etc.
|
12
|
+
|
13
|
+
2. Third-Party Imports: These are external libraries that are not part of the standard library, such as `numpy`, `requests`, etc.
|
14
|
+
|
15
|
+
3. Local Application or Project-Specific Imports: These are your own modules that are part of the project.
|
16
|
+
|
17
|
+
```py
|
18
|
+
# Standard library imports
|
19
|
+
import os
|
20
|
+
import sys
|
21
|
+
from datetime import datetime
|
22
|
+
|
23
|
+
# Third-party imports
|
24
|
+
import requests
|
25
|
+
import numpy as np
|
26
|
+
|
27
|
+
# Local application imports
|
28
|
+
from my_project.module import my_function
|
29
|
+
from . import another_module
|
30
|
+
```
|
31
|
+
|
32
|
+
|
33
|
+
### Python C Extension
|
34
|
+
|
35
|
+
Once set up, you can build the C++ extension with a simple `pip install .` or `pip install --editable .` for development builds.
|
36
|
+
|
37
|
+
```sh
|
38
|
+
pip install --editable .
|
39
|
+
```
|
40
|
+
|
41
|
+
```sh
|
42
|
+
pybind11-stubgen fastgpx -o src
|
43
|
+
|
44
|
+
pybind11-stubgen fastgpx --enum-class-locations "Precision:fastgpx.polyline" -o src
|
45
|
+
```
|
46
|
+
|
47
|
+
### Python Profiling
|
48
|
+
|
49
|
+
https://learn.microsoft.com/en-us/visualstudio/python/profiling-python-code-in-visual-studio?view=vs-2022
|
50
|
+
|
51
|
+
```sh
|
52
|
+
snakeviz profiling/fastgpx_polyline_encode.prof
|
53
|
+
```
|
54
|
+
|
55
|
+
```sh
|
56
|
+
snakeviz profiling/polyline_encode.prof
|
57
|
+
```
|
58
|
+
|
59
|
+
### pyproject.toml
|
60
|
+
|
61
|
+
> Installing Dependencies with pyproject.toml
|
62
|
+
>
|
63
|
+
> You no longer need to use `pip install -r requirements.txt`. Instead, you can simply install dependencies directly using:
|
64
|
+
>
|
65
|
+
> ```sh
|
66
|
+
> pip install .
|
67
|
+
> ```
|
68
|
+
|
69
|
+
> For Development Dependencies:
|
70
|
+
>
|
71
|
+
> To install development dependencies (like `pytest` and `pybind11-stubgen`), you can use the `--extra` option (assuming you defined them under dev):
|
72
|
+
>
|
73
|
+
> ```sh
|
74
|
+
> pip install .[dev]
|
75
|
+
> ```
|
76
|
+
>
|
77
|
+
> This will install both the main dependencies and the development dependencies defined in the dev section of `pyproject.toml`.
|
78
|
+
|
79
|
+
### Locally test build wheel
|
80
|
+
|
81
|
+
```sh
|
82
|
+
pip install --upgrade build twine wheel
|
83
|
+
```
|
84
|
+
|
85
|
+
```sh
|
86
|
+
python -m build
|
87
|
+
twine check dist/*
|
88
|
+
```
|
89
|
+
|
90
|
+
## C++
|
91
|
+
|
92
|
+
### Include order
|
93
|
+
|
94
|
+
The organization of C++ includes follows certain conventions that are similar in spirit to Python import conventions. Well-structured includes can improve readability, reduce compile times, and minimize dependencies. Here are the typical guidelines and best practices for organizing C++ includes:
|
95
|
+
|
96
|
+
C++ includes are generally organized in a specific order, often grouped and separated by blank lines. The typical order is as follows:
|
97
|
+
|
98
|
+
1. Header File for the Current Implementation File (if applicable)
|
99
|
+
2. Standard Library Headers (e.g., `<iostream>`, `<vector>`)
|
100
|
+
3. Third-Party Library Headers (e.g., `boost`, or other external dependencies)
|
101
|
+
4. Project-Specific Headers (e.g., your own modules or classes)
|
102
|
+
|
103
|
+
This organization helps to ensure that your file includes what it needs directly, and minimizes the chance of accidentally relying on transitive includes from other files.
|
104
|
+
|
105
|
+
```cpp
|
106
|
+
// Current implementation file's corresponding header
|
107
|
+
#include "my_class.h"
|
108
|
+
|
109
|
+
// Standard library headers
|
110
|
+
#include <iostream>
|
111
|
+
#include <vector>
|
112
|
+
#include <string>
|
113
|
+
|
114
|
+
// Third-party library headers
|
115
|
+
#include <boost/algorithm/string.hpp>
|
116
|
+
|
117
|
+
// Project-specific headers
|
118
|
+
#include "utils.h"
|
119
|
+
#include "data_processing.h"
|
120
|
+
```
|
121
|
+
|
122
|
+
### Reformat all C++ sources
|
123
|
+
|
124
|
+
```sh
|
125
|
+
cd src\cpp
|
126
|
+
for /R %f in (*.cpp *.hpp) do "C:\Program Files\LLVM\bin\clang-format.exe" -i "%f"
|
127
|
+
```
|
128
|
+
|
129
|
+
### Coverage (C++ OpenCppCoverage)
|
130
|
+
|
131
|
+
```sh
|
132
|
+
coverage.bat ~[real_world]
|
133
|
+
```
|
134
|
+
|
135
|
+
### Catch2 Tests
|
136
|
+
|
137
|
+
If the output prints UTF-8 characters the terminal needs to be set to UTF-8 mode on Windows:
|
138
|
+
|
139
|
+
```sh
|
140
|
+
chcp 65001
|
141
|
+
```
|
142
|
+
#### Running Tests
|
143
|
+
|
144
|
+
```sh
|
145
|
+
build\src\cpp\RelWithDebInfo\fastgpx_test.exe
|
146
|
+
```
|
147
|
+
|
148
|
+
#### Running Benchmarks
|
149
|
+
|
150
|
+
```sh
|
151
|
+
build\src\cpp\RelWithDebInfo\fastgpx_test.exe [!benchmark]
|
152
|
+
```
|
fastgpx-0.1.0/GPX.md
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
|
2
|
+
# GPX
|
3
|
+
|
4
|
+
## GPX for Developers
|
5
|
+
|
6
|
+
https://www.topografix.com/gpx_for_developers.asp
|
7
|
+
|
8
|
+
> The standard in the gpx files for the time zone format is not completely
|
9
|
+
> clear. For instance some applications generate time formats in the track
|
10
|
+
> points of the form
|
11
|
+
>
|
12
|
+
> <time>2008-07-18T16:07:50.000+02:00</time>
|
13
|
+
>
|
14
|
+
> this is slightly substandard because the standard as described in
|
15
|
+
> http://www.topografix.com/gpx.asp says that
|
16
|
+
>
|
17
|
+
> "Date and time in are in Univeral Coordinated Time (UTC), not local time!
|
18
|
+
> Conforms to ISO 8601 specification for date/time representation."
|
19
|
+
|
20
|
+
https://web.archive.org/web/20130725164436/http://tech.groups.yahoo.com/group/gpsxml/message/1090?l=1
|
21
|
+
|
22
|
+
## GPS Exchange Format (GPX): A Comprehensive Guide
|
23
|
+
|
24
|
+
https://mapscaping.com/gps-exchange-format-gpx/
|
25
|
+
|
26
|
+
## Assumed typical <time> representation in .GPX files
|
27
|
+
|
28
|
+
To simplify logic and maximize performance, maybe one can assume a more narrow
|
29
|
+
scope of ISO 8601.
|
30
|
+
|
31
|
+
By using the length of the string one can make assumption to the variation of
|
32
|
+
the format and directly extract the numeric components of the string. Probably
|
33
|
+
worth validating the separator characters as a minimal validation safety check
|
34
|
+
to eliminate pure junk input.
|
35
|
+
|
36
|
+
Variations observed:
|
37
|
+
* Only Extended Format.
|
38
|
+
* With or without milliseconds. (3 fractional decimals)
|
39
|
+
* Zulu hours are the norm, but timezone offsets might occur.
|
40
|
+
* Some mention of missing timezone notation all together.
|
41
|
+
While ISO 8601 describe this as local time, one can probably assume this is
|
42
|
+
an omission by the author and it really should be Zulu hours.
|
43
|
+
|
44
|
+
| Example | Length | |
|
45
|
+
|-------------------------------|--------|-------------------------------------|
|
46
|
+
| 2008-07-18T16:07:50.000+02:00 | 29 | Not per GPX definition. |
|
47
|
+
| 2008-07-18T16:07:50.000Z | 24 | |
|
48
|
+
| 2008-07-18T16:07:50+02:00 | 25 | Not per GPX definition. |
|
49
|
+
| 2008-07-18T16:07:50Z | 20 | |
|
50
|
+
| 2008-07-18T16:07:50 | 19 | Assume Zulu time? |
|
fastgpx-0.1.0/LICENSE.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
|
2
|
+
The MIT License (MIT)
|
3
|
+
|
4
|
+
Copyright (c) 2024-2025 Thomas Thomassen
|
5
|
+
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
8
|
+
in the Software without restriction, including without limitation the rights
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
11
|
+
furnished to do so, subject to the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be included in all
|
14
|
+
copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
22
|
+
SOFTWARE.
|
fastgpx-0.1.0/PKG-INFO
ADDED
@@ -0,0 +1,148 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: fastgpx
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: An experimental Python library for parsing GPX files fast.
|
5
|
+
Keywords: gpx,parser,fast
|
6
|
+
Author: Thomas Thomassen
|
7
|
+
License-Expression: MIT
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
9
|
+
Classifier: Intended Audience :: Developers
|
10
|
+
Classifier: Topic :: File Formats
|
11
|
+
Classifier: Operating System :: Microsoft :: Windows
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
13
|
+
Project-URL: Documentation, https://thomthom.github.io/fastgpx/
|
14
|
+
Requires-Python: >=3.11
|
15
|
+
Description-Content-Type: text/markdown
|
16
|
+
|
17
|
+
# fastgpx
|
18
|
+
|
19
|
+
An experimental Python library for parsing GPX files fast.
|
20
|
+
|
21
|
+
```py
|
22
|
+
# Get the total length of the tracks in a GPX file:
|
23
|
+
import fastgpx
|
24
|
+
|
25
|
+
gpx = fastgpx.parse("example.gpx")
|
26
|
+
print(f'{gpx.length_2d()} m')
|
27
|
+
```
|
28
|
+
|
29
|
+
```py
|
30
|
+
# Iterate over GPX file:
|
31
|
+
import fastgpx
|
32
|
+
|
33
|
+
gpx = fastgpx.parse("example.gpx")
|
34
|
+
for track in gpx.tracks:
|
35
|
+
print(f'Track: {track.name}')
|
36
|
+
print(f'Distance: {track.length_2d()} m')
|
37
|
+
if not track.time_bounds.is_empty():
|
38
|
+
print(f'Time: {track.time_bounds().start_time} - {track.time_bounds().end_time}')
|
39
|
+
for segment in track.segments:
|
40
|
+
print(f'Segment: {segment.name}')
|
41
|
+
for point in segment.points:
|
42
|
+
print(f'Point: {point.latitude}, {point.longitude}')
|
43
|
+
```
|
44
|
+
|
45
|
+
[Documentation](https://thomthom.github.io/fastgpx/)
|
46
|
+
|
47
|
+
## GPX/XML Performance (Background)
|
48
|
+
|
49
|
+
`gpxpy` appear to be the most popular GPX library for Python.
|
50
|
+
|
51
|
+
`gpxpy` docs says that it uses `lxml` is available because it is faster than "`minidom`" (`etree`).
|
52
|
+
When benchmarking that seemed not to be the case. It appear that the stdlib XML library has gotten
|
53
|
+
much better since `gpxpy` was created.
|
54
|
+
|
55
|
+
Reference: Open ticket on making `etree` default:
|
56
|
+
https://github.com/tkrajina/gpxpy/issues/248
|
57
|
+
|
58
|
+
## Benchmarks
|
59
|
+
|
60
|
+
Test machine:
|
61
|
+
|
62
|
+
* AMD Ryzen 7 5800 8-Core, 3.80 GHz
|
63
|
+
* 32 GB memory
|
64
|
+
* m2 SSD storage
|
65
|
+
|
66
|
+
### gpxpy benchmarks
|
67
|
+
|
68
|
+
Comparing getting the distance of a GPX file using `gpxpy` vs manually extracting
|
69
|
+
the data using `xml_etree`, computing distance between points using `gpxpy`
|
70
|
+
distance functions.
|
71
|
+
|
72
|
+
#### gpxpy without `lxml`
|
73
|
+
|
74
|
+
```
|
75
|
+
Running benchmark with 3 iterations...
|
76
|
+
gpxpy 5463041.784135511 meters
|
77
|
+
gpxpy 5463041.784135511 meters
|
78
|
+
gpxpy 5463041.784135511 meters
|
79
|
+
gpxpy: 11.497863 seconds (Average: 3.832621 seconds)
|
80
|
+
```
|
81
|
+
|
82
|
+
#### gpxpy with `lxml`
|
83
|
+
|
84
|
+
```
|
85
|
+
Running benchmark with 3 iterations...
|
86
|
+
gpxpy 5463041.784135511 meters
|
87
|
+
gpxpy 5463041.784135511 meters
|
88
|
+
gpxpy 5463041.784135511 meters
|
89
|
+
gpxpy: 37.803625 seconds (Average: 12.601208 seconds)
|
90
|
+
```
|
91
|
+
|
92
|
+
#### xml_etree data extraction
|
93
|
+
|
94
|
+
```
|
95
|
+
Running benchmark with 3 iterations...
|
96
|
+
xml_etree 5463043.740615641 meters
|
97
|
+
xml_etree 5463043.740615641 meters
|
98
|
+
xml_etree 5463043.740615641 meters
|
99
|
+
xml_etree: 2.333200 seconds (Average: 0.777733 seconds)
|
100
|
+
```
|
101
|
+
|
102
|
+
Even with `gpxpy` using `etree` to parse the XML it is paster to parse it
|
103
|
+
directly with `etree` and use `gpxpy.geo` distance functions to compute the
|
104
|
+
distance of a GPX file. Unclear what the extra overhead is, possibly the cost
|
105
|
+
of extraction additional data. (Some minor difference in how the total distance
|
106
|
+
is computed in this example. Using different options for computing the distance.)
|
107
|
+
|
108
|
+
### C++ benchmarks
|
109
|
+
|
110
|
+
Since XML parsing itself appear to have a significant impact on performance some
|
111
|
+
popular C++ XML libraries was tested:
|
112
|
+
|
113
|
+
#### tinyxml2
|
114
|
+
```
|
115
|
+
Total Length: 5456930.710560566
|
116
|
+
Elapsed time: 0.4980144 seconds
|
117
|
+
```
|
118
|
+
|
119
|
+
#### pugixml
|
120
|
+
```
|
121
|
+
Total Length: 5456930.710560566
|
122
|
+
Elapsed time: 0.1890089 seconds
|
123
|
+
```
|
124
|
+
|
125
|
+
### C++ vs Python implementations
|
126
|
+
|
127
|
+
|
128
|
+
```
|
129
|
+
Running 5 benchmarks with 3 iterations...
|
130
|
+
|
131
|
+
Running gpxpy ...
|
132
|
+
gpxpy: 50.182288 seconds (Average: 16.727429 seconds)
|
133
|
+
|
134
|
+
Running xml_etree ...
|
135
|
+
xml_etree: 8.269050 seconds (Average: 2.756350 seconds)
|
136
|
+
|
137
|
+
Running lxml ...
|
138
|
+
lxml: 8.479702 seconds (Average: 2.826567 seconds)
|
139
|
+
|
140
|
+
Running tinyxml (C++) ...
|
141
|
+
tinyxml (C++): 2.699880 seconds (Average: 0.899960 seconds)
|
142
|
+
|
143
|
+
Running pugixml (C++) ...
|
144
|
+
pugixml (C++): 0.381095 seconds (Average: 0.127032 seconds)
|
145
|
+
```
|
146
|
+
|
147
|
+
For computing the length of a GPX file, `pugixml` in a Python C extension was ~140
|
148
|
+
times faster than using `gpxpy`.
|
fastgpx-0.1.0/README.md
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
# fastgpx
|
2
|
+
|
3
|
+
An experimental Python library for parsing GPX files fast.
|
4
|
+
|
5
|
+
```py
|
6
|
+
# Get the total length of the tracks in a GPX file:
|
7
|
+
import fastgpx
|
8
|
+
|
9
|
+
gpx = fastgpx.parse("example.gpx")
|
10
|
+
print(f'{gpx.length_2d()} m')
|
11
|
+
```
|
12
|
+
|
13
|
+
```py
|
14
|
+
# Iterate over GPX file:
|
15
|
+
import fastgpx
|
16
|
+
|
17
|
+
gpx = fastgpx.parse("example.gpx")
|
18
|
+
for track in gpx.tracks:
|
19
|
+
print(f'Track: {track.name}')
|
20
|
+
print(f'Distance: {track.length_2d()} m')
|
21
|
+
if not track.time_bounds.is_empty():
|
22
|
+
print(f'Time: {track.time_bounds().start_time} - {track.time_bounds().end_time}')
|
23
|
+
for segment in track.segments:
|
24
|
+
print(f'Segment: {segment.name}')
|
25
|
+
for point in segment.points:
|
26
|
+
print(f'Point: {point.latitude}, {point.longitude}')
|
27
|
+
```
|
28
|
+
|
29
|
+
[Documentation](https://thomthom.github.io/fastgpx/)
|
30
|
+
|
31
|
+
## GPX/XML Performance (Background)
|
32
|
+
|
33
|
+
`gpxpy` appear to be the most popular GPX library for Python.
|
34
|
+
|
35
|
+
`gpxpy` docs says that it uses `lxml` is available because it is faster than "`minidom`" (`etree`).
|
36
|
+
When benchmarking that seemed not to be the case. It appear that the stdlib XML library has gotten
|
37
|
+
much better since `gpxpy` was created.
|
38
|
+
|
39
|
+
Reference: Open ticket on making `etree` default:
|
40
|
+
https://github.com/tkrajina/gpxpy/issues/248
|
41
|
+
|
42
|
+
## Benchmarks
|
43
|
+
|
44
|
+
Test machine:
|
45
|
+
|
46
|
+
* AMD Ryzen 7 5800 8-Core, 3.80 GHz
|
47
|
+
* 32 GB memory
|
48
|
+
* m2 SSD storage
|
49
|
+
|
50
|
+
### gpxpy benchmarks
|
51
|
+
|
52
|
+
Comparing getting the distance of a GPX file using `gpxpy` vs manually extracting
|
53
|
+
the data using `xml_etree`, computing distance between points using `gpxpy`
|
54
|
+
distance functions.
|
55
|
+
|
56
|
+
#### gpxpy without `lxml`
|
57
|
+
|
58
|
+
```
|
59
|
+
Running benchmark with 3 iterations...
|
60
|
+
gpxpy 5463041.784135511 meters
|
61
|
+
gpxpy 5463041.784135511 meters
|
62
|
+
gpxpy 5463041.784135511 meters
|
63
|
+
gpxpy: 11.497863 seconds (Average: 3.832621 seconds)
|
64
|
+
```
|
65
|
+
|
66
|
+
#### gpxpy with `lxml`
|
67
|
+
|
68
|
+
```
|
69
|
+
Running benchmark with 3 iterations...
|
70
|
+
gpxpy 5463041.784135511 meters
|
71
|
+
gpxpy 5463041.784135511 meters
|
72
|
+
gpxpy 5463041.784135511 meters
|
73
|
+
gpxpy: 37.803625 seconds (Average: 12.601208 seconds)
|
74
|
+
```
|
75
|
+
|
76
|
+
#### xml_etree data extraction
|
77
|
+
|
78
|
+
```
|
79
|
+
Running benchmark with 3 iterations...
|
80
|
+
xml_etree 5463043.740615641 meters
|
81
|
+
xml_etree 5463043.740615641 meters
|
82
|
+
xml_etree 5463043.740615641 meters
|
83
|
+
xml_etree: 2.333200 seconds (Average: 0.777733 seconds)
|
84
|
+
```
|
85
|
+
|
86
|
+
Even with `gpxpy` using `etree` to parse the XML it is paster to parse it
|
87
|
+
directly with `etree` and use `gpxpy.geo` distance functions to compute the
|
88
|
+
distance of a GPX file. Unclear what the extra overhead is, possibly the cost
|
89
|
+
of extraction additional data. (Some minor difference in how the total distance
|
90
|
+
is computed in this example. Using different options for computing the distance.)
|
91
|
+
|
92
|
+
### C++ benchmarks
|
93
|
+
|
94
|
+
Since XML parsing itself appear to have a significant impact on performance some
|
95
|
+
popular C++ XML libraries was tested:
|
96
|
+
|
97
|
+
#### tinyxml2
|
98
|
+
```
|
99
|
+
Total Length: 5456930.710560566
|
100
|
+
Elapsed time: 0.4980144 seconds
|
101
|
+
```
|
102
|
+
|
103
|
+
#### pugixml
|
104
|
+
```
|
105
|
+
Total Length: 5456930.710560566
|
106
|
+
Elapsed time: 0.1890089 seconds
|
107
|
+
```
|
108
|
+
|
109
|
+
### C++ vs Python implementations
|
110
|
+
|
111
|
+
|
112
|
+
```
|
113
|
+
Running 5 benchmarks with 3 iterations...
|
114
|
+
|
115
|
+
Running gpxpy ...
|
116
|
+
gpxpy: 50.182288 seconds (Average: 16.727429 seconds)
|
117
|
+
|
118
|
+
Running xml_etree ...
|
119
|
+
xml_etree: 8.269050 seconds (Average: 2.756350 seconds)
|
120
|
+
|
121
|
+
Running lxml ...
|
122
|
+
lxml: 8.479702 seconds (Average: 2.826567 seconds)
|
123
|
+
|
124
|
+
Running tinyxml (C++) ...
|
125
|
+
tinyxml (C++): 2.699880 seconds (Average: 0.899960 seconds)
|
126
|
+
|
127
|
+
Running pugixml (C++) ...
|
128
|
+
pugixml (C++): 0.381095 seconds (Average: 0.127032 seconds)
|
129
|
+
```
|
130
|
+
|
131
|
+
For computing the length of a GPX file, `pugixml` in a Python C extension was ~140
|
132
|
+
times faster than using `gpxpy`.
|