osmsg 0.3.0__tar.gz → 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- osmsg-1.0.3/PKG-INFO +177 -0
- osmsg-1.0.3/README.md +145 -0
- osmsg-1.0.3/osmsg/__init__.py +95 -0
- osmsg-1.0.3/osmsg/__version__.py +1 -0
- osmsg-1.0.3/osmsg/_http.py +41 -0
- osmsg-1.0.3/osmsg/auth.py +118 -0
- osmsg-1.0.3/osmsg/boundary.py +37 -0
- osmsg-1.0.3/osmsg/cli.py +268 -0
- osmsg-1.0.3/osmsg/db/__init__.py +43 -0
- osmsg-1.0.3/osmsg/db/ingest.py +144 -0
- osmsg-1.0.3/osmsg/db/queries.py +265 -0
- osmsg-1.0.3/osmsg/db/schema.py +111 -0
- osmsg-1.0.3/osmsg/exceptions.py +37 -0
- osmsg-1.0.3/osmsg/export/__init__.py +19 -0
- osmsg-1.0.3/osmsg/export/csv.py +34 -0
- osmsg-1.0.3/osmsg/export/json.py +14 -0
- osmsg-1.0.3/osmsg/export/markdown.py +154 -0
- osmsg-1.0.3/osmsg/export/parquet.py +64 -0
- osmsg-1.0.3/osmsg/export/psql.py +89 -0
- osmsg-1.0.3/osmsg/fetch.py +49 -0
- osmsg-1.0.3/osmsg/geofabrik.py +41 -0
- osmsg-1.0.3/osmsg/handlers.py +205 -0
- osmsg-1.0.3/osmsg/models.py +143 -0
- osmsg-1.0.3/osmsg/pipeline.py +484 -0
- osmsg-1.0.3/osmsg/replication.py +162 -0
- osmsg-1.0.3/osmsg/tm.py +71 -0
- osmsg-1.0.3/osmsg/ui.py +65 -0
- osmsg-1.0.3/osmsg/workers.py +97 -0
- osmsg-1.0.3/pyproject.toml +113 -0
- osmsg-0.3.0/PKG-INFO +0 -104
- osmsg-0.3.0/README.md +0 -82
- osmsg-0.3.0/osmsg/__version__.py +0 -1
- osmsg-0.3.0/osmsg/app.py +0 -1550
- osmsg-0.3.0/osmsg/changefiles.py +0 -231
- osmsg-0.3.0/osmsg/changesets.py +0 -156
- osmsg-0.3.0/osmsg/login.py +0 -170
- osmsg-0.3.0/osmsg/utils.py +0 -845
- osmsg-0.3.0/pyproject.toml +0 -142
- {osmsg-0.3.0 → osmsg-1.0.3}/LICENSE +0 -0
- /osmsg-0.3.0/osmsg/__init__.py → /osmsg-1.0.3/osmsg/py.typed +0 -0
osmsg-1.0.3/PKG-INFO
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: osmsg
|
|
3
|
+
Version: 1.0.3
|
|
4
|
+
Summary: OpenStreetMap Stats Generator: Commandline
|
|
5
|
+
Keywords: osm,stats,commandline,openstreetmap
|
|
6
|
+
Author: Kshitij Raj Sharma
|
|
7
|
+
Author-email: Kshitij Raj Sharma <skshitizraj@gmail.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Topic :: Utilities
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Dist: duckdb>=1.5.2
|
|
16
|
+
Requires-Dist: osmium>=4.3.1
|
|
17
|
+
Requires-Dist: platformdirs>=4.5.1
|
|
18
|
+
Requires-Dist: pyarrow>=24.0.0
|
|
19
|
+
Requires-Dist: pydantic>=2.13.3
|
|
20
|
+
Requires-Dist: python-dotenv>=1.2.2
|
|
21
|
+
Requires-Dist: pytz>=2024.1
|
|
22
|
+
Requires-Dist: requests>=2.32.5
|
|
23
|
+
Requires-Dist: rich>=13.0
|
|
24
|
+
Requires-Dist: shapely>=2.1.2
|
|
25
|
+
Requires-Dist: typer>=0.25.0
|
|
26
|
+
Requires-Dist: typer-config[yaml]>=1.5.1
|
|
27
|
+
Requires-Python: >=3.11
|
|
28
|
+
Project-URL: documentation, https://github.com/osgeonepal/osmsg
|
|
29
|
+
Project-URL: homepage, https://github.com/osgeonepal/osmsg
|
|
30
|
+
Project-URL: repository, https://github.com/osgeonepal/osmsg
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
|
|
33
|
+
# osmsg
|
|
34
|
+
|
|
35
|
+
[](https://github.com/osgeonepal/osmsg/actions/workflows/ci.yml)
|
|
36
|
+
[](https://github.com/osgeonepal/osmsg/actions/workflows/docker.yml)
|
|
37
|
+
[](https://pypi.org/project/osmsg/)
|
|
38
|
+
[](https://www.python.org/downloads/)
|
|
39
|
+
[](./LICENSE)
|
|
40
|
+
[](https://github.com/astral-sh/ruff)
|
|
41
|
+
[](https://github.com/astral-sh/uv)
|
|
42
|
+
[](https://github.com/osgeonepal/osmsg/pkgs/container/osmsg)
|
|
43
|
+
|
|
44
|
+
**OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts
|
|
45
|
+
of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres.
|
|
46
|
+
|
|
47
|
+
A Project of [OSGeo Nepal](https://osgeonepal.org).
|
|
48
|
+
|
|
49
|
+
## What you get
|
|
50
|
+
|
|
51
|
+
- Per-user create/modify/delete counts over any time window.
|
|
52
|
+
- Tag and hashtag breakdowns (e.g. `building`, `#hotosm`).
|
|
53
|
+
- Country and custom-boundary filters via Geofabrik.
|
|
54
|
+
- Cron-friendly resume with `--update`.
|
|
55
|
+
- Outputs you can query: parquet, csv, json, markdown, DuckDB, Postgres.
|
|
56
|
+
|
|
57
|
+
## Install
|
|
58
|
+
|
|
59
|
+
Pick the one that fits how you work.
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
uvx --from osmsg osmsg --last hour # zero-install, one-shot run
|
|
63
|
+
pip install osmsg # into your project
|
|
64
|
+
uv tool install osmsg # standalone CLI
|
|
65
|
+
docker run --rm -v "$PWD:/work" -w /work ghcr.io/osgeonepal/osmsg:latest --last hour
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
`uvx` can run osmsg in a throwaway environment , no install, no virtualenv to manage. Works
|
|
69
|
+
with any flag combination, e.g. `uvx --from osmsg osmsg --last hour --tags building --summary -f parquet -f markdown`.
|
|
70
|
+
|
|
71
|
+
## Quick start
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
osmsg --last hour # planet, last hour
|
|
75
|
+
osmsg --last day --tags building # last day with a tag breakdown
|
|
76
|
+
osmsg --hashtags hotosm --last day # only changesets tagged #hotosm
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
That's it. A `stats.duckdb` and a `stats.parquet` show up in your current folder.
|
|
80
|
+
|
|
81
|
+
## Tutorials
|
|
82
|
+
|
|
83
|
+
### 1. Stats for a country
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
osmsg --country nepal --last day
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
`--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD`
|
|
90
|
+
in your shell or a `.env` file:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
export OSM_USERNAME=you
|
|
94
|
+
export OSM_PASSWORD=secret
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### 2. A custom date range with summaries
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
osmsg --start "2026-04-01" --end "2026-04-08" \
|
|
101
|
+
--tags building --tags highway --summary
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
`--summary` adds a daily rollup file alongside the per-changeset stats.
|
|
105
|
+
|
|
106
|
+
### 3. Run on a schedule
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
osmsg --country nepal --update # picks up where the last run stopped
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Drop that into cron or a GitHub Actions schedule. State is stored inside the DuckDB file, so reruns are safe.
|
|
113
|
+
|
|
114
|
+
### 4. Query the output
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
duckdb stats.duckdb -c "SELECT username, SUM(nodes_created) AS n
|
|
118
|
+
FROM users JOIN changeset_stats USING (uid)
|
|
119
|
+
GROUP BY username ORDER BY n DESC LIMIT 10"
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Same schema in DuckDB and Postgres: `users`, `changesets`, `changeset_stats`, `state`.
|
|
123
|
+
|
|
124
|
+
### 5. Use it as a library
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from datetime import datetime, UTC
|
|
128
|
+
from osmsg import RunConfig, run
|
|
129
|
+
|
|
130
|
+
result = run(RunConfig(
|
|
131
|
+
name="nepal",
|
|
132
|
+
countries=["nepal"],
|
|
133
|
+
start_date=datetime(2026, 4, 25, tzinfo=UTC),
|
|
134
|
+
end_date=datetime(2026, 4, 26, tzinfo=UTC),
|
|
135
|
+
))
|
|
136
|
+
print(result["files"]["parquet"])
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Same pipeline as the CLI.
|
|
140
|
+
|
|
141
|
+
### 6. Long flag lists? Use a config
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
osmsg --config nepal.yaml
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the full list.
|
|
148
|
+
|
|
149
|
+
## Output formats
|
|
150
|
+
|
|
151
|
+
Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via
|
|
152
|
+
`-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything.
|
|
153
|
+
|
|
154
|
+
## Documentation
|
|
155
|
+
|
|
156
|
+
- [Installation](./docs/Installation.md)
|
|
157
|
+
- [Manual](./docs/Manual.md) (every flag, with examples)
|
|
158
|
+
- [Version control / release notes](./docs/Version_control.md)
|
|
159
|
+
|
|
160
|
+
## Contributing
|
|
161
|
+
|
|
162
|
+
Pull requests are welcome. Quick path:
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
git clone https://github.com/osgeonepal/osmsg && cd osmsg
|
|
166
|
+
git switch develop
|
|
167
|
+
uv sync
|
|
168
|
+
uv run pre-commit install
|
|
169
|
+
uv run pytest -m "not network"
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR.
|
|
173
|
+
Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`).
|
|
174
|
+
|
|
175
|
+
## License
|
|
176
|
+
|
|
177
|
+
[MIT](./LICENSE) © OSGeo Nepal contributors.
|
osmsg-1.0.3/README.md
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# osmsg
|
|
2
|
+
|
|
3
|
+
[](https://github.com/osgeonepal/osmsg/actions/workflows/ci.yml)
|
|
4
|
+
[](https://github.com/osgeonepal/osmsg/actions/workflows/docker.yml)
|
|
5
|
+
[](https://pypi.org/project/osmsg/)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
[](./LICENSE)
|
|
8
|
+
[](https://github.com/astral-sh/ruff)
|
|
9
|
+
[](https://github.com/astral-sh/uv)
|
|
10
|
+
[](https://github.com/osgeonepal/osmsg/pkgs/container/osmsg)
|
|
11
|
+
|
|
12
|
+
**OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts
|
|
13
|
+
of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres.
|
|
14
|
+
|
|
15
|
+
A Project of [OSGeo Nepal](https://osgeonepal.org).
|
|
16
|
+
|
|
17
|
+
## What you get
|
|
18
|
+
|
|
19
|
+
- Per-user create/modify/delete counts over any time window.
|
|
20
|
+
- Tag and hashtag breakdowns (e.g. `building`, `#hotosm`).
|
|
21
|
+
- Country and custom-boundary filters via Geofabrik.
|
|
22
|
+
- Cron-friendly resume with `--update`.
|
|
23
|
+
- Outputs you can query: parquet, csv, json, markdown, DuckDB, Postgres.
|
|
24
|
+
|
|
25
|
+
## Install
|
|
26
|
+
|
|
27
|
+
Pick the one that fits how you work.
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
uvx --from osmsg osmsg --last hour # zero-install, one-shot run
|
|
31
|
+
pip install osmsg # into your project
|
|
32
|
+
uv tool install osmsg # standalone CLI
|
|
33
|
+
docker run --rm -v "$PWD:/work" -w /work ghcr.io/osgeonepal/osmsg:latest --last hour
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
`uvx` can run osmsg in a throwaway environment , no install, no virtualenv to manage. Works
|
|
37
|
+
with any flag combination, e.g. `uvx --from osmsg osmsg --last hour --tags building --summary -f parquet -f markdown`.
|
|
38
|
+
|
|
39
|
+
## Quick start
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
osmsg --last hour # planet, last hour
|
|
43
|
+
osmsg --last day --tags building # last day with a tag breakdown
|
|
44
|
+
osmsg --hashtags hotosm --last day # only changesets tagged #hotosm
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
That's it. A `stats.duckdb` and a `stats.parquet` show up in your current folder.
|
|
48
|
+
|
|
49
|
+
## Tutorials
|
|
50
|
+
|
|
51
|
+
### 1. Stats for a country
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
osmsg --country nepal --last day
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
`--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD`
|
|
58
|
+
in your shell or a `.env` file:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
export OSM_USERNAME=you
|
|
62
|
+
export OSM_PASSWORD=secret
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### 2. A custom date range with summaries
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
osmsg --start "2026-04-01" --end "2026-04-08" \
|
|
69
|
+
--tags building --tags highway --summary
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
`--summary` adds a daily rollup file alongside the per-changeset stats.
|
|
73
|
+
|
|
74
|
+
### 3. Run on a schedule
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
osmsg --country nepal --update # picks up where the last run stopped
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Drop that into cron or a GitHub Actions schedule. State is stored inside the DuckDB file, so reruns are safe.
|
|
81
|
+
|
|
82
|
+
### 4. Query the output
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
duckdb stats.duckdb -c "SELECT username, SUM(nodes_created) AS n
|
|
86
|
+
FROM users JOIN changeset_stats USING (uid)
|
|
87
|
+
GROUP BY username ORDER BY n DESC LIMIT 10"
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Same schema in DuckDB and Postgres: `users`, `changesets`, `changeset_stats`, `state`.
|
|
91
|
+
|
|
92
|
+
### 5. Use it as a library
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from datetime import datetime, UTC
|
|
96
|
+
from osmsg import RunConfig, run
|
|
97
|
+
|
|
98
|
+
result = run(RunConfig(
|
|
99
|
+
name="nepal",
|
|
100
|
+
countries=["nepal"],
|
|
101
|
+
start_date=datetime(2026, 4, 25, tzinfo=UTC),
|
|
102
|
+
end_date=datetime(2026, 4, 26, tzinfo=UTC),
|
|
103
|
+
))
|
|
104
|
+
print(result["files"]["parquet"])
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Same pipeline as the CLI.
|
|
108
|
+
|
|
109
|
+
### 6. Long flag lists? Use a config
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
osmsg --config nepal.yaml
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the full list.
|
|
116
|
+
|
|
117
|
+
## Output formats
|
|
118
|
+
|
|
119
|
+
Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via
|
|
120
|
+
`-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything.
|
|
121
|
+
|
|
122
|
+
## Documentation
|
|
123
|
+
|
|
124
|
+
- [Installation](./docs/Installation.md)
|
|
125
|
+
- [Manual](./docs/Manual.md) (every flag, with examples)
|
|
126
|
+
- [Version control / release notes](./docs/Version_control.md)
|
|
127
|
+
|
|
128
|
+
## Contributing
|
|
129
|
+
|
|
130
|
+
Pull requests are welcome. Quick path:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
git clone https://github.com/osgeonepal/osmsg && cd osmsg
|
|
134
|
+
git switch develop
|
|
135
|
+
uv sync
|
|
136
|
+
uv run pre-commit install
|
|
137
|
+
uv run pytest -m "not network"
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR.
|
|
141
|
+
Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`).
|
|
142
|
+
|
|
143
|
+
## License
|
|
144
|
+
|
|
145
|
+
[MIT](./LICENSE) © OSGeo Nepal contributors.
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""OpenStreetMap stats generator. Parquet-first, OAuth 2.0, UTC-only.
|
|
2
|
+
|
|
3
|
+
Library usage::
|
|
4
|
+
|
|
5
|
+
from osmsg import RunConfig, run, OsmsgError
|
|
6
|
+
|
|
7
|
+
cfg = RunConfig(
|
|
8
|
+
name="nepal",
|
|
9
|
+
countries=["nepal"],
|
|
10
|
+
start_date=datetime(2026, 4, 25, tzinfo=UTC),
|
|
11
|
+
end_date=datetime(2026, 4, 26, tzinfo=UTC),
|
|
12
|
+
formats=["parquet"],
|
|
13
|
+
osm_username="...",
|
|
14
|
+
osm_password="...",
|
|
15
|
+
)
|
|
16
|
+
try:
|
|
17
|
+
result = run(cfg)
|
|
18
|
+
except OsmsgError as exc:
|
|
19
|
+
...
|
|
20
|
+
print(result["files"]["parquet"]) # → 'nepal.parquet'
|
|
21
|
+
|
|
22
|
+
CLI entry point: ``osmsg`` (defined in ``osmsg.cli``).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
from .__version__ import __version__
|
|
28
|
+
from .db import (
|
|
29
|
+
attach_metadata,
|
|
30
|
+
attach_tag_stats,
|
|
31
|
+
connect,
|
|
32
|
+
create_tables,
|
|
33
|
+
daily_summary,
|
|
34
|
+
get_state,
|
|
35
|
+
upsert_state,
|
|
36
|
+
user_stats,
|
|
37
|
+
)
|
|
38
|
+
from .exceptions import (
|
|
39
|
+
CredentialsRequiredError,
|
|
40
|
+
GeofabrikAuthError,
|
|
41
|
+
NoDataFoundError,
|
|
42
|
+
OsmsgError,
|
|
43
|
+
UnknownRegionError,
|
|
44
|
+
)
|
|
45
|
+
from .export import (
|
|
46
|
+
summary_markdown,
|
|
47
|
+
table_markdown,
|
|
48
|
+
to_csv,
|
|
49
|
+
to_json,
|
|
50
|
+
to_parquet,
|
|
51
|
+
to_psql,
|
|
52
|
+
)
|
|
53
|
+
from .geofabrik import country_update_url, load_index
|
|
54
|
+
from .models import (
|
|
55
|
+
Action,
|
|
56
|
+
Changeset,
|
|
57
|
+
ChangesetStats,
|
|
58
|
+
ElementStat,
|
|
59
|
+
TagValueStat,
|
|
60
|
+
User,
|
|
61
|
+
)
|
|
62
|
+
from .pipeline import RunConfig, run
|
|
63
|
+
|
|
64
|
+
__all__ = [
|
|
65
|
+
"Action",
|
|
66
|
+
"Changeset",
|
|
67
|
+
"ChangesetStats",
|
|
68
|
+
"CredentialsRequiredError",
|
|
69
|
+
"ElementStat",
|
|
70
|
+
"GeofabrikAuthError",
|
|
71
|
+
"NoDataFoundError",
|
|
72
|
+
"OsmsgError",
|
|
73
|
+
"RunConfig",
|
|
74
|
+
"TagValueStat",
|
|
75
|
+
"UnknownRegionError",
|
|
76
|
+
"User",
|
|
77
|
+
"__version__",
|
|
78
|
+
"attach_metadata",
|
|
79
|
+
"attach_tag_stats",
|
|
80
|
+
"connect",
|
|
81
|
+
"country_update_url",
|
|
82
|
+
"create_tables",
|
|
83
|
+
"daily_summary",
|
|
84
|
+
"get_state",
|
|
85
|
+
"load_index",
|
|
86
|
+
"run",
|
|
87
|
+
"summary_markdown",
|
|
88
|
+
"table_markdown",
|
|
89
|
+
"to_csv",
|
|
90
|
+
"to_json",
|
|
91
|
+
"to_parquet",
|
|
92
|
+
"to_psql",
|
|
93
|
+
"upsert_state",
|
|
94
|
+
"user_stats",
|
|
95
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.3"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""Shared `requests.Session` with retry policy + connect/read timeouts.
|
|
2
|
+
|
|
3
|
+
Every HTTP call in osmsg goes through this session so retry behaviour and
|
|
4
|
+
timeout defaults are consistent. Per-request `timeout=` still wins.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
from requests.adapters import HTTPAdapter
|
|
11
|
+
from urllib3.util.retry import Retry
|
|
12
|
+
|
|
13
|
+
USER_AGENT = "osmsg"
|
|
14
|
+
DEFAULT_TIMEOUT = (10, 60) # (connect, read) seconds
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class _TimeoutSession(requests.Session):
|
|
18
|
+
"""Session that applies `DEFAULT_TIMEOUT` whenever the caller did not specify one."""
|
|
19
|
+
|
|
20
|
+
def request(self, method, url, *args, **kwargs):
|
|
21
|
+
kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
|
|
22
|
+
return super().request(method, url, *args, **kwargs)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def make_session() -> requests.Session:
|
|
26
|
+
"""Fresh session with the standard timeout + retry policy (use when a flow needs its own cookie jar)."""
|
|
27
|
+
s = _TimeoutSession()
|
|
28
|
+
retry = Retry(
|
|
29
|
+
total=5,
|
|
30
|
+
backoff_factor=0.5,
|
|
31
|
+
status_forcelist=(429, 500, 502, 503, 504),
|
|
32
|
+
allowed_methods=frozenset({"GET", "POST", "HEAD"}),
|
|
33
|
+
)
|
|
34
|
+
adapter = HTTPAdapter(max_retries=retry, pool_maxsize=32)
|
|
35
|
+
s.mount("https://", adapter)
|
|
36
|
+
s.mount("http://", adapter)
|
|
37
|
+
s.headers["User-Agent"] = USER_AGENT
|
|
38
|
+
return s
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
session = make_session()
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""OAuth 2.0 cookie client for Geofabrik internal download server.
|
|
2
|
+
|
|
3
|
+
Mirrors https://github.com/geofabrik/sendfile_osm_oauth_protector
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import urllib.parse
|
|
9
|
+
from html.parser import HTMLParser
|
|
10
|
+
|
|
11
|
+
from ._http import make_session
|
|
12
|
+
from ._http import session as shared_session
|
|
13
|
+
from .exceptions import GeofabrikAuthError
|
|
14
|
+
|
|
15
|
+
DEFAULT_OSM_HOST = "https://www.openstreetmap.org"
|
|
16
|
+
DEFAULT_CONSUMER_URL = "https://osm-internal.download.geofabrik.de/get_cookie"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class _CsrfFinder(HTMLParser):
|
|
20
|
+
def __init__(self) -> None:
|
|
21
|
+
super().__init__()
|
|
22
|
+
self.token: str | None = None
|
|
23
|
+
|
|
24
|
+
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
|
|
25
|
+
if tag != "meta" or self.token is not None:
|
|
26
|
+
return
|
|
27
|
+
a = dict(attrs)
|
|
28
|
+
if a.get("name") == "csrf-token":
|
|
29
|
+
content = a.get("content")
|
|
30
|
+
if content:
|
|
31
|
+
self.token = content
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _csrf(html: str) -> str:
|
|
35
|
+
parser = _CsrfFinder()
|
|
36
|
+
parser.feed(html)
|
|
37
|
+
if parser.token is None:
|
|
38
|
+
raise GeofabrikAuthError("authenticity_token not found in OSM response")
|
|
39
|
+
return parser.token
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_geofabrik_cookie(
|
|
43
|
+
username: str,
|
|
44
|
+
password: str,
|
|
45
|
+
osm_host: str = DEFAULT_OSM_HOST,
|
|
46
|
+
consumer_url: str = DEFAULT_CONSUMER_URL,
|
|
47
|
+
) -> str:
|
|
48
|
+
if not username or not password:
|
|
49
|
+
raise GeofabrikAuthError("OSM username and password required")
|
|
50
|
+
|
|
51
|
+
r = shared_session.post(f"{consumer_url}?action=get_authorization_url", timeout=30)
|
|
52
|
+
if r.status_code != 200:
|
|
53
|
+
raise GeofabrikAuthError(f"get_authorization_url returned HTTP {r.status_code}")
|
|
54
|
+
payload = r.json()
|
|
55
|
+
try:
|
|
56
|
+
authz_url = payload["authorization_url"]
|
|
57
|
+
state = payload["state"]
|
|
58
|
+
redirect_uri = payload["redirect_uri"]
|
|
59
|
+
client_id = payload["client_id"]
|
|
60
|
+
except KeyError as exc:
|
|
61
|
+
raise GeofabrikAuthError(f"missing field in authorization response: {exc}") from exc
|
|
62
|
+
|
|
63
|
+
s = make_session()
|
|
64
|
+
|
|
65
|
+
r = s.get(f"{osm_host}/login?cookie_test=true", timeout=30)
|
|
66
|
+
if r.status_code != 200:
|
|
67
|
+
raise GeofabrikAuthError(f"GET /login returned HTTP {r.status_code}")
|
|
68
|
+
|
|
69
|
+
r = s.post(
|
|
70
|
+
f"{osm_host}/login",
|
|
71
|
+
data={
|
|
72
|
+
"username": username,
|
|
73
|
+
"password": password,
|
|
74
|
+
"referer": "/",
|
|
75
|
+
"commit": "Login",
|
|
76
|
+
"authenticity_token": _csrf(r.text),
|
|
77
|
+
},
|
|
78
|
+
allow_redirects=False,
|
|
79
|
+
timeout=30,
|
|
80
|
+
)
|
|
81
|
+
if r.status_code != 302:
|
|
82
|
+
raise GeofabrikAuthError(f"OSM login failed (HTTP {r.status_code}); check credentials")
|
|
83
|
+
|
|
84
|
+
r = s.get(authz_url, allow_redirects=False, timeout=30)
|
|
85
|
+
if r.status_code != 302:
|
|
86
|
+
if r.status_code != 200:
|
|
87
|
+
raise GeofabrikAuthError(f"GET authorize returned HTTP {r.status_code}")
|
|
88
|
+
r = s.post(
|
|
89
|
+
authz_url,
|
|
90
|
+
data={
|
|
91
|
+
"client_id": client_id,
|
|
92
|
+
"redirect_uri": redirect_uri,
|
|
93
|
+
"authenticity_token": _csrf(r.text),
|
|
94
|
+
"state": state,
|
|
95
|
+
"response_type": "code",
|
|
96
|
+
"scope": "read_prefs",
|
|
97
|
+
"nonce": "",
|
|
98
|
+
"code_challenge": "",
|
|
99
|
+
"code_challenge_method": "",
|
|
100
|
+
"commit": "Authorize",
|
|
101
|
+
},
|
|
102
|
+
allow_redirects=False,
|
|
103
|
+
timeout=30,
|
|
104
|
+
)
|
|
105
|
+
if r.status_code != 302:
|
|
106
|
+
raise GeofabrikAuthError(f"POST authorize returned HTTP {r.status_code}")
|
|
107
|
+
|
|
108
|
+
location = r.headers.get("location") or ""
|
|
109
|
+
if "?" not in location:
|
|
110
|
+
raise GeofabrikAuthError("authorization redirect missing query string")
|
|
111
|
+
|
|
112
|
+
s.get(f"{osm_host}/logout", timeout=30)
|
|
113
|
+
|
|
114
|
+
final_url = f"{location}&{urllib.parse.urlencode({'format': 'http'})}"
|
|
115
|
+
r = shared_session.get(final_url, timeout=30)
|
|
116
|
+
if r.status_code != 200 or not r.text.strip():
|
|
117
|
+
raise GeofabrikAuthError(f"cookie exchange failed (HTTP {r.status_code})")
|
|
118
|
+
return r.text.strip()
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Geometry helpers: boundary parsing + bbox centroid."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from shapely.geometry import MultiPolygon, Polygon, box, shape
|
|
10
|
+
from shapely.geometry.base import BaseGeometry
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def load_boundary(input_data: str) -> BaseGeometry:
|
|
14
|
+
"""Accept either inline GeoJSON text or a path to a GeoJSON file."""
|
|
15
|
+
try:
|
|
16
|
+
payload: Any = json.loads(input_data)
|
|
17
|
+
except json.JSONDecodeError as exc:
|
|
18
|
+
path = Path(input_data)
|
|
19
|
+
if not path.is_file():
|
|
20
|
+
raise ValueError(f"Not valid JSON or a file path: {input_data!r}") from exc
|
|
21
|
+
payload = json.loads(path.read_text())
|
|
22
|
+
|
|
23
|
+
geometry = payload.get("geometry") if "geometry" in payload else payload
|
|
24
|
+
if not geometry or geometry.get("type") not in ("Polygon", "MultiPolygon"):
|
|
25
|
+
raise ValueError("Boundary must be a Polygon or MultiPolygon GeoJSON.")
|
|
26
|
+
geom = shape(geometry)
|
|
27
|
+
if isinstance(geom, (Polygon, MultiPolygon)):
|
|
28
|
+
return geom
|
|
29
|
+
raise ValueError(f"Unexpected geometry type: {type(geom).__name__}")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def bbox_centroid(bounds) -> tuple[float, float] | None:
|
|
33
|
+
"""Centroid of an osmium bounding box, or None if invalid."""
|
|
34
|
+
if not bounds.valid():
|
|
35
|
+
return None
|
|
36
|
+
geom = box(bounds.bottom_left.lon, bounds.bottom_left.lat, bounds.top_right.lon, bounds.top_right.lat)
|
|
37
|
+
return geom.centroid.x, geom.centroid.y
|