osmsg 1.0.2__tar.gz → 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {osmsg-1.0.2 → osmsg-1.0.3}/PKG-INFO +13 -5
- {osmsg-1.0.2 → osmsg-1.0.3}/README.md +12 -4
- osmsg-1.0.3/osmsg/__version__.py +1 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/cli.py +1 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/markdown.py +26 -1
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/pipeline.py +50 -6
- {osmsg-1.0.2 → osmsg-1.0.3}/pyproject.toml +1 -1
- osmsg-1.0.2/osmsg/__version__.py +0 -1
- {osmsg-1.0.2 → osmsg-1.0.3}/LICENSE +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/__init__.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/_http.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/auth.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/boundary.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/db/__init__.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/db/ingest.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/db/queries.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/db/schema.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/exceptions.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/__init__.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/csv.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/json.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/parquet.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/psql.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/fetch.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/geofabrik.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/handlers.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/models.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/py.typed +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/replication.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/tm.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/ui.py +0 -0
- {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/workers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: osmsg
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.3
|
|
4
4
|
Summary: OpenStreetMap Stats Generator: Commandline
|
|
5
5
|
Keywords: osm,stats,commandline,openstreetmap
|
|
6
6
|
Author: Kshitij Raj Sharma
|
|
@@ -41,7 +41,8 @@ Description-Content-Type: text/markdown
|
|
|
41
41
|
[](https://github.com/astral-sh/uv)
|
|
42
42
|
[](https://github.com/osgeonepal/osmsg/pkgs/container/osmsg)
|
|
43
43
|
|
|
44
|
-
**OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts
|
|
44
|
+
**OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts
|
|
45
|
+
of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres.
|
|
45
46
|
|
|
46
47
|
A Project of [OSGeo Nepal](https://osgeonepal.org).
|
|
47
48
|
|
|
@@ -58,11 +59,15 @@ A Project of [OSGeo Nepal](https://osgeonepal.org).
|
|
|
58
59
|
Pick the one that fits how you work.
|
|
59
60
|
|
|
60
61
|
```bash
|
|
62
|
+
uvx --from osmsg osmsg --last hour # zero-install, one-shot run
|
|
61
63
|
pip install osmsg # into your project
|
|
62
64
|
uv tool install osmsg # standalone CLI
|
|
63
65
|
docker run --rm -v "$PWD:/work" -w /work ghcr.io/osgeonepal/osmsg:latest --last hour
|
|
64
66
|
```
|
|
65
67
|
|
|
68
|
+
`uvx` can run osmsg in a throwaway environment , no install, no virtualenv to manage. Works
|
|
69
|
+
with any flag combination, e.g. `uvx --from osmsg osmsg --last hour --tags building --summary -f parquet -f markdown`.
|
|
70
|
+
|
|
66
71
|
## Quick start
|
|
67
72
|
|
|
68
73
|
```bash
|
|
@@ -81,7 +86,8 @@ That's it. A `stats.duckdb` and a `stats.parquet` show up in your current folder
|
|
|
81
86
|
osmsg --country nepal --last day
|
|
82
87
|
```
|
|
83
88
|
|
|
84
|
-
`--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD`
|
|
89
|
+
`--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD`
|
|
90
|
+
in your shell or a `.env` file:
|
|
85
91
|
|
|
86
92
|
```bash
|
|
87
93
|
export OSM_USERNAME=you
|
|
@@ -142,7 +148,8 @@ Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the ful
|
|
|
142
148
|
|
|
143
149
|
## Output formats
|
|
144
150
|
|
|
145
|
-
Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via
|
|
151
|
+
Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via
|
|
152
|
+
`-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything.
|
|
146
153
|
|
|
147
154
|
## Documentation
|
|
148
155
|
|
|
@@ -162,7 +169,8 @@ uv run pre-commit install
|
|
|
162
169
|
uv run pytest -m "not network"
|
|
163
170
|
```
|
|
164
171
|
|
|
165
|
-
Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR.
|
|
172
|
+
Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR.
|
|
173
|
+
Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`).
|
|
166
174
|
|
|
167
175
|
## License
|
|
168
176
|
|
|
@@ -9,7 +9,8 @@
|
|
|
9
9
|
[](https://github.com/astral-sh/uv)
|
|
10
10
|
[](https://github.com/osgeonepal/osmsg/pkgs/container/osmsg)
|
|
11
11
|
|
|
12
|
-
**OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts
|
|
12
|
+
**OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts
|
|
13
|
+
of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres.
|
|
13
14
|
|
|
14
15
|
A Project of [OSGeo Nepal](https://osgeonepal.org).
|
|
15
16
|
|
|
@@ -26,11 +27,15 @@ A Project of [OSGeo Nepal](https://osgeonepal.org).
|
|
|
26
27
|
Pick the one that fits how you work.
|
|
27
28
|
|
|
28
29
|
```bash
|
|
30
|
+
uvx --from osmsg osmsg --last hour # zero-install, one-shot run
|
|
29
31
|
pip install osmsg # into your project
|
|
30
32
|
uv tool install osmsg # standalone CLI
|
|
31
33
|
docker run --rm -v "$PWD:/work" -w /work ghcr.io/osgeonepal/osmsg:latest --last hour
|
|
32
34
|
```
|
|
33
35
|
|
|
36
|
+
`uvx` can run osmsg in a throwaway environment , no install, no virtualenv to manage. Works
|
|
37
|
+
with any flag combination, e.g. `uvx --from osmsg osmsg --last hour --tags building --summary -f parquet -f markdown`.
|
|
38
|
+
|
|
34
39
|
## Quick start
|
|
35
40
|
|
|
36
41
|
```bash
|
|
@@ -49,7 +54,8 @@ That's it. A `stats.duckdb` and a `stats.parquet` show up in your current folder
|
|
|
49
54
|
osmsg --country nepal --last day
|
|
50
55
|
```
|
|
51
56
|
|
|
52
|
-
`--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD`
|
|
57
|
+
`--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD`
|
|
58
|
+
in your shell or a `.env` file:
|
|
53
59
|
|
|
54
60
|
```bash
|
|
55
61
|
export OSM_USERNAME=you
|
|
@@ -110,7 +116,8 @@ Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the ful
|
|
|
110
116
|
|
|
111
117
|
## Output formats
|
|
112
118
|
|
|
113
|
-
Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via
|
|
119
|
+
Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via
|
|
120
|
+
`-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything.
|
|
114
121
|
|
|
115
122
|
## Documentation
|
|
116
123
|
|
|
@@ -130,7 +137,8 @@ uv run pre-commit install
|
|
|
130
137
|
uv run pytest -m "not network"
|
|
131
138
|
```
|
|
132
139
|
|
|
133
|
-
Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR.
|
|
140
|
+
Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR.
|
|
141
|
+
Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`).
|
|
134
142
|
|
|
135
143
|
## License
|
|
136
144
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.3"
|
|
@@ -80,8 +80,33 @@ def summary_markdown(
|
|
|
80
80
|
parts.append(f"\nFull stats: `{fname}.parquet`")
|
|
81
81
|
|
|
82
82
|
parts.append("\n#### Top 5 users")
|
|
83
|
+
user_cols = (
|
|
84
|
+
("rank", "rank"),
|
|
85
|
+
("name", "name"),
|
|
86
|
+
("changesets", "changesets"),
|
|
87
|
+
("map_changes", "map changes"),
|
|
88
|
+
("nodes_create", "nodes created"),
|
|
89
|
+
("ways_create", "ways created"),
|
|
90
|
+
("rels_create", "rels created"),
|
|
91
|
+
("poi_create", "poi created"),
|
|
92
|
+
("hashtags", "hashtags"),
|
|
93
|
+
)
|
|
94
|
+
parts.append("| " + " | ".join(label for _, label in user_cols) + " |")
|
|
95
|
+
parts.append("| " + " | ".join("---" for _ in user_cols) + " |")
|
|
83
96
|
for r in rows[:5]:
|
|
84
|
-
|
|
97
|
+
cells: list[str] = []
|
|
98
|
+
for key, _ in user_cols:
|
|
99
|
+
v = r.get(key)
|
|
100
|
+
if key == "hashtags":
|
|
101
|
+
hts = v or []
|
|
102
|
+
cells.append(", ".join(hts[:3]) + (f" (+{len(hts) - 3})" if len(hts) > 3 else ""))
|
|
103
|
+
elif key == "name":
|
|
104
|
+
cells.append(str(v or ""))
|
|
105
|
+
elif key == "rank":
|
|
106
|
+
cells.append(str(v if v is not None else ""))
|
|
107
|
+
else:
|
|
108
|
+
cells.append(_human(int(v or 0)))
|
|
109
|
+
parts.append("| " + " | ".join(cells) + " |")
|
|
85
110
|
|
|
86
111
|
if tm_stats and any("tasks_mapped" in r for r in rows):
|
|
87
112
|
parts.append("\n#### Top 5 TM mappers")
|
|
@@ -24,7 +24,7 @@ from .exceptions import CredentialsRequiredError, NoDataFoundError, OsmsgError
|
|
|
24
24
|
from .export import summary_markdown, to_csv, to_json, to_parquet, to_psql
|
|
25
25
|
from .fetch import download_osm_file
|
|
26
26
|
from .geofabrik import country_update_url
|
|
27
|
-
from .replication import ChangesetReplication, changefile_download_urls, resolve_url
|
|
27
|
+
from .replication import SHORTCUTS, ChangesetReplication, changefile_download_urls, resolve_url
|
|
28
28
|
from .ui import info, progress_bar, warn
|
|
29
29
|
|
|
30
30
|
UTC = dt.UTC
|
|
@@ -49,6 +49,7 @@ class RunConfig:
|
|
|
49
49
|
end_date: dt.datetime | None = None
|
|
50
50
|
countries: list[str] | None = None
|
|
51
51
|
urls: list[str] = field(default_factory=lambda: ["https://planet.openstreetmap.org/replication/minute"])
|
|
52
|
+
url_explicit: bool = False
|
|
52
53
|
workers: int | None = None
|
|
53
54
|
additional_tags: list[str] | None = None
|
|
54
55
|
hashtags: list[str] | None = None
|
|
@@ -83,6 +84,34 @@ def _normalize_urls(cfg: RunConfig) -> None:
|
|
|
83
84
|
cfg.urls = list(dict.fromkeys(resolve_url(u) for u in cfg.urls))
|
|
84
85
|
|
|
85
86
|
|
|
87
|
+
def _pick_replication_for_span(span: dt.timedelta) -> str:
|
|
88
|
+
span_h = span.total_seconds() / 3600
|
|
89
|
+
if span_h < 6:
|
|
90
|
+
return "minute"
|
|
91
|
+
if span_h < 24 * 7:
|
|
92
|
+
return "hour"
|
|
93
|
+
return "day"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _auto_switch_replication(cfg: RunConfig, span: dt.timedelta) -> None:
|
|
97
|
+
"""Swap a single planet-shortcut --url for the cheapest one that covers `span`."""
|
|
98
|
+
if cfg.url_explicit or cfg.update or cfg.countries or len(cfg.urls) != 1:
|
|
99
|
+
return
|
|
100
|
+
cur = cfg.urls[0]
|
|
101
|
+
if cur not in SHORTCUTS.values():
|
|
102
|
+
return
|
|
103
|
+
target_label = _pick_replication_for_span(span)
|
|
104
|
+
target_url = SHORTCUTS[target_label]
|
|
105
|
+
if target_url == cur:
|
|
106
|
+
return
|
|
107
|
+
cur_label = next(label for label, url in SHORTCUTS.items() if url == cur)
|
|
108
|
+
warn(
|
|
109
|
+
f"Span is {span}; auto-switching --url from '{cur_label}' to '{target_label}' to reduce load. "
|
|
110
|
+
f"Pass --url {cur_label} to keep '{cur_label}'."
|
|
111
|
+
)
|
|
112
|
+
cfg.urls = [target_url]
|
|
113
|
+
|
|
114
|
+
|
|
86
115
|
def _canonical_hashtags(hashtags: list[str]) -> list[str]:
|
|
87
116
|
# Force leading '#' so 'hotosm' and '#hotosm' both match the '#hotosm' tokens in changeset comments.
|
|
88
117
|
return ["#" + h.lstrip("#") for h in hashtags]
|
|
@@ -96,7 +125,18 @@ def _resolve_url_starts(conn, cfg: RunConfig) -> dict[str, dt.datetime]:
|
|
|
96
125
|
for url in cfg.urls:
|
|
97
126
|
last = get_state(conn, url)
|
|
98
127
|
if not last:
|
|
99
|
-
|
|
128
|
+
known = [r[0] for r in conn.execute("SELECT source_url FROM state").fetchall()]
|
|
129
|
+
hint = (
|
|
130
|
+
f" Existing state in this DuckDB is for: {', '.join(known)}. "
|
|
131
|
+
"Re-run --update with one of those URLs, or start fresh under a different --name."
|
|
132
|
+
if known
|
|
133
|
+
else " Run osmsg once without --update to seed state."
|
|
134
|
+
)
|
|
135
|
+
raise OsmsgError(
|
|
136
|
+
f"--update cannot switch replication URL: no prior state for {url}.{hint} "
|
|
137
|
+
"(Replaying the same window through a different granularity would double-count "
|
|
138
|
+
"via the changeset_stats (seq_id, changeset_id) key.)"
|
|
139
|
+
)
|
|
100
140
|
starts[url] = last["last_ts"]
|
|
101
141
|
return starts
|
|
102
142
|
if cfg.start_date is None:
|
|
@@ -219,14 +259,17 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
219
259
|
dbmod.create_tables(conn)
|
|
220
260
|
info(f"DuckDB: {db_path}")
|
|
221
261
|
|
|
262
|
+
if cfg.end_date is None:
|
|
263
|
+
cfg.end_date = dt.datetime.now(UTC)
|
|
264
|
+
if cfg.start_date is not None:
|
|
265
|
+
_auto_switch_replication(cfg, cfg.end_date - cfg.start_date)
|
|
266
|
+
|
|
222
267
|
url_starts = _resolve_url_starts(conn, cfg)
|
|
223
268
|
if cfg.update:
|
|
224
269
|
# Changeset-replication reads one planet-wide source; widest window covers every URL.
|
|
225
270
|
cfg.start_date = min(url_starts.values())
|
|
226
271
|
info(f"--update: resuming each source from its own state row (earliest: {cfg.start_date.isoformat()})")
|
|
227
272
|
|
|
228
|
-
if cfg.end_date is None:
|
|
229
|
-
cfg.end_date = dt.datetime.now(UTC)
|
|
230
273
|
# _resolve_url_starts guarantees start_date is set (or raised); narrow for ty.
|
|
231
274
|
assert cfg.start_date is not None
|
|
232
275
|
if cfg.start_date >= cfg.end_date:
|
|
@@ -235,8 +278,9 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
235
278
|
span = cfg.end_date - cfg.start_date
|
|
236
279
|
info(f"Range: {cfg.start_date.isoformat()} → {cfg.end_date.isoformat()} ({span})")
|
|
237
280
|
span_hours = span.total_seconds() / 3600
|
|
238
|
-
#
|
|
239
|
-
|
|
281
|
+
# When auto-switch was suppressed (--url explicit, --update, --country, multi-URL), a long
|
|
282
|
+
# span on minute replication still floods the network. Hint the user.
|
|
283
|
+
if span_hours >= 72 and any(u == SHORTCUTS["minute"] for u in cfg.urls):
|
|
240
284
|
warn(
|
|
241
285
|
f"Range spans {span_hours:.0f}h on minute replication "
|
|
242
286
|
f"(~{int(span_hours * 60):,} files). Consider --url hour or --url day."
|
osmsg-1.0.2/osmsg/__version__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.0.2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|