osmsg 1.0.2__tar.gz → 1.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {osmsg-1.0.2 → osmsg-1.0.3}/PKG-INFO +13 -5
  2. {osmsg-1.0.2 → osmsg-1.0.3}/README.md +12 -4
  3. osmsg-1.0.3/osmsg/__version__.py +1 -0
  4. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/cli.py +1 -0
  5. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/markdown.py +26 -1
  6. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/pipeline.py +50 -6
  7. {osmsg-1.0.2 → osmsg-1.0.3}/pyproject.toml +1 -1
  8. osmsg-1.0.2/osmsg/__version__.py +0 -1
  9. {osmsg-1.0.2 → osmsg-1.0.3}/LICENSE +0 -0
  10. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/__init__.py +0 -0
  11. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/_http.py +0 -0
  12. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/auth.py +0 -0
  13. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/boundary.py +0 -0
  14. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/db/__init__.py +0 -0
  15. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/db/ingest.py +0 -0
  16. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/db/queries.py +0 -0
  17. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/db/schema.py +0 -0
  18. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/exceptions.py +0 -0
  19. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/__init__.py +0 -0
  20. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/csv.py +0 -0
  21. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/json.py +0 -0
  22. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/parquet.py +0 -0
  23. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/export/psql.py +0 -0
  24. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/fetch.py +0 -0
  25. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/geofabrik.py +0 -0
  26. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/handlers.py +0 -0
  27. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/models.py +0 -0
  28. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/py.typed +0 -0
  29. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/replication.py +0 -0
  30. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/tm.py +0 -0
  31. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/ui.py +0 -0
  32. {osmsg-1.0.2 → osmsg-1.0.3}/osmsg/workers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: osmsg
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: OpenStreetMap Stats Generator: Commandline
5
5
  Keywords: osm,stats,commandline,openstreetmap
6
6
  Author: Kshitij Raj Sharma
@@ -41,7 +41,8 @@ Description-Content-Type: text/markdown
41
41
  [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
42
42
  [![Container](https://img.shields.io/badge/ghcr.io-osgeonepal%2Fosmsg-2496ED?logo=docker)](https://github.com/osgeonepal/osmsg/pkgs/container/osmsg)
43
43
 
44
- **OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres.
44
+ **OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts
45
+ of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres.
45
46
 
46
47
  A Project of [OSGeo Nepal](https://osgeonepal.org).
47
48
 
@@ -58,11 +59,15 @@ A Project of [OSGeo Nepal](https://osgeonepal.org).
58
59
  Pick the one that fits how you work.
59
60
 
60
61
  ```bash
62
+ uvx --from osmsg osmsg --last hour # zero-install, one-shot run
61
63
  pip install osmsg # into your project
62
64
  uv tool install osmsg # standalone CLI
63
65
  docker run --rm -v "$PWD:/work" -w /work ghcr.io/osgeonepal/osmsg:latest --last hour
64
66
  ```
65
67
 
68
+ `uvx` can run osmsg in a throwaway environment , no install, no virtualenv to manage. Works
69
+ with any flag combination, e.g. `uvx --from osmsg osmsg --last hour --tags building --summary -f parquet -f markdown`.
70
+
66
71
  ## Quick start
67
72
 
68
73
  ```bash
@@ -81,7 +86,8 @@ That's it. A `stats.duckdb` and a `stats.parquet` show up in your current folder
81
86
  osmsg --country nepal --last day
82
87
  ```
83
88
 
84
- `--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD` in your shell or a `.env` file:
89
+ `--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD`
90
+ in your shell or a `.env` file:
85
91
 
86
92
  ```bash
87
93
  export OSM_USERNAME=you
@@ -142,7 +148,8 @@ Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the ful
142
148
 
143
149
  ## Output formats
144
150
 
145
- Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via `-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything.
151
+ Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via
152
+ `-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything.
146
153
 
147
154
  ## Documentation
148
155
 
@@ -162,7 +169,8 @@ uv run pre-commit install
162
169
  uv run pytest -m "not network"
163
170
  ```
164
171
 
165
- Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR. Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`).
172
+ Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR.
173
+ Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`).
166
174
 
167
175
  ## License
168
176
 
@@ -9,7 +9,8 @@
9
9
  [![uv](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json)](https://github.com/astral-sh/uv)
10
10
  [![Container](https://img.shields.io/badge/ghcr.io-osgeonepal%2Fosmsg-2496ED?logo=docker)](https://github.com/osgeonepal/osmsg/pkgs/container/osmsg)
11
11
 
12
- **OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres.
12
+ **OpenStreetMap Stats Generator.** A tiny CLI (and Python library) that turns OSM history into per-user counts
13
+ of nodes, ways, and relations created, modified, or deleted, written to parquet, csv, json, markdown, or Postgres.
13
14
 
14
15
  A Project of [OSGeo Nepal](https://osgeonepal.org).
15
16
 
@@ -26,11 +27,15 @@ A Project of [OSGeo Nepal](https://osgeonepal.org).
26
27
  Pick the one that fits how you work.
27
28
 
28
29
  ```bash
30
+ uvx --from osmsg osmsg --last hour # zero-install, one-shot run
29
31
  pip install osmsg # into your project
30
32
  uv tool install osmsg # standalone CLI
31
33
  docker run --rm -v "$PWD:/work" -w /work ghcr.io/osgeonepal/osmsg:latest --last hour
32
34
  ```
33
35
 
36
+ `uvx` can run osmsg in a throwaway environment , no install, no virtualenv to manage. Works
37
+ with any flag combination, e.g. `uvx --from osmsg osmsg --last hour --tags building --summary -f parquet -f markdown`.
38
+
34
39
  ## Quick start
35
40
 
36
41
  ```bash
@@ -49,7 +54,8 @@ That's it. A `stats.duckdb` and a `stats.parquet` show up in your current folder
49
54
  osmsg --country nepal --last day
50
55
  ```
51
56
 
52
- `--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD` in your shell or a `.env` file:
57
+ `--country` resolves through Geofabrik and needs an OSM account. Set `OSM_USERNAME` and `OSM_PASSWORD`
58
+ in your shell or a `.env` file:
53
59
 
54
60
  ```bash
55
61
  export OSM_USERNAME=you
@@ -110,7 +116,8 @@ Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the ful
110
116
 
111
117
  ## Output formats
112
118
 
113
- Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via `-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything.
119
+ Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via
120
+ `-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything.
114
121
 
115
122
  ## Documentation
116
123
 
@@ -130,7 +137,8 @@ uv run pre-commit install
130
137
  uv run pytest -m "not network"
131
138
  ```
132
139
 
133
- Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR. Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`).
140
+ Please read [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Code of Conduct](./CODE_OF_CONDUCT.md) before opening a PR.
141
+ Use [Conventional Commits](https://www.conventionalcommits.org/) (`cz commit`).
134
142
 
135
143
  ## License
136
144
 
@@ -0,0 +1 @@
1
+ __version__ = "1.0.3"
@@ -194,6 +194,7 @@ def main(
194
194
  end_date=_parse_dt(end),
195
195
  countries=country,
196
196
  urls=url or ["minute"],
197
+ url_explicit=url is not None,
197
198
  workers=workers,
198
199
  additional_tags=tags,
199
200
  hashtags=hashtags,
@@ -80,8 +80,33 @@ def summary_markdown(
80
80
  parts.append(f"\nFull stats: `{fname}.parquet`")
81
81
 
82
82
  parts.append("\n#### Top 5 users")
83
+ user_cols = (
84
+ ("rank", "rank"),
85
+ ("name", "name"),
86
+ ("changesets", "changesets"),
87
+ ("map_changes", "map changes"),
88
+ ("nodes_create", "nodes created"),
89
+ ("ways_create", "ways created"),
90
+ ("rels_create", "rels created"),
91
+ ("poi_create", "poi created"),
92
+ ("hashtags", "hashtags"),
93
+ )
94
+ parts.append("| " + " | ".join(label for _, label in user_cols) + " |")
95
+ parts.append("| " + " | ".join("---" for _ in user_cols) + " |")
83
96
  for r in rows[:5]:
84
- parts.append(f"- {r['name']}: {_human(int(r.get('map_changes', 0) or 0))} map changes")
97
+ cells: list[str] = []
98
+ for key, _ in user_cols:
99
+ v = r.get(key)
100
+ if key == "hashtags":
101
+ hts = v or []
102
+ cells.append(", ".join(hts[:3]) + (f" (+{len(hts) - 3})" if len(hts) > 3 else ""))
103
+ elif key == "name":
104
+ cells.append(str(v or ""))
105
+ elif key == "rank":
106
+ cells.append(str(v if v is not None else ""))
107
+ else:
108
+ cells.append(_human(int(v or 0)))
109
+ parts.append("| " + " | ".join(cells) + " |")
85
110
 
86
111
  if tm_stats and any("tasks_mapped" in r for r in rows):
87
112
  parts.append("\n#### Top 5 TM mappers")
@@ -24,7 +24,7 @@ from .exceptions import CredentialsRequiredError, NoDataFoundError, OsmsgError
24
24
  from .export import summary_markdown, to_csv, to_json, to_parquet, to_psql
25
25
  from .fetch import download_osm_file
26
26
  from .geofabrik import country_update_url
27
- from .replication import ChangesetReplication, changefile_download_urls, resolve_url
27
+ from .replication import SHORTCUTS, ChangesetReplication, changefile_download_urls, resolve_url
28
28
  from .ui import info, progress_bar, warn
29
29
 
30
30
  UTC = dt.UTC
@@ -49,6 +49,7 @@ class RunConfig:
49
49
  end_date: dt.datetime | None = None
50
50
  countries: list[str] | None = None
51
51
  urls: list[str] = field(default_factory=lambda: ["https://planet.openstreetmap.org/replication/minute"])
52
+ url_explicit: bool = False
52
53
  workers: int | None = None
53
54
  additional_tags: list[str] | None = None
54
55
  hashtags: list[str] | None = None
@@ -83,6 +84,34 @@ def _normalize_urls(cfg: RunConfig) -> None:
83
84
  cfg.urls = list(dict.fromkeys(resolve_url(u) for u in cfg.urls))
84
85
 
85
86
 
87
+ def _pick_replication_for_span(span: dt.timedelta) -> str:
88
+ span_h = span.total_seconds() / 3600
89
+ if span_h < 6:
90
+ return "minute"
91
+ if span_h < 24 * 7:
92
+ return "hour"
93
+ return "day"
94
+
95
+
96
+ def _auto_switch_replication(cfg: RunConfig, span: dt.timedelta) -> None:
97
+ """Swap a single planet-shortcut --url for the cheapest one that covers `span`."""
98
+ if cfg.url_explicit or cfg.update or cfg.countries or len(cfg.urls) != 1:
99
+ return
100
+ cur = cfg.urls[0]
101
+ if cur not in SHORTCUTS.values():
102
+ return
103
+ target_label = _pick_replication_for_span(span)
104
+ target_url = SHORTCUTS[target_label]
105
+ if target_url == cur:
106
+ return
107
+ cur_label = next(label for label, url in SHORTCUTS.items() if url == cur)
108
+ warn(
109
+ f"Span is {span}; auto-switching --url from '{cur_label}' to '{target_label}' to reduce load. "
110
+ f"Pass --url {cur_label} to keep '{cur_label}'."
111
+ )
112
+ cfg.urls = [target_url]
113
+
114
+
86
115
  def _canonical_hashtags(hashtags: list[str]) -> list[str]:
87
116
  # Force leading '#' so 'hotosm' and '#hotosm' both match the '#hotosm' tokens in changeset comments.
88
117
  return ["#" + h.lstrip("#") for h in hashtags]
@@ -96,7 +125,18 @@ def _resolve_url_starts(conn, cfg: RunConfig) -> dict[str, dt.datetime]:
96
125
  for url in cfg.urls:
97
126
  last = get_state(conn, url)
98
127
  if not last:
99
- raise OsmsgError(f"--update has no prior state for {url}. Run osmsg without --update first to seed it.")
128
+ known = [r[0] for r in conn.execute("SELECT source_url FROM state").fetchall()]
129
+ hint = (
130
+ f" Existing state in this DuckDB is for: {', '.join(known)}. "
131
+ "Re-run --update with one of those URLs, or start fresh under a different --name."
132
+ if known
133
+ else " Run osmsg once without --update to seed state."
134
+ )
135
+ raise OsmsgError(
136
+ f"--update cannot switch replication URL: no prior state for {url}.{hint} "
137
+ "(Replaying the same window through a different granularity would double-count "
138
+ "via the changeset_stats (seq_id, changeset_id) key.)"
139
+ )
100
140
  starts[url] = last["last_ts"]
101
141
  return starts
102
142
  if cfg.start_date is None:
@@ -219,14 +259,17 @@ def run(cfg: RunConfig) -> dict[str, Any]:
219
259
  dbmod.create_tables(conn)
220
260
  info(f"DuckDB: {db_path}")
221
261
 
262
+ if cfg.end_date is None:
263
+ cfg.end_date = dt.datetime.now(UTC)
264
+ if cfg.start_date is not None:
265
+ _auto_switch_replication(cfg, cfg.end_date - cfg.start_date)
266
+
222
267
  url_starts = _resolve_url_starts(conn, cfg)
223
268
  if cfg.update:
224
269
  # Changeset-replication reads one planet-wide source; widest window covers every URL.
225
270
  cfg.start_date = min(url_starts.values())
226
271
  info(f"--update: resuming each source from its own state row (earliest: {cfg.start_date.isoformat()})")
227
272
 
228
- if cfg.end_date is None:
229
- cfg.end_date = dt.datetime.now(UTC)
230
273
  # _resolve_url_starts guarantees start_date is set (or raised); narrow for ty.
231
274
  assert cfg.start_date is not None
232
275
  if cfg.start_date >= cfg.end_date:
@@ -235,8 +278,9 @@ def run(cfg: RunConfig) -> dict[str, Any]:
235
278
  span = cfg.end_date - cfg.start_date
236
279
  info(f"Range: {cfg.start_date.isoformat()} → {cfg.end_date.isoformat()} ({span})")
237
280
  span_hours = span.total_seconds() / 3600
238
- # 72h on minute replication is ~4,300 files; beyond that, hour/day replication is much cheaper.
239
- if span_hours >= 72 and any("minute" in u.lower() for u in cfg.urls):
281
+ # When auto-switch was suppressed (--url explicit, --update, --country, multi-URL), a long
282
+ # span on minute replication still floods the network. Hint the user.
283
+ if span_hours >= 72 and any(u == SHORTCUTS["minute"] for u in cfg.urls):
240
284
  warn(
241
285
  f"Range spans {span_hours:.0f}h on minute replication "
242
286
  f"(~{int(span_hours * 60):,} files). Consider --url hour or --url day."
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "osmsg"
3
- version = "1.0.2"
3
+ version = "1.0.3"
4
4
  description = "OpenStreetMap Stats Generator: Commandline"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -1 +0,0 @@
1
- __version__ = "1.0.2"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes