osmsg 1.2.1__tar.gz → 1.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {osmsg-1.2.1 → osmsg-1.2.5}/PKG-INFO +8 -5
  2. {osmsg-1.2.1 → osmsg-1.2.5}/README.md +7 -4
  3. osmsg-1.2.5/osmsg/__version__.py +1 -0
  4. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/_http.py +7 -3
  5. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/cli.py +1 -1
  6. osmsg-1.2.5/osmsg/gui.py +272 -0
  7. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/history.py +41 -28
  8. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/pipeline.py +37 -11
  9. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/replication.py +1 -1
  10. {osmsg-1.2.1 → osmsg-1.2.5}/pyproject.toml +4 -1
  11. osmsg-1.2.1/osmsg/__version__.py +0 -1
  12. {osmsg-1.2.1 → osmsg-1.2.5}/LICENSE +0 -0
  13. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/__init__.py +0 -0
  14. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/_tick.py +0 -0
  15. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/auth.py +0 -0
  16. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/boundary.py +0 -0
  17. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/db/__init__.py +0 -0
  18. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/db/duckdb_schema.py +0 -0
  19. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/db/ingest.py +0 -0
  20. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/db/queries.py +0 -0
  21. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/db/schema.py +0 -0
  22. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/exceptions.py +0 -0
  23. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/__init__.py +0 -0
  24. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/csv.py +0 -0
  25. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/json.py +0 -0
  26. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/markdown.py +0 -0
  27. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/parquet.py +0 -0
  28. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/psql.py +0 -0
  29. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/fetch.py +0 -0
  30. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/geofabrik.py +0 -0
  31. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/handlers.py +0 -0
  32. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/__init__.py +0 -0
  33. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/cli.py +0 -0
  34. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/convert.py +0 -0
  35. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/manifest.py +0 -0
  36. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/month.py +0 -0
  37. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/parquet.py +0 -0
  38. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/pbf_split.py +0 -0
  39. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/models.py +0 -0
  40. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/pg_schema.py +0 -0
  41. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/py.typed +0 -0
  42. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/tm.py +0 -0
  43. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/ui.py +0 -0
  44. {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/workers.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: osmsg
3
- Version: 1.2.1
3
+ Version: 1.2.5
4
4
  Summary: OpenStreetMap Stats Generator: Commandline
5
5
  Keywords: osm,stats,commandline,openstreetmap
6
6
  Author: Kshitij Raj Sharma
@@ -78,7 +78,9 @@ brew install osgeonepal/tap/osmsg # macOS / Linux (Homebrew tap)
78
78
  ```
79
79
 
80
80
  On Windows, download `osmsg.exe` from the [latest release](https://github.com/osgeonepal/osmsg/releases)
81
- and run it directly, no Python required.
81
+ and double-click it to open the desktop app. Pick a Quick range (last hour, day, week, month, year, or
82
+ all time) or type your own dates, set the options, click Compute, and open the output folder. The CLI
83
+ below is for macOS, Linux, and pip/uv users.
82
84
 
83
85
  ## Quick start
84
86
 
@@ -206,7 +208,8 @@ Same pipeline as the CLI.
206
208
  osmsg --config nepal.yaml
207
209
  ```
208
210
 
209
- Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the full list.
211
+ Each option is a YAML key written with its underscore name: `output_dir`, `history_url`, `all_stats`,
212
+ `formats`, `psql_dsn`, and so on (not the dashed flag). See [docs/Manual.md](./docs/Manual.md).
210
213
 
211
214
  ## Output formats
212
215
 
@@ -227,7 +230,7 @@ docker-compose `environment:` block all reach the same setting. CLI flag wins ov
227
230
  | `--country` | `OSMSG_COUNTRY` | unset | Geofabrik region id(s). Comma-separated when set via env. |
228
231
  | `--boundary` | `OSMSG_BOUNDARY` | unset | GeoJSON path or inline GeoJSON. |
229
232
  | `--url` | `OSMSG_URL` | `minute` | `minute`/`hour`/`day` shortcut or full URL. Comma-separated when set via env. |
230
- | `--workers` | `OSMSG_WORKERS` | cpu count | Parallel workers. |
233
+ | `--workers` | `OSMSG_WORKERS` | cpu count | Parallel parse workers. |
231
234
  | `--cache-dir` | `OSMSG_CACHE_DIR` | platform cache | Where downloaded OSM files are kept across runs. |
232
235
  | `--output-dir` | `OSMSG_OUTPUT_DIR` | `.` | Where `<name>.duckdb` and exports are written. |
233
236
  | `--format` / `-f` | `OSMSG_FORMAT` | `parquet` | Repeat for multiple. Comma-separated when set via env. |
@@ -237,7 +240,7 @@ docker-compose `environment:` block all reach the same setting. CLI flag wins ov
237
240
  | `--history` / `--no-history` | `OSMSG_HISTORY` | on | Read covered months from the published dataset. |
238
241
  | `--history-url` | `OSMSG_HISTORY_URL` | `osmsg-history` | Published dataset location. |
239
242
  | `--insert` | (none) | off | Load history into the store and seed resume, then exit. No window loads all of it. |
240
- | `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files instead of the dataset. |
243
+ | `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files. |
241
244
  | `--changeset-pad-hours` | `OSMSG_CHANGESET_PAD_HOURS` | `1` | See below. |
242
245
  | (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP` | `hour` | `hour`, `day`, or `week`. Used when `--update` runs against an empty DB. |
243
246
  | (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP_DAYS` | unset | Integer N; overrides `OSMSG_BOOTSTRAP`. |
@@ -46,7 +46,9 @@ brew install osgeonepal/tap/osmsg # macOS / Linux (Homebrew tap)
46
46
  ```
47
47
 
48
48
  On Windows, download `osmsg.exe` from the [latest release](https://github.com/osgeonepal/osmsg/releases)
49
- and run it directly, no Python required.
49
+ and double-click it to open the desktop app. Pick a Quick range (last hour, day, week, month, year, or
50
+ all time) or type your own dates, set the options, click Compute, and open the output folder. The CLI
51
+ below is for macOS, Linux, and pip/uv users.
50
52
 
51
53
  ## Quick start
52
54
 
@@ -174,7 +176,8 @@ Same pipeline as the CLI.
174
176
  osmsg --config nepal.yaml
175
177
  ```
176
178
 
177
- Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the full list.
179
+ Each option is a YAML key written with its underscore name: `output_dir`, `history_url`, `all_stats`,
180
+ `formats`, `psql_dsn`, and so on (not the dashed flag). See [docs/Manual.md](./docs/Manual.md).
178
181
 
179
182
  ## Output formats
180
183
 
@@ -195,7 +198,7 @@ docker-compose `environment:` block all reach the same setting. CLI flag wins ov
195
198
  | `--country` | `OSMSG_COUNTRY` | unset | Geofabrik region id(s). Comma-separated when set via env. |
196
199
  | `--boundary` | `OSMSG_BOUNDARY` | unset | GeoJSON path or inline GeoJSON. |
197
200
  | `--url` | `OSMSG_URL` | `minute` | `minute`/`hour`/`day` shortcut or full URL. Comma-separated when set via env. |
198
- | `--workers` | `OSMSG_WORKERS` | cpu count | Parallel workers. |
201
+ | `--workers` | `OSMSG_WORKERS` | cpu count | Parallel parse workers. |
199
202
  | `--cache-dir` | `OSMSG_CACHE_DIR` | platform cache | Where downloaded OSM files are kept across runs. |
200
203
  | `--output-dir` | `OSMSG_OUTPUT_DIR` | `.` | Where `<name>.duckdb` and exports are written. |
201
204
  | `--format` / `-f` | `OSMSG_FORMAT` | `parquet` | Repeat for multiple. Comma-separated when set via env. |
@@ -205,7 +208,7 @@ docker-compose `environment:` block all reach the same setting. CLI flag wins ov
205
208
  | `--history` / `--no-history` | `OSMSG_HISTORY` | on | Read covered months from the published dataset. |
206
209
  | `--history-url` | `OSMSG_HISTORY_URL` | `osmsg-history` | Published dataset location. |
207
210
  | `--insert` | (none) | off | Load history into the store and seed resume, then exit. No window loads all of it. |
208
- | `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files instead of the dataset. |
211
+ | `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files. |
209
212
  | `--changeset-pad-hours` | `OSMSG_CHANGESET_PAD_HOURS` | `1` | See below. |
210
213
  | (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP` | `hour` | `hour`, `day`, or `week`. Used when `--update` runs against an empty DB. |
211
214
  | (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP_DAYS` | unset | Integer N; overrides `OSMSG_BOOTSTRAP`. |
@@ -0,0 +1 @@
1
+ __version__ = "1.2.5"
@@ -11,7 +11,7 @@ from requests.adapters import HTTPAdapter
11
11
  from urllib3.util.retry import Retry
12
12
 
13
13
  USER_AGENT = "osmsg"
14
- DEFAULT_TIMEOUT = (10, 60) # (connect, read) seconds
14
+ DEFAULT_TIMEOUT = (30, 120) # (connect, read) seconds
15
15
 
16
16
 
17
17
  class _TimeoutSession(requests.Session):
@@ -26,10 +26,14 @@ def make_session() -> requests.Session:
26
26
  """Fresh session with the standard timeout + retry policy (use when a flow needs its own cookie jar)."""
27
27
  s = _TimeoutSession()
28
28
  retry = Retry(
29
- total=5,
30
- backoff_factor=0.5,
29
+ total=10,
30
+ connect=10,
31
+ read=10,
32
+ backoff_factor=1.0,
33
+ backoff_max=120,
31
34
  status_forcelist=(429, 500, 502, 503, 504),
32
35
  allowed_methods=frozenset({"GET", "POST", "HEAD"}),
36
+ respect_retry_after_header=True,
33
37
  )
34
38
  adapter = HTTPAdapter(max_retries=retry, pool_maxsize=32)
35
39
  s.mount("https://", adapter)
@@ -150,7 +150,7 @@ def main(
150
150
  ] = None,
151
151
  workers: Annotated[
152
152
  int | None,
153
- typer.Option(envvar="OSMSG_WORKERS", help="Parallel workers (default: cpu count)."),
153
+ typer.Option(envvar="OSMSG_WORKERS", help="Parallel parse workers (default: cpu count)."),
154
154
  ] = None,
155
155
  rows: Annotated[
156
156
  int | None,
@@ -0,0 +1,272 @@
1
+ """Minimal tkinter desktop UI for running osmsg and saving the output."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime as dt
6
+ import os
7
+ import queue
8
+ import sys
9
+ import threading
10
+ import webbrowser
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from .__version__ import __version__
15
+ from .exceptions import NoDataFoundError, OsmsgError
16
+ from .pipeline import RunConfig, run
17
+
18
+ UTC = dt.UTC
19
+ FORMATS = ["parquet", "csv", "json", "markdown"]
20
+ ABOUT_LINKS = [
21
+ ("Star osmsg on GitHub", "https://github.com/osgeonepal/osmsg"),
22
+ ("Report a bug or request a feature", "https://github.com/osgeonepal/osmsg/issues"),
23
+ ("Sponsor the developer", "https://github.com/sponsors/kshitijrajsharma"),
24
+ ]
25
+ PRESETS = ["Last hour", "Last day", "Last week", "Last month", "Last year", "All time"]
26
+ _PRESET_DELTAS = {
27
+ "Last hour": dt.timedelta(hours=1),
28
+ "Last day": dt.timedelta(days=1),
29
+ "Last week": dt.timedelta(days=7),
30
+ "Last month": dt.timedelta(days=30),
31
+ "Last year": dt.timedelta(days=365),
32
+ }
33
+
34
+
35
+ def preset_range(name: str, now: dt.datetime | None = None) -> tuple[dt.datetime, dt.datetime]:
36
+ """Resolve a quick-range label to a (start, end) window."""
37
+ now = now or dt.datetime.now(UTC)
38
+ if name == "All time":
39
+ return dt.datetime(2005, 1, 1, tzinfo=UTC), now
40
+ return now - _PRESET_DELTAS[name], now
41
+
42
+
43
+ def _fmt(when: dt.datetime) -> str:
44
+ return when.strftime("%Y-%m-%d %H:%M:%S")
45
+
46
+
47
+ def _parse_date(value: str) -> dt.datetime | None:
48
+ value = value.strip()
49
+ if not value:
50
+ return None
51
+ for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d"):
52
+ try:
53
+ return dt.datetime.strptime(value, fmt).replace(tzinfo=UTC)
54
+ except ValueError:
55
+ continue
56
+ raise OsmsgError(f"Unrecognized date: {value!r}. Use YYYY-MM-DD.")
57
+
58
+
59
+ def _split(value: str | None) -> list[str] | None:
60
+ items: list[str] = [part.strip() for part in (value or "").split(",") if part.strip()]
61
+ return items if items else None
62
+
63
+
64
+ def _parse_int(value: object, field: str) -> int | None:
65
+ text = str(value or "").strip()
66
+ if not text:
67
+ return None
68
+ try:
69
+ number = int(text)
70
+ except ValueError as exc:
71
+ raise OsmsgError(f"{field} must be a whole number.") from exc
72
+ if number < 1:
73
+ raise OsmsgError(f"{field} must be at least 1.")
74
+ return number
75
+
76
+
77
+ def build_config(form: dict[str, object], output_dir: str) -> RunConfig:
78
+ """Map the form fields to a RunConfig, raising OsmsgError on invalid input."""
79
+ formats = [name for name in FORMATS if form.get(name)]
80
+ if not formats:
81
+ raise OsmsgError("Pick at least one output format.")
82
+ start = _parse_date(str(form.get("start", "")))
83
+ if start is None:
84
+ raise OsmsgError("Start date is required (YYYY-MM-DD).")
85
+ return RunConfig(
86
+ name=str(form.get("name") or "stats"),
87
+ start_date=start,
88
+ end_date=_parse_date(str(form.get("end", ""))),
89
+ hashtags=_split(str(form.get("hashtags") or "")),
90
+ additional_tags=_split(str(form.get("tags") or "")),
91
+ tag_mode="all" if form.get("all_tags") else "none",
92
+ summary=bool(form.get("summary")),
93
+ formats=formats,
94
+ workers=_parse_int(form.get("workers"), "Workers"),
95
+ output_dir=Path(output_dir or "."),
96
+ )
97
+
98
+
99
+ def _open_folder(path: Path) -> None:
100
+ if sys.platform == "win32":
101
+ os.startfile(path) # noqa: S606
102
+ elif sys.platform == "darwin":
103
+ import subprocess
104
+
105
+ subprocess.run(["open", str(path)], check=False)
106
+ else:
107
+ import subprocess
108
+
109
+ subprocess.run(["xdg-open", str(path)], check=False)
110
+
111
+
112
+ class _Redirector:
113
+ def __init__(self, sink: queue.Queue) -> None:
114
+ self.sink = sink
115
+
116
+ def write(self, text: str) -> None:
117
+ if text:
118
+ self.sink.put(("log", text))
119
+
120
+ def flush(self) -> None:
121
+ pass
122
+
123
+ def isatty(self) -> bool:
124
+ return False
125
+
126
+
127
+ class App:
128
+ def __init__(self) -> None:
129
+ import tkinter as tk
130
+ from tkinter import filedialog, scrolledtext, ttk
131
+
132
+ self._tk = tk
133
+ self._ttk = ttk
134
+ self._filedialog = filedialog
135
+ self.events: queue.Queue = queue.Queue()
136
+ self.out_dir = str(Path.home() / "osmsg")
137
+
138
+ self.root = tk.Tk()
139
+ self.root.title("osmsg")
140
+ self.vars: dict[str, Any] = {}
141
+ frame = ttk.Frame(self.root, padding=12)
142
+ frame.grid(sticky="nsew")
143
+
144
+ rows = [
145
+ ("Name", "name", "stats"),
146
+ ("Start (YYYY-MM-DD)", "start", ""),
147
+ ("End (blank = now)", "end", ""),
148
+ ("Hashtags (comma-sep)", "hashtags", ""),
149
+ ("Tags (comma-sep)", "tags", ""),
150
+ ("Workers", "workers", str(os.cpu_count() or 4)),
151
+ ]
152
+ for i, (label, key, default) in enumerate(rows):
153
+ ttk.Label(frame, text=label).grid(row=i, column=0, sticky="w", pady=2)
154
+ var = tk.StringVar(value=default)
155
+ ttk.Entry(frame, textvariable=var, width=40).grid(row=i, column=1, columnspan=3, sticky="we", pady=2)
156
+ self.vars[key] = var
157
+
158
+ preset_frame = ttk.LabelFrame(frame, text="Quick range", padding=6)
159
+ preset_frame.grid(row=6, column=0, columnspan=4, sticky="we", pady=6)
160
+ for i, name in enumerate(PRESETS):
161
+ ttk.Button(preset_frame, text=name, width=11, command=lambda n=name: self._apply_preset(n)).grid(
162
+ row=0, column=i, padx=2
163
+ )
164
+
165
+ self.vars["all_tags"] = tk.BooleanVar()
166
+ self.vars["summary"] = tk.BooleanVar()
167
+ ttk.Checkbutton(frame, text="All tags", variable=self.vars["all_tags"]).grid(row=7, column=0, sticky="w")
168
+ ttk.Checkbutton(frame, text="Daily summary", variable=self.vars["summary"]).grid(row=7, column=1, sticky="w")
169
+
170
+ fmt_frame = ttk.LabelFrame(frame, text="Formats", padding=6)
171
+ fmt_frame.grid(row=8, column=0, columnspan=4, sticky="we", pady=6)
172
+ for i, name in enumerate(FORMATS):
173
+ var = tk.BooleanVar(value=name in ("parquet", "csv"))
174
+ ttk.Checkbutton(fmt_frame, text=name, variable=var).grid(row=0, column=i, padx=4)
175
+ self.vars[name] = var
176
+
177
+ self.out_label = ttk.Label(frame, text=f"Output: {self.out_dir}")
178
+ self.out_label.grid(row=9, column=0, columnspan=3, sticky="w")
179
+ ttk.Button(frame, text="Choose folder", command=self._choose_folder).grid(row=9, column=3, sticky="e")
180
+
181
+ self.run_btn = ttk.Button(frame, text="Compute", command=self._on_run)
182
+ self.run_btn.grid(row=10, column=0, pady=8, sticky="w")
183
+ self.open_btn = ttk.Button(frame, text="Open output folder", command=lambda: _open_folder(Path(self.out_dir)))
184
+ self.open_btn.grid(row=10, column=1, pady=8, sticky="w")
185
+ self.spinner = ttk.Progressbar(frame, mode="indeterminate", length=160)
186
+ self.spinner.grid(row=10, column=2, columnspan=2, pady=8, sticky="we")
187
+
188
+ self.log = scrolledtext.ScrolledText(frame, width=70, height=14, state="disabled")
189
+ self.log.grid(row=11, column=0, columnspan=4, sticky="nsew")
190
+
191
+ ttk.Button(frame, text="About", command=self._show_about).grid(row=12, column=0, pady=(6, 0), sticky="w")
192
+ ttk.Label(frame, text="A project of OSGeo Nepal").grid(row=12, column=1, columnspan=3, pady=(6, 0), sticky="e")
193
+ self.root.after(120, self._drain)
194
+
195
+ def _show_about(self) -> None:
196
+ tk, ttk = self._tk, self._ttk
197
+ win = tk.Toplevel(self.root)
198
+ win.title("About osmsg")
199
+ box = ttk.Frame(win, padding=16)
200
+ box.grid(sticky="nsew")
201
+ ttk.Label(box, text=f"osmsg {__version__}", font=("", 12, "bold")).grid(sticky="w")
202
+ ttk.Label(box, text="OpenStreetMap Stats Generator").grid(sticky="w")
203
+ ttk.Label(box, text="A project of OSGeo Nepal").grid(sticky="w", pady=(0, 10))
204
+ for text, url in ABOUT_LINKS:
205
+ link = ttk.Label(box, text=text, foreground="#1a73e8", cursor="hand2")
206
+ link.grid(sticky="w", pady=2)
207
+ link.bind("<Button-1>", lambda _event, target=url: webbrowser.open(target))
208
+ ttk.Button(box, text="Close", command=win.destroy).grid(sticky="e", pady=(12, 0))
209
+
210
+ def _apply_preset(self, name: str) -> None:
211
+ start, end = preset_range(name)
212
+ self.vars["start"].set(_fmt(start))
213
+ self.vars["end"].set(_fmt(end))
214
+
215
+ def _choose_folder(self) -> None:
216
+ chosen = self._filedialog.askdirectory(initialdir=self.out_dir)
217
+ if chosen:
218
+ self.out_dir = chosen
219
+ self.out_label.config(text=f"Output: {self.out_dir}")
220
+
221
+ def _append(self, text: str) -> None:
222
+ self.log.config(state="normal")
223
+ self.log.insert("end", text)
224
+ self.log.see("end")
225
+ self.log.config(state="disabled")
226
+
227
+ def _on_run(self) -> None:
228
+ try:
229
+ cfg = build_config({k: v.get() for k, v in self.vars.items()}, self.out_dir)
230
+ except OsmsgError as exc:
231
+ self._append(f"\n{exc}\n")
232
+ return
233
+ self.run_btn.config(state="disabled", text="Running...")
234
+ self.spinner.start(12)
235
+ self._append(f"\nComputing into {self.out_dir} ...\n")
236
+ threading.Thread(target=self._worker, args=(cfg,), daemon=True).start()
237
+
238
+ def _worker(self, cfg: RunConfig) -> None:
239
+ saved = sys.stdout, sys.stderr
240
+ sys.stdout = sys.stderr = _Redirector(self.events) # type: ignore[assignment]
241
+ try:
242
+ result = run(cfg)
243
+ self.events.put(("done", f"Done. {result['rows']} rows. Files in {self.out_dir}"))
244
+ except NoDataFoundError:
245
+ self.events.put(("done", "No data found for that range."))
246
+ except OsmsgError as exc:
247
+ self.events.put(("done", f"Error: {exc}"))
248
+ except Exception as exc:
249
+ self.events.put(("done", f"Unexpected error: {type(exc).__name__}: {exc}"))
250
+ finally:
251
+ sys.stdout, sys.stderr = saved
252
+
253
+ def _drain(self) -> None:
254
+ try:
255
+ while True:
256
+ kind, payload = self.events.get_nowait()
257
+ if kind == "log":
258
+ self._append(payload)
259
+ else:
260
+ self._append(f"\n{payload}\n")
261
+ self.spinner.stop()
262
+ self.run_btn.config(state="normal", text="Compute")
263
+ except queue.Empty:
264
+ pass
265
+ self.root.after(120, self._drain)
266
+
267
+ def run(self) -> None:
268
+ self.root.mainloop()
269
+
270
+
271
+ def launch() -> None:
272
+ App().run()
@@ -5,6 +5,7 @@ path (a glob would make DuckDB list every partition over the HF API)."""
5
5
  import datetime as dt
6
6
  import json
7
7
  import pathlib
8
+ import time
8
9
  from dataclasses import dataclass
9
10
 
10
11
  import duckdb
@@ -16,6 +17,7 @@ UTC = dt.UTC
16
17
  SCHEMA_VERSION = 1
17
18
  DEFAULT_HISTORY_URL = "hf://datasets/kshitijrajsharma/osmsg-history"
18
19
  HISTORY_SEQ_ID = 0
20
+ MONTH_READ_ATTEMPTS = 4
19
21
 
20
22
 
21
23
  @dataclass
@@ -185,36 +187,47 @@ def ingest_remote(
185
187
 
186
188
  info(f"history: remote ingest {start_iso} -> {end_iso} ({len(months)} month partitions) from {history_url}")
187
189
 
190
+ def ingest_month(month: tuple[int, int]) -> None:
191
+ changesets_src = _partition_list(history_url, "changesets", [month])
192
+ changefiles_src = _partition_list(history_url, "changefiles", [month])
193
+ if changesets_src is not None:
194
+ conn.execute(
195
+ f"""INSERT INTO users
196
+ SELECT uid, any_value(username) FROM {changesets_src}
197
+ WHERE {in_window} AND username IS NOT NULL
198
+ GROUP BY uid ON CONFLICT (uid) DO NOTHING"""
199
+ )
200
+ conn.execute(
201
+ f"""INSERT INTO changesets
202
+ SELECT changeset_id, uid, created_at, hashtags, editor,
203
+ CASE WHEN min_lon IS NOT NULL
204
+ THEN ST_MakeEnvelope(min_lon, min_lat, max_lon, max_lat) END
205
+ FROM {changesets_src} WHERE {changeset_where}
206
+ ON CONFLICT (changeset_id) DO NOTHING"""
207
+ )
208
+ if changefiles_src is not None:
209
+ conn.execute(
210
+ f"""INSERT INTO changeset_stats
211
+ SELECT changeset_id, {HISTORY_SEQ_ID} AS seq_id, uid,
212
+ nodes_created, nodes_modified, nodes_deleted,
213
+ ways_created, ways_modified, ways_deleted,
214
+ rels_created, rels_modified, rels_deleted,
215
+ poi_created, poi_modified, tag_stats
216
+ FROM {changefiles_src} WHERE {stats_where}
217
+ ON CONFLICT (seq_id, changeset_id) DO NOTHING"""
218
+ )
219
+
188
220
  with progress_bar(len(months), unit="months", description="Reading history") as advance:
189
221
  for month in months:
190
- changesets_src = _partition_list(history_url, "changesets", [month])
191
- changefiles_src = _partition_list(history_url, "changefiles", [month])
192
- if changesets_src is not None:
193
- conn.execute(
194
- f"""INSERT INTO users
195
- SELECT uid, any_value(username) FROM {changesets_src}
196
- WHERE {in_window} AND username IS NOT NULL
197
- GROUP BY uid ON CONFLICT (uid) DO NOTHING"""
198
- )
199
- conn.execute(
200
- f"""INSERT INTO changesets
201
- SELECT changeset_id, uid, created_at, hashtags, editor,
202
- CASE WHEN min_lon IS NOT NULL
203
- THEN ST_MakeEnvelope(min_lon, min_lat, max_lon, max_lat) END
204
- FROM {changesets_src} WHERE {changeset_where}
205
- ON CONFLICT (changeset_id) DO NOTHING"""
206
- )
207
- if changefiles_src is not None:
208
- conn.execute(
209
- f"""INSERT INTO changeset_stats
210
- SELECT changeset_id, {HISTORY_SEQ_ID} AS seq_id, uid,
211
- nodes_created, nodes_modified, nodes_deleted,
212
- ways_created, ways_modified, ways_deleted,
213
- rels_created, rels_modified, rels_deleted,
214
- poi_created, poi_modified, tag_stats
215
- FROM {changefiles_src} WHERE {stats_where}
216
- ON CONFLICT (seq_id, changeset_id) DO NOTHING"""
217
- )
222
+ for attempt in range(MONTH_READ_ATTEMPTS):
223
+ try:
224
+ ingest_month(month)
225
+ break
226
+ except duckdb.Error as exc:
227
+ if attempt == MONTH_READ_ATTEMPTS - 1:
228
+ raise
229
+ warn(f"history: {month[0]}-{month[1]:02d} read failed ({type(exc).__name__}); retrying.")
230
+ time.sleep(2 * (attempt + 1))
218
231
  advance()
219
232
 
220
233
  row = conn.execute(f"SELECT count(*) FROM changeset_stats WHERE seq_id = {HISTORY_SEQ_ID}").fetchone()
@@ -14,6 +14,7 @@ from pathlib import Path
14
14
  from typing import Any
15
15
 
16
16
  import duckdb
17
+ import requests
17
18
  from platformdirs import user_cache_dir
18
19
  from shapely.ops import unary_union
19
20
 
@@ -539,21 +540,30 @@ def _processing_config(cfg: RunConfig, *, parquet_dir: Path, geom_wkt: str | Non
539
540
  }
540
541
 
541
542
 
543
+ _DOWNLOAD_WORKERS = 4
544
+
545
+
542
546
  def _download_all(
543
547
  urls: list[str],
544
548
  mode: str,
545
- max_workers: int,
549
+ workers: int,
546
550
  cookie: str | None,
547
551
  cache_dir: Path,
548
552
  label: str,
549
553
  description: str = "downloading",
550
554
  ) -> None:
551
- with (
552
- progress_bar(len(urls), unit=label, description=description) as advance,
553
- concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool,
554
- ):
555
- for _ in pool.map(lambda u: download_osm_file(u, mode=mode, cookie=cookie, cache_dir=cache_dir), urls):
556
- advance()
555
+ try:
556
+ with (
557
+ progress_bar(len(urls), unit=label, description=description) as advance,
558
+ concurrent.futures.ThreadPoolExecutor(max_workers=workers) as pool,
559
+ ):
560
+ for _ in pool.map(lambda u: download_osm_file(u, mode=mode, cookie=cookie, cache_dir=cache_dir), urls):
561
+ advance()
562
+ except requests.exceptions.RequestException as exc:
563
+ raise OsmsgError(
564
+ f"Network error downloading {label} after retries ({type(exc).__name__}). "
565
+ "Re-run to resume: finished downloads are cached, so it continues from where it stopped."
566
+ ) from exc
557
567
 
558
568
 
559
569
  def _process_all(
@@ -701,10 +711,13 @@ def run(cfg: RunConfig) -> dict[str, Any]:
701
711
  if run_live:
702
712
  _auto_switch_replication(cfg, cfg.end_date - cfg.start_date)
703
713
  except duckdb.Error as exc:
704
- warn(f"history: remote ingest failed ({type(exc).__name__}: {exc}); using live path.")
705
714
  for tbl in ("changeset_stats", "changesets", "users"):
706
715
  conn.execute(f"DELETE FROM {tbl}")
707
- run_live = True
716
+ dbmod.close(conn)
717
+ raise OsmsgError(
718
+ f"Reading the published history failed after retries ({type(exc).__name__}). "
719
+ "Re-run to try again, narrow the date range, or pass --no-history for the live path."
720
+ ) from exc
708
721
 
709
722
  max_workers = cfg.workers or _cpu_count()
710
723
  info(f"Workers: {max_workers}")
@@ -727,6 +740,13 @@ def run(cfg: RunConfig) -> dict[str, Any]:
727
740
  else f"first run with {cfg.changeset_pad_hours}h backward pad"
728
741
  )
729
742
  info(f"Changesets: {len(urls)} files (seq {cs_start}-{cs_end}), {pad_note}.")
743
+ if len(urls) > 5000:
744
+ warn(
745
+ f"Hashtag/changeset filtering downloads the per-minute changeset stream for the live "
746
+ f"tail ({len(urls):,} files here). This is slow over a busy network and resumes from "
747
+ f"cache if interrupted; a shorter range or waiting for the dataset to cover more months "
748
+ f"reduces it."
749
+ )
730
750
 
731
751
  cs_frontier_ts = cs_repl.sequence_to_timestamp(cs_end)
732
752
 
@@ -736,7 +756,13 @@ def run(cfg: RunConfig) -> dict[str, Any]:
736
756
  cs_config["window_start_utc"] = cfg.start_date.astimezone(UTC)
737
757
 
738
758
  _download_all(
739
- urls, "changeset", max_workers, None, cfg.cache_dir, "changesets", description="Downloading changesets"
759
+ urls,
760
+ "changeset",
761
+ _DOWNLOAD_WORKERS,
762
+ None,
763
+ cfg.cache_dir,
764
+ "changesets",
765
+ description="Downloading changesets",
740
766
  )
741
767
  _process_all(
742
768
  urls,
@@ -799,7 +825,7 @@ def run(cfg: RunConfig) -> dict[str, Any]:
799
825
  _download_all(
800
826
  urls,
801
827
  "changefiles",
802
- max_workers,
828
+ _DOWNLOAD_WORKERS,
803
829
  cookie,
804
830
  cfg.cache_dir,
805
831
  "changefiles",
@@ -145,7 +145,7 @@ class ChangesetReplication:
145
145
  def timestamp_to_sequence(self, ts: datetime) -> int:
146
146
  cur_seq, last_run = self._state()
147
147
  wanted = int((ts - last_run).total_seconds() / 60) + cur_seq
148
- return min(wanted, cur_seq)
148
+ return max(1, min(wanted, cur_seq))
149
149
 
150
150
  def sequence_to_timestamp(self, seq: int) -> datetime:
151
151
  txt = session.get(self.state_url(seq)).text
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "osmsg"
3
- version = "1.2.1"
3
+ version = "1.2.5"
4
4
  description = "OpenStreetMap Stats Generator: Commandline"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -41,6 +41,9 @@ repository = "https://github.com/osgeonepal/osmsg"
41
41
  [project.scripts]
42
42
  osmsg = "osmsg.cli:app"
43
43
 
44
+ [project.gui-scripts]
45
+ osmsg-gui = "osmsg.gui:launch"
46
+
44
47
  [build-system]
45
48
  requires = ["uv_build>=0.5.15,<0.9"]
46
49
  build-backend = "uv_build"
@@ -1 +0,0 @@
1
- __version__ = "1.2.1"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes