osmsg 1.2.1__tar.gz → 1.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {osmsg-1.2.1 → osmsg-1.2.5}/PKG-INFO +8 -5
- {osmsg-1.2.1 → osmsg-1.2.5}/README.md +7 -4
- osmsg-1.2.5/osmsg/__version__.py +1 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/_http.py +7 -3
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/cli.py +1 -1
- osmsg-1.2.5/osmsg/gui.py +272 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/history.py +41 -28
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/pipeline.py +37 -11
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/replication.py +1 -1
- {osmsg-1.2.1 → osmsg-1.2.5}/pyproject.toml +4 -1
- osmsg-1.2.1/osmsg/__version__.py +0 -1
- {osmsg-1.2.1 → osmsg-1.2.5}/LICENSE +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/__init__.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/_tick.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/auth.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/boundary.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/db/__init__.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/db/duckdb_schema.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/db/ingest.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/db/queries.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/db/schema.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/exceptions.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/__init__.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/csv.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/json.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/markdown.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/parquet.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/export/psql.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/fetch.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/geofabrik.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/handlers.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/__init__.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/cli.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/convert.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/manifest.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/month.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/parquet.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/maintain/pbf_split.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/models.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/pg_schema.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/py.typed +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/tm.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/ui.py +0 -0
- {osmsg-1.2.1 → osmsg-1.2.5}/osmsg/workers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: osmsg
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.5
|
|
4
4
|
Summary: OpenStreetMap Stats Generator: Commandline
|
|
5
5
|
Keywords: osm,stats,commandline,openstreetmap
|
|
6
6
|
Author: Kshitij Raj Sharma
|
|
@@ -78,7 +78,9 @@ brew install osgeonepal/tap/osmsg # macOS / Linux (Homebrew tap)
|
|
|
78
78
|
```
|
|
79
79
|
|
|
80
80
|
On Windows, download `osmsg.exe` from the [latest release](https://github.com/osgeonepal/osmsg/releases)
|
|
81
|
-
and
|
|
81
|
+
and double-click it to open the desktop app. Pick a Quick range (last hour, day, week, month, year, or
|
|
82
|
+
all time) or type your own dates, set the options, click Compute, and open the output folder. The CLI
|
|
83
|
+
below is for macOS, Linux, and pip/uv users.
|
|
82
84
|
|
|
83
85
|
## Quick start
|
|
84
86
|
|
|
@@ -206,7 +208,8 @@ Same pipeline as the CLI.
|
|
|
206
208
|
osmsg --config nepal.yaml
|
|
207
209
|
```
|
|
208
210
|
|
|
209
|
-
|
|
211
|
+
Each option is a YAML key written with its underscore name: `output_dir`, `history_url`, `all_stats`,
|
|
212
|
+
`formats`, `psql_dsn`, and so on (not the dashed flag). See [docs/Manual.md](./docs/Manual.md).
|
|
210
213
|
|
|
211
214
|
## Output formats
|
|
212
215
|
|
|
@@ -227,7 +230,7 @@ docker-compose `environment:` block all reach the same setting. CLI flag wins ov
|
|
|
227
230
|
| `--country` | `OSMSG_COUNTRY` | unset | Geofabrik region id(s). Comma-separated when set via env. |
|
|
228
231
|
| `--boundary` | `OSMSG_BOUNDARY` | unset | GeoJSON path or inline GeoJSON. |
|
|
229
232
|
| `--url` | `OSMSG_URL` | `minute` | `minute`/`hour`/`day` shortcut or full URL. Comma-separated when set via env. |
|
|
230
|
-
| `--workers` | `OSMSG_WORKERS` | cpu count | Parallel workers. |
|
|
233
|
+
| `--workers` | `OSMSG_WORKERS` | cpu count | Parallel parse workers. |
|
|
231
234
|
| `--cache-dir` | `OSMSG_CACHE_DIR` | platform cache | Where downloaded OSM files are kept across runs. |
|
|
232
235
|
| `--output-dir` | `OSMSG_OUTPUT_DIR` | `.` | Where `<name>.duckdb` and exports are written. |
|
|
233
236
|
| `--format` / `-f` | `OSMSG_FORMAT` | `parquet` | Repeat for multiple. Comma-separated when set via env. |
|
|
@@ -237,7 +240,7 @@ docker-compose `environment:` block all reach the same setting. CLI flag wins ov
|
|
|
237
240
|
| `--history` / `--no-history` | `OSMSG_HISTORY` | on | Read covered months from the published dataset. |
|
|
238
241
|
| `--history-url` | `OSMSG_HISTORY_URL` | `osmsg-history` | Published dataset location. |
|
|
239
242
|
| `--insert` | (none) | off | Load history into the store and seed resume, then exit. No window loads all of it. |
|
|
240
|
-
| `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files
|
|
243
|
+
| `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files. |
|
|
241
244
|
| `--changeset-pad-hours` | `OSMSG_CHANGESET_PAD_HOURS` | `1` | See below. |
|
|
242
245
|
| (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP` | `hour` | `hour`, `day`, or `week`. Used when `--update` runs against an empty DB. |
|
|
243
246
|
| (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP_DAYS` | unset | Integer N; overrides `OSMSG_BOOTSTRAP`. |
|
|
@@ -46,7 +46,9 @@ brew install osgeonepal/tap/osmsg # macOS / Linux (Homebrew tap)
|
|
|
46
46
|
```
|
|
47
47
|
|
|
48
48
|
On Windows, download `osmsg.exe` from the [latest release](https://github.com/osgeonepal/osmsg/releases)
|
|
49
|
-
and
|
|
49
|
+
and double-click it to open the desktop app. Pick a Quick range (last hour, day, week, month, year, or
|
|
50
|
+
all time) or type your own dates, set the options, click Compute, and open the output folder. The CLI
|
|
51
|
+
below is for macOS, Linux, and pip/uv users.
|
|
50
52
|
|
|
51
53
|
## Quick start
|
|
52
54
|
|
|
@@ -174,7 +176,8 @@ Same pipeline as the CLI.
|
|
|
174
176
|
osmsg --config nepal.yaml
|
|
175
177
|
```
|
|
176
178
|
|
|
177
|
-
|
|
179
|
+
Each option is a YAML key written with its underscore name: `output_dir`, `history_url`, `all_stats`,
|
|
180
|
+
`formats`, `psql_dsn`, and so on (not the dashed flag). See [docs/Manual.md](./docs/Manual.md).
|
|
178
181
|
|
|
179
182
|
## Output formats
|
|
180
183
|
|
|
@@ -195,7 +198,7 @@ docker-compose `environment:` block all reach the same setting. CLI flag wins ov
|
|
|
195
198
|
| `--country` | `OSMSG_COUNTRY` | unset | Geofabrik region id(s). Comma-separated when set via env. |
|
|
196
199
|
| `--boundary` | `OSMSG_BOUNDARY` | unset | GeoJSON path or inline GeoJSON. |
|
|
197
200
|
| `--url` | `OSMSG_URL` | `minute` | `minute`/`hour`/`day` shortcut or full URL. Comma-separated when set via env. |
|
|
198
|
-
| `--workers` | `OSMSG_WORKERS` | cpu count | Parallel workers. |
|
|
201
|
+
| `--workers` | `OSMSG_WORKERS` | cpu count | Parallel parse workers. |
|
|
199
202
|
| `--cache-dir` | `OSMSG_CACHE_DIR` | platform cache | Where downloaded OSM files are kept across runs. |
|
|
200
203
|
| `--output-dir` | `OSMSG_OUTPUT_DIR` | `.` | Where `<name>.duckdb` and exports are written. |
|
|
201
204
|
| `--format` / `-f` | `OSMSG_FORMAT` | `parquet` | Repeat for multiple. Comma-separated when set via env. |
|
|
@@ -205,7 +208,7 @@ docker-compose `environment:` block all reach the same setting. CLI flag wins ov
|
|
|
205
208
|
| `--history` / `--no-history` | `OSMSG_HISTORY` | on | Read covered months from the published dataset. |
|
|
206
209
|
| `--history-url` | `OSMSG_HISTORY_URL` | `osmsg-history` | Published dataset location. |
|
|
207
210
|
| `--insert` | (none) | off | Load history into the store and seed resume, then exit. No window loads all of it. |
|
|
208
|
-
| `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files
|
|
211
|
+
| `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files. |
|
|
209
212
|
| `--changeset-pad-hours` | `OSMSG_CHANGESET_PAD_HOURS` | `1` | See below. |
|
|
210
213
|
| (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP` | `hour` | `hour`, `day`, or `week`. Used when `--update` runs against an empty DB. |
|
|
211
214
|
| (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP_DAYS` | unset | Integer N; overrides `OSMSG_BOOTSTRAP`. |
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.5"
|
|
@@ -11,7 +11,7 @@ from requests.adapters import HTTPAdapter
|
|
|
11
11
|
from urllib3.util.retry import Retry
|
|
12
12
|
|
|
13
13
|
USER_AGENT = "osmsg"
|
|
14
|
-
DEFAULT_TIMEOUT = (
|
|
14
|
+
DEFAULT_TIMEOUT = (30, 120) # (connect, read) seconds
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class _TimeoutSession(requests.Session):
|
|
@@ -26,10 +26,14 @@ def make_session() -> requests.Session:
|
|
|
26
26
|
"""Fresh session with the standard timeout + retry policy (use when a flow needs its own cookie jar)."""
|
|
27
27
|
s = _TimeoutSession()
|
|
28
28
|
retry = Retry(
|
|
29
|
-
total=
|
|
30
|
-
|
|
29
|
+
total=10,
|
|
30
|
+
connect=10,
|
|
31
|
+
read=10,
|
|
32
|
+
backoff_factor=1.0,
|
|
33
|
+
backoff_max=120,
|
|
31
34
|
status_forcelist=(429, 500, 502, 503, 504),
|
|
32
35
|
allowed_methods=frozenset({"GET", "POST", "HEAD"}),
|
|
36
|
+
respect_retry_after_header=True,
|
|
33
37
|
)
|
|
34
38
|
adapter = HTTPAdapter(max_retries=retry, pool_maxsize=32)
|
|
35
39
|
s.mount("https://", adapter)
|
|
@@ -150,7 +150,7 @@ def main(
|
|
|
150
150
|
] = None,
|
|
151
151
|
workers: Annotated[
|
|
152
152
|
int | None,
|
|
153
|
-
typer.Option(envvar="OSMSG_WORKERS", help="Parallel workers (default: cpu count)."),
|
|
153
|
+
typer.Option(envvar="OSMSG_WORKERS", help="Parallel parse workers (default: cpu count)."),
|
|
154
154
|
] = None,
|
|
155
155
|
rows: Annotated[
|
|
156
156
|
int | None,
|
osmsg-1.2.5/osmsg/gui.py
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
"""Minimal tkinter desktop UI for running osmsg and saving the output."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime as dt
|
|
6
|
+
import os
|
|
7
|
+
import queue
|
|
8
|
+
import sys
|
|
9
|
+
import threading
|
|
10
|
+
import webbrowser
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from .__version__ import __version__
|
|
15
|
+
from .exceptions import NoDataFoundError, OsmsgError
|
|
16
|
+
from .pipeline import RunConfig, run
|
|
17
|
+
|
|
18
|
+
UTC = dt.UTC
|
|
19
|
+
FORMATS = ["parquet", "csv", "json", "markdown"]
|
|
20
|
+
ABOUT_LINKS = [
|
|
21
|
+
("Star osmsg on GitHub", "https://github.com/osgeonepal/osmsg"),
|
|
22
|
+
("Report a bug or request a feature", "https://github.com/osgeonepal/osmsg/issues"),
|
|
23
|
+
("Sponsor the developer", "https://github.com/sponsors/kshitijrajsharma"),
|
|
24
|
+
]
|
|
25
|
+
PRESETS = ["Last hour", "Last day", "Last week", "Last month", "Last year", "All time"]
|
|
26
|
+
_PRESET_DELTAS = {
|
|
27
|
+
"Last hour": dt.timedelta(hours=1),
|
|
28
|
+
"Last day": dt.timedelta(days=1),
|
|
29
|
+
"Last week": dt.timedelta(days=7),
|
|
30
|
+
"Last month": dt.timedelta(days=30),
|
|
31
|
+
"Last year": dt.timedelta(days=365),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def preset_range(name: str, now: dt.datetime | None = None) -> tuple[dt.datetime, dt.datetime]:
|
|
36
|
+
"""Resolve a quick-range label to a (start, end) window."""
|
|
37
|
+
now = now or dt.datetime.now(UTC)
|
|
38
|
+
if name == "All time":
|
|
39
|
+
return dt.datetime(2005, 1, 1, tzinfo=UTC), now
|
|
40
|
+
return now - _PRESET_DELTAS[name], now
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _fmt(when: dt.datetime) -> str:
|
|
44
|
+
return when.strftime("%Y-%m-%d %H:%M:%S")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _parse_date(value: str) -> dt.datetime | None:
|
|
48
|
+
value = value.strip()
|
|
49
|
+
if not value:
|
|
50
|
+
return None
|
|
51
|
+
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d"):
|
|
52
|
+
try:
|
|
53
|
+
return dt.datetime.strptime(value, fmt).replace(tzinfo=UTC)
|
|
54
|
+
except ValueError:
|
|
55
|
+
continue
|
|
56
|
+
raise OsmsgError(f"Unrecognized date: {value!r}. Use YYYY-MM-DD.")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _split(value: str | None) -> list[str] | None:
|
|
60
|
+
items: list[str] = [part.strip() for part in (value or "").split(",") if part.strip()]
|
|
61
|
+
return items if items else None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _parse_int(value: object, field: str) -> int | None:
|
|
65
|
+
text = str(value or "").strip()
|
|
66
|
+
if not text:
|
|
67
|
+
return None
|
|
68
|
+
try:
|
|
69
|
+
number = int(text)
|
|
70
|
+
except ValueError as exc:
|
|
71
|
+
raise OsmsgError(f"{field} must be a whole number.") from exc
|
|
72
|
+
if number < 1:
|
|
73
|
+
raise OsmsgError(f"{field} must be at least 1.")
|
|
74
|
+
return number
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def build_config(form: dict[str, object], output_dir: str) -> RunConfig:
|
|
78
|
+
"""Map the form fields to a RunConfig, raising OsmsgError on invalid input."""
|
|
79
|
+
formats = [name for name in FORMATS if form.get(name)]
|
|
80
|
+
if not formats:
|
|
81
|
+
raise OsmsgError("Pick at least one output format.")
|
|
82
|
+
start = _parse_date(str(form.get("start", "")))
|
|
83
|
+
if start is None:
|
|
84
|
+
raise OsmsgError("Start date is required (YYYY-MM-DD).")
|
|
85
|
+
return RunConfig(
|
|
86
|
+
name=str(form.get("name") or "stats"),
|
|
87
|
+
start_date=start,
|
|
88
|
+
end_date=_parse_date(str(form.get("end", ""))),
|
|
89
|
+
hashtags=_split(str(form.get("hashtags") or "")),
|
|
90
|
+
additional_tags=_split(str(form.get("tags") or "")),
|
|
91
|
+
tag_mode="all" if form.get("all_tags") else "none",
|
|
92
|
+
summary=bool(form.get("summary")),
|
|
93
|
+
formats=formats,
|
|
94
|
+
workers=_parse_int(form.get("workers"), "Workers"),
|
|
95
|
+
output_dir=Path(output_dir or "."),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _open_folder(path: Path) -> None:
|
|
100
|
+
if sys.platform == "win32":
|
|
101
|
+
os.startfile(path) # noqa: S606
|
|
102
|
+
elif sys.platform == "darwin":
|
|
103
|
+
import subprocess
|
|
104
|
+
|
|
105
|
+
subprocess.run(["open", str(path)], check=False)
|
|
106
|
+
else:
|
|
107
|
+
import subprocess
|
|
108
|
+
|
|
109
|
+
subprocess.run(["xdg-open", str(path)], check=False)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class _Redirector:
|
|
113
|
+
def __init__(self, sink: queue.Queue) -> None:
|
|
114
|
+
self.sink = sink
|
|
115
|
+
|
|
116
|
+
def write(self, text: str) -> None:
|
|
117
|
+
if text:
|
|
118
|
+
self.sink.put(("log", text))
|
|
119
|
+
|
|
120
|
+
def flush(self) -> None:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
def isatty(self) -> bool:
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class App:
|
|
128
|
+
def __init__(self) -> None:
|
|
129
|
+
import tkinter as tk
|
|
130
|
+
from tkinter import filedialog, scrolledtext, ttk
|
|
131
|
+
|
|
132
|
+
self._tk = tk
|
|
133
|
+
self._ttk = ttk
|
|
134
|
+
self._filedialog = filedialog
|
|
135
|
+
self.events: queue.Queue = queue.Queue()
|
|
136
|
+
self.out_dir = str(Path.home() / "osmsg")
|
|
137
|
+
|
|
138
|
+
self.root = tk.Tk()
|
|
139
|
+
self.root.title("osmsg")
|
|
140
|
+
self.vars: dict[str, Any] = {}
|
|
141
|
+
frame = ttk.Frame(self.root, padding=12)
|
|
142
|
+
frame.grid(sticky="nsew")
|
|
143
|
+
|
|
144
|
+
rows = [
|
|
145
|
+
("Name", "name", "stats"),
|
|
146
|
+
("Start (YYYY-MM-DD)", "start", ""),
|
|
147
|
+
("End (blank = now)", "end", ""),
|
|
148
|
+
("Hashtags (comma-sep)", "hashtags", ""),
|
|
149
|
+
("Tags (comma-sep)", "tags", ""),
|
|
150
|
+
("Workers", "workers", str(os.cpu_count() or 4)),
|
|
151
|
+
]
|
|
152
|
+
for i, (label, key, default) in enumerate(rows):
|
|
153
|
+
ttk.Label(frame, text=label).grid(row=i, column=0, sticky="w", pady=2)
|
|
154
|
+
var = tk.StringVar(value=default)
|
|
155
|
+
ttk.Entry(frame, textvariable=var, width=40).grid(row=i, column=1, columnspan=3, sticky="we", pady=2)
|
|
156
|
+
self.vars[key] = var
|
|
157
|
+
|
|
158
|
+
preset_frame = ttk.LabelFrame(frame, text="Quick range", padding=6)
|
|
159
|
+
preset_frame.grid(row=6, column=0, columnspan=4, sticky="we", pady=6)
|
|
160
|
+
for i, name in enumerate(PRESETS):
|
|
161
|
+
ttk.Button(preset_frame, text=name, width=11, command=lambda n=name: self._apply_preset(n)).grid(
|
|
162
|
+
row=0, column=i, padx=2
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
self.vars["all_tags"] = tk.BooleanVar()
|
|
166
|
+
self.vars["summary"] = tk.BooleanVar()
|
|
167
|
+
ttk.Checkbutton(frame, text="All tags", variable=self.vars["all_tags"]).grid(row=7, column=0, sticky="w")
|
|
168
|
+
ttk.Checkbutton(frame, text="Daily summary", variable=self.vars["summary"]).grid(row=7, column=1, sticky="w")
|
|
169
|
+
|
|
170
|
+
fmt_frame = ttk.LabelFrame(frame, text="Formats", padding=6)
|
|
171
|
+
fmt_frame.grid(row=8, column=0, columnspan=4, sticky="we", pady=6)
|
|
172
|
+
for i, name in enumerate(FORMATS):
|
|
173
|
+
var = tk.BooleanVar(value=name in ("parquet", "csv"))
|
|
174
|
+
ttk.Checkbutton(fmt_frame, text=name, variable=var).grid(row=0, column=i, padx=4)
|
|
175
|
+
self.vars[name] = var
|
|
176
|
+
|
|
177
|
+
self.out_label = ttk.Label(frame, text=f"Output: {self.out_dir}")
|
|
178
|
+
self.out_label.grid(row=9, column=0, columnspan=3, sticky="w")
|
|
179
|
+
ttk.Button(frame, text="Choose folder", command=self._choose_folder).grid(row=9, column=3, sticky="e")
|
|
180
|
+
|
|
181
|
+
self.run_btn = ttk.Button(frame, text="Compute", command=self._on_run)
|
|
182
|
+
self.run_btn.grid(row=10, column=0, pady=8, sticky="w")
|
|
183
|
+
self.open_btn = ttk.Button(frame, text="Open output folder", command=lambda: _open_folder(Path(self.out_dir)))
|
|
184
|
+
self.open_btn.grid(row=10, column=1, pady=8, sticky="w")
|
|
185
|
+
self.spinner = ttk.Progressbar(frame, mode="indeterminate", length=160)
|
|
186
|
+
self.spinner.grid(row=10, column=2, columnspan=2, pady=8, sticky="we")
|
|
187
|
+
|
|
188
|
+
self.log = scrolledtext.ScrolledText(frame, width=70, height=14, state="disabled")
|
|
189
|
+
self.log.grid(row=11, column=0, columnspan=4, sticky="nsew")
|
|
190
|
+
|
|
191
|
+
ttk.Button(frame, text="About", command=self._show_about).grid(row=12, column=0, pady=(6, 0), sticky="w")
|
|
192
|
+
ttk.Label(frame, text="A project of OSGeo Nepal").grid(row=12, column=1, columnspan=3, pady=(6, 0), sticky="e")
|
|
193
|
+
self.root.after(120, self._drain)
|
|
194
|
+
|
|
195
|
+
def _show_about(self) -> None:
|
|
196
|
+
tk, ttk = self._tk, self._ttk
|
|
197
|
+
win = tk.Toplevel(self.root)
|
|
198
|
+
win.title("About osmsg")
|
|
199
|
+
box = ttk.Frame(win, padding=16)
|
|
200
|
+
box.grid(sticky="nsew")
|
|
201
|
+
ttk.Label(box, text=f"osmsg {__version__}", font=("", 12, "bold")).grid(sticky="w")
|
|
202
|
+
ttk.Label(box, text="OpenStreetMap Stats Generator").grid(sticky="w")
|
|
203
|
+
ttk.Label(box, text="A project of OSGeo Nepal").grid(sticky="w", pady=(0, 10))
|
|
204
|
+
for text, url in ABOUT_LINKS:
|
|
205
|
+
link = ttk.Label(box, text=text, foreground="#1a73e8", cursor="hand2")
|
|
206
|
+
link.grid(sticky="w", pady=2)
|
|
207
|
+
link.bind("<Button-1>", lambda _event, target=url: webbrowser.open(target))
|
|
208
|
+
ttk.Button(box, text="Close", command=win.destroy).grid(sticky="e", pady=(12, 0))
|
|
209
|
+
|
|
210
|
+
def _apply_preset(self, name: str) -> None:
|
|
211
|
+
start, end = preset_range(name)
|
|
212
|
+
self.vars["start"].set(_fmt(start))
|
|
213
|
+
self.vars["end"].set(_fmt(end))
|
|
214
|
+
|
|
215
|
+
def _choose_folder(self) -> None:
|
|
216
|
+
chosen = self._filedialog.askdirectory(initialdir=self.out_dir)
|
|
217
|
+
if chosen:
|
|
218
|
+
self.out_dir = chosen
|
|
219
|
+
self.out_label.config(text=f"Output: {self.out_dir}")
|
|
220
|
+
|
|
221
|
+
def _append(self, text: str) -> None:
|
|
222
|
+
self.log.config(state="normal")
|
|
223
|
+
self.log.insert("end", text)
|
|
224
|
+
self.log.see("end")
|
|
225
|
+
self.log.config(state="disabled")
|
|
226
|
+
|
|
227
|
+
def _on_run(self) -> None:
|
|
228
|
+
try:
|
|
229
|
+
cfg = build_config({k: v.get() for k, v in self.vars.items()}, self.out_dir)
|
|
230
|
+
except OsmsgError as exc:
|
|
231
|
+
self._append(f"\n{exc}\n")
|
|
232
|
+
return
|
|
233
|
+
self.run_btn.config(state="disabled", text="Running...")
|
|
234
|
+
self.spinner.start(12)
|
|
235
|
+
self._append(f"\nComputing into {self.out_dir} ...\n")
|
|
236
|
+
threading.Thread(target=self._worker, args=(cfg,), daemon=True).start()
|
|
237
|
+
|
|
238
|
+
def _worker(self, cfg: RunConfig) -> None:
|
|
239
|
+
saved = sys.stdout, sys.stderr
|
|
240
|
+
sys.stdout = sys.stderr = _Redirector(self.events) # type: ignore[assignment]
|
|
241
|
+
try:
|
|
242
|
+
result = run(cfg)
|
|
243
|
+
self.events.put(("done", f"Done. {result['rows']} rows. Files in {self.out_dir}"))
|
|
244
|
+
except NoDataFoundError:
|
|
245
|
+
self.events.put(("done", "No data found for that range."))
|
|
246
|
+
except OsmsgError as exc:
|
|
247
|
+
self.events.put(("done", f"Error: {exc}"))
|
|
248
|
+
except Exception as exc:
|
|
249
|
+
self.events.put(("done", f"Unexpected error: {type(exc).__name__}: {exc}"))
|
|
250
|
+
finally:
|
|
251
|
+
sys.stdout, sys.stderr = saved
|
|
252
|
+
|
|
253
|
+
def _drain(self) -> None:
|
|
254
|
+
try:
|
|
255
|
+
while True:
|
|
256
|
+
kind, payload = self.events.get_nowait()
|
|
257
|
+
if kind == "log":
|
|
258
|
+
self._append(payload)
|
|
259
|
+
else:
|
|
260
|
+
self._append(f"\n{payload}\n")
|
|
261
|
+
self.spinner.stop()
|
|
262
|
+
self.run_btn.config(state="normal", text="Compute")
|
|
263
|
+
except queue.Empty:
|
|
264
|
+
pass
|
|
265
|
+
self.root.after(120, self._drain)
|
|
266
|
+
|
|
267
|
+
def run(self) -> None:
|
|
268
|
+
self.root.mainloop()
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def launch() -> None:
|
|
272
|
+
App().run()
|
|
@@ -5,6 +5,7 @@ path (a glob would make DuckDB list every partition over the HF API)."""
|
|
|
5
5
|
import datetime as dt
|
|
6
6
|
import json
|
|
7
7
|
import pathlib
|
|
8
|
+
import time
|
|
8
9
|
from dataclasses import dataclass
|
|
9
10
|
|
|
10
11
|
import duckdb
|
|
@@ -16,6 +17,7 @@ UTC = dt.UTC
|
|
|
16
17
|
SCHEMA_VERSION = 1
|
|
17
18
|
DEFAULT_HISTORY_URL = "hf://datasets/kshitijrajsharma/osmsg-history"
|
|
18
19
|
HISTORY_SEQ_ID = 0
|
|
20
|
+
MONTH_READ_ATTEMPTS = 4
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
@dataclass
|
|
@@ -185,36 +187,47 @@ def ingest_remote(
|
|
|
185
187
|
|
|
186
188
|
info(f"history: remote ingest {start_iso} -> {end_iso} ({len(months)} month partitions) from {history_url}")
|
|
187
189
|
|
|
190
|
+
def ingest_month(month: tuple[int, int]) -> None:
|
|
191
|
+
changesets_src = _partition_list(history_url, "changesets", [month])
|
|
192
|
+
changefiles_src = _partition_list(history_url, "changefiles", [month])
|
|
193
|
+
if changesets_src is not None:
|
|
194
|
+
conn.execute(
|
|
195
|
+
f"""INSERT INTO users
|
|
196
|
+
SELECT uid, any_value(username) FROM {changesets_src}
|
|
197
|
+
WHERE {in_window} AND username IS NOT NULL
|
|
198
|
+
GROUP BY uid ON CONFLICT (uid) DO NOTHING"""
|
|
199
|
+
)
|
|
200
|
+
conn.execute(
|
|
201
|
+
f"""INSERT INTO changesets
|
|
202
|
+
SELECT changeset_id, uid, created_at, hashtags, editor,
|
|
203
|
+
CASE WHEN min_lon IS NOT NULL
|
|
204
|
+
THEN ST_MakeEnvelope(min_lon, min_lat, max_lon, max_lat) END
|
|
205
|
+
FROM {changesets_src} WHERE {changeset_where}
|
|
206
|
+
ON CONFLICT (changeset_id) DO NOTHING"""
|
|
207
|
+
)
|
|
208
|
+
if changefiles_src is not None:
|
|
209
|
+
conn.execute(
|
|
210
|
+
f"""INSERT INTO changeset_stats
|
|
211
|
+
SELECT changeset_id, {HISTORY_SEQ_ID} AS seq_id, uid,
|
|
212
|
+
nodes_created, nodes_modified, nodes_deleted,
|
|
213
|
+
ways_created, ways_modified, ways_deleted,
|
|
214
|
+
rels_created, rels_modified, rels_deleted,
|
|
215
|
+
poi_created, poi_modified, tag_stats
|
|
216
|
+
FROM {changefiles_src} WHERE {stats_where}
|
|
217
|
+
ON CONFLICT (seq_id, changeset_id) DO NOTHING"""
|
|
218
|
+
)
|
|
219
|
+
|
|
188
220
|
with progress_bar(len(months), unit="months", description="Reading history") as advance:
|
|
189
221
|
for month in months:
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
conn.execute(
|
|
200
|
-
f"""INSERT INTO changesets
|
|
201
|
-
SELECT changeset_id, uid, created_at, hashtags, editor,
|
|
202
|
-
CASE WHEN min_lon IS NOT NULL
|
|
203
|
-
THEN ST_MakeEnvelope(min_lon, min_lat, max_lon, max_lat) END
|
|
204
|
-
FROM {changesets_src} WHERE {changeset_where}
|
|
205
|
-
ON CONFLICT (changeset_id) DO NOTHING"""
|
|
206
|
-
)
|
|
207
|
-
if changefiles_src is not None:
|
|
208
|
-
conn.execute(
|
|
209
|
-
f"""INSERT INTO changeset_stats
|
|
210
|
-
SELECT changeset_id, {HISTORY_SEQ_ID} AS seq_id, uid,
|
|
211
|
-
nodes_created, nodes_modified, nodes_deleted,
|
|
212
|
-
ways_created, ways_modified, ways_deleted,
|
|
213
|
-
rels_created, rels_modified, rels_deleted,
|
|
214
|
-
poi_created, poi_modified, tag_stats
|
|
215
|
-
FROM {changefiles_src} WHERE {stats_where}
|
|
216
|
-
ON CONFLICT (seq_id, changeset_id) DO NOTHING"""
|
|
217
|
-
)
|
|
222
|
+
for attempt in range(MONTH_READ_ATTEMPTS):
|
|
223
|
+
try:
|
|
224
|
+
ingest_month(month)
|
|
225
|
+
break
|
|
226
|
+
except duckdb.Error as exc:
|
|
227
|
+
if attempt == MONTH_READ_ATTEMPTS - 1:
|
|
228
|
+
raise
|
|
229
|
+
warn(f"history: {month[0]}-{month[1]:02d} read failed ({type(exc).__name__}); retrying.")
|
|
230
|
+
time.sleep(2 * (attempt + 1))
|
|
218
231
|
advance()
|
|
219
232
|
|
|
220
233
|
row = conn.execute(f"SELECT count(*) FROM changeset_stats WHERE seq_id = {HISTORY_SEQ_ID}").fetchone()
|
|
@@ -14,6 +14,7 @@ from pathlib import Path
|
|
|
14
14
|
from typing import Any
|
|
15
15
|
|
|
16
16
|
import duckdb
|
|
17
|
+
import requests
|
|
17
18
|
from platformdirs import user_cache_dir
|
|
18
19
|
from shapely.ops import unary_union
|
|
19
20
|
|
|
@@ -539,21 +540,30 @@ def _processing_config(cfg: RunConfig, *, parquet_dir: Path, geom_wkt: str | Non
|
|
|
539
540
|
}
|
|
540
541
|
|
|
541
542
|
|
|
543
|
+
_DOWNLOAD_WORKERS = 4
|
|
544
|
+
|
|
545
|
+
|
|
542
546
|
def _download_all(
|
|
543
547
|
urls: list[str],
|
|
544
548
|
mode: str,
|
|
545
|
-
|
|
549
|
+
workers: int,
|
|
546
550
|
cookie: str | None,
|
|
547
551
|
cache_dir: Path,
|
|
548
552
|
label: str,
|
|
549
553
|
description: str = "downloading",
|
|
550
554
|
) -> None:
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
555
|
+
try:
|
|
556
|
+
with (
|
|
557
|
+
progress_bar(len(urls), unit=label, description=description) as advance,
|
|
558
|
+
concurrent.futures.ThreadPoolExecutor(max_workers=workers) as pool,
|
|
559
|
+
):
|
|
560
|
+
for _ in pool.map(lambda u: download_osm_file(u, mode=mode, cookie=cookie, cache_dir=cache_dir), urls):
|
|
561
|
+
advance()
|
|
562
|
+
except requests.exceptions.RequestException as exc:
|
|
563
|
+
raise OsmsgError(
|
|
564
|
+
f"Network error downloading {label} after retries ({type(exc).__name__}). "
|
|
565
|
+
"Re-run to resume: finished downloads are cached, so it continues from where it stopped."
|
|
566
|
+
) from exc
|
|
557
567
|
|
|
558
568
|
|
|
559
569
|
def _process_all(
|
|
@@ -701,10 +711,13 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
701
711
|
if run_live:
|
|
702
712
|
_auto_switch_replication(cfg, cfg.end_date - cfg.start_date)
|
|
703
713
|
except duckdb.Error as exc:
|
|
704
|
-
warn(f"history: remote ingest failed ({type(exc).__name__}: {exc}); using live path.")
|
|
705
714
|
for tbl in ("changeset_stats", "changesets", "users"):
|
|
706
715
|
conn.execute(f"DELETE FROM {tbl}")
|
|
707
|
-
|
|
716
|
+
dbmod.close(conn)
|
|
717
|
+
raise OsmsgError(
|
|
718
|
+
f"Reading the published history failed after retries ({type(exc).__name__}). "
|
|
719
|
+
"Re-run to try again, narrow the date range, or pass --no-history for the live path."
|
|
720
|
+
) from exc
|
|
708
721
|
|
|
709
722
|
max_workers = cfg.workers or _cpu_count()
|
|
710
723
|
info(f"Workers: {max_workers}")
|
|
@@ -727,6 +740,13 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
727
740
|
else f"first run with {cfg.changeset_pad_hours}h backward pad"
|
|
728
741
|
)
|
|
729
742
|
info(f"Changesets: {len(urls)} files (seq {cs_start}-{cs_end}), {pad_note}.")
|
|
743
|
+
if len(urls) > 5000:
|
|
744
|
+
warn(
|
|
745
|
+
f"Hashtag/changeset filtering downloads the per-minute changeset stream for the live "
|
|
746
|
+
f"tail ({len(urls):,} files here). This is slow over a busy network and resumes from "
|
|
747
|
+
f"cache if interrupted; a shorter range or waiting for the dataset to cover more months "
|
|
748
|
+
f"reduces it."
|
|
749
|
+
)
|
|
730
750
|
|
|
731
751
|
cs_frontier_ts = cs_repl.sequence_to_timestamp(cs_end)
|
|
732
752
|
|
|
@@ -736,7 +756,13 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
736
756
|
cs_config["window_start_utc"] = cfg.start_date.astimezone(UTC)
|
|
737
757
|
|
|
738
758
|
_download_all(
|
|
739
|
-
urls,
|
|
759
|
+
urls,
|
|
760
|
+
"changeset",
|
|
761
|
+
_DOWNLOAD_WORKERS,
|
|
762
|
+
None,
|
|
763
|
+
cfg.cache_dir,
|
|
764
|
+
"changesets",
|
|
765
|
+
description="Downloading changesets",
|
|
740
766
|
)
|
|
741
767
|
_process_all(
|
|
742
768
|
urls,
|
|
@@ -799,7 +825,7 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
799
825
|
_download_all(
|
|
800
826
|
urls,
|
|
801
827
|
"changefiles",
|
|
802
|
-
|
|
828
|
+
_DOWNLOAD_WORKERS,
|
|
803
829
|
cookie,
|
|
804
830
|
cfg.cache_dir,
|
|
805
831
|
"changefiles",
|
|
@@ -145,7 +145,7 @@ class ChangesetReplication:
|
|
|
145
145
|
def timestamp_to_sequence(self, ts: datetime) -> int:
|
|
146
146
|
cur_seq, last_run = self._state()
|
|
147
147
|
wanted = int((ts - last_run).total_seconds() / 60) + cur_seq
|
|
148
|
-
return min(wanted, cur_seq)
|
|
148
|
+
return max(1, min(wanted, cur_seq))
|
|
149
149
|
|
|
150
150
|
def sequence_to_timestamp(self, seq: int) -> datetime:
|
|
151
151
|
txt = session.get(self.state_url(seq)).text
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "osmsg"
|
|
3
|
-
version = "1.2.
|
|
3
|
+
version = "1.2.5"
|
|
4
4
|
description = "OpenStreetMap Stats Generator: Commandline"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -41,6 +41,9 @@ repository = "https://github.com/osgeonepal/osmsg"
|
|
|
41
41
|
[project.scripts]
|
|
42
42
|
osmsg = "osmsg.cli:app"
|
|
43
43
|
|
|
44
|
+
[project.gui-scripts]
|
|
45
|
+
osmsg-gui = "osmsg.gui:launch"
|
|
46
|
+
|
|
44
47
|
[build-system]
|
|
45
48
|
requires = ["uv_build>=0.5.15,<0.9"]
|
|
46
49
|
build-backend = "uv_build"
|
osmsg-1.2.1/osmsg/__version__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|