osmsg 1.2.0__tar.gz → 1.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of osmsg might be problematic. Click here for more details.
- {osmsg-1.2.0 → osmsg-1.2.2}/PKG-INFO +8 -3
- {osmsg-1.2.0 → osmsg-1.2.2}/README.md +7 -2
- osmsg-1.2.2/osmsg/__version__.py +1 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/cli.py +9 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/export/psql.py +0 -13
- osmsg-1.2.2/osmsg/gui.py +195 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/history.py +46 -63
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/maintain/convert.py +5 -18
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/maintain/month.py +0 -3
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/maintain/parquet.py +2 -6
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/maintain/pbf_split.py +0 -1
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/pipeline.py +169 -108
- {osmsg-1.2.0 → osmsg-1.2.2}/pyproject.toml +4 -1
- osmsg-1.2.0/osmsg/__version__.py +0 -1
- {osmsg-1.2.0 → osmsg-1.2.2}/LICENSE +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/__init__.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/_http.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/_tick.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/auth.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/boundary.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/db/__init__.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/db/duckdb_schema.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/db/ingest.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/db/queries.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/db/schema.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/exceptions.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/export/__init__.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/export/csv.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/export/json.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/export/markdown.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/export/parquet.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/fetch.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/geofabrik.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/handlers.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/maintain/__init__.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/maintain/cli.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/maintain/manifest.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/models.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/pg_schema.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/py.typed +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/replication.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/tm.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/ui.py +0 -0
- {osmsg-1.2.0 → osmsg-1.2.2}/osmsg/workers.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: osmsg
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.2
|
|
4
4
|
Summary: OpenStreetMap Stats Generator: Commandline
|
|
5
5
|
Keywords: osm,stats,commandline,openstreetmap
|
|
6
6
|
Author: Kshitij Raj Sharma
|
|
@@ -78,7 +78,8 @@ brew install osgeonepal/tap/osmsg # macOS / Linux (Homebrew tap)
|
|
|
78
78
|
```
|
|
79
79
|
|
|
80
80
|
On Windows, download `osmsg.exe` from the [latest release](https://github.com/osgeonepal/osmsg/releases)
|
|
81
|
-
and
|
|
81
|
+
and double-click it to open the desktop app. Fill in the dates and options, click Run, and open the
|
|
82
|
+
output folder. The CLI below is for macOS, Linux, and pip/uv users.
|
|
82
83
|
|
|
83
84
|
## Quick start
|
|
84
85
|
|
|
@@ -213,6 +214,9 @@ Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the ful
|
|
|
213
214
|
Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via
|
|
214
215
|
`-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything.
|
|
215
216
|
|
|
217
|
+
Rerunning the same query with a different `-f` re-exports from the existing `<name>.duckdb` instead of
|
|
218
|
+
refetching, so adding a format is instant. Pass `--overwrite` to force a fresh recompute.
|
|
219
|
+
|
|
216
220
|
## Configuration
|
|
217
221
|
|
|
218
222
|
Every meaningful flag has a matching `OSMSG_*` env var so the CLI, a `.env` file, and a
|
|
@@ -228,12 +232,13 @@ docker-compose `environment:` block all reach the same setting. CLI flag wins ov
|
|
|
228
232
|
| `--cache-dir` | `OSMSG_CACHE_DIR` | platform cache | Where downloaded OSM files are kept across runs. |
|
|
229
233
|
| `--output-dir` | `OSMSG_OUTPUT_DIR` | `.` | Where `<name>.duckdb` and exports are written. |
|
|
230
234
|
| `--format` / `-f` | `OSMSG_FORMAT` | `parquet` | Repeat for multiple. Comma-separated when set via env. |
|
|
235
|
+
| `--overwrite` | (none) | off | Recompute even if `<name>.duckdb` already holds this exact query. |
|
|
231
236
|
| `--psql-dsn` | `OSMSG_PSQL_DSN` | unset | libpq DSN for `-f psql`. |
|
|
232
237
|
| `--psql-bulk` | `OSMSG_PSQL_BULK` | off | Faster first full load to Postgres. |
|
|
233
238
|
| `--history` / `--no-history` | `OSMSG_HISTORY` | on | Read covered months from the published dataset. |
|
|
234
239
|
| `--history-url` | `OSMSG_HISTORY_URL` | `osmsg-history` | Published dataset location. |
|
|
235
240
|
| `--insert` | (none) | off | Load history into the store and seed resume, then exit. No window loads all of it. |
|
|
236
|
-
| `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files
|
|
241
|
+
| `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files. |
|
|
237
242
|
| `--changeset-pad-hours` | `OSMSG_CHANGESET_PAD_HOURS` | `1` | See below. |
|
|
238
243
|
| (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP` | `hour` | `hour`, `day`, or `week`. Used when `--update` runs against an empty DB. |
|
|
239
244
|
| (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP_DAYS` | unset | Integer N; overrides `OSMSG_BOOTSTRAP`. |
|
|
@@ -46,7 +46,8 @@ brew install osgeonepal/tap/osmsg # macOS / Linux (Homebrew tap)
|
|
|
46
46
|
```
|
|
47
47
|
|
|
48
48
|
On Windows, download `osmsg.exe` from the [latest release](https://github.com/osgeonepal/osmsg/releases)
|
|
49
|
-
and
|
|
49
|
+
and double-click it to open the desktop app. Fill in the dates and options, click Run, and open the
|
|
50
|
+
output folder. The CLI below is for macOS, Linux, and pip/uv users.
|
|
50
51
|
|
|
51
52
|
## Quick start
|
|
52
53
|
|
|
@@ -181,6 +182,9 @@ Any flag works as a YAML key. See [docs/Manual.md](./docs/Manual.md) for the ful
|
|
|
181
182
|
Every run writes `stats.duckdb` (or `<--name>.duckdb`) plus the formats you ask for via
|
|
182
183
|
`-f parquet|csv|json|markdown|psql`. Parquet is the default. Open it with duckdb, polars, pandas, anything.
|
|
183
184
|
|
|
185
|
+
Rerunning the same query with a different `-f` re-exports from the existing `<name>.duckdb` instead of
|
|
186
|
+
refetching, so adding a format is instant. Pass `--overwrite` to force a fresh recompute.
|
|
187
|
+
|
|
184
188
|
## Configuration
|
|
185
189
|
|
|
186
190
|
Every meaningful flag has a matching `OSMSG_*` env var so the CLI, a `.env` file, and a
|
|
@@ -196,12 +200,13 @@ docker-compose `environment:` block all reach the same setting. CLI flag wins ov
|
|
|
196
200
|
| `--cache-dir` | `OSMSG_CACHE_DIR` | platform cache | Where downloaded OSM files are kept across runs. |
|
|
197
201
|
| `--output-dir` | `OSMSG_OUTPUT_DIR` | `.` | Where `<name>.duckdb` and exports are written. |
|
|
198
202
|
| `--format` / `-f` | `OSMSG_FORMAT` | `parquet` | Repeat for multiple. Comma-separated when set via env. |
|
|
203
|
+
| `--overwrite` | (none) | off | Recompute even if `<name>.duckdb` already holds this exact query. |
|
|
199
204
|
| `--psql-dsn` | `OSMSG_PSQL_DSN` | unset | libpq DSN for `-f psql`. |
|
|
200
205
|
| `--psql-bulk` | `OSMSG_PSQL_BULK` | off | Faster first full load to Postgres. |
|
|
201
206
|
| `--history` / `--no-history` | `OSMSG_HISTORY` | on | Read covered months from the published dataset. |
|
|
202
207
|
| `--history-url` | `OSMSG_HISTORY_URL` | `osmsg-history` | Published dataset location. |
|
|
203
208
|
| `--insert` | (none) | off | Load history into the store and seed resume, then exit. No window loads all of it. |
|
|
204
|
-
| `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files
|
|
209
|
+
| `--osh-file` / `--changeset-file` | (none) | unset | Insert from local planet history + changeset files. |
|
|
205
210
|
| `--changeset-pad-hours` | `OSMSG_CHANGESET_PAD_HOURS` | `1` | See below. |
|
|
206
211
|
| (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP` | `hour` | `hour`, `day`, or `week`. Used when `--update` runs against an empty DB. |
|
|
207
212
|
| (auto-bootstrap on `--update`) | `OSMSG_BOOTSTRAP_DAYS` | unset | Integer N; overrides `OSMSG_BOOTSTRAP`. |
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.2"
|
|
@@ -271,6 +271,14 @@ def main(
|
|
|
271
271
|
str | None,
|
|
272
272
|
typer.Option("--changeset-file", help="Changeset dump (.osm.bz2) paired with --osh-file."),
|
|
273
273
|
] = None,
|
|
274
|
+
overwrite: Annotated[
|
|
275
|
+
bool,
|
|
276
|
+
typer.Option(
|
|
277
|
+
"--overwrite",
|
|
278
|
+
help="Recompute even if <name>.duckdb already holds this exact query; otherwise a rerun "
|
|
279
|
+
"that only changes the output format re-exports from the existing store.",
|
|
280
|
+
),
|
|
281
|
+
] = False,
|
|
274
282
|
) -> None:
|
|
275
283
|
"""Run osmsg. With no subcommand this generates stats (or loads history with --insert)."""
|
|
276
284
|
if ctx.invoked_subcommand is not None:
|
|
@@ -338,6 +346,7 @@ def main(
|
|
|
338
346
|
insert=insert,
|
|
339
347
|
osh_file=osh_file,
|
|
340
348
|
changeset_file=changeset_file,
|
|
349
|
+
overwrite=overwrite,
|
|
341
350
|
)
|
|
342
351
|
|
|
343
352
|
if last is not None:
|
|
@@ -5,10 +5,6 @@ import duckdb
|
|
|
5
5
|
from ..exceptions import OsmsgError
|
|
6
6
|
from ..pg_schema import PG_SCHEMA
|
|
7
7
|
|
|
8
|
-
# Secondary indexes and foreign keys that make a row-by-row insert slow. For a one-time bulk load
|
|
9
|
-
# they are dropped before the COPY and rebuilt once after (one index build + one FK validation,
|
|
10
|
-
# instead of maintaining them per row). Primary keys stay, because the ON CONFLICT upserts need them.
|
|
11
|
-
# Indexes are (name, create-sql); foreign keys are (table, name, add-clause).
|
|
12
8
|
_BULK_INDEXES = [
|
|
13
9
|
("idx_changesets_created_at", "CREATE INDEX idx_changesets_created_at ON changesets (created_at)"),
|
|
14
10
|
("idx_changesets_geom", "CREATE INDEX idx_changesets_geom ON changesets USING GIST (geom)"),
|
|
@@ -25,8 +21,6 @@ _BULK_FKS = [
|
|
|
25
21
|
]
|
|
26
22
|
|
|
27
23
|
|
|
28
|
-
# Bulk loads push the big tables in this many changeset_id ranges, each its own statement and so its
|
|
29
|
-
# own commit, so a failure costs one range instead of rolling back the whole multi-GB load.
|
|
30
24
|
_BULK_COMMIT_CHUNKS = 32
|
|
31
25
|
|
|
32
26
|
|
|
@@ -102,9 +96,6 @@ def to_psql(conn: duckdb.DuckDBPyConnection, dsn: str, *, bulk_load: bool = Fals
|
|
|
102
96
|
)
|
|
103
97
|
|
|
104
98
|
if bulk_load:
|
|
105
|
-
# Stream rows instead of buffering them to preserve order; buffering 180M+ JSON-bearing
|
|
106
|
-
# rows is what exhausts memory in a single INSERT. Then drop the secondary indexes and
|
|
107
|
-
# foreign keys so the load does not maintain them per row.
|
|
108
99
|
conn.execute("SET preserve_insertion_order = false")
|
|
109
100
|
for table, name, _add in _BULK_FKS:
|
|
110
101
|
_pg(conn, f"ALTER TABLE {table} DROP CONSTRAINT IF EXISTS {name}")
|
|
@@ -114,8 +105,6 @@ def to_psql(conn: duckdb.DuckDBPyConnection, dsn: str, *, bulk_load: bool = Fals
|
|
|
114
105
|
_push_chunked(conn, "changesets", _push_changesets)
|
|
115
106
|
_push_chunked(conn, "changeset_stats", _push_changeset_stats)
|
|
116
107
|
elif _pg_has_history(conn):
|
|
117
|
-
# The history layer (seq_id=0) is already in PG from the bulk load and never changes, so an
|
|
118
|
-
# incremental --update pushes only the live layer and its parents, not the 180M history rows.
|
|
119
108
|
live_ids = "changeset_id IN (SELECT changeset_id FROM changeset_stats WHERE seq_id <> 0)"
|
|
120
109
|
conn.execute(
|
|
121
110
|
"INSERT INTO pg_target.users SELECT * FROM users "
|
|
@@ -124,7 +113,6 @@ def to_psql(conn: duckdb.DuckDBPyConnection, dsn: str, *, bulk_load: bool = Fals
|
|
|
124
113
|
_push_changesets(conn, f"WHERE {live_ids}")
|
|
125
114
|
_push_changeset_stats(conn, "WHERE seq_id <> 0")
|
|
126
115
|
else:
|
|
127
|
-
# No history in PG (a plain live target): push everything (live rows are all seq_id<>0).
|
|
128
116
|
conn.execute("INSERT INTO pg_target.users SELECT * FROM users ON CONFLICT DO NOTHING")
|
|
129
117
|
_push_changesets(conn)
|
|
130
118
|
_push_changeset_stats(conn)
|
|
@@ -141,7 +129,6 @@ def to_psql(conn: duckdb.DuckDBPyConnection, dsn: str, *, bulk_load: bool = Fals
|
|
|
141
129
|
)
|
|
142
130
|
|
|
143
131
|
if bulk_load:
|
|
144
|
-
# Rebuild once, with more memory for the sort-based index builds, then refresh planner stats.
|
|
145
132
|
for table, name, add in _BULK_FKS:
|
|
146
133
|
_pg(conn, f"ALTER TABLE {table} ADD CONSTRAINT {name} {add}")
|
|
147
134
|
for _name, create in _BULK_INDEXES:
|
osmsg-1.2.2/osmsg/gui.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
"""Minimal tkinter desktop UI for running osmsg and saving the output."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import datetime as dt
|
|
6
|
+
import os
|
|
7
|
+
import queue
|
|
8
|
+
import sys
|
|
9
|
+
import threading
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from .exceptions import NoDataFoundError, OsmsgError
|
|
14
|
+
from .pipeline import RunConfig, run
|
|
15
|
+
|
|
16
|
+
UTC = dt.UTC
|
|
17
|
+
FORMATS = ["parquet", "csv", "json", "markdown"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _parse_date(value: str) -> dt.datetime | None:
|
|
21
|
+
value = value.strip()
|
|
22
|
+
if not value:
|
|
23
|
+
return None
|
|
24
|
+
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d"):
|
|
25
|
+
try:
|
|
26
|
+
return dt.datetime.strptime(value, fmt).replace(tzinfo=UTC)
|
|
27
|
+
except ValueError:
|
|
28
|
+
continue
|
|
29
|
+
raise OsmsgError(f"Unrecognized date: {value!r}. Use YYYY-MM-DD.")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _split(value: str | None) -> list[str] | None:
|
|
33
|
+
items: list[str] = [part.strip() for part in (value or "").split(",") if part.strip()]
|
|
34
|
+
return items if items else None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def build_config(form: dict[str, object], output_dir: str) -> RunConfig:
|
|
38
|
+
"""Map the form fields to a RunConfig, raising OsmsgError on invalid input."""
|
|
39
|
+
formats = [name for name in FORMATS if form.get(name)]
|
|
40
|
+
if not formats:
|
|
41
|
+
raise OsmsgError("Pick at least one output format.")
|
|
42
|
+
start = _parse_date(str(form.get("start", "")))
|
|
43
|
+
if start is None:
|
|
44
|
+
raise OsmsgError("Start date is required (YYYY-MM-DD).")
|
|
45
|
+
return RunConfig(
|
|
46
|
+
name=str(form.get("name") or "stats"),
|
|
47
|
+
start_date=start,
|
|
48
|
+
end_date=_parse_date(str(form.get("end", ""))),
|
|
49
|
+
hashtags=_split(str(form.get("hashtags") or "")),
|
|
50
|
+
additional_tags=_split(str(form.get("tags") or "")),
|
|
51
|
+
tag_mode="all" if form.get("all_tags") else "none",
|
|
52
|
+
summary=bool(form.get("summary")),
|
|
53
|
+
formats=formats,
|
|
54
|
+
output_dir=Path(output_dir or "."),
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _open_folder(path: Path) -> None:
|
|
59
|
+
if sys.platform == "win32":
|
|
60
|
+
os.startfile(path) # noqa: S606
|
|
61
|
+
elif sys.platform == "darwin":
|
|
62
|
+
import subprocess
|
|
63
|
+
|
|
64
|
+
subprocess.run(["open", str(path)], check=False)
|
|
65
|
+
else:
|
|
66
|
+
import subprocess
|
|
67
|
+
|
|
68
|
+
subprocess.run(["xdg-open", str(path)], check=False)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class _Redirector:
|
|
72
|
+
def __init__(self, sink: queue.Queue) -> None:
|
|
73
|
+
self.sink = sink
|
|
74
|
+
|
|
75
|
+
def write(self, text: str) -> None:
|
|
76
|
+
if text:
|
|
77
|
+
self.sink.put(("log", text))
|
|
78
|
+
|
|
79
|
+
def flush(self) -> None:
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
def isatty(self) -> bool:
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class App:
|
|
87
|
+
def __init__(self) -> None:
|
|
88
|
+
import tkinter as tk
|
|
89
|
+
from tkinter import filedialog, scrolledtext, ttk
|
|
90
|
+
|
|
91
|
+
self._tk = tk
|
|
92
|
+
self._filedialog = filedialog
|
|
93
|
+
self.events: queue.Queue = queue.Queue()
|
|
94
|
+
self.out_dir = str(Path.home() / "osmsg")
|
|
95
|
+
|
|
96
|
+
self.root = tk.Tk()
|
|
97
|
+
self.root.title("osmsg")
|
|
98
|
+
self.vars: dict[str, Any] = {}
|
|
99
|
+
frame = ttk.Frame(self.root, padding=12)
|
|
100
|
+
frame.grid(sticky="nsew")
|
|
101
|
+
|
|
102
|
+
rows = [
|
|
103
|
+
("Name", "name", "stats"),
|
|
104
|
+
("Start (YYYY-MM-DD)", "start", ""),
|
|
105
|
+
("End (blank = now)", "end", ""),
|
|
106
|
+
("Hashtags (comma-sep)", "hashtags", ""),
|
|
107
|
+
("Tags (comma-sep)", "tags", ""),
|
|
108
|
+
]
|
|
109
|
+
for i, (label, key, default) in enumerate(rows):
|
|
110
|
+
ttk.Label(frame, text=label).grid(row=i, column=0, sticky="w", pady=2)
|
|
111
|
+
var = tk.StringVar(value=default)
|
|
112
|
+
ttk.Entry(frame, textvariable=var, width=40).grid(row=i, column=1, columnspan=3, sticky="we", pady=2)
|
|
113
|
+
self.vars[key] = var
|
|
114
|
+
|
|
115
|
+
self.vars["all_tags"] = tk.BooleanVar()
|
|
116
|
+
self.vars["summary"] = tk.BooleanVar()
|
|
117
|
+
ttk.Checkbutton(frame, text="All tags", variable=self.vars["all_tags"]).grid(row=5, column=0, sticky="w")
|
|
118
|
+
ttk.Checkbutton(frame, text="Daily summary", variable=self.vars["summary"]).grid(row=5, column=1, sticky="w")
|
|
119
|
+
|
|
120
|
+
fmt_frame = ttk.LabelFrame(frame, text="Formats", padding=6)
|
|
121
|
+
fmt_frame.grid(row=6, column=0, columnspan=4, sticky="we", pady=6)
|
|
122
|
+
for i, name in enumerate(FORMATS):
|
|
123
|
+
var = tk.BooleanVar(value=name in ("parquet", "csv"))
|
|
124
|
+
ttk.Checkbutton(fmt_frame, text=name, variable=var).grid(row=0, column=i, padx=4)
|
|
125
|
+
self.vars[name] = var
|
|
126
|
+
|
|
127
|
+
self.out_label = ttk.Label(frame, text=f"Output: {self.out_dir}")
|
|
128
|
+
self.out_label.grid(row=7, column=0, columnspan=3, sticky="w")
|
|
129
|
+
ttk.Button(frame, text="Choose folder", command=self._choose_folder).grid(row=7, column=3, sticky="e")
|
|
130
|
+
|
|
131
|
+
self.run_btn = ttk.Button(frame, text="Run", command=self._on_run)
|
|
132
|
+
self.run_btn.grid(row=8, column=0, pady=8, sticky="w")
|
|
133
|
+
self.open_btn = ttk.Button(frame, text="Open output folder", command=lambda: _open_folder(Path(self.out_dir)))
|
|
134
|
+
self.open_btn.grid(row=8, column=1, pady=8, sticky="w")
|
|
135
|
+
|
|
136
|
+
self.log = scrolledtext.ScrolledText(frame, width=70, height=14, state="disabled")
|
|
137
|
+
self.log.grid(row=9, column=0, columnspan=4, sticky="nsew")
|
|
138
|
+
self.root.after(120, self._drain)
|
|
139
|
+
|
|
140
|
+
def _choose_folder(self) -> None:
|
|
141
|
+
chosen = self._filedialog.askdirectory(initialdir=self.out_dir)
|
|
142
|
+
if chosen:
|
|
143
|
+
self.out_dir = chosen
|
|
144
|
+
self.out_label.config(text=f"Output: {self.out_dir}")
|
|
145
|
+
|
|
146
|
+
def _append(self, text: str) -> None:
|
|
147
|
+
self.log.config(state="normal")
|
|
148
|
+
self.log.insert("end", text)
|
|
149
|
+
self.log.see("end")
|
|
150
|
+
self.log.config(state="disabled")
|
|
151
|
+
|
|
152
|
+
def _on_run(self) -> None:
|
|
153
|
+
try:
|
|
154
|
+
cfg = build_config({k: v.get() for k, v in self.vars.items()}, self.out_dir)
|
|
155
|
+
except OsmsgError as exc:
|
|
156
|
+
self._append(f"\n{exc}\n")
|
|
157
|
+
return
|
|
158
|
+
self.run_btn.config(state="disabled")
|
|
159
|
+
self._append(f"\nRunning into {self.out_dir} ...\n")
|
|
160
|
+
threading.Thread(target=self._worker, args=(cfg,), daemon=True).start()
|
|
161
|
+
|
|
162
|
+
def _worker(self, cfg: RunConfig) -> None:
|
|
163
|
+
saved = sys.stdout, sys.stderr
|
|
164
|
+
sys.stdout = sys.stderr = _Redirector(self.events) # type: ignore[assignment]
|
|
165
|
+
try:
|
|
166
|
+
result = run(cfg)
|
|
167
|
+
self.events.put(("done", f"Done. {result['rows']} rows. Files in {self.out_dir}"))
|
|
168
|
+
except NoDataFoundError:
|
|
169
|
+
self.events.put(("done", "No data found for that range."))
|
|
170
|
+
except OsmsgError as exc:
|
|
171
|
+
self.events.put(("done", f"Error: {exc}"))
|
|
172
|
+
except Exception as exc:
|
|
173
|
+
self.events.put(("done", f"Unexpected error: {type(exc).__name__}: {exc}"))
|
|
174
|
+
finally:
|
|
175
|
+
sys.stdout, sys.stderr = saved
|
|
176
|
+
|
|
177
|
+
def _drain(self) -> None:
|
|
178
|
+
try:
|
|
179
|
+
while True:
|
|
180
|
+
kind, payload = self.events.get_nowait()
|
|
181
|
+
if kind == "log":
|
|
182
|
+
self._append(payload)
|
|
183
|
+
else:
|
|
184
|
+
self._append(f"\n{payload}\n")
|
|
185
|
+
self.run_btn.config(state="normal")
|
|
186
|
+
except queue.Empty:
|
|
187
|
+
pass
|
|
188
|
+
self.root.after(120, self._drain)
|
|
189
|
+
|
|
190
|
+
def run(self) -> None:
|
|
191
|
+
self.root.mainloop()
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def launch() -> None:
|
|
195
|
+
App().run()
|
|
@@ -10,26 +10,26 @@ from dataclasses import dataclass
|
|
|
10
10
|
import duckdb
|
|
11
11
|
import requests
|
|
12
12
|
|
|
13
|
-
from .ui import info, warn
|
|
13
|
+
from .ui import info, progress_bar, warn
|
|
14
14
|
|
|
15
15
|
UTC = dt.UTC
|
|
16
16
|
SCHEMA_VERSION = 1
|
|
17
17
|
DEFAULT_HISTORY_URL = "hf://datasets/kshitijrajsharma/osmsg-history"
|
|
18
|
-
HISTORY_SEQ_ID = 0
|
|
18
|
+
HISTORY_SEQ_ID = 0
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
@dataclass
|
|
22
22
|
class Manifest:
|
|
23
23
|
schema_version: int
|
|
24
|
-
min_month: dt.datetime
|
|
25
|
-
frontier: dt.datetime
|
|
24
|
+
min_month: dt.datetime
|
|
25
|
+
frontier: dt.datetime
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
@dataclass
|
|
29
29
|
class WindowSplit:
|
|
30
30
|
remote_start: dt.datetime | None
|
|
31
|
-
remote_end: dt.datetime | None
|
|
32
|
-
live_start: dt.datetime
|
|
31
|
+
remote_end: dt.datetime | None
|
|
32
|
+
live_start: dt.datetime
|
|
33
33
|
|
|
34
34
|
@property
|
|
35
35
|
def has_remote(self) -> bool:
|
|
@@ -51,7 +51,6 @@ class RemoteFilters:
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
def _manifest_http_url(history_url: str) -> str:
|
|
54
|
-
# hf://datasets/<repo> -> https://huggingface.co/datasets/<repo>/resolve/main/manifest.json
|
|
55
54
|
if history_url.startswith("hf://datasets/"):
|
|
56
55
|
repo = history_url[len("hf://datasets/") :]
|
|
57
56
|
return f"https://huggingface.co/datasets/{repo}/resolve/main/manifest.json"
|
|
@@ -78,7 +77,7 @@ def fetch_manifest(history_url: str, timeout: int = 15) -> Manifest | None:
|
|
|
78
77
|
return None
|
|
79
78
|
payload = response.json()
|
|
80
79
|
else:
|
|
81
|
-
with open(url) as handle:
|
|
80
|
+
with open(url) as handle:
|
|
82
81
|
payload = json.load(handle)
|
|
83
82
|
except (requests.RequestException, OSError, ValueError) as exc:
|
|
84
83
|
warn(f"history: manifest unreachable ({type(exc).__name__}); using live path.")
|
|
@@ -124,9 +123,8 @@ def _months(start: dt.datetime, end: dt.datetime) -> list[tuple[int, int]]:
|
|
|
124
123
|
|
|
125
124
|
|
|
126
125
|
def _partition_list(base: str, dataset: str, months: list[tuple[int, int]]) -> str | None:
|
|
127
|
-
"""Direct read_parquet() over the
|
|
128
|
-
|
|
129
|
-
since a converted slice may lack a partition (e.g. a month with metadata but no counted edits)."""
|
|
126
|
+
"""Direct read_parquet() over the given month partitions (local bases filtered to existing files),
|
|
127
|
+
or None when none exist."""
|
|
130
128
|
root = base.rstrip("/")
|
|
131
129
|
remote = root.startswith(("hf://", "http://", "https://", "s3://"))
|
|
132
130
|
files = [f"{root}/{dataset}/year={year}/month={month}/data.parquet" for (year, month) in months]
|
|
@@ -138,9 +136,7 @@ def _partition_list(base: str, dataset: str, months: list[tuple[int, int]]) -> s
|
|
|
138
136
|
|
|
139
137
|
|
|
140
138
|
def _hashtag_predicate(hashtags: list[str], exact_lookup: bool) -> str:
|
|
141
|
-
"""SQL predicate
|
|
142
|
-
Whole-token (case-insensitive) with exact_lookup, otherwise substring. hashtags are already
|
|
143
|
-
canonicalised to a leading '#'."""
|
|
139
|
+
"""SQL predicate matching the changesets `hashtags` list: whole-token with exact_lookup, else substring."""
|
|
144
140
|
needles = [h.lower() for h in hashtags]
|
|
145
141
|
if exact_lookup:
|
|
146
142
|
terms = ", ".join(f"'{n}'" for n in needles)
|
|
@@ -160,10 +156,6 @@ def ingest_remote(
|
|
|
160
156
|
if split.remote_start is None or split.remote_end is None:
|
|
161
157
|
return 0
|
|
162
158
|
months = _months(split.remote_start, split.remote_end)
|
|
163
|
-
changesets_src = _partition_list(history_url, "changesets", months)
|
|
164
|
-
changefiles_src = _partition_list(history_url, "changefiles", months)
|
|
165
|
-
if changesets_src is None and changefiles_src is None:
|
|
166
|
-
return 0
|
|
167
159
|
start_iso = split.remote_start.astimezone(UTC).isoformat()
|
|
168
160
|
end_iso = split.remote_end.astimezone(UTC).isoformat()
|
|
169
161
|
in_window = f"created_at >= TIMESTAMPTZ '{start_iso}' AND created_at < TIMESTAMPTZ '{end_iso}'"
|
|
@@ -172,21 +164,8 @@ def ingest_remote(
|
|
|
172
164
|
conn.execute("INSTALL spatial; LOAD spatial;")
|
|
173
165
|
if history_url.startswith(("hf://", "http://", "https://", "s3://")):
|
|
174
166
|
conn.execute("INSTALL httpfs; LOAD httpfs;")
|
|
175
|
-
# Ride out HF rate-limits on multi-partition reads instead of failing the run.
|
|
176
167
|
conn.execute("SET http_retries=10; SET http_retry_wait_ms=2000; SET http_retry_backoff=1.5;")
|
|
177
168
|
|
|
178
|
-
info(f"history: remote ingest {start_iso} -> {end_iso} ({len(months)} month partitions) from {history_url}")
|
|
179
|
-
|
|
180
|
-
if changesets_src is not None:
|
|
181
|
-
# Names for everyone in the window; every changeset_stats uid has a changeset row here.
|
|
182
|
-
conn.execute(
|
|
183
|
-
f"""INSERT INTO users
|
|
184
|
-
SELECT uid, any_value(username) FROM {changesets_src}
|
|
185
|
-
WHERE {in_window} AND username IS NOT NULL
|
|
186
|
-
GROUP BY uid
|
|
187
|
-
ON CONFLICT (uid) DO NOTHING"""
|
|
188
|
-
)
|
|
189
|
-
|
|
190
169
|
changeset_preds = [in_window]
|
|
191
170
|
if filters.hashtags:
|
|
192
171
|
changeset_preds.append(_hashtag_predicate(filters.hashtags, filters.exact_lookup))
|
|
@@ -199,51 +178,55 @@ def ingest_remote(
|
|
|
199
178
|
changeset_preds.append(f"uid IN (SELECT uid FROM users WHERE username IN ({names}))")
|
|
200
179
|
changeset_where = " AND ".join(changeset_preds)
|
|
201
180
|
|
|
202
|
-
# Always populate changesets: every changeset_stats row needs a parent row (the live path keeps
|
|
203
|
-
# this invariant via stubs, and Postgres enforces it as a foreign key). A metadata filter narrows
|
|
204
|
-
# which changesets (and thus which stats) are kept; a plain run keeps all in the window.
|
|
205
|
-
if changesets_src is not None:
|
|
206
|
-
conn.execute(
|
|
207
|
-
f"""INSERT INTO changesets
|
|
208
|
-
SELECT changeset_id, uid, created_at, hashtags, editor,
|
|
209
|
-
CASE WHEN min_lon IS NOT NULL
|
|
210
|
-
THEN ST_MakeEnvelope(min_lon, min_lat, max_lon, max_lat) END
|
|
211
|
-
FROM {changesets_src} WHERE {changeset_where}
|
|
212
|
-
ON CONFLICT (changeset_id) DO NOTHING"""
|
|
213
|
-
)
|
|
214
|
-
|
|
215
181
|
stats_preds = [in_window]
|
|
216
182
|
if filters.has_metadata_filter:
|
|
217
|
-
# Keep element stats only for changesets that passed the metadata filter above.
|
|
218
183
|
stats_preds.append("changeset_id IN (SELECT changeset_id FROM changesets)")
|
|
219
184
|
stats_where = " AND ".join(stats_preds)
|
|
220
185
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
186
|
+
info(f"history: remote ingest {start_iso} -> {end_iso} ({len(months)} month partitions) from {history_url}")
|
|
187
|
+
|
|
188
|
+
with progress_bar(len(months), unit="months", description="Reading history") as advance:
|
|
189
|
+
for month in months:
|
|
190
|
+
changesets_src = _partition_list(history_url, "changesets", [month])
|
|
191
|
+
changefiles_src = _partition_list(history_url, "changefiles", [month])
|
|
192
|
+
if changesets_src is not None:
|
|
193
|
+
conn.execute(
|
|
194
|
+
f"""INSERT INTO users
|
|
195
|
+
SELECT uid, any_value(username) FROM {changesets_src}
|
|
196
|
+
WHERE {in_window} AND username IS NOT NULL
|
|
197
|
+
GROUP BY uid ON CONFLICT (uid) DO NOTHING"""
|
|
198
|
+
)
|
|
199
|
+
conn.execute(
|
|
200
|
+
f"""INSERT INTO changesets
|
|
201
|
+
SELECT changeset_id, uid, created_at, hashtags, editor,
|
|
202
|
+
CASE WHEN min_lon IS NOT NULL
|
|
203
|
+
THEN ST_MakeEnvelope(min_lon, min_lat, max_lon, max_lat) END
|
|
204
|
+
FROM {changesets_src} WHERE {changeset_where}
|
|
205
|
+
ON CONFLICT (changeset_id) DO NOTHING"""
|
|
206
|
+
)
|
|
207
|
+
if changefiles_src is not None:
|
|
208
|
+
conn.execute(
|
|
209
|
+
f"""INSERT INTO changeset_stats
|
|
210
|
+
SELECT changeset_id, {HISTORY_SEQ_ID} AS seq_id, uid,
|
|
211
|
+
nodes_created, nodes_modified, nodes_deleted,
|
|
212
|
+
ways_created, ways_modified, ways_deleted,
|
|
213
|
+
rels_created, rels_modified, rels_deleted,
|
|
214
|
+
poi_created, poi_modified, tag_stats
|
|
215
|
+
FROM {changefiles_src} WHERE {stats_where}
|
|
216
|
+
ON CONFLICT (seq_id, changeset_id) DO NOTHING"""
|
|
217
|
+
)
|
|
218
|
+
advance()
|
|
219
|
+
|
|
232
220
|
row = conn.execute(f"SELECT count(*) FROM changeset_stats WHERE seq_id = {HISTORY_SEQ_ID}").fetchone()
|
|
233
221
|
return row[0] if row else 0
|
|
234
222
|
|
|
235
223
|
|
|
236
|
-
# Resume one day before the frontier, not at it. A changeset can stay open for up to 24h, so its
|
|
237
|
-
# edits can straddle the frontier, and converting a date to a replication sequence is not exact. The
|
|
238
|
-
# re-scanned day overlaps the history layer, which the seq_id=0 dedup removes, so this never misses an
|
|
239
|
-
# edit and never double counts.
|
|
240
224
|
RESUME_SAFETY = dt.timedelta(days=1)
|
|
241
225
|
|
|
242
226
|
|
|
243
227
|
def seed_resume_at(conn: duckdb.DuckDBPyConnection, resume_at: dt.datetime, replication_url: str) -> dt.datetime | None:
|
|
244
|
-
"""Seed
|
|
245
|
-
|
|
246
|
-
resume timestamp, or None if no sequence resolves at that time."""
|
|
228
|
+
"""Seed `state` so `osmsg --update` resumes at `resume_at` on `replication_url`. Returns resume_at,
|
|
229
|
+
or None if no sequence resolves."""
|
|
247
230
|
from osmium.replication.server import ReplicationServer
|
|
248
231
|
|
|
249
232
|
from .db.schema import upsert_state
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
"""Convert a planet .osh history plus a changeset dump into the changefiles/changesets parquet
|
|
2
|
-
datasets, out of core via osmsg's own DuckDB tables.
|
|
3
|
-
batches, then aggregates and joins in DuckDB (a changeset's edits are scattered across the .osh, so an
|
|
4
|
-
in-memory pass OOMs at planet scale)."""
|
|
2
|
+
datasets, out of core via osmsg's own DuckDB tables."""
|
|
5
3
|
|
|
6
4
|
import concurrent.futures as cf
|
|
7
5
|
import datetime as dt
|
|
@@ -20,11 +18,8 @@ from .pbf_split import split_pbf
|
|
|
20
18
|
|
|
21
19
|
BATCH = 1_000_000
|
|
22
20
|
CREATE, MODIFY, DELETE = 0, 1, 2
|
|
23
|
-
# Out-of-core settings for planet-scale aggregation. Leave headroom below physical RAM; spill to disk.
|
|
24
21
|
DUCKDB_MEMORY_LIMIT = "40GB"
|
|
25
22
|
DUCKDB_THREADS = 24
|
|
26
|
-
# A global GROUP BY over all string-keyed tag rows OOMs even with spill, and json_group_object does
|
|
27
|
-
# not spill. Shard raw tags to disk by changeset_id % K, then aggregate each shard independently.
|
|
28
23
|
TAG_SHARDS = 64
|
|
29
24
|
|
|
30
25
|
ELEM_SCHEMA = pa.schema(
|
|
@@ -162,9 +157,7 @@ def stream_changesets(dump: str, start: dt.datetime, end: dt.datetime, work: pat
|
|
|
162
157
|
|
|
163
158
|
|
|
164
159
|
def build_tables(con: duckdb.DuckDBPyConnection, work: pathlib.Path) -> None:
|
|
165
|
-
"""Populate osmsg's tables (users, changesets, changeset_stats) from the streamed raw rows.
|
|
166
|
-
raw_elements_*/raw_tags_* so single-process and split-parallel runs both work: one global GROUP BY
|
|
167
|
-
recombines each changeset's edits across parts."""
|
|
160
|
+
"""Populate osmsg's tables (users, changesets, changeset_stats) from the streamed raw rows."""
|
|
168
161
|
con.execute("INSTALL json; LOAD json;")
|
|
169
162
|
work = pathlib.Path(work)
|
|
170
163
|
cs = (work / "raw_changesets.parquet").as_posix()
|
|
@@ -209,8 +202,6 @@ def build_tables(con: duckdb.DuckDBPyConnection, work: pathlib.Path) -> None:
|
|
|
209
202
|
a.rels_created, a.rels_modified, a.rels_deleted,
|
|
210
203
|
a.poi_created, a.poi_modified"""
|
|
211
204
|
for b in range(TAG_SHARDS):
|
|
212
|
-
# Insert this shard's agg changesets; attach tag_stats only if the shard has tags (tiny inputs
|
|
213
|
-
# and edit-only changesets carry none).
|
|
214
205
|
shard_dir = shards / f"shard={b}"
|
|
215
206
|
if shard_dir.is_dir():
|
|
216
207
|
shard_glob = (shard_dir / "*.parquet").as_posix()
|
|
@@ -244,11 +235,8 @@ def build_tables(con: duckdb.DuckDBPyConnection, work: pathlib.Path) -> None:
|
|
|
244
235
|
|
|
245
236
|
|
|
246
237
|
def export_parquet(con: duckdb.DuckDBPyConnection, out: pathlib.Path) -> None:
|
|
247
|
-
"""Materialise the two datasets as persisted tables
|
|
248
|
-
partition; a TEMP table would hold 180M JSON rows in RAM), then write Morton-sorted partitions."""
|
|
238
|
+
"""Materialise the two datasets as persisted tables, then write Morton-sorted partitions."""
|
|
249
239
|
con.execute(MORTON_MACROS)
|
|
250
|
-
# changefiles created_at falls back to the element edit time when the changeset predates the window,
|
|
251
|
-
# so in-window edits are never dropped.
|
|
252
240
|
con.execute(
|
|
253
241
|
f"""CREATE TABLE changefiles_all AS
|
|
254
242
|
SELECT s.* EXCLUDE (seq_id),
|
|
@@ -292,9 +280,8 @@ def aggregate(work: pathlib.Path, out: pathlib.Path) -> pathlib.Path:
|
|
|
292
280
|
def convert(
|
|
293
281
|
osh: str, changesets: str, start: dt.datetime, end: dt.datetime, work_dir: pathlib.Path, parts: int = 1
|
|
294
282
|
) -> pathlib.Path:
|
|
295
|
-
"""Convert one .osh history + changeset dump to the two parquet datasets under `work_dir/out
|
|
296
|
-
With parts>1 the history is split
|
|
297
|
-
directory holding changefiles/, changesets/, and stats.duckdb."""
|
|
283
|
+
"""Convert one .osh history + changeset dump to the two parquet datasets under `work_dir/out`,
|
|
284
|
+
returned as a path. With parts>1 the history is split and streamed concurrently."""
|
|
298
285
|
work = pathlib.Path(work_dir)
|
|
299
286
|
raw = work / "raw"
|
|
300
287
|
raw.mkdir(parents=True, exist_ok=True)
|
|
@@ -12,9 +12,6 @@ from ..exceptions import OsmsgError
|
|
|
12
12
|
from .parquet import GEOM_COLS, MORTON_MACROS, write_partitions
|
|
13
13
|
|
|
14
14
|
UTC = dt.UTC
|
|
15
|
-
# Planet-wide edits are continuous, so a complete month reaches within minutes of its end. A larger
|
|
16
|
-
# shortfall means the source day diffs did not cover the whole month (a mid-day snapshot or lagging
|
|
17
|
-
# replication), so the partition would be published short, the exact gap the read-side backstep masks.
|
|
18
15
|
COMPLETENESS_TOLERANCE = dt.timedelta(hours=1)
|
|
19
16
|
|
|
20
17
|
|
|
@@ -6,8 +6,6 @@ import duckdb
|
|
|
6
6
|
|
|
7
7
|
ROW_GROUP_SIZE = 100_000
|
|
8
8
|
|
|
9
|
-
# Morton(centroid) as native SQL macros (vectorized): scale lon/lat to 16-bit and interleave the bits
|
|
10
|
-
# so 2D locality maps to a contiguous 1D key. A Python UDF in ORDER BY is ~10x slower at planet scale.
|
|
11
9
|
MORTON_MACROS = """
|
|
12
10
|
CREATE OR REPLACE MACRO _s1(v) AS ((v | (v << 8)) & 16711935);
|
|
13
11
|
CREATE OR REPLACE MACRO _s2(v) AS ((_s1(v) | (_s1(v) << 4)) & 252645135);
|
|
@@ -19,7 +17,6 @@ CREATE OR REPLACE MACRO morton2(lon, lat) AS (
|
|
|
19
17
|
);
|
|
20
18
|
"""
|
|
21
19
|
|
|
22
|
-
# lon/lat centroid plus bbox min/max derived from changesets.geom (osmsg stores the bbox envelope).
|
|
23
20
|
GEOM_COLS = (
|
|
24
21
|
"ST_X(ST_Centroid(c.geom)) AS lon, ST_Y(ST_Centroid(c.geom)) AS lat, "
|
|
25
22
|
"ST_XMin(c.geom) AS min_lon, ST_YMin(c.geom) AS min_lat, "
|
|
@@ -30,9 +27,8 @@ GEOM_COLS = (
|
|
|
30
27
|
def write_partitions(
|
|
31
28
|
con: duckdb.DuckDBPyConnection, view: str, base: pathlib.Path, order_by: str = "morton2(lon, lat)"
|
|
32
29
|
) -> None:
|
|
33
|
-
"""Write one parquet file per year/month partition, each sorted by `order_by`.
|
|
34
|
-
|
|
35
|
-
min/max. `view` must expose integer `y`, `m` partition columns."""
|
|
30
|
+
"""Write one parquet file per year/month partition, each sorted by `order_by`. `view` must expose
|
|
31
|
+
integer `y`, `m` partition columns."""
|
|
36
32
|
base.mkdir(parents=True, exist_ok=True)
|
|
37
33
|
for year, month in con.execute(f"SELECT DISTINCT y, m FROM {view} ORDER BY y, m").fetchall():
|
|
38
34
|
out = base / f"year={year}" / f"month={month}"
|
|
@@ -19,7 +19,6 @@ def read_blob(handle) -> tuple[bytes, bytes, str] | None:
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def _parse_blobheader(buf: bytes) -> tuple[str, int]:
|
|
22
|
-
# BlobHeader: field 1 = type (length-delimited string), field 3 = datasize (varint).
|
|
23
22
|
blob_type, datasize, i = "", 0, 0
|
|
24
23
|
while i < len(buf):
|
|
25
24
|
key = buf[i]
|
|
@@ -5,6 +5,8 @@ from __future__ import annotations
|
|
|
5
5
|
import concurrent.futures
|
|
6
6
|
import copy
|
|
7
7
|
import datetime as dt
|
|
8
|
+
import hashlib
|
|
9
|
+
import json
|
|
8
10
|
import os
|
|
9
11
|
import shutil
|
|
10
12
|
from dataclasses import dataclass, field
|
|
@@ -95,6 +97,7 @@ class RunConfig:
|
|
|
95
97
|
insert: bool = False
|
|
96
98
|
osh_file: str | None = None
|
|
97
99
|
changeset_file: str | None = None
|
|
100
|
+
overwrite: bool = False
|
|
98
101
|
|
|
99
102
|
|
|
100
103
|
def _resolve_country_urls(countries: list[str]) -> list[str]:
|
|
@@ -232,9 +235,6 @@ def _seed_history_resume(conn, cfg: RunConfig) -> None:
|
|
|
232
235
|
seed_resume_state(conn, cfg.history_url, url)
|
|
233
236
|
|
|
234
237
|
|
|
235
|
-
# minute < hour < day. Day diffs land at 00:00 UTC, which is also an hour and minute boundary, so
|
|
236
|
-
# resuming a finer source at a coarser source's last_ts is disjoint (no edit double-counted or
|
|
237
|
-
# skipped). The reverse can skip the partial current period, so --update only ever auto-refines.
|
|
238
238
|
_GRANULARITY_RANK = {SHORTCUTS["minute"]: 0, SHORTCUTS["hour"]: 1, SHORTCUTS["day"]: 2}
|
|
239
239
|
|
|
240
240
|
|
|
@@ -244,9 +244,7 @@ def _tracked_sources(conn) -> list[str]:
|
|
|
244
244
|
|
|
245
245
|
|
|
246
246
|
def _switch_source(conn, from_url: str, to_url: str) -> None:
|
|
247
|
-
"""
|
|
248
|
-
from_url, so the two sequence spaces never overlap (double count) or gap. Each granularity is a
|
|
249
|
-
separate sequence, so the disjoint-coverage invariant is what keeps stats correct across a switch."""
|
|
247
|
+
"""Resume to_url at from_url's last_ts and retire from_url, so the granularities never overlap or gap."""
|
|
250
248
|
state = get_state(conn, from_url)
|
|
251
249
|
if state is None:
|
|
252
250
|
return
|
|
@@ -258,12 +256,11 @@ def _switch_source(conn, from_url: str, to_url: str) -> None:
|
|
|
258
256
|
|
|
259
257
|
|
|
260
258
|
def _select_update_source(conn, cfg: RunConfig, now: dt.datetime) -> None:
|
|
261
|
-
"""Pick the source `--update` continues
|
|
262
|
-
to a finer granularity as the backlog shrinks; with `--url`, switch to it
|
|
263
|
-
handoffs and a store tracks one planet source at a time, so granularities never overlap."""
|
|
259
|
+
"""Pick the source `--update` continues: without `--url`, continue the tracked source and auto-refine
|
|
260
|
+
to a finer granularity as the backlog shrinks; with `--url`, switch to it via a clean handoff."""
|
|
264
261
|
tracked = _tracked_sources(conn)
|
|
265
262
|
if not tracked:
|
|
266
|
-
return
|
|
263
|
+
return
|
|
267
264
|
if len(tracked) > 1:
|
|
268
265
|
if not cfg.url_explicit:
|
|
269
266
|
cfg.urls = tracked
|
|
@@ -286,9 +283,8 @@ def _select_update_source(conn, cfg: RunConfig, now: dt.datetime) -> None:
|
|
|
286
283
|
|
|
287
284
|
|
|
288
285
|
def _history_live_start(split: WindowSplit, frontier: dt.datetime) -> dt.datetime:
|
|
289
|
-
"""Where the live tail begins after a remote ingest
|
|
290
|
-
|
|
291
|
-
re-scanning lets the seq_id=0 dedup recover the shortfall (the overlap it did cover is dropped)."""
|
|
286
|
+
"""Where the live tail begins after a remote ingest: back up by the safety window when the query
|
|
287
|
+
reached the frontier (the final month may be short), else the split boundary."""
|
|
292
288
|
if split.remote_end == frontier:
|
|
293
289
|
return frontier - RESUME_SAFETY
|
|
294
290
|
return split.live_start
|
|
@@ -351,8 +347,6 @@ def _run_insert(cfg: RunConfig, conn: duckdb.DuckDBPyConnection, db_path: Path)
|
|
|
351
347
|
if cfg.url_explicit:
|
|
352
348
|
seed_urls = cfg.urls
|
|
353
349
|
else:
|
|
354
|
-
# The catch-up gap (now - frontier) is usually weeks; seed the granularity --update can clear
|
|
355
|
-
# quickly instead of crawling minute diffs. --update continues this same source.
|
|
356
350
|
seed_urls = [resolve_url(_pick_replication_for_span(dt.datetime.now(UTC) - split.remote_end))]
|
|
357
351
|
for url in seed_urls:
|
|
358
352
|
seed_resume_at(conn, resume_at, url)
|
|
@@ -367,6 +361,136 @@ def _run_insert(cfg: RunConfig, conn: duckdb.DuckDBPyConnection, db_path: Path)
|
|
|
367
361
|
return {"rows": n, "files": written, "rows_data": [], "summary": None, "start_seq": None, "end_seq": None}
|
|
368
362
|
|
|
369
363
|
|
|
364
|
+
def _query_fingerprint(cfg: RunConfig) -> str:
|
|
365
|
+
"""Stable hash of the query's data-affecting params, excluding output formats."""
|
|
366
|
+
key = {
|
|
367
|
+
"start": cfg.start_date.isoformat() if cfg.start_date else None,
|
|
368
|
+
"end": cfg.end_date.isoformat() if cfg.end_date else None,
|
|
369
|
+
"urls": sorted(cfg.urls),
|
|
370
|
+
"countries": sorted(cfg.countries) if cfg.countries else None,
|
|
371
|
+
"boundary": cfg.boundary,
|
|
372
|
+
"hashtags": sorted(cfg.hashtags) if cfg.hashtags else None,
|
|
373
|
+
"exact_lookup": cfg.exact_lookup,
|
|
374
|
+
"users": sorted(cfg.users_filter) if cfg.users_filter else None,
|
|
375
|
+
"tag_mode": cfg.tag_mode,
|
|
376
|
+
"additional_tags": sorted(cfg.additional_tags) if cfg.additional_tags else None,
|
|
377
|
+
"length_tags": sorted(cfg.length_tags) if cfg.length_tags else None,
|
|
378
|
+
"changeset": cfg.changeset,
|
|
379
|
+
"summary": cfg.summary,
|
|
380
|
+
"tm_stats": cfg.tm_stats,
|
|
381
|
+
"history_mode": cfg.history_mode,
|
|
382
|
+
}
|
|
383
|
+
return hashlib.sha256(json.dumps(key, sort_keys=True).encode()).hexdigest()
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _read_fingerprint(conn: duckdb.DuckDBPyConnection) -> str | None:
|
|
387
|
+
"""The query fingerprint stamped on an existing store, or None if absent."""
|
|
388
|
+
present = conn.execute("SELECT 1 FROM information_schema.tables WHERE table_name = 'osmsg_run_meta'").fetchone()
|
|
389
|
+
if not present:
|
|
390
|
+
return None
|
|
391
|
+
row = conn.execute("SELECT fingerprint FROM osmsg_run_meta LIMIT 1").fetchone()
|
|
392
|
+
return row[0] if row else None
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _store_fingerprint(conn: duckdb.DuckDBPyConnection, fingerprint: str) -> None:
|
|
396
|
+
conn.execute("CREATE TABLE IF NOT EXISTS osmsg_run_meta (fingerprint VARCHAR)")
|
|
397
|
+
conn.execute("DELETE FROM osmsg_run_meta")
|
|
398
|
+
conn.execute("INSERT INTO osmsg_run_meta VALUES (?)", [fingerprint])
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _finalize(
|
|
402
|
+
cfg: RunConfig,
|
|
403
|
+
conn: duckdb.DuckDBPyConnection,
|
|
404
|
+
fingerprint: str,
|
|
405
|
+
*,
|
|
406
|
+
start_date_utc: dt.datetime,
|
|
407
|
+
end_date_utc: dt.datetime,
|
|
408
|
+
start_seq: int | None,
|
|
409
|
+
end_seq: int | None,
|
|
410
|
+
) -> dict[str, Any]:
|
|
411
|
+
"""Aggregate the populated tables into user stats and write the requested formats."""
|
|
412
|
+
rows = user_stats(conn, top_n=None)
|
|
413
|
+
if not rows:
|
|
414
|
+
dbmod.close(conn)
|
|
415
|
+
# Raised so the CLI can map "no new data" to exit 0.
|
|
416
|
+
raise NoDataFoundError("No stats produced for the requested time range.")
|
|
417
|
+
_store_fingerprint(conn, fingerprint)
|
|
418
|
+
|
|
419
|
+
if cfg.changeset or cfg.hashtags:
|
|
420
|
+
attach_metadata(conn, rows)
|
|
421
|
+
if cfg.additional_tags or cfg.tag_mode != "none" or cfg.length_tags:
|
|
422
|
+
attach_tag_stats(
|
|
423
|
+
conn,
|
|
424
|
+
rows,
|
|
425
|
+
additional_tags=cfg.additional_tags,
|
|
426
|
+
tag_mode=cfg.tag_mode,
|
|
427
|
+
length_tags=cfg.length_tags,
|
|
428
|
+
)
|
|
429
|
+
if cfg.tm_stats:
|
|
430
|
+
rows = tm.enrich(rows)
|
|
431
|
+
|
|
432
|
+
out = cfg.output_dir
|
|
433
|
+
written: dict[str, str] = {}
|
|
434
|
+
if "parquet" in cfg.formats:
|
|
435
|
+
written["parquet"] = str(to_parquet(rows, out / f"{cfg.name}.parquet"))
|
|
436
|
+
if "csv" in cfg.formats:
|
|
437
|
+
written["csv"] = str(to_csv(rows, out / f"{cfg.name}.csv"))
|
|
438
|
+
if "json" in cfg.formats:
|
|
439
|
+
written["json"] = str(to_json(rows, out / f"{cfg.name}.json"))
|
|
440
|
+
if "markdown" in cfg.formats:
|
|
441
|
+
md_path = out / f"{cfg.name}.md"
|
|
442
|
+
table_markdown(rows, output_path=md_path)
|
|
443
|
+
written["markdown"] = str(md_path)
|
|
444
|
+
|
|
445
|
+
summary_rows: list[dict[str, Any]] | None = None
|
|
446
|
+
if cfg.summary:
|
|
447
|
+
summary_rows = daily_summary(
|
|
448
|
+
conn,
|
|
449
|
+
additional_tags=cfg.additional_tags,
|
|
450
|
+
tag_mode=cfg.tag_mode,
|
|
451
|
+
length_tags=cfg.length_tags,
|
|
452
|
+
)
|
|
453
|
+
if summary_rows:
|
|
454
|
+
if "parquet" in cfg.formats:
|
|
455
|
+
written["summary_parquet"] = str(to_parquet(summary_rows, out / f"{cfg.name}_summary.parquet"))
|
|
456
|
+
if "csv" in cfg.formats:
|
|
457
|
+
written["summary_csv"] = str(to_csv(summary_rows, out / f"{cfg.name}_summary.csv"))
|
|
458
|
+
if "json" in cfg.formats:
|
|
459
|
+
written["summary_json"] = str(to_json(summary_rows, out / f"{cfg.name}_summary.json"))
|
|
460
|
+
if "markdown" in cfg.formats:
|
|
461
|
+
summary_md_path = out / f"{cfg.name}_summary.md"
|
|
462
|
+
summary_markdown(
|
|
463
|
+
rows,
|
|
464
|
+
output_path=summary_md_path,
|
|
465
|
+
start_date=start_date_utc,
|
|
466
|
+
end_date=end_date_utc,
|
|
467
|
+
additional_tags=cfg.additional_tags,
|
|
468
|
+
length_tags=cfg.length_tags,
|
|
469
|
+
tag_mode=cfg.tag_mode,
|
|
470
|
+
fname=cfg.name,
|
|
471
|
+
tm_stats=cfg.tm_stats,
|
|
472
|
+
)
|
|
473
|
+
written["summary_md"] = str(summary_md_path)
|
|
474
|
+
# psql: skipped on purpose, daily_summary is a query over the four base tables.
|
|
475
|
+
|
|
476
|
+
if "psql" in cfg.formats:
|
|
477
|
+
if not cfg.psql_dsn:
|
|
478
|
+
raise OsmsgError("'psql' format requires a libpq DSN (--psql-dsn / RunConfig.psql_dsn=...).")
|
|
479
|
+
info(f"Pushing to PostgreSQL: {cfg.psql_dsn.split()[0]}…")
|
|
480
|
+
to_psql(conn, cfg.psql_dsn, bulk_load=cfg.psql_bulk)
|
|
481
|
+
written["psql"] = cfg.psql_dsn
|
|
482
|
+
|
|
483
|
+
dbmod.close(conn)
|
|
484
|
+
return {
|
|
485
|
+
"rows": len(rows),
|
|
486
|
+
"files": written,
|
|
487
|
+
"rows_data": rows,
|
|
488
|
+
"summary": summary_rows,
|
|
489
|
+
"start_seq": start_seq,
|
|
490
|
+
"end_seq": end_seq,
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
|
|
370
494
|
def _ensure_credentials(cfg: RunConfig) -> str | None:
|
|
371
495
|
"""Resolve OSM credentials and exchange them for a Geofabrik OAuth 2.0 cookie.
|
|
372
496
|
|
|
@@ -482,15 +606,33 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
482
606
|
cookie = _ensure_credentials(cfg)
|
|
483
607
|
|
|
484
608
|
db_path = cfg.output_dir / f"{cfg.name}.duckdb"
|
|
609
|
+
|
|
610
|
+
if cfg.end_date is None:
|
|
611
|
+
cfg.end_date = dt.datetime.now(UTC)
|
|
612
|
+
fingerprint = _query_fingerprint(cfg)
|
|
613
|
+
|
|
614
|
+
if not cfg.update and not cfg.insert and not cfg.overwrite and db_path.exists():
|
|
615
|
+
existing = dbmod.connect(str(db_path))
|
|
616
|
+
if _read_fingerprint(existing) == fingerprint:
|
|
617
|
+
info(f"Reusing {db_path} (same query); re-exporting. Pass --overwrite to recompute.")
|
|
618
|
+
start_utc = (cfg.start_date or cfg.end_date).astimezone(UTC)
|
|
619
|
+
return _finalize(
|
|
620
|
+
cfg,
|
|
621
|
+
existing,
|
|
622
|
+
fingerprint,
|
|
623
|
+
start_date_utc=start_utc,
|
|
624
|
+
end_date_utc=cfg.end_date.astimezone(UTC),
|
|
625
|
+
start_seq=None,
|
|
626
|
+
end_seq=None,
|
|
627
|
+
)
|
|
628
|
+
dbmod.close(existing)
|
|
629
|
+
|
|
485
630
|
if not cfg.update and db_path.exists():
|
|
486
631
|
db_path.unlink()
|
|
487
632
|
conn = dbmod.connect(str(db_path))
|
|
488
633
|
dbmod.create_tables(conn)
|
|
489
634
|
info(f"DuckDB: {db_path}")
|
|
490
635
|
|
|
491
|
-
if cfg.end_date is None:
|
|
492
|
-
cfg.end_date = dt.datetime.now(UTC)
|
|
493
|
-
|
|
494
636
|
if cfg.insert:
|
|
495
637
|
return _run_insert(cfg, conn, db_path)
|
|
496
638
|
|
|
@@ -533,8 +675,6 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
533
675
|
if (cfg.tm_stats or cfg.summary or cfg.tag_mode == "all") and not cfg.changeset and not cfg.hashtags:
|
|
534
676
|
cfg.changeset = True
|
|
535
677
|
|
|
536
|
-
# Hybrid-auto history: serve the covered months from the published parquet, leaving only the
|
|
537
|
-
# uncovered recent tail to the live diff path. Falls back to full live on any problem.
|
|
538
678
|
run_live = True
|
|
539
679
|
if cfg.history_mode == "auto" and not cfg.update:
|
|
540
680
|
if cfg.length_tags:
|
|
@@ -562,7 +702,6 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
562
702
|
_auto_switch_replication(cfg, cfg.end_date - cfg.start_date)
|
|
563
703
|
except duckdb.Error as exc:
|
|
564
704
|
warn(f"history: remote ingest failed ({type(exc).__name__}: {exc}); using live path.")
|
|
565
|
-
# Discard any partial remote rows so the live path is the sole source.
|
|
566
705
|
for tbl in ("changeset_stats", "changesets", "users"):
|
|
567
706
|
conn.execute(f"DELETE FROM {tbl}")
|
|
568
707
|
run_live = True
|
|
@@ -700,9 +839,6 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
700
839
|
if sub.exists():
|
|
701
840
|
shutil.rmtree(sub, ignore_errors=True)
|
|
702
841
|
|
|
703
|
-
# History rows (seq_id=0) hold a changeset's COMPLETE lifetime counts. If the live tail re-saw
|
|
704
|
-
# some of those edits for a changeset that straddles the frontier, drop the live duplicates: the
|
|
705
|
-
# history row already counts them, so this is no-loss and prevents double counting.
|
|
706
842
|
history_row = conn.execute("SELECT count(*) FROM changeset_stats WHERE seq_id = 0").fetchone()
|
|
707
843
|
has_history = bool(history_row and history_row[0] > 0)
|
|
708
844
|
if run_live and has_history:
|
|
@@ -720,90 +856,15 @@ def run(cfg: RunConfig) -> dict[str, Any]:
|
|
|
720
856
|
else:
|
|
721
857
|
start_date_utc = cfg.start_date.astimezone(UTC)
|
|
722
858
|
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
attach_tag_stats(
|
|
733
|
-
conn,
|
|
734
|
-
rows,
|
|
735
|
-
additional_tags=cfg.additional_tags,
|
|
736
|
-
tag_mode=cfg.tag_mode,
|
|
737
|
-
length_tags=cfg.length_tags,
|
|
738
|
-
)
|
|
739
|
-
|
|
740
|
-
if cfg.tm_stats:
|
|
741
|
-
rows = tm.enrich(rows)
|
|
742
|
-
|
|
743
|
-
out = cfg.output_dir
|
|
744
|
-
written: dict[str, str] = {}
|
|
745
|
-
if "parquet" in cfg.formats:
|
|
746
|
-
written["parquet"] = str(to_parquet(rows, out / f"{cfg.name}.parquet"))
|
|
747
|
-
if "csv" in cfg.formats:
|
|
748
|
-
written["csv"] = str(to_csv(rows, out / f"{cfg.name}.csv"))
|
|
749
|
-
if "json" in cfg.formats:
|
|
750
|
-
written["json"] = str(to_json(rows, out / f"{cfg.name}.json"))
|
|
751
|
-
|
|
752
|
-
if "markdown" in cfg.formats:
|
|
753
|
-
md_path = out / f"{cfg.name}.md"
|
|
754
|
-
table_markdown(
|
|
755
|
-
rows,
|
|
756
|
-
output_path=md_path,
|
|
757
|
-
)
|
|
758
|
-
written["markdown"] = str(md_path)
|
|
759
|
-
|
|
760
|
-
summary_rows: list[dict[str, Any]] | None = None
|
|
761
|
-
if cfg.summary:
|
|
762
|
-
summary_rows = daily_summary(
|
|
763
|
-
conn,
|
|
764
|
-
additional_tags=cfg.additional_tags,
|
|
765
|
-
tag_mode=cfg.tag_mode,
|
|
766
|
-
length_tags=cfg.length_tags,
|
|
767
|
-
)
|
|
768
|
-
if summary_rows:
|
|
769
|
-
if "parquet" in cfg.formats:
|
|
770
|
-
written["summary_parquet"] = str(to_parquet(summary_rows, out / f"{cfg.name}_summary.parquet"))
|
|
771
|
-
if "csv" in cfg.formats:
|
|
772
|
-
written["summary_csv"] = str(to_csv(summary_rows, out / f"{cfg.name}_summary.csv"))
|
|
773
|
-
if "json" in cfg.formats:
|
|
774
|
-
written["summary_json"] = str(to_json(summary_rows, out / f"{cfg.name}_summary.json"))
|
|
775
|
-
if "markdown" in cfg.formats:
|
|
776
|
-
summary_md_path = out / f"{cfg.name}_summary.md"
|
|
777
|
-
summary_markdown(
|
|
778
|
-
rows,
|
|
779
|
-
output_path=summary_md_path,
|
|
780
|
-
start_date=start_date_utc,
|
|
781
|
-
end_date=end_date_utc,
|
|
782
|
-
additional_tags=cfg.additional_tags,
|
|
783
|
-
length_tags=cfg.length_tags,
|
|
784
|
-
tag_mode=cfg.tag_mode,
|
|
785
|
-
fname=cfg.name,
|
|
786
|
-
tm_stats=cfg.tm_stats,
|
|
787
|
-
)
|
|
788
|
-
written["summary_md"] = str(summary_md_path)
|
|
789
|
-
# psql: skipped on purpose, daily_summary is a query over the four base tables.
|
|
790
|
-
|
|
791
|
-
if "psql" in cfg.formats:
|
|
792
|
-
if not cfg.psql_dsn:
|
|
793
|
-
raise OsmsgError("'psql' format requires a libpq DSN (--psql-dsn / RunConfig.psql_dsn=...).")
|
|
794
|
-
info(f"Pushing to PostgreSQL: {cfg.psql_dsn.split()[0]}…")
|
|
795
|
-
to_psql(conn, cfg.psql_dsn, bulk_load=cfg.psql_bulk)
|
|
796
|
-
written["psql"] = cfg.psql_dsn
|
|
797
|
-
|
|
798
|
-
dbmod.close(conn)
|
|
799
|
-
return {
|
|
800
|
-
"rows": len(rows),
|
|
801
|
-
"files": written,
|
|
802
|
-
"rows_data": rows,
|
|
803
|
-
"summary": summary_rows,
|
|
804
|
-
"start_seq": start_seq,
|
|
805
|
-
"end_seq": end_seq,
|
|
806
|
-
}
|
|
859
|
+
return _finalize(
|
|
860
|
+
cfg,
|
|
861
|
+
conn,
|
|
862
|
+
fingerprint,
|
|
863
|
+
start_date_utc=start_date_utc,
|
|
864
|
+
end_date_utc=end_date_utc,
|
|
865
|
+
start_seq=start_seq,
|
|
866
|
+
end_seq=end_seq,
|
|
867
|
+
)
|
|
807
868
|
|
|
808
869
|
|
|
809
870
|
__all__ = ["RunConfig", "run"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "osmsg"
|
|
3
|
-
version = "1.2.
|
|
3
|
+
version = "1.2.2"
|
|
4
4
|
description = "OpenStreetMap Stats Generator: Commandline"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -41,6 +41,9 @@ repository = "https://github.com/osgeonepal/osmsg"
|
|
|
41
41
|
[project.scripts]
|
|
42
42
|
osmsg = "osmsg.cli:app"
|
|
43
43
|
|
|
44
|
+
[project.gui-scripts]
|
|
45
|
+
osmsg-gui = "osmsg.gui:launch"
|
|
46
|
+
|
|
44
47
|
[build-system]
|
|
45
48
|
requires = ["uv_build>=0.5.15,<0.9"]
|
|
46
49
|
build-backend = "uv_build"
|
osmsg-1.2.0/osmsg/__version__.py
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|