geosplit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geo_splitter/__init__.py +6 -0
- geo_splitter/__main__.py +3 -0
- geo_splitter/cli.py +72 -0
- geo_splitter/convert.py +55 -0
- geo_splitter/core.py +117 -0
- geosplit-0.1.0.dist-info/METADATA +88 -0
- geosplit-0.1.0.dist-info/RECORD +10 -0
- geosplit-0.1.0.dist-info/WHEEL +4 -0
- geosplit-0.1.0.dist-info/entry_points.txt +2 -0
- geosplit-0.1.0.dist-info/licenses/LICENSE +21 -0
geo_splitter/__init__.py
ADDED
geo_splitter/__main__.py
ADDED
geo_splitter/cli.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Command-line interface for GeoSplit."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from .convert import convert_file
|
|
9
|
+
from .core import GeoSplitterError, parse_size, split_geojson
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _parsers() -> tuple[argparse.ArgumentParser, dict[str, argparse.ArgumentParser]]:
|
|
13
|
+
parser = argparse.ArgumentParser(
|
|
14
|
+
prog="geosplit", description="Split GeoJSON and convert GeoJSON <-> GeoPackage."
|
|
15
|
+
)
|
|
16
|
+
commands = parser.add_subparsers(dest="command", required=True)
|
|
17
|
+
|
|
18
|
+
split = commands.add_parser("split", help="split a GeoJSON FeatureCollection")
|
|
19
|
+
split.add_argument("input", help="source .geojson file")
|
|
20
|
+
split.add_argument("output_dir", help="directory for split files")
|
|
21
|
+
mode = split.add_mutually_exclusive_group(required=True)
|
|
22
|
+
mode.add_argument("--features", type=int, metavar="COUNT", help="maximum features per file")
|
|
23
|
+
mode.add_argument("--size", type=parse_size, metavar="SIZE", help="maximum size, e.g. 10MB or 512KiB")
|
|
24
|
+
split.add_argument("--prefix", help="output filename prefix (default: input filename)")
|
|
25
|
+
split.add_argument("--force", action="store_true", help="replace matching output files")
|
|
26
|
+
|
|
27
|
+
convert = commands.add_parser("convert", help="convert GeoJSON to or from GeoPackage")
|
|
28
|
+
convert.add_argument("input", help="source .geojson, .json, or .gpkg file")
|
|
29
|
+
convert.add_argument("output", help="destination .geojson, .json, or .gpkg file")
|
|
30
|
+
convert.add_argument("--layer", help="source GeoPackage layer")
|
|
31
|
+
convert.add_argument("--output-layer", help="destination GeoPackage layer name")
|
|
32
|
+
convert.add_argument("--force", action="store_true", help="replace the output file")
|
|
33
|
+
|
|
34
|
+
help_command = commands.add_parser("help", help="show general or command-specific help")
|
|
35
|
+
help_command.add_argument("topic", nargs="?", choices=("split", "convert"), help="command to explain")
|
|
36
|
+
return parser, commands.choices
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def help_text(topic: str | None = None) -> str:
|
|
40
|
+
"""Return general help or detailed help for a command."""
|
|
41
|
+
parser, commands = _parsers()
|
|
42
|
+
return (commands[topic] if topic else parser).format_help()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def main(argv: list[str] | None = None) -> int:
|
|
46
|
+
parser, _ = _parsers()
|
|
47
|
+
try:
|
|
48
|
+
args = parser.parse_args(argv)
|
|
49
|
+
if args.command == "help":
|
|
50
|
+
print(help_text(args.topic), end="")
|
|
51
|
+
elif args.command == "split":
|
|
52
|
+
paths = split_geojson(
|
|
53
|
+
args.input,
|
|
54
|
+
args.output_dir,
|
|
55
|
+
features_per_file=args.features,
|
|
56
|
+
max_bytes=args.size,
|
|
57
|
+
prefix=args.prefix,
|
|
58
|
+
force=args.force,
|
|
59
|
+
)
|
|
60
|
+
print(f"Created {len(paths)} file(s) in {paths[0].parent}")
|
|
61
|
+
else:
|
|
62
|
+
path = convert_file(
|
|
63
|
+
args.input, args.output, layer=args.layer, output_layer=args.output_layer, force=args.force
|
|
64
|
+
)
|
|
65
|
+
print(f"Created {path}")
|
|
66
|
+
return 0
|
|
67
|
+
except GeoSplitterError as error:
|
|
68
|
+
parser.exit(2, f"error: {error}\n")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
if __name__ == "__main__":
|
|
72
|
+
sys.exit(main())
|
geo_splitter/convert.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Optional GeoJSON and GeoPackage conversion support."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from .core import GeoSplitterError
|
|
8
|
+
|
|
9
|
+
_GEOJSON = {".geojson", ".json"}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def convert_file(
|
|
13
|
+
source: str | Path,
|
|
14
|
+
destination: str | Path,
|
|
15
|
+
*,
|
|
16
|
+
layer: str | None = None,
|
|
17
|
+
output_layer: str | None = None,
|
|
18
|
+
force: bool = False,
|
|
19
|
+
) -> Path:
|
|
20
|
+
"""Convert one GeoJSON file to GeoPackage, or one GeoPackage layer to GeoJSON."""
|
|
21
|
+
source, destination = Path(source), Path(destination)
|
|
22
|
+
source_suffix, destination_suffix = source.suffix.lower(), destination.suffix.lower()
|
|
23
|
+
source_is_gpkg, destination_is_gpkg = source_suffix == ".gpkg", destination_suffix == ".gpkg"
|
|
24
|
+
if source_is_gpkg == destination_is_gpkg or {source_suffix, destination_suffix} - (_GEOJSON | {".gpkg"}):
|
|
25
|
+
raise GeoSplitterError("Conversion requires one .geojson/.json file and one .gpkg file.")
|
|
26
|
+
if not source.is_file():
|
|
27
|
+
raise GeoSplitterError(f"Input does not exist or is not a file: {source}")
|
|
28
|
+
if layer and not source_is_gpkg:
|
|
29
|
+
raise GeoSplitterError("--layer only applies when reading a GeoPackage.")
|
|
30
|
+
if output_layer and not destination_is_gpkg:
|
|
31
|
+
raise GeoSplitterError("--output-layer only applies when writing a GeoPackage.")
|
|
32
|
+
if destination.exists() and not force:
|
|
33
|
+
raise GeoSplitterError(f"Output already exists: {destination}. Use --force to replace it.")
|
|
34
|
+
try:
|
|
35
|
+
import geopandas as gpd
|
|
36
|
+
except ImportError as error:
|
|
37
|
+
raise GeoSplitterError("GeoPackage support is not installed. Run: pip install 'geosplit[gpkg]'") from error
|
|
38
|
+
|
|
39
|
+
if source_is_gpkg:
|
|
40
|
+
layers = gpd.list_layers(source)["name"].tolist()
|
|
41
|
+
if not layers:
|
|
42
|
+
raise GeoSplitterError("GeoPackage contains no layers.")
|
|
43
|
+
if layer is None and len(layers) != 1:
|
|
44
|
+
raise GeoSplitterError(f"GeoPackage has multiple layers; choose one with --layer: {', '.join(layers)}")
|
|
45
|
+
if layer is not None and layer not in layers:
|
|
46
|
+
raise GeoSplitterError(f"Layer {layer!r} not found. Available layers: {', '.join(layers)}")
|
|
47
|
+
frame, driver, kwargs = gpd.read_file(source, layer=layer or layers[0]), "GeoJSON", {}
|
|
48
|
+
else:
|
|
49
|
+
frame, driver, kwargs = gpd.read_file(source), "GPKG", {"layer": output_layer or destination.stem}
|
|
50
|
+
|
|
51
|
+
destination.parent.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
if destination.exists():
|
|
53
|
+
destination.unlink()
|
|
54
|
+
frame.to_file(destination, driver=driver, index=False, **kwargs)
|
|
55
|
+
return destination
|
geo_splitter/core.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Core GeoJSON splitting logic."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
JsonObject = dict[str, Any]
|
|
11
|
+
_SIZE = re.compile(r"^(\d+(?:\.\d+)?)\s*(B|KB|KIB|MB|MIB|GB|GIB)?$", re.I)
|
|
12
|
+
_UNITS = {
|
|
13
|
+
"B": 1,
|
|
14
|
+
"KB": 1_000,
|
|
15
|
+
"KIB": 1_024,
|
|
16
|
+
"MB": 1_000_000,
|
|
17
|
+
"MIB": 1_048_576,
|
|
18
|
+
"GB": 1_000_000_000,
|
|
19
|
+
"GIB": 1_073_741_824,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GeoSplitterError(ValueError):
|
|
24
|
+
"""Raised when an input or requested operation is invalid."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def parse_size(value: str) -> int:
|
|
28
|
+
"""Convert values such as ``500KB`` or ``2.5MiB`` to bytes."""
|
|
29
|
+
if not (match := _SIZE.fullmatch(value.strip())):
|
|
30
|
+
raise GeoSplitterError(f"Invalid size {value!r}; try 500KB, 2MB, or 1GiB.")
|
|
31
|
+
amount, unit = match.groups()
|
|
32
|
+
size = int(float(amount) * _UNITS[unit.upper() if unit else "B"])
|
|
33
|
+
if size < 1:
|
|
34
|
+
raise GeoSplitterError("Size must be at least 1 byte.")
|
|
35
|
+
return size
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _compact(value: Any) -> bytes:
|
|
39
|
+
return json.dumps(value, ensure_ascii=False, separators=(",", ":")).encode()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _load(path: Path) -> JsonObject:
|
|
43
|
+
try:
|
|
44
|
+
document = json.loads(path.read_text(encoding="utf-8-sig"))
|
|
45
|
+
except (OSError, UnicodeError) as error:
|
|
46
|
+
raise GeoSplitterError(f"Cannot read {path}: {error}") from error
|
|
47
|
+
except json.JSONDecodeError as error:
|
|
48
|
+
raise GeoSplitterError(f"Invalid JSON in {path}: {error}") from error
|
|
49
|
+
if not isinstance(document, dict) or document.get("type") != "FeatureCollection":
|
|
50
|
+
raise GeoSplitterError("Input must be a GeoJSON FeatureCollection.")
|
|
51
|
+
if not isinstance(document.get("features"), list):
|
|
52
|
+
raise GeoSplitterError("GeoJSON 'features' must be an array.")
|
|
53
|
+
return document
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _chunks_by_size(metadata: JsonObject, features: list[Any], limit: int) -> list[list[Any]]:
|
|
57
|
+
# A trailing newline is included because output files contain one.
|
|
58
|
+
fixed = len(_compact({**metadata, "features": []})) + 1
|
|
59
|
+
if fixed > limit:
|
|
60
|
+
raise GeoSplitterError(f"The GeoJSON metadata alone exceeds the {limit}-byte limit.")
|
|
61
|
+
|
|
62
|
+
chunks: list[list[Any]] = [[]]
|
|
63
|
+
used = fixed
|
|
64
|
+
for index, feature in enumerate(features, 1):
|
|
65
|
+
feature_size = len(_compact(feature))
|
|
66
|
+
addition = feature_size + bool(chunks[-1])
|
|
67
|
+
if fixed + feature_size > limit:
|
|
68
|
+
raise GeoSplitterError(f"Feature {index} cannot fit within the {limit}-byte limit.")
|
|
69
|
+
if used + addition > limit:
|
|
70
|
+
chunks.append([])
|
|
71
|
+
used = fixed
|
|
72
|
+
addition -= 1
|
|
73
|
+
chunks[-1].append(feature)
|
|
74
|
+
used += addition
|
|
75
|
+
return chunks
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def split_geojson(
|
|
79
|
+
source: str | Path,
|
|
80
|
+
output_dir: str | Path,
|
|
81
|
+
*,
|
|
82
|
+
features_per_file: int | None = None,
|
|
83
|
+
max_bytes: int | None = None,
|
|
84
|
+
prefix: str | None = None,
|
|
85
|
+
force: bool = False,
|
|
86
|
+
) -> list[Path]:
|
|
87
|
+
"""Split a FeatureCollection and return the output paths.
|
|
88
|
+
|
|
89
|
+
Exactly one of ``features_per_file`` and ``max_bytes`` must be supplied.
|
|
90
|
+
Size limits include the complete compact GeoJSON document and final newline.
|
|
91
|
+
"""
|
|
92
|
+
if (features_per_file is None) == (max_bytes is None):
|
|
93
|
+
raise GeoSplitterError("Choose exactly one split mode: feature count or file size.")
|
|
94
|
+
if features_per_file is not None and features_per_file < 1:
|
|
95
|
+
raise GeoSplitterError("Features per file must be at least 1.")
|
|
96
|
+
if max_bytes is not None and max_bytes < 1:
|
|
97
|
+
raise GeoSplitterError("Maximum file size must be at least 1 byte.")
|
|
98
|
+
|
|
99
|
+
source, output_dir = Path(source), Path(output_dir)
|
|
100
|
+
document = _load(source)
|
|
101
|
+
features = document.pop("features")
|
|
102
|
+
# A collection-wide bbox becomes incorrect after splitting, so omit it.
|
|
103
|
+
document.pop("bbox", None)
|
|
104
|
+
chunks = (
|
|
105
|
+
[features[i : i + features_per_file] for i in range(0, len(features), features_per_file)] or [[]]
|
|
106
|
+
if features_per_file is not None
|
|
107
|
+
else _chunks_by_size(document, features, max_bytes) # type: ignore[arg-type]
|
|
108
|
+
)
|
|
109
|
+
stem, width = prefix or source.stem, max(3, len(str(len(chunks))))
|
|
110
|
+
paths = [output_dir / f"{stem}_{i:0{width}d}.geojson" for i in range(1, len(chunks) + 1)]
|
|
111
|
+
if not force and (existing := next((path for path in paths if path.exists()), None)):
|
|
112
|
+
raise GeoSplitterError(f"Output already exists: {existing}. Use --force to replace it.")
|
|
113
|
+
|
|
114
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
115
|
+
for path, chunk in zip(paths, chunks, strict=True):
|
|
116
|
+
path.write_bytes(_compact({**document, "features": chunk}) + b"\n")
|
|
117
|
+
return paths
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: geosplit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Split GeoJSON by feature count or file size and convert GeoJSON to and from GeoPackage
|
|
5
|
+
Author: GeoSplit contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: geojson,geopackage,gis,splitter
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
16
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
17
|
+
Provides-Extra: gpkg
|
|
18
|
+
Requires-Dist: geopandas>=1.0; extra == 'gpkg'
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
|
|
21
|
+
# GeoSplit
|
|
22
|
+
|
|
23
|
+
A focused command-line tool that:
|
|
24
|
+
|
|
25
|
+
- splits a GeoJSON `FeatureCollection` by feature count;
|
|
26
|
+
- splits it by a strict maximum file size; and
|
|
27
|
+
- converts GeoJSON to GeoPackage, or a GeoPackage layer back to GeoJSON.
|
|
28
|
+
|
|
29
|
+
The splitter has no runtime dependencies. GeoPackage support is optional.
|
|
30
|
+
|
|
31
|
+
View general or command-specific help at any time:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
geosplit help
|
|
35
|
+
geosplit help split
|
|
36
|
+
geosplit help convert
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
Requires Python 3.10 or newer. Download or clone the project, open a terminal in its folder, then run:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install .
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Include conversion support:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install ".[gpkg]"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Split GeoJSON
|
|
54
|
+
|
|
55
|
+
By feature count:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
geosplit split world.geojson output --features 1000
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
By maximum file size:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
geosplit split world.geojson output --size 10MB
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Sizes accept `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, and `GiB`. Each resulting file is a complete, compact GeoJSON document whose on-disk size does not exceed the requested limit. If one feature cannot fit by itself, the command stops with a clear error.
|
|
68
|
+
|
|
69
|
+
Use `--prefix countries` to customize output names or `--force` to replace matching files. A source collection's top-level `bbox` is omitted because it would no longer describe each split collection; other top-level metadata is retained.
|
|
70
|
+
|
|
71
|
+
## Convert formats
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# GeoJSON to GeoPackage
|
|
75
|
+
geosplit convert roads.geojson roads.gpkg
|
|
76
|
+
|
|
77
|
+
# Choose the new GeoPackage layer name
|
|
78
|
+
geosplit convert roads.geojson map.gpkg --output-layer roads
|
|
79
|
+
|
|
80
|
+
# GeoPackage to GeoJSON
|
|
81
|
+
geosplit convert map.gpkg roads.geojson --layer roads
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
If a GeoPackage contains exactly one layer, `--layer` is optional. Existing destinations are protected unless `--force` is supplied.
|
|
85
|
+
|
|
86
|
+
## License
|
|
87
|
+
|
|
88
|
+
MIT
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
geo_splitter/__init__.py,sha256=3Jbw4scUmWEAjkugMgCfOS2wfFlyof_8NOx5KcDxa6A,191
|
|
2
|
+
geo_splitter/__main__.py,sha256=k1ocEWawweo1qCJWNFAAvyxz3tcY13dzvCenHszij30,48
|
|
3
|
+
geo_splitter/cli.py,sha256=36gmV16sk0K-GCfz9P9-LSVDnciTBgwwqEc_99ZT_EM,3046
|
|
4
|
+
geo_splitter/convert.py,sha256=8MJdgzCPmEpeLMjZdEJsQs7SK_EH5VHJtOjKAZ1KV7E,2531
|
|
5
|
+
geo_splitter/core.py,sha256=1M-OiJTEHTYqtE6uXOwxepAip6D6wBviEK2STujQNJU,4517
|
|
6
|
+
geosplit-0.1.0.dist-info/METADATA,sha256=4x4syE3TK2KGfSOJLBB9rwrYzXMMWFx2ZysiU8yH4m4,2414
|
|
7
|
+
geosplit-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
8
|
+
geosplit-0.1.0.dist-info/entry_points.txt,sha256=c5A6pF1agZaBjT9-xvYs_HFKkCLmpTVUXVFI0fW3FRs,51
|
|
9
|
+
geosplit-0.1.0.dist-info/licenses/LICENSE,sha256=EX3PsbXz3IlIdxy4w15CY9QAcfuocWvfs29IA_DHtBM,1078
|
|
10
|
+
geosplit-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 GeoSplit contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|