geosplit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+
7
+ jobs:
8
+ test:
9
+ runs-on: ubuntu-latest
10
+ strategy:
11
+ matrix:
12
+ python-version: ["3.10", "3.12", "3.13"]
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: ${{ matrix.python-version }}
18
+ cache: pip
19
+ - run: pip install -e ".[dev]"
20
+ - run: pytest
21
+ - run: ruff check .
@@ -0,0 +1,12 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .pytest_cache/
5
+ .ruff_cache/
6
+ .venv/
7
+ venv/
8
+ build/
9
+ dist/
10
+ .test-output/
11
+ *.gpkg-shm
12
+ *.gpkg-wal
geosplit-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 GeoSplit contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,88 @@
1
+ Metadata-Version: 2.4
2
+ Name: geosplit
3
+ Version: 0.1.0
4
+ Summary: Split GeoJSON by feature count or file size and convert GeoJSON to and from GeoPackage
5
+ Author: GeoSplit contributors
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Keywords: geojson,geopackage,gis,splitter
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Environment :: Console
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Scientific/Engineering :: GIS
13
+ Requires-Python: >=3.10
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest>=8; extra == 'dev'
16
+ Requires-Dist: ruff>=0.6; extra == 'dev'
17
+ Provides-Extra: gpkg
18
+ Requires-Dist: geopandas>=1.0; extra == 'gpkg'
19
+ Description-Content-Type: text/markdown
20
+
21
+ # GeoSplit
22
+
23
+ A focused command-line tool that:
24
+
25
+ - splits a GeoJSON `FeatureCollection` by feature count;
26
+ - splits it by a strict maximum file size; and
27
+ - converts GeoJSON to GeoPackage, or a GeoPackage layer back to GeoJSON.
28
+
29
+ The splitter has no runtime dependencies. GeoPackage support is optional.
30
+
31
+ View general or command-specific help at any time:
32
+
33
+ ```bash
34
+ geosplit help
35
+ geosplit help split
36
+ geosplit help convert
37
+ ```
38
+
39
+ ## Install
40
+
41
+ Requires Python 3.10 or newer. Download or clone the project, open a terminal in its folder, then run:
42
+
43
+ ```bash
44
+ pip install .
45
+ ```
46
+
47
+ Include conversion support:
48
+
49
+ ```bash
50
+ pip install ".[gpkg]"
51
+ ```
52
+
53
+ ## Split GeoJSON
54
+
55
+ By feature count:
56
+
57
+ ```bash
58
+ geosplit split world.geojson output --features 1000
59
+ ```
60
+
61
+ By maximum file size:
62
+
63
+ ```bash
64
+ geosplit split world.geojson output --size 10MB
65
+ ```
66
+
67
+ Sizes accept `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, and `GiB`. Each resulting file is a complete, compact GeoJSON document whose on-disk size does not exceed the requested limit. If one feature cannot fit by itself, the command stops with a clear error.
68
+
69
+ Use `--prefix countries` to customize output names or `--force` to replace matching files. A source collection's top-level `bbox` is omitted because it would no longer describe each split collection; other top-level metadata is retained.
70
+
71
+ ## Convert formats
72
+
73
+ ```bash
74
+ # GeoJSON to GeoPackage
75
+ geosplit convert roads.geojson roads.gpkg
76
+
77
+ # Choose the new GeoPackage layer name
78
+ geosplit convert roads.geojson map.gpkg --output-layer roads
79
+
80
+ # GeoPackage to GeoJSON
81
+ geosplit convert map.gpkg roads.geojson --layer roads
82
+ ```
83
+
84
+ If a GeoPackage contains exactly one layer, `--layer` is optional. Existing destinations are protected unless `--force` is supplied.
85
+
86
+ ## License
87
+
88
+ MIT
@@ -0,0 +1,68 @@
1
+ # GeoSplit
2
+
3
+ A focused command-line tool that:
4
+
5
+ - splits a GeoJSON `FeatureCollection` by feature count;
6
+ - splits it by a strict maximum file size; and
7
+ - converts GeoJSON to GeoPackage, or a GeoPackage layer back to GeoJSON.
8
+
9
+ The splitter has no runtime dependencies. GeoPackage support is optional.
10
+
11
+ View general or command-specific help at any time:
12
+
13
+ ```bash
14
+ geosplit help
15
+ geosplit help split
16
+ geosplit help convert
17
+ ```
18
+
19
+ ## Install
20
+
21
+ Requires Python 3.10 or newer. Download or clone the project, open a terminal in its folder, then run:
22
+
23
+ ```bash
24
+ pip install .
25
+ ```
26
+
27
+ Include conversion support:
28
+
29
+ ```bash
30
+ pip install ".[gpkg]"
31
+ ```
32
+
33
+ ## Split GeoJSON
34
+
35
+ By feature count:
36
+
37
+ ```bash
38
+ geosplit split world.geojson output --features 1000
39
+ ```
40
+
41
+ By maximum file size:
42
+
43
+ ```bash
44
+ geosplit split world.geojson output --size 10MB
45
+ ```
46
+
47
+ Sizes accept `B`, `KB`, `KiB`, `MB`, `MiB`, `GB`, and `GiB`. Each resulting file is a complete, compact GeoJSON document whose on-disk size does not exceed the requested limit. If one feature cannot fit by itself, the command stops with a clear error.
48
+
49
+ Use `--prefix countries` to customize output names or `--force` to replace matching files. A source collection's top-level `bbox` is omitted because it would no longer describe each split collection; other top-level metadata is retained.
50
+
51
+ ## Convert formats
52
+
53
+ ```bash
54
+ # GeoJSON to GeoPackage
55
+ geosplit convert roads.geojson roads.gpkg
56
+
57
+ # Choose the new GeoPackage layer name
58
+ geosplit convert roads.geojson map.gpkg --output-layer roads
59
+
60
+ # GeoPackage to GeoJSON
61
+ geosplit convert map.gpkg roads.geojson --layer roads
62
+ ```
63
+
64
+ If a GeoPackage contains exactly one layer, `--layer` is optional. Existing destinations are protected unless `--force` is supplied.
65
+
66
+ ## License
67
+
68
+ MIT
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.25"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "geosplit"
7
+ version = "0.1.0"
8
+ description = "Split GeoJSON by feature count or file size and convert GeoJSON to and from GeoPackage"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ authors = [{ name = "GeoSplit contributors" }]
13
+ keywords = ["geojson", "geopackage", "gis", "splitter"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Environment :: Console",
17
+ "Programming Language :: Python :: 3",
18
+ "Topic :: Scientific/Engineering :: GIS",
19
+ ]
20
+
21
+ [project.optional-dependencies]
22
+ gpkg = ["geopandas>=1.0"]
23
+ dev = ["pytest>=8", "ruff>=0.6"]
24
+
25
+ [project.scripts]
26
+ geosplit = "geo_splitter.cli:main"
27
+
28
+ [tool.hatch.build.targets.wheel]
29
+ packages = ["src/geo_splitter"]
30
+
31
+ [tool.pytest.ini_options]
32
+ addopts = "-q"
33
+ testpaths = ["tests"]
34
+
35
+ [tool.ruff]
36
+ line-length = 120
37
+ target-version = "py310"
@@ -0,0 +1,6 @@
1
+ """Split GeoJSON files and convert GeoJSON to or from GeoPackage."""
2
+
3
+ from .core import GeoSplitterError, split_geojson
4
+
5
+ __all__ = ["GeoSplitterError", "split_geojson"]
6
+ __version__ = "0.1.0"
@@ -0,0 +1,3 @@
1
+ from .cli import main
2
+
3
+ raise SystemExit(main())
@@ -0,0 +1,72 @@
1
+ """Command-line interface for GeoSplit."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+
8
+ from .convert import convert_file
9
+ from .core import GeoSplitterError, parse_size, split_geojson
10
+
11
+
12
+ def _parsers() -> tuple[argparse.ArgumentParser, dict[str, argparse.ArgumentParser]]:
13
+ parser = argparse.ArgumentParser(
14
+ prog="geosplit", description="Split GeoJSON and convert GeoJSON <-> GeoPackage."
15
+ )
16
+ commands = parser.add_subparsers(dest="command", required=True)
17
+
18
+ split = commands.add_parser("split", help="split a GeoJSON FeatureCollection")
19
+ split.add_argument("input", help="source .geojson file")
20
+ split.add_argument("output_dir", help="directory for split files")
21
+ mode = split.add_mutually_exclusive_group(required=True)
22
+ mode.add_argument("--features", type=int, metavar="COUNT", help="maximum features per file")
23
+ mode.add_argument("--size", type=parse_size, metavar="SIZE", help="maximum size, e.g. 10MB or 512KiB")
24
+ split.add_argument("--prefix", help="output filename prefix (default: input filename)")
25
+ split.add_argument("--force", action="store_true", help="replace matching output files")
26
+
27
+ convert = commands.add_parser("convert", help="convert GeoJSON to or from GeoPackage")
28
+ convert.add_argument("input", help="source .geojson, .json, or .gpkg file")
29
+ convert.add_argument("output", help="destination .geojson, .json, or .gpkg file")
30
+ convert.add_argument("--layer", help="source GeoPackage layer")
31
+ convert.add_argument("--output-layer", help="destination GeoPackage layer name")
32
+ convert.add_argument("--force", action="store_true", help="replace the output file")
33
+
34
+ help_command = commands.add_parser("help", help="show general or command-specific help")
35
+ help_command.add_argument("topic", nargs="?", choices=("split", "convert"), help="command to explain")
36
+ return parser, commands.choices
37
+
38
+
39
+ def help_text(topic: str | None = None) -> str:
40
+ """Return general help or detailed help for a command."""
41
+ parser, commands = _parsers()
42
+ return (commands[topic] if topic else parser).format_help()
43
+
44
+
45
+ def main(argv: list[str] | None = None) -> int:
46
+ parser, _ = _parsers()
47
+ try:
48
+ args = parser.parse_args(argv)
49
+ if args.command == "help":
50
+ print(help_text(args.topic), end="")
51
+ elif args.command == "split":
52
+ paths = split_geojson(
53
+ args.input,
54
+ args.output_dir,
55
+ features_per_file=args.features,
56
+ max_bytes=args.size,
57
+ prefix=args.prefix,
58
+ force=args.force,
59
+ )
60
+ print(f"Created {len(paths)} file(s) in {paths[0].parent}")
61
+ else:
62
+ path = convert_file(
63
+ args.input, args.output, layer=args.layer, output_layer=args.output_layer, force=args.force
64
+ )
65
+ print(f"Created {path}")
66
+ return 0
67
+ except GeoSplitterError as error:
68
+ parser.exit(2, f"error: {error}\n")
69
+
70
+
71
+ if __name__ == "__main__":
72
+ sys.exit(main())
@@ -0,0 +1,55 @@
1
+ """Optional GeoJSON and GeoPackage conversion support."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from .core import GeoSplitterError
8
+
9
+ _GEOJSON = {".geojson", ".json"}
10
+
11
+
12
+ def convert_file(
13
+ source: str | Path,
14
+ destination: str | Path,
15
+ *,
16
+ layer: str | None = None,
17
+ output_layer: str | None = None,
18
+ force: bool = False,
19
+ ) -> Path:
20
+ """Convert one GeoJSON file to GeoPackage, or one GeoPackage layer to GeoJSON."""
21
+ source, destination = Path(source), Path(destination)
22
+ source_suffix, destination_suffix = source.suffix.lower(), destination.suffix.lower()
23
+ source_is_gpkg, destination_is_gpkg = source_suffix == ".gpkg", destination_suffix == ".gpkg"
24
+ if source_is_gpkg == destination_is_gpkg or {source_suffix, destination_suffix} - (_GEOJSON | {".gpkg"}):
25
+ raise GeoSplitterError("Conversion requires one .geojson/.json file and one .gpkg file.")
26
+ if not source.is_file():
27
+ raise GeoSplitterError(f"Input does not exist or is not a file: {source}")
28
+ if layer and not source_is_gpkg:
29
+ raise GeoSplitterError("--layer only applies when reading a GeoPackage.")
30
+ if output_layer and not destination_is_gpkg:
31
+ raise GeoSplitterError("--output-layer only applies when writing a GeoPackage.")
32
+ if destination.exists() and not force:
33
+ raise GeoSplitterError(f"Output already exists: {destination}. Use --force to replace it.")
34
+ try:
35
+ import geopandas as gpd
36
+ except ImportError as error:
37
+ raise GeoSplitterError("GeoPackage support is not installed. Run: pip install 'geosplit[gpkg]'") from error
38
+
39
+ if source_is_gpkg:
40
+ layers = gpd.list_layers(source)["name"].tolist()
41
+ if not layers:
42
+ raise GeoSplitterError("GeoPackage contains no layers.")
43
+ if layer is None and len(layers) != 1:
44
+ raise GeoSplitterError(f"GeoPackage has multiple layers; choose one with --layer: {', '.join(layers)}")
45
+ if layer is not None and layer not in layers:
46
+ raise GeoSplitterError(f"Layer {layer!r} not found. Available layers: {', '.join(layers)}")
47
+ frame, driver, kwargs = gpd.read_file(source, layer=layer or layers[0]), "GeoJSON", {}
48
+ else:
49
+ frame, driver, kwargs = gpd.read_file(source), "GPKG", {"layer": output_layer or destination.stem}
50
+
51
+ destination.parent.mkdir(parents=True, exist_ok=True)
52
+ if destination.exists():
53
+ destination.unlink()
54
+ frame.to_file(destination, driver=driver, index=False, **kwargs)
55
+ return destination
@@ -0,0 +1,117 @@
1
+ """Core GeoJSON splitting logic."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ JsonObject = dict[str, Any]
11
+ _SIZE = re.compile(r"^(\d+(?:\.\d+)?)\s*(B|KB|KIB|MB|MIB|GB|GIB)?$", re.I)
12
+ _UNITS = {
13
+ "B": 1,
14
+ "KB": 1_000,
15
+ "KIB": 1_024,
16
+ "MB": 1_000_000,
17
+ "MIB": 1_048_576,
18
+ "GB": 1_000_000_000,
19
+ "GIB": 1_073_741_824,
20
+ }
21
+
22
+
23
+ class GeoSplitterError(ValueError):
24
+ """Raised when an input or requested operation is invalid."""
25
+
26
+
27
+ def parse_size(value: str) -> int:
28
+ """Convert values such as ``500KB`` or ``2.5MiB`` to bytes."""
29
+ if not (match := _SIZE.fullmatch(value.strip())):
30
+ raise GeoSplitterError(f"Invalid size {value!r}; try 500KB, 2MB, or 1GiB.")
31
+ amount, unit = match.groups()
32
+ size = int(float(amount) * _UNITS[unit.upper() if unit else "B"])
33
+ if size < 1:
34
+ raise GeoSplitterError("Size must be at least 1 byte.")
35
+ return size
36
+
37
+
38
+ def _compact(value: Any) -> bytes:
39
+ return json.dumps(value, ensure_ascii=False, separators=(",", ":")).encode()
40
+
41
+
42
+ def _load(path: Path) -> JsonObject:
43
+ try:
44
+ document = json.loads(path.read_text(encoding="utf-8-sig"))
45
+ except (OSError, UnicodeError) as error:
46
+ raise GeoSplitterError(f"Cannot read {path}: {error}") from error
47
+ except json.JSONDecodeError as error:
48
+ raise GeoSplitterError(f"Invalid JSON in {path}: {error}") from error
49
+ if not isinstance(document, dict) or document.get("type") != "FeatureCollection":
50
+ raise GeoSplitterError("Input must be a GeoJSON FeatureCollection.")
51
+ if not isinstance(document.get("features"), list):
52
+ raise GeoSplitterError("GeoJSON 'features' must be an array.")
53
+ return document
54
+
55
+
56
+ def _chunks_by_size(metadata: JsonObject, features: list[Any], limit: int) -> list[list[Any]]:
57
+ # A trailing newline is included because output files contain one.
58
+ fixed = len(_compact({**metadata, "features": []})) + 1
59
+ if fixed > limit:
60
+ raise GeoSplitterError(f"The GeoJSON metadata alone exceeds the {limit}-byte limit.")
61
+
62
+ chunks: list[list[Any]] = [[]]
63
+ used = fixed
64
+ for index, feature in enumerate(features, 1):
65
+ feature_size = len(_compact(feature))
66
+ addition = feature_size + bool(chunks[-1])
67
+ if fixed + feature_size > limit:
68
+ raise GeoSplitterError(f"Feature {index} cannot fit within the {limit}-byte limit.")
69
+ if used + addition > limit:
70
+ chunks.append([])
71
+ used = fixed
72
+ addition -= 1
73
+ chunks[-1].append(feature)
74
+ used += addition
75
+ return chunks
76
+
77
+
78
+ def split_geojson(
79
+ source: str | Path,
80
+ output_dir: str | Path,
81
+ *,
82
+ features_per_file: int | None = None,
83
+ max_bytes: int | None = None,
84
+ prefix: str | None = None,
85
+ force: bool = False,
86
+ ) -> list[Path]:
87
+ """Split a FeatureCollection and return the output paths.
88
+
89
+ Exactly one of ``features_per_file`` and ``max_bytes`` must be supplied.
90
+ Size limits include the complete compact GeoJSON document and final newline.
91
+ """
92
+ if (features_per_file is None) == (max_bytes is None):
93
+ raise GeoSplitterError("Choose exactly one split mode: feature count or file size.")
94
+ if features_per_file is not None and features_per_file < 1:
95
+ raise GeoSplitterError("Features per file must be at least 1.")
96
+ if max_bytes is not None and max_bytes < 1:
97
+ raise GeoSplitterError("Maximum file size must be at least 1 byte.")
98
+
99
+ source, output_dir = Path(source), Path(output_dir)
100
+ document = _load(source)
101
+ features = document.pop("features")
102
+ # A collection-wide bbox becomes incorrect after splitting, so omit it.
103
+ document.pop("bbox", None)
104
+ chunks = (
105
+ [features[i : i + features_per_file] for i in range(0, len(features), features_per_file)] or [[]]
106
+ if features_per_file is not None
107
+ else _chunks_by_size(document, features, max_bytes) # type: ignore[arg-type]
108
+ )
109
+ stem, width = prefix or source.stem, max(3, len(str(len(chunks))))
110
+ paths = [output_dir / f"{stem}_{i:0{width}d}.geojson" for i in range(1, len(chunks) + 1)]
111
+ if not force and (existing := next((path for path in paths if path.exists()), None)):
112
+ raise GeoSplitterError(f"Output already exists: {existing}. Use --force to replace it.")
113
+
114
+ output_dir.mkdir(parents=True, exist_ok=True)
115
+ for path, chunk in zip(paths, chunks, strict=True):
116
+ path.write_bytes(_compact({**document, "features": chunk}) + b"\n")
117
+ return paths
@@ -0,0 +1,12 @@
1
+ {
2
+ "type": "FeatureCollection",
3
+ "name": "sample",
4
+ "bbox": [0, 0, 4, 4],
5
+ "features": [
6
+ {"type": "Feature", "properties": {"id": 0}, "geometry": {"type": "Point", "coordinates": [0, 0]}},
7
+ {"type": "Feature", "properties": {"id": 1}, "geometry": {"type": "Point", "coordinates": [1, 1]}},
8
+ {"type": "Feature", "properties": {"id": 2}, "geometry": {"type": "Point", "coordinates": [2, 2]}},
9
+ {"type": "Feature", "properties": {"id": 3}, "geometry": {"type": "Point", "coordinates": [3, 3]}},
10
+ {"type": "Feature", "properties": {"id": 4}, "geometry": {"type": "Point", "coordinates": [4, 4]}}
11
+ ]
12
+ }
@@ -0,0 +1,17 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ from geo_splitter.cli import help_text, main
5
+
6
+
7
+ def test_split_command(tmp_path: Path, capsys) -> None:
8
+ source = tmp_path / "data.geojson"
9
+ source.write_text(json.dumps({"type": "FeatureCollection", "features": []}), encoding="utf-8")
10
+ assert main(["split", str(source), str(tmp_path / "out"), "--features", "10"]) == 0
11
+ assert "Created 1 file" in capsys.readouterr().out
12
+
13
+
14
+ def test_help_command(capsys) -> None:
15
+ assert main(["help", "split"]) == 0
16
+ assert "--features" in capsys.readouterr().out
17
+ assert "convert" in help_text()
@@ -0,0 +1,20 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ from geo_splitter.convert import convert_file
6
+ from geo_splitter.core import GeoSplitterError
7
+
8
+
9
+ def test_requires_geojson_and_geopackage(tmp_path: Path) -> None:
10
+ source = tmp_path / "input.geojson"
11
+ source.write_text('{"type":"FeatureCollection","features":[]}', encoding="utf-8")
12
+ with pytest.raises(GeoSplitterError, match="requires one"):
13
+ convert_file(source, tmp_path / "output.json")
14
+
15
+
16
+ def test_rejects_layer_for_geojson_input(tmp_path: Path) -> None:
17
+ source = tmp_path / "input.geojson"
18
+ source.write_text('{"type":"FeatureCollection","features":[]}', encoding="utf-8")
19
+ with pytest.raises(GeoSplitterError, match="only applies"):
20
+ convert_file(source, tmp_path / "output.gpkg", layer="places")
@@ -0,0 +1,55 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import pytest
5
+
6
+ from geo_splitter.core import GeoSplitterError, parse_size, split_geojson
7
+
8
+
9
+ @pytest.fixture
10
+ def collection(tmp_path: Path) -> Path:
11
+ path = tmp_path / "places.geojson"
12
+ path.write_text(
13
+ json.dumps(
14
+ {
15
+ "type": "FeatureCollection",
16
+ "name": "places",
17
+ "bbox": [0, 0, 4, 4],
18
+ "features": [
19
+ {"type": "Feature", "properties": {"id": i}, "geometry": {"type": "Point", "coordinates": [i, i]}}
20
+ for i in range(5)
21
+ ],
22
+ }
23
+ ),
24
+ encoding="utf-8",
25
+ )
26
+ return path
27
+
28
+
29
+ def test_parse_size() -> None:
30
+ assert parse_size("2 MB") == 2_000_000
31
+ assert parse_size("2MiB") == 2_097_152
32
+
33
+
34
+ def test_split_by_feature_count(collection: Path, tmp_path: Path) -> None:
35
+ paths = split_geojson(collection, tmp_path / "out", features_per_file=2)
36
+ assert [len(json.loads(path.read_text())["features"]) for path in paths] == [2, 2, 1]
37
+ assert all("bbox" not in json.loads(path.read_text()) for path in paths)
38
+
39
+
40
+ def test_split_by_exact_size(collection: Path, tmp_path: Path) -> None:
41
+ paths = split_geojson(collection, tmp_path / "out", max_bytes=260)
42
+ assert len(paths) > 1
43
+ assert sum(len(json.loads(path.read_text())["features"]) for path in paths) == 5
44
+ assert all(path.stat().st_size <= 260 for path in paths)
45
+
46
+
47
+ def test_rejects_oversized_feature(collection: Path, tmp_path: Path) -> None:
48
+ with pytest.raises(GeoSplitterError, match="Feature 1"):
49
+ split_geojson(collection, tmp_path / "out", max_bytes=130)
50
+
51
+
52
+ def test_does_not_overwrite_by_default(collection: Path, tmp_path: Path) -> None:
53
+ split_geojson(collection, tmp_path / "out", features_per_file=5)
54
+ with pytest.raises(GeoSplitterError, match="already exists"):
55
+ split_geojson(collection, tmp_path / "out", features_per_file=5)