makegis 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- makegis/__init__.py +1 -0
- makegis/cli.py +235 -0
- makegis/config/__init__.py +2 -0
- makegis/config/makegis.py +253 -0
- makegis/config/root.py +69 -0
- makegis/config/utils.py +24 -0
- makegis/core/__init__.py +0 -0
- makegis/core/commands.py +7 -0
- makegis/core/load.py +77 -0
- makegis/core/transforms.py +7 -0
- makegis/dag/__init__.py +1 -0
- makegis/dag/builder.py +308 -0
- makegis/dag/dag.py +297 -0
- makegis/dag/sql.py +279 -0
- makegis/errors.py +6 -0
- makegis/journal.py +63 -0
- makegis/targets/__init__.py +1 -0
- makegis/targets/postgis.py +655 -0
- makegis/targets/target.py +47 -0
- makegis/utils.py +16 -0
- makegis-0.1.0.dist-info/METADATA +284 -0
- makegis-0.1.0.dist-info/RECORD +25 -0
- makegis-0.1.0.dist-info/WHEEL +4 -0
- makegis-0.1.0.dist-info/entry_points.txt +2 -0
- makegis-0.1.0.dist-info/licenses/LICENSE +21 -0
makegis/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
makegis/cli.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
import dotenv
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
from rich.logging import RichHandler
|
|
9
|
+
|
|
10
|
+
from . import __version__
|
|
11
|
+
from .config import RootConfig
|
|
12
|
+
from .dag.builder import Builder
|
|
13
|
+
from .targets import Target
|
|
14
|
+
from . import errors
|
|
15
|
+
|
|
16
|
+
console = Console()
|
|
17
|
+
|
|
18
|
+
log = logging.getLogger("makegis")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def cli():
|
|
22
|
+
|
|
23
|
+
# Handle general -v and -d options outside of argparse.
|
|
24
|
+
args = sys.argv[1:]
|
|
25
|
+
verbose_flags = ["-v", "--verbose"]
|
|
26
|
+
debug_flags = ["--debug"]
|
|
27
|
+
debug = any([flag in args for flag in debug_flags])
|
|
28
|
+
verbose = not debug and any([flag in args for flag in verbose_flags])
|
|
29
|
+
args = [a for a in args if a not in verbose_flags + debug_flags]
|
|
30
|
+
|
|
31
|
+
# Configure logger
|
|
32
|
+
level = logging.WARN
|
|
33
|
+
format = "%(message)s"
|
|
34
|
+
datefmt = "[%X]"
|
|
35
|
+
show_time = False
|
|
36
|
+
show_path = False
|
|
37
|
+
if debug:
|
|
38
|
+
level = logging.DEBUG
|
|
39
|
+
show_path = True
|
|
40
|
+
elif verbose:
|
|
41
|
+
level = logging.INFO
|
|
42
|
+
logging.basicConfig(
|
|
43
|
+
level=level,
|
|
44
|
+
format=format,
|
|
45
|
+
datefmt=datefmt,
|
|
46
|
+
handlers=[
|
|
47
|
+
RichHandler(
|
|
48
|
+
console=console,
|
|
49
|
+
rich_tracebacks=True,
|
|
50
|
+
show_path=show_path,
|
|
51
|
+
show_time=show_time,
|
|
52
|
+
)
|
|
53
|
+
],
|
|
54
|
+
)
|
|
55
|
+
log.info(f"makegis {__version__}")
|
|
56
|
+
|
|
57
|
+
parser = argparse.ArgumentParser(prog="mkgs")
|
|
58
|
+
|
|
59
|
+
# The --verbose and --debug options are parsed outside of argparse but we still declare
|
|
60
|
+
# them here so they show up as general options in the generated help.
|
|
61
|
+
parser.add_argument("-v", "--verbose", action="store_true", help="verbose messages")
|
|
62
|
+
parser.add_argument("--debug", action="store_true", help="debug messages")
|
|
63
|
+
|
|
64
|
+
subparsers = parser.add_subparsers(dest="command", help="commands")
|
|
65
|
+
|
|
66
|
+
def add_target_argument(parser):
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
"-t",
|
|
69
|
+
"--target",
|
|
70
|
+
action="store",
|
|
71
|
+
type=str,
|
|
72
|
+
default=None,
|
|
73
|
+
help="db instance to target",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
init_parser = subparsers.add_parser("init", help="create schemas and journal table")
|
|
77
|
+
add_target_argument(init_parser)
|
|
78
|
+
init_parser.set_defaults(func=init)
|
|
79
|
+
|
|
80
|
+
list_parser = subparsers.add_parser("ls", help="list nodes")
|
|
81
|
+
list_parser.add_argument("pattern", type=str, help="DAG selection pattern")
|
|
82
|
+
list_parser.set_defaults(func=show)
|
|
83
|
+
|
|
84
|
+
outdated_parser = subparsers.add_parser("outdated", help="report outdated nodes")
|
|
85
|
+
add_target_argument(outdated_parser)
|
|
86
|
+
outdated_parser.set_defaults(func=outdated)
|
|
87
|
+
|
|
88
|
+
run_parser = subparsers.add_parser("run", help="run nodes")
|
|
89
|
+
run_parser.add_argument("pattern", type=str, help="DAG selection pattern")
|
|
90
|
+
add_target_argument(run_parser)
|
|
91
|
+
run_parser.add_argument(
|
|
92
|
+
"-d",
|
|
93
|
+
"--dry-run",
|
|
94
|
+
action="store_true",
|
|
95
|
+
help="process nodes without actually running them",
|
|
96
|
+
)
|
|
97
|
+
run_parser.add_argument(
|
|
98
|
+
"-f",
|
|
99
|
+
"--force",
|
|
100
|
+
action="store_true",
|
|
101
|
+
help="also run fresh nodes",
|
|
102
|
+
)
|
|
103
|
+
run_parser.set_defaults(func=run)
|
|
104
|
+
|
|
105
|
+
# Load .env
|
|
106
|
+
dotenv.load_dotenv(".env")
|
|
107
|
+
|
|
108
|
+
# Parse preprocessed args
|
|
109
|
+
args = parser.parse_args(args)
|
|
110
|
+
|
|
111
|
+
# Inject verbose or debug option
|
|
112
|
+
args.verbose = verbose
|
|
113
|
+
args.debug = debug
|
|
114
|
+
|
|
115
|
+
# Call handler
|
|
116
|
+
if hasattr(args, "func"):
|
|
117
|
+
args.func(args)
|
|
118
|
+
else:
|
|
119
|
+
parser.print_help()
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def init(args):
|
|
123
|
+
cfg = load_root_config()
|
|
124
|
+
target_id = args.target or cfg.defaults.target
|
|
125
|
+
assert target_id is not None
|
|
126
|
+
log.info(f"using target {target_id}")
|
|
127
|
+
target = Target(cfg.targets[target_id])
|
|
128
|
+
|
|
129
|
+
dag = Builder(cfg).build()
|
|
130
|
+
target.ensure_schemas(dag.list_schemas())
|
|
131
|
+
target.init_journal()
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def outdated(args):
|
|
135
|
+
cfg = load_root_config()
|
|
136
|
+
target_id = args.target or cfg.defaults.target
|
|
137
|
+
assert target_id is not None
|
|
138
|
+
log.info(f"using target {target_id}")
|
|
139
|
+
target = Target(cfg.targets[target_id])
|
|
140
|
+
|
|
141
|
+
dag = Builder(cfg).build()
|
|
142
|
+
node_ids = dag.get_outdated(target)
|
|
143
|
+
if not node_ids:
|
|
144
|
+
print("All nodes are up to date.")
|
|
145
|
+
|
|
146
|
+
for node_id in node_ids:
|
|
147
|
+
print(dag.render_node(node_id))
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def run(args):
|
|
151
|
+
cfg = load_root_config()
|
|
152
|
+
|
|
153
|
+
target_id = args.target or cfg.defaults.target
|
|
154
|
+
assert target_id is not None
|
|
155
|
+
log.info(f"using target {target_id}")
|
|
156
|
+
target = Target(cfg.targets[target_id])
|
|
157
|
+
|
|
158
|
+
dry_run = args.dry_run == True
|
|
159
|
+
if args.dry_run:
|
|
160
|
+
log.info("dry run - target will not be modified")
|
|
161
|
+
|
|
162
|
+
dag = Builder(cfg).build()
|
|
163
|
+
node_ids = dag.select_nodes(args.pattern)
|
|
164
|
+
if not node_ids:
|
|
165
|
+
print("No nodes matching selection pattern.")
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
if not args.force:
|
|
169
|
+
outdated = dag.get_outdated(target, limit_to=node_ids)
|
|
170
|
+
node_ids = outdated
|
|
171
|
+
|
|
172
|
+
if not node_ids:
|
|
173
|
+
print("All selected nodes are up to date. Use --force to run anyways.")
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
n = len(node_ids)
|
|
177
|
+
with console.status("") as status:
|
|
178
|
+
for inode, node_id in enumerate(node_ids):
|
|
179
|
+
status.update(f"Running node {inode + 1}/{n}: {node_id}")
|
|
180
|
+
if args.dry_run:
|
|
181
|
+
log.info(f"dry running node '{node_id}'")
|
|
182
|
+
continue
|
|
183
|
+
log.info(f"running node '{node_id}'")
|
|
184
|
+
try:
|
|
185
|
+
dag.run_node(node_id, target)
|
|
186
|
+
except errors.FailedNodeRun as e:
|
|
187
|
+
log.error(e.message)
|
|
188
|
+
return
|
|
189
|
+
except Exception:
|
|
190
|
+
log.exception(f"node '{node_id}' run failed!")
|
|
191
|
+
return
|
|
192
|
+
if args.dry_run:
|
|
193
|
+
print(f"Dry run done. Would've run {n} node(s).")
|
|
194
|
+
else:
|
|
195
|
+
print(f"Done. Ran {n} node(s).")
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def show(args):
|
|
199
|
+
cfg = load_root_config()
|
|
200
|
+
dag = Builder(cfg).build()
|
|
201
|
+
node_ids = dag.select_nodes(args.pattern)
|
|
202
|
+
if not node_ids:
|
|
203
|
+
print("No matching nodes.")
|
|
204
|
+
return
|
|
205
|
+
|
|
206
|
+
for node_id in node_ids:
|
|
207
|
+
print(dag.render_node(node_id))
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def load_root_config():
|
|
211
|
+
cfg_path = find_root_config()
|
|
212
|
+
|
|
213
|
+
# Load .env in same dir as makegis.root.yml
|
|
214
|
+
cfg_dir = cfg_path.parent
|
|
215
|
+
dotenv.load_dotenv(cfg_dir / ".env")
|
|
216
|
+
|
|
217
|
+
return RootConfig.from_file(cfg_path)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def find_root_config(cwd: Path = Path(".").resolve()):
|
|
221
|
+
"""
|
|
222
|
+
Returns path to first makegis.root.yml file found in current dir or parents.
|
|
223
|
+
"""
|
|
224
|
+
path = cwd / "makegis.root.yml"
|
|
225
|
+
if path.exists():
|
|
226
|
+
return path
|
|
227
|
+
parent = cwd.parent
|
|
228
|
+
if parent == cwd:
|
|
229
|
+
log.error("Found no makegis root file in current directory or its parents.")
|
|
230
|
+
exit(1)
|
|
231
|
+
return find_root_config(cwd=parent)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
if __name__ == "__main__":
|
|
235
|
+
cli()
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict
|
|
4
|
+
from typing import List
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
from pydantic import model_validator
|
|
10
|
+
from pydantic import ValidationError
|
|
11
|
+
import yaml
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from yaml import CLoader as Loader
|
|
15
|
+
except ImportError:
|
|
16
|
+
from yaml import Loader
|
|
17
|
+
|
|
18
|
+
from .utils import expand_dict_strings
|
|
19
|
+
|
|
20
|
+
log = logging.getLogger("makegis")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LoadDefaults(BaseModel):
|
|
24
|
+
epsg: int | str | None = None
|
|
25
|
+
geom_index: bool | None = None
|
|
26
|
+
geom_column: str | None = None
|
|
27
|
+
raster_index: bool | None = None
|
|
28
|
+
raster_column: str | None = None
|
|
29
|
+
raster_constraints: bool | None = None
|
|
30
|
+
tile_size: int | None = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class BaseSourceBlock(BaseModel):
|
|
34
|
+
epsg: int | str | None = None
|
|
35
|
+
# Name of column to use as primary key
|
|
36
|
+
pk: str | None = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class VectorSourceBlock(BaseModel):
|
|
40
|
+
geom_index: bool | None = None
|
|
41
|
+
geom_column: str | None = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CSVSourceBlock(BaseSourceBlock, VectorSourceBlock):
|
|
45
|
+
type: Literal["csv"] = "csv"
|
|
46
|
+
path: Path
|
|
47
|
+
# TODO:
|
|
48
|
+
# x_column: str | None = None
|
|
49
|
+
# y_column: str | None = None
|
|
50
|
+
# keep_xy_columns: bool = False
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class EsriSourceBlock(BaseSourceBlock, VectorSourceBlock):
|
|
54
|
+
type: Literal["esri"] = "esri"
|
|
55
|
+
url: str
|
|
56
|
+
f: Literal["pjson", "pgeojson"] = "pjson"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class DuckDBSourceBlock(BaseSourceBlock, VectorSourceBlock):
|
|
60
|
+
type: Literal["duckdb"] = "duckdb"
|
|
61
|
+
path: Path
|
|
62
|
+
table: Optional[str] = None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class FileSourceBlock(BaseSourceBlock, VectorSourceBlock):
|
|
66
|
+
type: Literal["file"] = "file"
|
|
67
|
+
path: Path
|
|
68
|
+
layer: str | None = None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class RasterSourceBlock(BaseSourceBlock):
|
|
72
|
+
type: Literal["raster"] = "raster"
|
|
73
|
+
path: Path
|
|
74
|
+
raster_index: bool | None = None
|
|
75
|
+
raster_column: str | None = None
|
|
76
|
+
raster_constraints: bool | None = None
|
|
77
|
+
tile_size: int | None = None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class WFSSourceBlock(BaseSourceBlock, VectorSourceBlock):
|
|
81
|
+
type: Literal["wfs"] = "wfs"
|
|
82
|
+
url: str
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
type SourceBlock = CSVSourceBlock | EsriSourceBlock | DuckDBSourceBlock | FileSourceBlock | RasterSourceBlock | WFSSourceBlock
|
|
86
|
+
|
|
87
|
+
SOURCE_KEYS = set(["csv", "esri", "duckdb", "file", "raster", "wfs"])
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class LoadItem(BaseModel):
|
|
91
|
+
name: str
|
|
92
|
+
src: SourceBlock
|
|
93
|
+
meta: Dict[str, str | int | float | None]
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def from_kv(cls, k: str, v: Dict):
|
|
97
|
+
name = k
|
|
98
|
+
meta = v.pop("meta", {})
|
|
99
|
+
matched_source_keys = [sk for sk in SOURCE_KEYS if sk in v]
|
|
100
|
+
if len(matched_source_keys) == 0:
|
|
101
|
+
raise RuntimeError(
|
|
102
|
+
f"Missing source key in load block item, execting one of {SOURCE_KEYS}"
|
|
103
|
+
)
|
|
104
|
+
elif len(matched_source_keys) > 1:
|
|
105
|
+
raise RuntimeError(
|
|
106
|
+
f"Too many source keys in load block item, expecting exactly one of {SOURCE_KEYS}"
|
|
107
|
+
)
|
|
108
|
+
if "csv" in matched_source_keys:
|
|
109
|
+
path = v.pop("csv")
|
|
110
|
+
src = CSVSourceBlock(path=path, **v)
|
|
111
|
+
elif "esri" in matched_source_keys:
|
|
112
|
+
url = v.pop("esri")
|
|
113
|
+
src = EsriSourceBlock(url=url, **v)
|
|
114
|
+
elif "duckdb" in matched_source_keys:
|
|
115
|
+
path = v.pop("duckdb")
|
|
116
|
+
src = DuckDBSourceBlock(path=path, **v)
|
|
117
|
+
elif "file" in matched_source_keys:
|
|
118
|
+
path = v.pop("file")
|
|
119
|
+
src = FileSourceBlock(path=path, **v)
|
|
120
|
+
elif "raster" in matched_source_keys:
|
|
121
|
+
path = v.pop("raster")
|
|
122
|
+
src = RasterSourceBlock(path=path, **v)
|
|
123
|
+
elif "wfs" in matched_source_keys:
|
|
124
|
+
url = v.pop("wfs")
|
|
125
|
+
src = WFSSourceBlock(url=url, **v)
|
|
126
|
+
else:
|
|
127
|
+
raise NotImplementedError("Unhandled source key in load block item")
|
|
128
|
+
|
|
129
|
+
return LoadItem(name=name, src=src, meta=meta)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class SQLTransform(BaseModel):
|
|
133
|
+
path: Path
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class LoadBlock(BaseModel):
|
|
137
|
+
defaults: LoadDefaults
|
|
138
|
+
items: List[LoadItem]
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def from_dict(cls, d: Dict):
|
|
142
|
+
defaults = LoadDefaults(**d.pop("defaults", {}))
|
|
143
|
+
items = [LoadItem.from_kv(k, v) for k, v in d.items()]
|
|
144
|
+
return LoadBlock(defaults=defaults, items=items)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class TransformBlock(BaseModel):
|
|
148
|
+
transforms: List[SQLTransform]
|
|
149
|
+
|
|
150
|
+
@classmethod
|
|
151
|
+
def from_sequence(cls, s: List):
|
|
152
|
+
transforms = [SQLTransform(path=p) for p in s]
|
|
153
|
+
return TransformBlock(transforms=transforms)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class DatabaseItem(BaseModel):
|
|
157
|
+
type: Literal["table", "function"]
|
|
158
|
+
name: str
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def from_dict(cls, d: Dict):
|
|
162
|
+
assert (
|
|
163
|
+
len(d) == 1
|
|
164
|
+
), "each item in a 'creates' or 'deps' block must have exactly 1 key e.g. - table: name"
|
|
165
|
+
k, v = next(iter(d.items()))
|
|
166
|
+
return DatabaseItem(type=k, name=v)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class RunTask(BaseModel):
|
|
170
|
+
cmd: str
|
|
171
|
+
creates: List[DatabaseItem]
|
|
172
|
+
|
|
173
|
+
@classmethod
|
|
174
|
+
def from_dict(cls, d: Dict):
|
|
175
|
+
creates = [DatabaseItem.from_dict(item) for item in d.pop("creates", [])]
|
|
176
|
+
return RunTask(creates=creates, **d)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
class DoBlock(BaseModel):
|
|
180
|
+
"""The 'do' key in a 'node' block"""
|
|
181
|
+
|
|
182
|
+
load: Optional[LoadBlock] = None
|
|
183
|
+
run: Optional[List[RunTask]] = None
|
|
184
|
+
|
|
185
|
+
@model_validator(mode="after")
|
|
186
|
+
def at_least_one(self):
|
|
187
|
+
if self.load is None and self.run is None:
|
|
188
|
+
raise ValidationError(
|
|
189
|
+
"A node's do block must have a 'load' and/or a 'run' key"
|
|
190
|
+
)
|
|
191
|
+
return self
|
|
192
|
+
|
|
193
|
+
@classmethod
|
|
194
|
+
def from_dict(cls, d: Dict):
|
|
195
|
+
load, tasks = None, None
|
|
196
|
+
if "load" in d:
|
|
197
|
+
load = LoadBlock.from_dict(d["load"])
|
|
198
|
+
if "run" in d:
|
|
199
|
+
tasks = [RunTask.from_dict(t) for t in d["run"]]
|
|
200
|
+
return DoBlock(load=load, run=tasks)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class NodeBlock(BaseModel):
|
|
204
|
+
"""Top-level 'node' block in a makegis.yml file"""
|
|
205
|
+
|
|
206
|
+
deps: List[DatabaseItem] | None = []
|
|
207
|
+
prep: List[str] | None = []
|
|
208
|
+
do: DoBlock
|
|
209
|
+
post: List[str] | None = []
|
|
210
|
+
cleanup: List[str] | None = []
|
|
211
|
+
|
|
212
|
+
@classmethod
|
|
213
|
+
def from_dict(cls, d: Dict):
|
|
214
|
+
deps = [DatabaseItem.from_dict(d) for d in d.pop("deps", [])]
|
|
215
|
+
do = DoBlock.from_dict(d.pop("do"))
|
|
216
|
+
return NodeBlock(deps=deps, do=do, **d)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class MakeGISConfig(BaseModel):
|
|
220
|
+
block: LoadBlock | TransformBlock | NodeBlock
|
|
221
|
+
type: Literal["load", "transform", "node"]
|
|
222
|
+
|
|
223
|
+
@classmethod
|
|
224
|
+
def from_file(cls, path: Path):
|
|
225
|
+
log.debug(f"reading {path}")
|
|
226
|
+
with open(path) as f:
|
|
227
|
+
d = yaml.load(f, Loader)
|
|
228
|
+
return cls.from_dict(d)
|
|
229
|
+
|
|
230
|
+
@classmethod
|
|
231
|
+
def from_yaml(cls, s: str):
|
|
232
|
+
d = yaml.load(s, Loader)
|
|
233
|
+
return cls.from_dict(d)
|
|
234
|
+
|
|
235
|
+
@classmethod
|
|
236
|
+
def from_dict(cls, d: Dict):
|
|
237
|
+
expand_dict_strings(d)
|
|
238
|
+
assert len(d) == 1
|
|
239
|
+
key = list(d)[0]
|
|
240
|
+
if key == "load":
|
|
241
|
+
typ = "load"
|
|
242
|
+
block = LoadBlock.from_dict(d["load"])
|
|
243
|
+
elif key == "transform":
|
|
244
|
+
typ = "transform"
|
|
245
|
+
block = TransformBlock.from_sequence(d["transform"])
|
|
246
|
+
elif key == "node":
|
|
247
|
+
typ = "node"
|
|
248
|
+
block = NodeBlock.from_dict(d["node"])
|
|
249
|
+
else:
|
|
250
|
+
raise RuntimeError(
|
|
251
|
+
f"Unknown makegis file key '{key}', should be one of load, transform or node"
|
|
252
|
+
)
|
|
253
|
+
return MakeGISConfig(type=typ, block=block)
|
makegis/config/root.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from yaml import CLoader as Loader
|
|
10
|
+
except ImportError:
|
|
11
|
+
from yaml import Loader
|
|
12
|
+
|
|
13
|
+
from .makegis import LoadDefaults
|
|
14
|
+
from .utils import expand_dict_strings
|
|
15
|
+
|
|
16
|
+
log = logging.getLogger("makegis")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RootDefaults(BaseModel):
|
|
20
|
+
load: LoadDefaults = LoadDefaults()
|
|
21
|
+
target: str | None = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TargetConfig(BaseModel):
|
|
25
|
+
"""Describes a target database"""
|
|
26
|
+
|
|
27
|
+
# optional description
|
|
28
|
+
description: str | None = None
|
|
29
|
+
host: str = "localhost"
|
|
30
|
+
port: int = 5432
|
|
31
|
+
user: str = "postgres"
|
|
32
|
+
# database name
|
|
33
|
+
db: str
|
|
34
|
+
|
|
35
|
+
def conn_uri(self) -> str:
|
|
36
|
+
s = self
|
|
37
|
+
return f"postgresql://{s.user}@{s.host}:{s.port}/{s.db}"
|
|
38
|
+
|
|
39
|
+
def conn_str(self) -> str:
|
|
40
|
+
s = self
|
|
41
|
+
return f"host={s.host} port={s.port} dbname={s.db} user={s.user}"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class RootConfig(BaseModel):
|
|
45
|
+
src_dir: Path
|
|
46
|
+
defaults: RootDefaults
|
|
47
|
+
targets: Dict[str, TargetConfig]
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def from_file(cls, path: Path):
|
|
51
|
+
log.debug(f"reading {path}")
|
|
52
|
+
with open(path) as f:
|
|
53
|
+
d = yaml.load(f, Loader)
|
|
54
|
+
rc = cls.from_dict(d)
|
|
55
|
+
# Resolve path of src dir
|
|
56
|
+
if not rc.src_dir.is_absolute():
|
|
57
|
+
rc.src_dir = (path.parent / rc.src_dir).resolve()
|
|
58
|
+
return rc
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def from_yaml(cls, s: str):
|
|
62
|
+
d = yaml.load(s, Loader)
|
|
63
|
+
return cls.from_dict(d)
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def from_dict(cls, d: Dict):
|
|
67
|
+
expand_dict_strings(d)
|
|
68
|
+
defaults = RootDefaults(**d.pop("defaults", {}))
|
|
69
|
+
return RootConfig(defaults=defaults, **d)
|
makegis/config/utils.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Dict
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def expand_dict_strings(raw_dict: Dict):
|
|
7
|
+
"""
|
|
8
|
+
Replaces {{variables}} found in strings in place.
|
|
9
|
+
"""
|
|
10
|
+
pattern = re.compile(r"\{\{\s*(\w+)\s*\}\}")
|
|
11
|
+
|
|
12
|
+
def expand_string_values(d: dict):
|
|
13
|
+
for k, v in d.items():
|
|
14
|
+
if isinstance(v, dict):
|
|
15
|
+
expand_string_values(v)
|
|
16
|
+
if isinstance(v, str):
|
|
17
|
+
vars = re.findall(pattern, v)
|
|
18
|
+
for var in vars:
|
|
19
|
+
if var not in os.environ:
|
|
20
|
+
raise RuntimeError(f"unmatched env var {var}")
|
|
21
|
+
v = re.sub(rf"\{{\{{\s*{var}\s*\}}\}}", os.environ[var], v)
|
|
22
|
+
d[k] = v
|
|
23
|
+
|
|
24
|
+
expand_string_values(raw_dict)
|
makegis/core/__init__.py
ADDED
|
File without changes
|
makegis/core/commands.py
ADDED
makegis/core/load.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass(frozen=True)
|
|
7
|
+
class BaseSource:
|
|
8
|
+
# Explicit srid of geometry in source dataset
|
|
9
|
+
epsg: int | None
|
|
10
|
+
# Name of column to use as primary key
|
|
11
|
+
pk: str | None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class CSVSource(BaseSource):
|
|
16
|
+
path: Path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(frozen=True)
|
|
20
|
+
class EsriSource(BaseSource):
|
|
21
|
+
url: str
|
|
22
|
+
f: Literal["pgeojson", "pjson"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class DuckDBSource(BaseSource):
|
|
27
|
+
# Path to database file
|
|
28
|
+
path: Path
|
|
29
|
+
# Fully qualified name of table to import
|
|
30
|
+
table: str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass(frozen=True)
|
|
34
|
+
class FileSource(BaseSource):
|
|
35
|
+
path: Path
|
|
36
|
+
# Optional layer name for file formats supporting it
|
|
37
|
+
layer: str | None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class RasterSource(BaseSource):
|
|
42
|
+
path: Path
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class WFSSource(BaseSource):
|
|
47
|
+
url: str
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
type Source = EsriSource | DuckDBSource | FileSource | WFSSource
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True)
|
|
54
|
+
class Destination:
|
|
55
|
+
schema: str
|
|
56
|
+
table: str
|
|
57
|
+
# Desired srid of geometry column in destination table
|
|
58
|
+
epsg: int | None
|
|
59
|
+
# Name to assign to geometry column. Keep original if None.
|
|
60
|
+
geom_column: str | None
|
|
61
|
+
# Wether to index geometries or not.
|
|
62
|
+
# Defaults to False to be conservative.
|
|
63
|
+
geom_index: bool
|
|
64
|
+
# Name to assign to raster column.
|
|
65
|
+
raster_column: str
|
|
66
|
+
# Wether to index raster bounds
|
|
67
|
+
raster_index: bool
|
|
68
|
+
# Wether to set the standard set of raster constraints
|
|
69
|
+
raster_constraints: bool
|
|
70
|
+
# Raster tile size
|
|
71
|
+
tile_size: int | None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass(frozen=True)
|
|
75
|
+
class LoadJob:
|
|
76
|
+
src: Source
|
|
77
|
+
dst: Destination
|
makegis/dag/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .dag import DAG
|