makegis 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- makegis-0.1.0/.gitignore +7 -0
- makegis-0.1.0/LICENSE +21 -0
- makegis-0.1.0/PKG-INFO +284 -0
- makegis-0.1.0/README.md +266 -0
- makegis-0.1.0/examples/makegis.load.yml +27 -0
- makegis-0.1.0/examples/makegis.node.yml +22 -0
- makegis-0.1.0/examples/makegis.root1.yml +20 -0
- makegis-0.1.0/examples/makegis.root2.yml +16 -0
- makegis-0.1.0/examples/makegis.transform.yml +4 -0
- makegis-0.1.0/pyproject.toml +34 -0
- makegis-0.1.0/src/makegis/__init__.py +1 -0
- makegis-0.1.0/src/makegis/cli.py +235 -0
- makegis-0.1.0/src/makegis/config/__init__.py +2 -0
- makegis-0.1.0/src/makegis/config/makegis.py +253 -0
- makegis-0.1.0/src/makegis/config/root.py +69 -0
- makegis-0.1.0/src/makegis/config/utils.py +24 -0
- makegis-0.1.0/src/makegis/core/__init__.py +0 -0
- makegis-0.1.0/src/makegis/core/commands.py +7 -0
- makegis-0.1.0/src/makegis/core/load.py +77 -0
- makegis-0.1.0/src/makegis/core/transforms.py +7 -0
- makegis-0.1.0/src/makegis/dag/__init__.py +1 -0
- makegis-0.1.0/src/makegis/dag/builder.py +308 -0
- makegis-0.1.0/src/makegis/dag/dag.py +297 -0
- makegis-0.1.0/src/makegis/dag/sql.py +279 -0
- makegis-0.1.0/src/makegis/errors.py +6 -0
- makegis-0.1.0/src/makegis/journal.py +63 -0
- makegis-0.1.0/src/makegis/targets/__init__.py +1 -0
- makegis-0.1.0/src/makegis/targets/postgis.py +655 -0
- makegis-0.1.0/src/makegis/targets/target.py +47 -0
- makegis-0.1.0/src/makegis/utils.py +16 -0
- makegis-0.1.0/tests/test_dag_generation.py +121 -0
- makegis-0.1.0/tests/test_dag_selection.py +17 -0
- makegis-0.1.0/tests/test_makegis_examples.py +54 -0
- makegis-0.1.0/tests/test_makegis_root_examples.py +16 -0
- makegis-0.1.0/tests/test_makegis_variable_expansion.py +38 -0
- makegis-0.1.0/tests/test_sql_parsing.py +275 -0
makegis-0.1.0/.gitignore
ADDED
makegis-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Christophe Thiange
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
makegis-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: makegis
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A lightweight orchestrator for spatial databases
|
|
5
|
+
Project-URL: Repository, https://github.com/cthiange/makegis
|
|
6
|
+
Author: Christophe Thiange
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Python: >=3.12
|
|
10
|
+
Requires-Dist: duckdb
|
|
11
|
+
Requires-Dist: psycopg[binary]
|
|
12
|
+
Requires-Dist: pydantic
|
|
13
|
+
Requires-Dist: python-dotenv
|
|
14
|
+
Requires-Dist: pyyaml
|
|
15
|
+
Requires-Dist: rich
|
|
16
|
+
Requires-Dist: sqlglot
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# MakeGIS
|
|
21
|
+
|
|
22
|
+
A lightweight orchestrator for spatial databases.
|
|
23
|
+
|
|
24
|
+
MakeGIS uses YAML files to describe DAG nodes that let you achieve one of three things:
|
|
25
|
+
|
|
26
|
+
- load data to a target database
|
|
27
|
+
- transform data in a target database
|
|
28
|
+
- run custom commands
|
|
29
|
+
|
|
30
|
+
MakeGIS comes with a command line tool, `mkgs`, that operates on the resulting DAG.
|
|
31
|
+
|
|
32
|
+
Key features/choices:
|
|
33
|
+
|
|
34
|
+
- Local and standalone: `mkgs` runs locally, no other service involved
|
|
35
|
+
- Supports many data sources: describe where the data is, MakeGIS handles the rest
|
|
36
|
+
- Works for both ETL and ELT workflows
|
|
37
|
+
- Automatic dependency discovery for SQL transforms
|
|
38
|
+
- Flexible: run anything you want
|
|
39
|
+
- Reproducible pipelines
|
|
40
|
+
- Data lineage
|
|
41
|
+
|
|
42
|
+
> [!Note]
|
|
43
|
+
> MakeGIS is under active development, expect breaking changes.
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
`pip install makegis`
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
## Usage
|
|
52
|
+
|
|
53
|
+
Makegis provides the `mkgs` CLI utility to operate on the DAG.
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
usage: mkgs [-h] [-v] [--debug] {init,ls,outdated,run} ...
|
|
57
|
+
|
|
58
|
+
positional arguments:
|
|
59
|
+
{init,ls,outdated,run}
|
|
60
|
+
commands
|
|
61
|
+
init initialize journal on target
|
|
62
|
+
ls list nodes
|
|
63
|
+
outdated report outdated nodes
|
|
64
|
+
run run nodes
|
|
65
|
+
|
|
66
|
+
options:
|
|
67
|
+
-h, --help show this help message and exit
|
|
68
|
+
-v, --verbose verbose messages
|
|
69
|
+
--debug debug messages
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### mkgs init
|
|
73
|
+
|
|
74
|
+
The `init` command prepares a target database to work with MakeGIS. It creates a `_makegis_log` journal table that is used to track which nodes have been run, when and at what version.
|
|
75
|
+
It will also create any missing schemas expeced by the DAG.
|
|
76
|
+
```
|
|
77
|
+
usage: mkgs init [-h] [-t TARGET]
|
|
78
|
+
|
|
79
|
+
options:
|
|
80
|
+
-h, --help show this help message and exit
|
|
81
|
+
-t, --target TARGET db instance to target
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### mkgs ls
|
|
85
|
+
|
|
86
|
+
The `ls` command shows DAG nodes matching a selection pattern. At this stage only `*` wildcards are supported but additional operators are planned (e.g. `+<pattern>` or `<pattern>+` for upstream/downstream propagation).
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
usage: mkgs ls [-h] pattern
|
|
90
|
+
|
|
91
|
+
positional arguments:
|
|
92
|
+
pattern DAG selection pattern
|
|
93
|
+
|
|
94
|
+
options:
|
|
95
|
+
-h, --help show this help message and exit
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### mkgs outdated
|
|
99
|
+
|
|
100
|
+
The `outdated` command reports outdated nodes for the given target.
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
usage: mkgs outdated [-h] [-t TARGET]
|
|
104
|
+
|
|
105
|
+
options:
|
|
106
|
+
-h, --help show this help message and exit
|
|
107
|
+
-t, --target TARGET db instance to target
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### mkgs run
|
|
111
|
+
|
|
112
|
+
The `run` command will run the nodes matching a selection pattern (same as `mkgs ls`). Nodes that are fresh (i.e. not outdated) will be skipped. This can be overridden by using the `--force` flag.
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
usage: mkgs run [-h] [-t TARGET] [-d] [-f] pattern
|
|
116
|
+
|
|
117
|
+
positional arguments:
|
|
118
|
+
pattern DAG selection pattern
|
|
119
|
+
|
|
120
|
+
options:
|
|
121
|
+
-h, --help show this help message and exit
|
|
122
|
+
-t, --target TARGET db instance to target
|
|
123
|
+
-d, --dry-run process nodes without actually running them
|
|
124
|
+
-f, --force also run fresh nodes
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Configuration
|
|
128
|
+
|
|
129
|
+
Makegis is configured through YAML configuration files and environment variables.
|
|
130
|
+
|
|
131
|
+
A `makegis.root.yml` file defines the root of a MakeGIS project, along with project-wide settings.
|
|
132
|
+
MakeGIS will traverse the directory tree and look for any `makegis.yml` files.
|
|
133
|
+
|
|
134
|
+
An example project may look like this:
|
|
135
|
+
|
|
136
|
+
```
|
|
137
|
+
project/
|
|
138
|
+
├─ src/
|
|
139
|
+
| ├─ raw/
|
|
140
|
+
| │ ├─ provider/
|
|
141
|
+
| │ │ └─ makegis.yml
|
|
142
|
+
| | └─ makegis.yml
|
|
143
|
+
| └─ core/
|
|
144
|
+
| ├─ transform_1.sql
|
|
145
|
+
| ├─ transform_2.sql
|
|
146
|
+
| ├─ transform_3.sql
|
|
147
|
+
| └─ makegis.yml
|
|
148
|
+
├─ .env
|
|
149
|
+
├─ .gitignore
|
|
150
|
+
└─ makegis.root.yml
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
> [!Note]
|
|
154
|
+
> **Environment variables** can be used by enclosing them in double curly brackets: `{{ EXAMPLE }}`. MakeGIS will consider any `.env` files in the project tree.
|
|
155
|
+
|
|
156
|
+
### makegis.root.yml
|
|
157
|
+
|
|
158
|
+
A `makegis.root.yml` file defines the root of a MakeGIS project along with project wide settings. Here's an annotated example:
|
|
159
|
+
|
|
160
|
+
```yaml
|
|
161
|
+
# The project's root directory.
|
|
162
|
+
src_dir: ./src
|
|
163
|
+
|
|
164
|
+
# Global defaults
|
|
165
|
+
defaults:
|
|
166
|
+
# Global defaults for `load` nodes
|
|
167
|
+
load:
|
|
168
|
+
epsg: 4326
|
|
169
|
+
geom_index: false
|
|
170
|
+
# Optional default target (to use we running mkgs without a `--target` option)
|
|
171
|
+
target: pg_dev
|
|
172
|
+
|
|
173
|
+
# Databases to target
|
|
174
|
+
targets:
|
|
175
|
+
pg_prod:
|
|
176
|
+
host: prod.example.com
|
|
177
|
+
port: 5432
|
|
178
|
+
user: mkgs
|
|
179
|
+
db: postgres
|
|
180
|
+
pg_dev:
|
|
181
|
+
host: 127.0.0.1
|
|
182
|
+
port: 5432
|
|
183
|
+
user: mkgs
|
|
184
|
+
db: postgres
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### makegis.yml
|
|
188
|
+
|
|
189
|
+
The path of a `makegis.yml` determines the database relations they manage, whith top-level directories mapping to schemas.
|
|
190
|
+
|
|
191
|
+
A `makegis.yml` contains one of the following configuration blocks:
|
|
192
|
+
|
|
193
|
+
- load: defines sources to be loaded to a target
|
|
194
|
+
- transform: defines transforms to be applied to a target
|
|
195
|
+
- node: custom node to run bespoke commands
|
|
196
|
+
|
|
197
|
+
#### Load block
|
|
198
|
+
|
|
199
|
+
Maps tables to external data sources.
|
|
200
|
+
Each table becomes a DAG node and can be invoked individually
|
|
201
|
+
|
|
202
|
+
```yaml
|
|
203
|
+
load:
|
|
204
|
+
<table-name>:
|
|
205
|
+
<loader>: <loader-arg>
|
|
206
|
+
<loader-option>: <option-value>
|
|
207
|
+
<loader-option>: <option-value>
|
|
208
|
+
...
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
```yaml
|
|
212
|
+
load:
|
|
213
|
+
countries:
|
|
214
|
+
wfs: https://wfs.example.com/countries?token={{API_KEY}}
|
|
215
|
+
epsg: 4326
|
|
216
|
+
geom_index: true
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
TODO: Document loaders and their options.
|
|
220
|
+
|
|
221
|
+
##### EPSG option
|
|
222
|
+
|
|
223
|
+
###### Single value
|
|
224
|
+
|
|
225
|
+
`epsg: 4326`:
|
|
226
|
+
|
|
227
|
+
Target SRID.
|
|
228
|
+
If source declares a different EPSG, a tranformation is applied.
|
|
229
|
+
If source has no SRID, no transformation is applied and srid is set to given value.
|
|
230
|
+
|
|
231
|
+
###### Mapping
|
|
232
|
+
|
|
233
|
+
`epsg: 4326:2193`
|
|
234
|
+
|
|
235
|
+
Convert from source to dest. Warn or abort if source exposes a different SRID
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
#### Transform block
|
|
239
|
+
|
|
240
|
+
Declares sql scripts to be enrolled.
|
|
241
|
+
Each script becomes a DAG node.
|
|
242
|
+
Dependencies with other DAG nodes are resolved automatically.
|
|
243
|
+
The order in which sql scripts are listed does not matter.
|
|
244
|
+
There are no constraints on what is in the sql scripts, as long as MakeGIS is aware of all dependencies.
|
|
245
|
+
|
|
246
|
+
```yaml
|
|
247
|
+
transform:
|
|
248
|
+
- create_view_of_awesome_table.sql
|
|
249
|
+
- create_awesome_table.sql
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
#### Node block
|
|
253
|
+
|
|
254
|
+
A `node` block defines a custom DAG node, for when more flexibility is needed than offered by a `load` or `tranform` block.
|
|
255
|
+
|
|
256
|
+
The price to pay for more flexibility is that dependencies need to be documented manually. This goes for upstream dependecies as well as objects created on the target db.
|
|
257
|
+
|
|
258
|
+
```yaml
|
|
259
|
+
node:
|
|
260
|
+
# List any relations needed by this node.
|
|
261
|
+
deps:
|
|
262
|
+
- schema.upstream_table
|
|
263
|
+
# Commands that do not change the target db but need to be run before we proceed.
|
|
264
|
+
# Commands are run sequentially, in listing order.
|
|
265
|
+
prep:
|
|
266
|
+
- before.py
|
|
267
|
+
# Main section
|
|
268
|
+
do:
|
|
269
|
+
# List of commands along with any objects they will create on the target.
|
|
270
|
+
run:
|
|
271
|
+
- cmd: script1.py
|
|
272
|
+
# Declare objects owned by this command
|
|
273
|
+
creates:
|
|
274
|
+
- table: new_table
|
|
275
|
+
- function: helper
|
|
276
|
+
# Can also use a load block here, but it won't spawn new DAG nodes
|
|
277
|
+
<load-block>
|
|
278
|
+
# Like prep, but runs after `do`, and only if `do` runs fine.
|
|
279
|
+
post:
|
|
280
|
+
- after.py
|
|
281
|
+
# Like post but always runs, even if something failed prior.
|
|
282
|
+
finally:
|
|
283
|
+
- teardown.py
|
|
284
|
+
```
|
makegis-0.1.0/README.md
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
|
|
2
|
+
# MakeGIS
|
|
3
|
+
|
|
4
|
+
A lightweight orchestrator for spatial databases.
|
|
5
|
+
|
|
6
|
+
MakeGIS uses YAML files to describe DAG nodes that let you achieve one of three things:
|
|
7
|
+
|
|
8
|
+
- load data to a target database
|
|
9
|
+
- transform data in a target database
|
|
10
|
+
- run custom commands
|
|
11
|
+
|
|
12
|
+
MakeGIS comes with a command line tool, `mkgs`, that operates on the resulting DAG.
|
|
13
|
+
|
|
14
|
+
Key features/choices:
|
|
15
|
+
|
|
16
|
+
- Local and standalone: `mkgs` runs locally, no other service involved
|
|
17
|
+
- Supports many data sources: describe where the data is, MakeGIS handles the rest
|
|
18
|
+
- Works for both ETL and ELT workflows
|
|
19
|
+
- Automatic dependency discovery for SQL transforms
|
|
20
|
+
- Flexible: run anything you want
|
|
21
|
+
- Reproducible pipelines
|
|
22
|
+
- Data lineage
|
|
23
|
+
|
|
24
|
+
> [!Note]
|
|
25
|
+
> MakeGIS is under active development, expect breaking changes.
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
`pip install makegis`
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
Makegis provides the `mkgs` CLI utility to operate on the DAG.
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
usage: mkgs [-h] [-v] [--debug] {init,ls,outdated,run} ...
|
|
39
|
+
|
|
40
|
+
positional arguments:
|
|
41
|
+
{init,ls,outdated,run}
|
|
42
|
+
commands
|
|
43
|
+
init initialize journal on target
|
|
44
|
+
ls list nodes
|
|
45
|
+
outdated report outdated nodes
|
|
46
|
+
run run nodes
|
|
47
|
+
|
|
48
|
+
options:
|
|
49
|
+
-h, --help show this help message and exit
|
|
50
|
+
-v, --verbose verbose messages
|
|
51
|
+
--debug debug messages
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### mkgs init
|
|
55
|
+
|
|
56
|
+
The `init` command prepares a target database to work with MakeGIS. It creates a `_makegis_log` journal table that is used to track which nodes have been run, when and at what version.
|
|
57
|
+
It will also create any missing schemas expeced by the DAG.
|
|
58
|
+
```
|
|
59
|
+
usage: mkgs init [-h] [-t TARGET]
|
|
60
|
+
|
|
61
|
+
options:
|
|
62
|
+
-h, --help show this help message and exit
|
|
63
|
+
-t, --target TARGET db instance to target
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### mkgs ls
|
|
67
|
+
|
|
68
|
+
The `ls` command shows DAG nodes matching a selection pattern. At this stage only `*` wildcards are supported but additional operators are planned (e.g. `+<pattern>` or `<pattern>+` for upstream/downstream propagation).
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
usage: mkgs ls [-h] pattern
|
|
72
|
+
|
|
73
|
+
positional arguments:
|
|
74
|
+
pattern DAG selection pattern
|
|
75
|
+
|
|
76
|
+
options:
|
|
77
|
+
-h, --help show this help message and exit
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### mkgs outdated
|
|
81
|
+
|
|
82
|
+
The `outdated` command reports outdated nodes for the given target.
|
|
83
|
+
|
|
84
|
+
```
|
|
85
|
+
usage: mkgs outdated [-h] [-t TARGET]
|
|
86
|
+
|
|
87
|
+
options:
|
|
88
|
+
-h, --help show this help message and exit
|
|
89
|
+
-t, --target TARGET db instance to target
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### mkgs run
|
|
93
|
+
|
|
94
|
+
The `run` command will run the nodes matching a selection pattern (same as `mkgs ls`). Nodes that are fresh (i.e. not outdated) will be skipped. This can be overridden by using the `--force` flag.
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
usage: mkgs run [-h] [-t TARGET] [-d] [-f] pattern
|
|
98
|
+
|
|
99
|
+
positional arguments:
|
|
100
|
+
pattern DAG selection pattern
|
|
101
|
+
|
|
102
|
+
options:
|
|
103
|
+
-h, --help show this help message and exit
|
|
104
|
+
-t, --target TARGET db instance to target
|
|
105
|
+
-d, --dry-run process nodes without actually running them
|
|
106
|
+
-f, --force also run fresh nodes
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Configuration
|
|
110
|
+
|
|
111
|
+
Makegis is configured through YAML configuration files and environment variables.
|
|
112
|
+
|
|
113
|
+
A `makegis.root.yml` file defines the root of a MakeGIS project, along with project-wide settings.
|
|
114
|
+
MakeGIS will traverse the directory tree and look for any `makegis.yml` files.
|
|
115
|
+
|
|
116
|
+
An example project may look like this:
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
project/
|
|
120
|
+
├─ src/
|
|
121
|
+
| ├─ raw/
|
|
122
|
+
| │ ├─ provider/
|
|
123
|
+
| │ │ └─ makegis.yml
|
|
124
|
+
| | └─ makegis.yml
|
|
125
|
+
| └─ core/
|
|
126
|
+
| ├─ transform_1.sql
|
|
127
|
+
| ├─ transform_2.sql
|
|
128
|
+
| ├─ transform_3.sql
|
|
129
|
+
| └─ makegis.yml
|
|
130
|
+
├─ .env
|
|
131
|
+
├─ .gitignore
|
|
132
|
+
└─ makegis.root.yml
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
> [!Note]
|
|
136
|
+
> **Environment variables** can be used by enclosing them in double curly brackets: `{{ EXAMPLE }}`. MakeGIS will consider any `.env` files in the project tree.
|
|
137
|
+
|
|
138
|
+
### makegis.root.yml
|
|
139
|
+
|
|
140
|
+
A `makegis.root.yml` file defines the root of a MakeGIS project along with project wide settings. Here's an annotated example:
|
|
141
|
+
|
|
142
|
+
```yaml
|
|
143
|
+
# The project's root directory.
|
|
144
|
+
src_dir: ./src
|
|
145
|
+
|
|
146
|
+
# Global defaults
|
|
147
|
+
defaults:
|
|
148
|
+
# Global defaults for `load` nodes
|
|
149
|
+
load:
|
|
150
|
+
epsg: 4326
|
|
151
|
+
geom_index: false
|
|
152
|
+
# Optional default target (to use we running mkgs without a `--target` option)
|
|
153
|
+
target: pg_dev
|
|
154
|
+
|
|
155
|
+
# Databases to target
|
|
156
|
+
targets:
|
|
157
|
+
pg_prod:
|
|
158
|
+
host: prod.example.com
|
|
159
|
+
port: 5432
|
|
160
|
+
user: mkgs
|
|
161
|
+
db: postgres
|
|
162
|
+
pg_dev:
|
|
163
|
+
host: 127.0.0.1
|
|
164
|
+
port: 5432
|
|
165
|
+
user: mkgs
|
|
166
|
+
db: postgres
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### makegis.yml
|
|
170
|
+
|
|
171
|
+
The path of a `makegis.yml` determines the database relations they manage, whith top-level directories mapping to schemas.
|
|
172
|
+
|
|
173
|
+
A `makegis.yml` contains one of the following configuration blocks:
|
|
174
|
+
|
|
175
|
+
- load: defines sources to be loaded to a target
|
|
176
|
+
- transform: defines transforms to be applied to a target
|
|
177
|
+
- node: custom node to run bespoke commands
|
|
178
|
+
|
|
179
|
+
#### Load block
|
|
180
|
+
|
|
181
|
+
Maps tables to external data sources.
|
|
182
|
+
Each table becomes a DAG node and can be invoked individually
|
|
183
|
+
|
|
184
|
+
```yaml
|
|
185
|
+
load:
|
|
186
|
+
<table-name>:
|
|
187
|
+
<loader>: <loader-arg>
|
|
188
|
+
<loader-option>: <option-value>
|
|
189
|
+
<loader-option>: <option-value>
|
|
190
|
+
...
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
```yaml
|
|
194
|
+
load:
|
|
195
|
+
countries:
|
|
196
|
+
wfs: https://wfs.example.com/countries?token={{API_KEY}}
|
|
197
|
+
epsg: 4326
|
|
198
|
+
geom_index: true
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
TODO: Document loaders and their options.
|
|
202
|
+
|
|
203
|
+
##### EPSG option
|
|
204
|
+
|
|
205
|
+
###### Single value
|
|
206
|
+
|
|
207
|
+
`epsg: 4326`:
|
|
208
|
+
|
|
209
|
+
Target SRID.
|
|
210
|
+
If source declares a different EPSG, a tranformation is applied.
|
|
211
|
+
If source has no SRID, no transformation is applied and srid is set to given value.
|
|
212
|
+
|
|
213
|
+
###### Mapping
|
|
214
|
+
|
|
215
|
+
`epsg: 4326:2193`
|
|
216
|
+
|
|
217
|
+
Convert from source to dest. Warn or abort if source exposes a different SRID
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
#### Transform block
|
|
221
|
+
|
|
222
|
+
Declares sql scripts to be enrolled.
|
|
223
|
+
Each script becomes a DAG node.
|
|
224
|
+
Dependencies with other DAG nodes are resolved automatically.
|
|
225
|
+
The order in which sql scripts are listed does not matter.
|
|
226
|
+
There are no constraints on what is in the sql scripts, as long as MakeGIS is aware of all dependencies.
|
|
227
|
+
|
|
228
|
+
```yaml
|
|
229
|
+
transform:
|
|
230
|
+
- create_view_of_awesome_table.sql
|
|
231
|
+
- create_awesome_table.sql
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
#### Node block
|
|
235
|
+
|
|
236
|
+
A `node` block defines a custom DAG node, for when more flexibility is needed than offered by a `load` or `tranform` block.
|
|
237
|
+
|
|
238
|
+
The price to pay for more flexibility is that dependencies need to be documented manually. This goes for upstream dependecies as well as objects created on the target db.
|
|
239
|
+
|
|
240
|
+
```yaml
|
|
241
|
+
node:
|
|
242
|
+
# List any relations needed by this node.
|
|
243
|
+
deps:
|
|
244
|
+
- schema.upstream_table
|
|
245
|
+
# Commands that do not change the target db but need to be run before we proceed.
|
|
246
|
+
# Commands are run sequentially, in listing order.
|
|
247
|
+
prep:
|
|
248
|
+
- before.py
|
|
249
|
+
# Main section
|
|
250
|
+
do:
|
|
251
|
+
# List of commands along with any objects they will create on the target.
|
|
252
|
+
run:
|
|
253
|
+
- cmd: script1.py
|
|
254
|
+
# Declare objects owned by this command
|
|
255
|
+
creates:
|
|
256
|
+
- table: new_table
|
|
257
|
+
- function: helper
|
|
258
|
+
# Can also use a load block here, but it won't spawn new DAG nodes
|
|
259
|
+
<load-block>
|
|
260
|
+
# Like prep, but runs after `do`, and only if `do` runs fine.
|
|
261
|
+
post:
|
|
262
|
+
- after.py
|
|
263
|
+
# Like post but always runs, even if something failed prior.
|
|
264
|
+
finally:
|
|
265
|
+
- teardown.py
|
|
266
|
+
```
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
load:
|
|
2
|
+
defaults:
|
|
3
|
+
epsg: 4326
|
|
4
|
+
geom_index: false
|
|
5
|
+
geom_column: geom
|
|
6
|
+
|
|
7
|
+
table_1:
|
|
8
|
+
duckdb: ~/path/to/some.db
|
|
9
|
+
|
|
10
|
+
table_2:
|
|
11
|
+
duckdb: ~/path/to/some.db
|
|
12
|
+
table: not_table_2
|
|
13
|
+
index: true
|
|
14
|
+
|
|
15
|
+
table_3:
|
|
16
|
+
file: ~/path/to/file.shp
|
|
17
|
+
epsg: 2193
|
|
18
|
+
|
|
19
|
+
table_4:
|
|
20
|
+
wfs: https://wfs.example.com
|
|
21
|
+
|
|
22
|
+
table_5:
|
|
23
|
+
esri: https://services3.arcgis.com/example/dummy/rest/services/test/FeatureServer/0
|
|
24
|
+
|
|
25
|
+
table_raster_512:
|
|
26
|
+
raster: local.tiff
|
|
27
|
+
tile_size: 512
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
node:
|
|
2
|
+
deps:
|
|
3
|
+
- table: raw.upstream_table
|
|
4
|
+
prep:
|
|
5
|
+
- do_this_first.py
|
|
6
|
+
- then_this.py
|
|
7
|
+
do:
|
|
8
|
+
load:
|
|
9
|
+
defaults:
|
|
10
|
+
epsg: 4326
|
|
11
|
+
geom_index: false
|
|
12
|
+
geom_column: geom
|
|
13
|
+
table_1:
|
|
14
|
+
duckdb: ~/path/to/some.db
|
|
15
|
+
run:
|
|
16
|
+
- cmd: script_1.py
|
|
17
|
+
creates:
|
|
18
|
+
- table: table_created_by_script_1
|
|
19
|
+
- table: other_table_created_by_script_1
|
|
20
|
+
- cmd: script_2.py
|
|
21
|
+
creates:
|
|
22
|
+
- table: another_table
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
---
|
|
2
|
+
src_dir: ./src
|
|
3
|
+
|
|
4
|
+
defaults:
|
|
5
|
+
load:
|
|
6
|
+
epsg: 4326
|
|
7
|
+
geom_index: false
|
|
8
|
+
target: pg_dev
|
|
9
|
+
|
|
10
|
+
targets:
|
|
11
|
+
pg_prod:
|
|
12
|
+
host: prod.example.com
|
|
13
|
+
port: 5432
|
|
14
|
+
user: mkgs
|
|
15
|
+
db: postgres
|
|
16
|
+
pg_dev:
|
|
17
|
+
host: 127.0.0.1
|
|
18
|
+
port: 5432
|
|
19
|
+
user: mkgs
|
|
20
|
+
db: postgres
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "makegis"
|
|
7
|
+
description = "A lightweight orchestrator for spatial databases"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
requires-python = ">=3.12"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
license-files = ["LICENSE"]
|
|
12
|
+
authors = [{ name = "Christophe Thiange" }]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"duckdb",
|
|
15
|
+
"psycopg[binary]",
|
|
16
|
+
"pydantic",
|
|
17
|
+
"python-dotenv",
|
|
18
|
+
"pyyaml",
|
|
19
|
+
"rich",
|
|
20
|
+
"sqlglot",
|
|
21
|
+
]
|
|
22
|
+
dynamic = ["version"]
|
|
23
|
+
|
|
24
|
+
[project.scripts]
|
|
25
|
+
mkgs = "makegis.cli:cli"
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Repository = "https://github.com/cthiange/makegis"
|
|
29
|
+
|
|
30
|
+
[tool.hatch.version]
|
|
31
|
+
path = "src/makegis/__init__.py"
|
|
32
|
+
|
|
33
|
+
[tool.pytest.ini_options]
|
|
34
|
+
addopts = ["--import-mode=importlib"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|