conlink 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +5 -0
- package/Dockerfile +34 -0
- package/LICENSE +373 -0
- package/README.md +485 -0
- package/TODO +34 -0
- package/conlink +11 -0
- package/conlink-start.sh +172 -0
- package/examples/dot.js +36 -0
- package/examples/index.html +11 -0
- package/examples/net2dot.yaml +21 -0
- package/examples/test1-compose.yaml +60 -0
- package/examples/test2-compose.yaml +31 -0
- package/examples/test2-network.yaml +5 -0
- package/examples/test3-network.yaml +5 -0
- package/examples/test4-multiple/all-compose.yaml +5 -0
- package/examples/test4-multiple/base-compose.yaml +25 -0
- package/examples/test4-multiple/node1-compose.yaml +17 -0
- package/examples/test4-multiple/nodes2-compose.yaml +20 -0
- package/examples/test4-multiple/web-network.yaml +2 -0
- package/examples/test5-geneve-compose.yaml +31 -0
- package/examples/test6-cfn.yaml +184 -0
- package/examples/test7-compose.yaml +31 -0
- package/examples/test8-compose.yaml +35 -0
- package/host-build.yaml +1 -0
- package/inspect.json +210 -0
- package/link-add.sh +197 -0
- package/link-del.sh +60 -0
- package/net2dot +11 -0
- package/notes.txt +82 -0
- package/old/Dockerfile.bak +26 -0
- package/old/add-link.sh +82 -0
- package/old/conlink +12 -0
- package/old/conlink.cljs +131 -0
- package/old/dot_gitignore +1 -0
- package/old/examples/test2-compose.yaml +32 -0
- package/old/examples/test2-network.yaml +42 -0
- package/old/move-link.sh +108 -0
- package/old/net2dot.py +122 -0
- package/old/notes-old.txt +97 -0
- package/old/package.json +16 -0
- package/old/schema.yaml +138 -0
- package/old/schema.yaml.bak +76 -0
- package/old/test2b-compose.yaml +18 -0
- package/old/veth-link.sh +96 -0
- package/package.json +15 -0
- package/schema-ish.yaml +29 -0
- package/schema.yaml +71 -0
- package/shadow-cljs.edn +33 -0
- package/src/conlink/addrs.cljc +63 -0
- package/src/conlink/core.cljs +772 -0
- package/src/conlink/net2dot.cljs +158 -0
- package/src/conlink/util.cljs +140 -0
- package/tests/invalid-schema-1.yaml +6 -0
- package/tests/invalid-schema-2.yaml +6 -0
- package/tests/invalid-schema-3.yaml +17 -0
- package/tests/invalid-schema-4.yaml +14 -0
- package/tests/invalid-schema-5.yaml +12 -0
- package/tests/invalid-schema-6.yaml +12 -0
- package/tmp/conlink/.env +1 -0
package/README.md
ADDED
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
# conlink: Declarative Low-Level Networking for Containers
|
|
2
|
+
|
|
3
|
+
Create (layer 2 and layer 3) networking between containers using
|
|
4
|
+
a declarative configuration.
|
|
5
|
+
|
|
6
|
+
## Prerequisites
|
|
7
|
+
|
|
8
|
+
* docker-compose version 1.25.4 or later.
|
|
9
|
+
* `openvswitch` kernel module loaded on the host
|
|
10
|
+
* `geneve` (and/or `vxlan`) kernel module loaded on the host (only
|
|
11
|
+
needed for `test5-geneve-compose` example)
|
|
12
|
+
|
|
13
|
+
## Usage Notes
|
|
14
|
+
|
|
15
|
+
### Asynchronous startup
|
|
16
|
+
|
|
17
|
+
The conlink managed container links are created after the main process
|
|
18
|
+
in the container starts executing. This is different from normal
|
|
19
|
+
docker behavior where the interfaces are created and configured before
|
|
20
|
+
the main process starts. This means the interfaces for those
|
|
21
|
+
links will not be immediately present and the container process will
|
|
22
|
+
need to account for this asynchronous interface behavior. The `node`
|
|
23
|
+
service in `examples/test2-compose.yaml` shows a simple example of
|
|
24
|
+
a container command that will wait for an interface to appear before
|
|
25
|
+
continuing with another command.
|
|
26
|
+
|
|
27
|
+
### System Capabilities/Permissions
|
|
28
|
+
|
|
29
|
+
The conlink container needs to have a superset of the network related
|
|
30
|
+
system capabilities of the containers that it will connect to. At
|
|
31
|
+
a minimum `SYS_ADMIN` and `NET_ADMIN` are required but depending on
|
|
32
|
+
what the other containers require then those additional capabilities
|
|
33
|
+
will also be required for the conlink container. In particular, if the
|
|
34
|
+
container uses systemd, then it will likely use `SYS_NICE` and
|
|
35
|
+
`NET_BROADCAST` and conlink will likewise need those capabilities.
|
|
36
|
+
|
|
37
|
+
## Network Configuration Syntax
|
|
38
|
+
|
|
39
|
+
Network configuration can either be loaded directly from configuration
|
|
40
|
+
files using the `--network-config` option or it can be loaded from
|
|
41
|
+
`x-network` properties contained in docker-compose files using the
|
|
42
|
+
`--compose-file`. Multiple of each option may be specified and all the
|
|
43
|
+
network configuration will be merged into a final network
|
|
44
|
+
configuration.
|
|
45
|
+
|
|
46
|
+
The network configuration can have three top level keys: `links`,
|
|
47
|
+
`tunnels`, and `commands`.
|
|
48
|
+
|
|
49
|
+
### Links
|
|
50
|
+
|
|
51
|
+
Each link defintion specifies an interface that will be configured in
|
|
52
|
+
a container. Most types have some sort of connection to either the
|
|
53
|
+
conlink/network container or the host network namespace. For example,
|
|
54
|
+
"veth" type links always have their peer end connected to a bridge in
|
|
55
|
+
the conlink/network container and vlan types are children of physical
|
|
56
|
+
interfaces in the host.
|
|
57
|
+
|
|
58
|
+
The following table describes the link properties:
|
|
59
|
+
|
|
60
|
+
| property | link types | format | default | description |
|
|
61
|
+
|-----------|------------|------------|---------|--------------------------|
|
|
62
|
+
| type | * | string 1 | veth | link/interface type |
|
|
63
|
+
| service | * | string | 2 | compose service |
|
|
64
|
+
| container | * | string | | container name |
|
|
65
|
+
| bridge | veth | string | | conlink bridge / domain |
|
|
66
|
+
| outer-dev | not dummy | string[15] | | conlink/host intf name |
|
|
67
|
+
| dev | * | string[15] | eth0 | container intf name |
|
|
68
|
+
| ip | * | CIDR | | IP CIDR (index offset) |
|
|
69
|
+
| mac | 3 | MAC | | MAC addr (index offset) |
|
|
70
|
+
| mtu | * | number 4 | 9000 | intf MTU |
|
|
71
|
+
| route | * | string | | ip route add args |
|
|
72
|
+
| nat | * | IP | | DNAT/SNAT to IP |
|
|
73
|
+
| netem | * | string | | tc qdisc NetEm options |
|
|
74
|
+
| mode | 5 | IP | | virt intf mode |
|
|
75
|
+
| vlanid | vlan | IP | | VLAN ID |
|
|
76
|
+
|
|
77
|
+
- 1 - veth, dummy, vlan, ipvlan, macvlan, ipvtap, macvtap
|
|
78
|
+
- 2 - defaults to outer compose service
|
|
79
|
+
- 3 - not ipvlan/ipvtap
|
|
80
|
+
- 4 - max MTU of parent device for \*vlan, \*vtap types
|
|
81
|
+
- 5 - macvlan, macvtap, ipvlan, ipvtap
|
|
82
|
+
|
|
83
|
+
Each link has a 'type' key that defaults to "veth" and each link
|
|
84
|
+
definition must also have either a `service` key or a `container` key.
|
|
85
|
+
If the link is defined in the service of a compose file then the value
|
|
86
|
+
of `service` will default to the name of that service.
|
|
87
|
+
|
|
88
|
+
The `container` key is a fully qualified container name that this link
|
|
89
|
+
will apply to. The `service` key is the name of a docker-compose
|
|
90
|
+
service that this link applies to. In the case of a `service` link, if
|
|
91
|
+
more than one replica is started for that service, then the mac, and
|
|
92
|
+
ip values in the link definition will be incremented by the service
|
|
93
|
+
index - 1.
|
|
94
|
+
|
|
95
|
+
All link definitions support the following optional properties: dev,
|
|
96
|
+
ip, mtu, route, nat, netem. If dev is not specified then it will
|
|
97
|
+
default to "eth0". For `*vlan` type interfaces, mtu cannot be larger
|
|
98
|
+
than the MTU of the parent (outer-dev) device.
|
|
99
|
+
|
|
100
|
+
For the `netem` property, refer to the `netem` man page. The `OPTIONS`
|
|
101
|
+
grammar defines the valid strings for the `netem` property.
|
|
102
|
+
|
|
103
|
+
### Tunnels
|
|
104
|
+
|
|
105
|
+
Tunnels links/interfaces will be created and attached to the specified
|
|
106
|
+
bridge. Any containers with links to the same bridge will share
|
|
107
|
+
a broadcast domain with the tunnel link.
|
|
108
|
+
|
|
109
|
+
The following table describes the tunnel properties:
|
|
110
|
+
|
|
111
|
+
| property | format | description |
|
|
112
|
+
|-----------|---------|----------------------------|
|
|
113
|
+
| type | string | geneve or vxlan |
|
|
114
|
+
| bridge | string | conlink bridge / domain |
|
|
115
|
+
| remote | IP | remote host addr |
|
|
116
|
+
| vni | number | Virtual Network Identifier |
|
|
117
|
+
| netem | string | tc qdisc NetEm options |
|
|
118
|
+
|
|
119
|
+
Each tunnel definition must have the keys: type, bridge, remote, and
|
|
120
|
+
vni. The netem optional property also applies to tunnel interfaces.
|
|
121
|
+
|
|
122
|
+
### Commands
|
|
123
|
+
|
|
124
|
+
Commands will be executed in parallel within the matching container
|
|
125
|
+
once all links are succesfully configured for that container.
|
|
126
|
+
|
|
127
|
+
The following table describes the command properties:
|
|
128
|
+
|
|
129
|
+
| property | format | description |
|
|
130
|
+
|-----------|------------------|----------------------------|
|
|
131
|
+
| service | string | compose service |
|
|
132
|
+
| container | string | container name |
|
|
133
|
+
| command | array or string | command or shell string |
|
|
134
|
+
|
|
135
|
+
Each command defintion must have a `command` key and either
|
|
136
|
+
a `service` or `container` key. The `service` and `container` keys are
|
|
137
|
+
defined the same as for link properties.
|
|
138
|
+
|
|
139
|
+
If the `command` value is an array then the command and arguments will
|
|
140
|
+
be executed directly. If the `command` is a string then the string
|
|
141
|
+
will be wrapped in `sh -c STRING` for execution.
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
## Examples
|
|
145
|
+
|
|
146
|
+
The examples below require a conlink docker image. Build the image for
|
|
147
|
+
both docker and podman like this:
|
|
148
|
+
|
|
149
|
+
```
|
|
150
|
+
docker build -t conlink .
|
|
151
|
+
podman build -t conlink .
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### test1: compose file with embedded network config
|
|
155
|
+
|
|
156
|
+
Start the test1 compose configuration:
|
|
157
|
+
|
|
158
|
+
```
|
|
159
|
+
docker-compose -f examples/test1-compose.yaml up --build --force-recreate
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
From h1 ping the address of h3 (routed via the r0 container):
|
|
163
|
+
|
|
164
|
+
```
|
|
165
|
+
docker-compose -f examples/test1-compose.yaml exec h1 ping 10.0.0.100
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
### test2: compose file with separate network config and live scaling
|
|
170
|
+
|
|
171
|
+
Start the test2 compose configuration:
|
|
172
|
+
|
|
173
|
+
```
|
|
174
|
+
docker-compose -f examples/test2-compose.yaml up -d --build --force-recreate
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
From the first node ping the second:
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
docker-compose -f examples/test2-compose.yaml exec --index 1 node ping 10.0.1.2
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
From the second node ping an address in the internet service:
|
|
184
|
+
|
|
185
|
+
```
|
|
186
|
+
docker-compose -f examples/test2-compose.yaml exec --index 2 node ping 8.8.8.8
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Scale the nodes from 2 to 5 and then ping from first node from the fifth:
|
|
190
|
+
|
|
191
|
+
```
|
|
192
|
+
docker-compose -f examples/test2-compose.yaml up -d --scale node=5
|
|
193
|
+
docker-compose -f examples/test2-compose.yaml exec --index 5 node ping 10.0.1.1
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
### test3: network config file only (no compose) and variable templating
|
|
198
|
+
|
|
199
|
+
#### test3 with docker
|
|
200
|
+
|
|
201
|
+
Start two containers named `ZZZ_node_1` and `ZZZ_node_2`.
|
|
202
|
+
|
|
203
|
+
```
|
|
204
|
+
docker run --name=ZZZ_node_1 --rm -d --network none alpine sleep 864000
|
|
205
|
+
docker run --name=ZZZ_node_2 --rm -d --network none alpine sleep 864000
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Start the conlink container `ZZZ_network` that will setup a network
|
|
209
|
+
configuration that is connected to the other containers:
|
|
210
|
+
|
|
211
|
+
```
|
|
212
|
+
./conlink-start.sh -v --mode docker --host-mode docker --network-file examples/test3-network.yaml -- --name ZZZ_network --rm -e NODE_NAME_1=ZZZ_node_1 -e NODE_NAME_2=ZZZ_node_2
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
In a separate terminal, ping the node 2 from node 1.
|
|
216
|
+
|
|
217
|
+
```
|
|
218
|
+
docker exec -it ZZZ_node_1 ping 10.0.1.2
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
#### test3 with rootless podman
|
|
222
|
+
|
|
223
|
+
Same as test3 but using rootless podman instead
|
|
224
|
+
|
|
225
|
+
Start two containers named `ZZZ_node_1` and `ZZZ_node_2`.
|
|
226
|
+
|
|
227
|
+
```
|
|
228
|
+
podman run --name=ZZZ_node_1 --rm -d --network none alpine sleep 864000
|
|
229
|
+
podman run --name=ZZZ_node_2 --rm -d --network none alpine sleep 864000
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
Start the conlink container `ZZZ_network` that will setup a network
|
|
233
|
+
configuration that is connected to the other containers:
|
|
234
|
+
|
|
235
|
+
```
|
|
236
|
+
./conlink-start.sh -v --mode podman --host-mode podman --network-file examples/test3-network.yaml -- --name ZZZ_network --rm -e NODE_NAME_1=ZZZ_node_1 -e NODE_NAME_2=ZZZ_node_2
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
In a separate terminal, ping the node 2 from node 1.
|
|
240
|
+
|
|
241
|
+
```
|
|
242
|
+
podman exec -it ZZZ_node_1 ping 10.0.1.2
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### test4: multiple compose files and container commands
|
|
246
|
+
|
|
247
|
+
Docker-compose has the ability to specify multiple compose files that
|
|
248
|
+
are merged together into a single runtime configuration. This test
|
|
249
|
+
has conlink configuration spread across multiple compose files and
|
|
250
|
+
a separate network config file. The network configuration appears at the
|
|
251
|
+
top-level of the compose files and also within multiple compose
|
|
252
|
+
service definitions.
|
|
253
|
+
|
|
254
|
+
Run docker-compose using two compose files. The first defines the
|
|
255
|
+
conlink/network container and a basic network configuration that
|
|
256
|
+
includes a router and switch (`s0`). The second defines a single
|
|
257
|
+
container (`node1`) and switch (`s1`) that is connected to the router
|
|
258
|
+
defined in the first compose file.
|
|
259
|
+
|
|
260
|
+
```
|
|
261
|
+
echo "COMPOSE_FILE=examples/test4-multiple/base-compose.yaml:examples/test4-multiple/node1-compose.yaml" > .env
|
|
262
|
+
docker-compose up --build --force-recreate
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
Ping the router host from `node`:
|
|
266
|
+
|
|
267
|
+
```
|
|
268
|
+
docker-compose exec node1 ping 10.0.0.100
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
Restart the compose instance and add another compose file that defines
|
|
272
|
+
two node2 replicas and a switch (`s2`) that is connected to the
|
|
273
|
+
router.
|
|
274
|
+
|
|
275
|
+
```
|
|
276
|
+
echo "COMPOSE_FILE=examples/test4-multiple/base-compose.yaml:examples/test4-multiple/node1-compose.yaml:examples/test4-multiple/nodes2-compose.yaml" > .env
|
|
277
|
+
docker-compose up --build --force-recreate
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
From both `node2` replicas, ping `node1` across the switches and `r0` router:
|
|
281
|
+
|
|
282
|
+
```
|
|
283
|
+
docker-compose exec --index 1 node2 ping 10.1.0.1
|
|
284
|
+
docker-compose exec --index 2 node2 ping 10.1.0.1
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
Restart the compose instance and add another compose file that starts
|
|
288
|
+
conlink using an addition network file `web-network.yaml`. The network
|
|
289
|
+
file starts up a simple web server on the router.
|
|
290
|
+
|
|
291
|
+
```
|
|
292
|
+
echo "COMPOSE_FILE=examples/test4-multiple/base-compose.yaml:examples/test4-multiple/node1-compose.yaml:examples/test4-multiple/nodes2-compose.yaml:examples/test4-multiple/all-compose.yaml" > .env
|
|
293
|
+
docker-compose up --build --force-recreate
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
From the second `node2`, perform a download from the web server running on the
|
|
297
|
+
router host:
|
|
298
|
+
|
|
299
|
+
```
|
|
300
|
+
docker-compose exec --index 2 node2 wget -O- 10.0.0.100
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
Remove the `.env` file as a final cleanup step:
|
|
304
|
+
|
|
305
|
+
```
|
|
306
|
+
rm .env
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
### test5: conlink on two hosts with overlay connectivity via geneve
|
|
311
|
+
|
|
312
|
+
Launch a compose instance on host 1 and point it at host 2:
|
|
313
|
+
|
|
314
|
+
```
|
|
315
|
+
echo "REMOTE=ADDRESS_OF_HOST_2" > .env
|
|
316
|
+
echo "NODE_IP=192.168.100.1" > .env \
|
|
317
|
+
docker-compose --env-file .env -f examples/test5-geneve-compose.yaml up
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
Launch another compose instance on host 2 and point it at host 1:
|
|
321
|
+
On host 2 run conlink like this:
|
|
322
|
+
|
|
323
|
+
```
|
|
324
|
+
echo "REMOTE=ADDRESS_OF_HOST_1" > .env
|
|
325
|
+
echo "NODE_IP=192.168.100.2" >> .env \
|
|
326
|
+
docker-compose --env-file .env -f examples/test5-geneve-compose.yaml up
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
On host 1, start a tcpdump on the main interface capturing Geneve
|
|
330
|
+
(encapsulated) traffic:
|
|
331
|
+
|
|
332
|
+
```
|
|
333
|
+
sudo tcpdump -nli eth0 port 6081
|
|
334
|
+
```
|
|
335
|
+
|
|
336
|
+
On host 2, start a ping within the "node1" network namespace created
|
|
337
|
+
by conlink:
|
|
338
|
+
|
|
339
|
+
```
|
|
340
|
+
docker-compose -f examples/test5-geneve-compose.yaml exec node ping 192.168.100.1
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
On host 1 you should see bi-directional encapsulated ping traffic on the host.
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
### test6: conlink on two hosts deployed with CloudFormation
|
|
347
|
+
|
|
348
|
+
This test uses AWS CloudFormation to deploy two AWS EC2 instances that
|
|
349
|
+
automatically install, configure, and start conlink (and dependencies)
|
|
350
|
+
using the `test5-geneve-compose.yaml` compose file.
|
|
351
|
+
|
|
352
|
+
Authenticate with AWS and set the `MY_KEY`, `MY_VPC`, and `MY_SUBNET`
|
|
353
|
+
variables to refer to a preexisting key pair name, VPC ID, and Subnet
|
|
354
|
+
ID respectively. Then use the AWS CLI to deploy the stack:
|
|
355
|
+
|
|
356
|
+
```
|
|
357
|
+
export MY_KEY=... MY_VPC=... MY_SUBNET=...
|
|
358
|
+
aws --region us-west-2 cloudformation deploy --stack-name ${USER}-conlink-test6 --template-file examples/test6-cfn.yaml --parameter-overrides KeyPairName=${MY_KEY} VpcId=${MY_VPC} SubnetId=${MY_SUBNET}
|
|
359
|
+
```
|
|
360
|
+
|
|
361
|
+
The stack will take about 8 minutes to finish deploying. You can
|
|
362
|
+
reduce the time to under a minute if you create your own AMI with the
|
|
363
|
+
pre-signal steps in `BaseUserData` baked in and modify the template to
|
|
364
|
+
use that instead.
|
|
365
|
+
|
|
366
|
+
Once the stack is finish deploying, show the outputs of the stack
|
|
367
|
+
(including instance IP addresses) like this:
|
|
368
|
+
|
|
369
|
+
```
|
|
370
|
+
aws --region us-west-2 cloudformation describe-stacks --stack-name ${USER}-conlink-test6 | jq '.Stacks[0].Outputs'
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
Use ssh to connect to instance 1 and 2 (as the "ubuntu" user), then
|
|
374
|
+
sudo to root and cd into `/root/conlink`. You can now run the tcpdump
|
|
375
|
+
and ping test described for test5.
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
### test7: MAC, MTU, and NetEm settings
|
|
379
|
+
|
|
380
|
+
This example demonstrates using interface MAC, MTU, and NetEm (tc
|
|
381
|
+
qdisc) settings.
|
|
382
|
+
|
|
383
|
+
Start the test7 compose configuration:
|
|
384
|
+
|
|
385
|
+
```
|
|
386
|
+
docker-compose -f examples/test7-compose.yaml up --build --force-recreate
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
Show the links in both node containers to see that the MAC addresses
|
|
390
|
+
are `00:0a:0b:0c:0d:0*` and the MTUs are set to `4111`.
|
|
391
|
+
|
|
392
|
+
```
|
|
393
|
+
docker-compose -f examples/test7-compose.yaml exec --index 1 ip link
|
|
394
|
+
docker-compose -f examples/test7-compose.yaml exec --index 2 ip link
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
Ping the second node from the first to show the the NetEm setting is
|
|
398
|
+
adding 40ms delay in both directions (80ms roundtrip).
|
|
399
|
+
|
|
400
|
+
```
|
|
401
|
+
docker-compose -f examples/test7-compose.yaml exec --index 1 node ping 10.0.1.2
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
### test8: Connections to macvlan/vlan host interfaces
|
|
405
|
+
|
|
406
|
+
This example has two nodes with web servers bound to local addresses.
|
|
407
|
+
The first node is connected to a macvlan sub-interfaces of a host
|
|
408
|
+
physical interface. The second node is connected to a VLAN
|
|
409
|
+
sub-interface of the same host (using VLAN ID/tag 5). Static NAT
|
|
410
|
+
(SNAT+DNAT) is setup inside each container to map the external
|
|
411
|
+
address/interface to the internal address/interface (dummy) where the
|
|
412
|
+
web server is running.
|
|
413
|
+
|
|
414
|
+
Create an environment file with the name of the parent host interface
|
|
415
|
+
and the external IP addresses to assign to each container:
|
|
416
|
+
|
|
417
|
+
```
|
|
418
|
+
cat << EOF > .env
|
|
419
|
+
HOST_INTERFACE=enp6s0
|
|
420
|
+
NODE1_HOST_ADDRESS=192.168.0.32/24
|
|
421
|
+
NODE2_HOST_ADDRESS=192.168.0.33/24
|
|
422
|
+
EOF
|
|
423
|
+
```
|
|
424
|
+
|
|
425
|
+
Start the test8 compose configuration using the environment file:
|
|
426
|
+
|
|
427
|
+
```
|
|
428
|
+
docker-compose --env-file .env -f examples/test8-compose.yaml up --build --force-recreate
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
Connect to the macvlan node (NODE1_HOST_ADDRESS) from an external host
|
|
432
|
+
on your network (traffic to macvlan interfaces on the same host is
|
|
433
|
+
prevented):
|
|
434
|
+
|
|
435
|
+
```
|
|
436
|
+
ping -c1 192.168.0.32
|
|
437
|
+
curl 192.168.0.32
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
Note: to connect to the vlan node (NODE2_HOST_ADDRESS) you will need
|
|
441
|
+
to configure your physical switch/router with routing/connectivity to
|
|
442
|
+
VLAN 5 on the same physical link to your host.
|
|
443
|
+
|
|
444
|
+
## GraphViz network configuration rendering
|
|
445
|
+
|
|
446
|
+
You can use d3 and GraphViz to create a visual graph rendering of
|
|
447
|
+
a network configuration. First start a simple web server in the
|
|
448
|
+
examples directory:
|
|
449
|
+
|
|
450
|
+
```
|
|
451
|
+
cd examples
|
|
452
|
+
python3 -m http.server 8080
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
Use the `net2dot` script to transform a network
|
|
456
|
+
configuration into a GraphViz data file (dot language). The `net2dot`
|
|
457
|
+
script supports `--compose-file` and `--network-file` command line
|
|
458
|
+
options. To render the network configuration for example test1, run
|
|
459
|
+
the following in another window:
|
|
460
|
+
|
|
461
|
+
```
|
|
462
|
+
./net2dot --compose-file examples/test1-compose.yaml > examples/data.dot
|
|
463
|
+
```
|
|
464
|
+
|
|
465
|
+
Then load `http://localhost:8080` in your browser to see the rendered
|
|
466
|
+
image.
|
|
467
|
+
|
|
468
|
+
The file `examples/net2dot.yaml` contains a configuration that
|
|
469
|
+
combines many different configuration elements (veth links, dummy
|
|
470
|
+
interfaces, vlan type links, tunnels, etc).
|
|
471
|
+
|
|
472
|
+
```
|
|
473
|
+
./net2dot --network-file examples/net2dot.yaml > examples/data.dot
|
|
474
|
+
```
|
|
475
|
+
|
|
476
|
+
Then load `http://localhost:8080` in your browser.
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
## Copyright & License
|
|
480
|
+
|
|
481
|
+
This software is copyright Viasat and subject to the terms of the
|
|
482
|
+
Mozilla Public License version 2.0 (MPL.20). A copy of the license is
|
|
483
|
+
located in the LICENSE file at the top of the repository or available
|
|
484
|
+
at https://mozilla.org/MPL/2.0/.
|
|
485
|
+
|
package/TODO
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
- MVP for ViaBox:
|
|
2
|
+
- [x] compose/x-network file loading
|
|
3
|
+
- [x] multiple config sources and merging
|
|
4
|
+
- [x] link route config
|
|
5
|
+
- [x] filtering on project and workdir
|
|
6
|
+
- [x] interface and MAC iteration
|
|
7
|
+
- [x] variable templating
|
|
8
|
+
- [x] *vlan type interfaces
|
|
9
|
+
|
|
10
|
+
- Near term:
|
|
11
|
+
- [x] dummy interfaces
|
|
12
|
+
- [x] arbitrary container commands
|
|
13
|
+
- [x] schema validation
|
|
14
|
+
- [x] code comments/documentation
|
|
15
|
+
- [x] tunnel interfaces
|
|
16
|
+
- [x] tc/qdisc settings
|
|
17
|
+
- [x] fix/test all examples (6 and 9 remaining)
|
|
18
|
+
- [x] add net2dot
|
|
19
|
+
- [ ] add outer-netem (and match all link-add params to link keys)
|
|
20
|
+
|
|
21
|
+
- Further term:
|
|
22
|
+
- [ ] CNI networking support
|
|
23
|
+
- conlink runs in container listening for events on a UDS
|
|
24
|
+
(intead of docker events)
|
|
25
|
+
- an outer conlink command is the CNI client that formats
|
|
26
|
+
events to send over the UDS to the inner conlink
|
|
27
|
+
- [ ] multiple routes
|
|
28
|
+
- [ ] ovs flow config
|
|
29
|
+
- [ ] Multiple bridge-modes
|
|
30
|
+
- bridge-mode as part of the domain definition so that the
|
|
31
|
+
same conlink instances can support multiple bridge modes
|
|
32
|
+
simultaneously (with a default for links that don't
|
|
33
|
+
specify).
|
|
34
|
+
|
package/conlink
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
TOP_DIR=$(dirname $(readlink -f "${0}"))
|
|
4
|
+
SCRIPT_NAME=$(basename "${0}")
|
|
5
|
+
NBB=${TOP_DIR}/node_modules/.bin/nbb
|
|
6
|
+
|
|
7
|
+
die() { echo >&2 "${*}"; exit 1; }
|
|
8
|
+
|
|
9
|
+
[ -e "${NBB}" ] || die "Missing ${NBB}. Maybe run 'npm install' in ${TOP_DIR}?"
|
|
10
|
+
|
|
11
|
+
exec ${NBB} -cp "${TOP_DIR}/src" -m conlink.core/main "${@}"
|
package/conlink-start.sh
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
|
|
3
|
+
set -e
|
|
4
|
+
|
|
5
|
+
usage () {
|
|
6
|
+
echo "${0} [CONLINK_OPTIONS] CONFIG_OPTIONS [-- CMD_OPTIONS]"
|
|
7
|
+
echo ""
|
|
8
|
+
echo "Where CONLINK_OPTIONS are:"
|
|
9
|
+
echo " --verbose - Enable verbose output"
|
|
10
|
+
echo " --dry-run - Show what would be done"
|
|
11
|
+
echo " --mode MODE - Conlink launch mode: podman, docker"
|
|
12
|
+
echo " (default: podman)"
|
|
13
|
+
echo " --image IMAGE - Conlink image to use"
|
|
14
|
+
echo " (default: conlink)"
|
|
15
|
+
echo " --host-mode HOST_MODE - External container manager mode:"
|
|
16
|
+
echo " podman, docker, none"
|
|
17
|
+
echo " (default: none)"
|
|
18
|
+
echo ""
|
|
19
|
+
echo "Where CONFIG_OPTIONS are (at least one must be specified):"
|
|
20
|
+
echo " --network-file NET_CFG - Conlink network config file"
|
|
21
|
+
echo " --compose-file COM_CFG - Compose file containing conlink service"
|
|
22
|
+
echo " with an optional an x-network key"
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
VERBOSE=${VERBOSE:-}
|
|
26
|
+
DRY_RUN=${DRY_RUN:-}
|
|
27
|
+
MODE=${MODE:-podman}
|
|
28
|
+
IMAGE="${IMAGE:-conlink}"
|
|
29
|
+
HOST_MODE=${HOST_MODE:-}
|
|
30
|
+
HOST_NETWORK=${HOST_NETWORK}
|
|
31
|
+
CMD="${CMD:-/app/build/conlink.js}"
|
|
32
|
+
NETWORK_FILE=${NETWORK_FILE:-}
|
|
33
|
+
COMPOSE_FILE=${COMPOSE_FILE:-}
|
|
34
|
+
COMPOSE_PROJECT=${COMPOSE_PROJECT:-}
|
|
35
|
+
|
|
36
|
+
die() { echo >&2 "${*}"; exit 1; }
|
|
37
|
+
vecho() { [ "${VERBOSE}" ] && echo "${@}" || true; }
|
|
38
|
+
|
|
39
|
+
psvc=
|
|
40
|
+
cleanup() {
|
|
41
|
+
trap - EXIT INT TERM
|
|
42
|
+
echo "Cleaning up"
|
|
43
|
+
if [ "${psvc}" ]; then
|
|
44
|
+
kill ${psvc}
|
|
45
|
+
[ -e "${HOST_SOCK_PATH}" ] && rm -f "${HOST_SOCK_PATH}"
|
|
46
|
+
fi
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Parse arguments
|
|
50
|
+
CMD_OPTS=
|
|
51
|
+
while [ "${*}" ]; do
|
|
52
|
+
param=$1; OPTARG=$2
|
|
53
|
+
case ${param} in
|
|
54
|
+
-v|--verbose) VERBOSE=1 CMD_OPTS="${CMD_OPTS} -v" ;;
|
|
55
|
+
#-vv) VERBOSE=1 CMD_OPTS="${CMD_OPTS} -v -v" ;;
|
|
56
|
+
--dry-run) DRY_RUN=1 ;;
|
|
57
|
+
--mode) MODE="${OPTARG}"; shift ;;
|
|
58
|
+
--image) IMAGE="${OPTARG}"; shift ;;
|
|
59
|
+
--host-mode) HOST_MODE="${OPTARG}"; shift ;;
|
|
60
|
+
--network-file) NETWORK_FILE="${OPTARG}"; shift ;;
|
|
61
|
+
--compose-file) COMPOSE_FILE="${OPTARG}"; shift ;;
|
|
62
|
+
--compose-project) CMD_OPTS="${CMD_OPTS} --compose-project ${OPTARG}"; shift ;;
|
|
63
|
+
-h|--help) usage ;;
|
|
64
|
+
--) shift; break ;;
|
|
65
|
+
*) CMD_OPTS="${CMD_OPTS} $1" ;;
|
|
66
|
+
esac
|
|
67
|
+
shift
|
|
68
|
+
done
|
|
69
|
+
CCMD="${MODE}"
|
|
70
|
+
[ "${DRY_RUN}" ] && CCMD="echo ${CCMD}"
|
|
71
|
+
# Remaining args are for podman/docker
|
|
72
|
+
|
|
73
|
+
### Sanity checks
|
|
74
|
+
|
|
75
|
+
# TODO: check for openvswitch and geneve
|
|
76
|
+
|
|
77
|
+
case "${MODE}" in
|
|
78
|
+
podman|docker) ;;
|
|
79
|
+
*) die "Unknown mode '${MODE}'" ;;
|
|
80
|
+
esac
|
|
81
|
+
[ "${NETWORK_FILE}" -o "${COMPOSE_FILE}" ] || \
|
|
82
|
+
die "One or both required: --network-file or --compose-file"
|
|
83
|
+
|
|
84
|
+
### Construct command line arguments
|
|
85
|
+
vecho "Settings:"
|
|
86
|
+
RUN_OPTS="${RUN_OPTS} --security-opt apparmor=unconfined"
|
|
87
|
+
|
|
88
|
+
vecho " - mount network/compose config files"
|
|
89
|
+
if [ "${NETWORK_FILE}" ]; then
|
|
90
|
+
network_path=/root/$(basename ${NETWORK_FILE})
|
|
91
|
+
RUN_OPTS="${RUN_OPTS} -v $(readlink -f ${NETWORK_FILE}):${network_path}:ro"
|
|
92
|
+
CMD_OPTS="${CMD_OPTS} --network-file ${network_path}"
|
|
93
|
+
fi
|
|
94
|
+
if [ "${COMPOSE_FILE}" ]; then
|
|
95
|
+
compose_path=/root/$(basename ${COMPOSE_FILE})
|
|
96
|
+
RUN_OPTS="${RUN_OPTS} -v $(readlink -f ${COMPOSE_FILE}):${compose_path}:ro"
|
|
97
|
+
CMD_OPTS="${CMD_OPTS} --compose-file ${compose_path}"
|
|
98
|
+
fi
|
|
99
|
+
|
|
100
|
+
# podman specific settings and shared container storage
|
|
101
|
+
case "${MODE}" in
|
|
102
|
+
podman)
|
|
103
|
+
vecho " - support running systemd in containers"
|
|
104
|
+
RUN_OPTS="${RUN_OPTS} --systemd=always"
|
|
105
|
+
vecho " - port forwarding with slirp4netns (for rootless mode)"
|
|
106
|
+
RUN_OPTS="${RUN_OPTS} --network=slirp4netns:port_handler=slirp4netns"
|
|
107
|
+
if [ "$(id -u)" = 0 ]; then
|
|
108
|
+
host_containers=/var/lib/containers
|
|
109
|
+
else
|
|
110
|
+
host_containers=$HOME/.local/share/containers
|
|
111
|
+
fi
|
|
112
|
+
vecho " - mount shared storage from ${host_containers}"
|
|
113
|
+
RUN_OPTS="${RUN_OPTS} -v ${host_containers}:/var/lib/host-containers:ro"
|
|
114
|
+
;;
|
|
115
|
+
esac
|
|
116
|
+
|
|
117
|
+
# permissions/capabilities
|
|
118
|
+
if [ "$(id -u)" = 0 ]; then
|
|
119
|
+
vecho " - adding base capabilities"
|
|
120
|
+
RUN_OPTS="${RUN_OPTS} --cap-add SYS_ADMIN --cap-add NET_ADMIN"
|
|
121
|
+
RUN_OPTS="${RUN_OPTS} --cap-add SYS_NICE --cap-add NET_BROADCAST"
|
|
122
|
+
RUN_OPTS="${RUN_OPTS} --cap-add IPC_LOCK"
|
|
123
|
+
else
|
|
124
|
+
vecho " - adding --priviledged (for rootless)"
|
|
125
|
+
RUN_OPTS="${RUN_OPTS} --privileged"
|
|
126
|
+
fi
|
|
127
|
+
|
|
128
|
+
# Warning: --pid host (without cgroup v2) can leak conmon processes to
|
|
129
|
+
# the outer host if the internal cleanup doesn't complete fully.
|
|
130
|
+
case "${HOST_MODE}" in
|
|
131
|
+
podman)
|
|
132
|
+
vecho " - adding connectivity to outer podman"
|
|
133
|
+
RUN_OPTS="${RUN_OPTS} --pid host"
|
|
134
|
+
HOST_SOCK_PATH=$(mktemp -u $(pwd)/podman.sock.XXXXXXXXXX)
|
|
135
|
+
RUN_OPTS="${RUN_OPTS} -v ${HOST_SOCK_PATH}:/var/run/podman/podman.sock"
|
|
136
|
+
;;
|
|
137
|
+
docker)
|
|
138
|
+
vecho " - adding connectivity to outer docker"
|
|
139
|
+
RUN_OPTS="${RUN_OPTS} --pid host"
|
|
140
|
+
RUN_OPTS="${RUN_OPTS} -v /var/lib/docker:/var/lib/docker"
|
|
141
|
+
RUN_OPTS="${RUN_OPTS} -v /var/run/docker.sock:/var/run/docker.sock"
|
|
142
|
+
;;
|
|
143
|
+
none|"")
|
|
144
|
+
;;
|
|
145
|
+
*) die "Unknown host mode '${HOST_MODE}'" ;;
|
|
146
|
+
esac
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
### Start it up
|
|
150
|
+
|
|
151
|
+
trap cleanup EXIT INT TERM
|
|
152
|
+
|
|
153
|
+
### Start podman API service for external podman connectivity
|
|
154
|
+
|
|
155
|
+
if [ "${HOST_MODE}" = "podman" ]; then
|
|
156
|
+
vecho "Starting outer podman API service"
|
|
157
|
+
rm -f "${HOST_SOCK_PATH}"
|
|
158
|
+
${DRY_RUN:+echo} podman system service --time=0 "unix://${HOST_SOCK_PATH}" &
|
|
159
|
+
psvc=$! # for cleanup
|
|
160
|
+
for i in $(seq 5); do
|
|
161
|
+
[ -e "${HOST_SOCK_PATH}" ] && break
|
|
162
|
+
echo "Waiting (try $i/5) for podman service to start"
|
|
163
|
+
sleep 1
|
|
164
|
+
done
|
|
165
|
+
[ -e "${HOST_SOCK_PATH}" ] || die "podman service did not start in 5 seconds"
|
|
166
|
+
fi
|
|
167
|
+
|
|
168
|
+
### Run conlink/docker
|
|
169
|
+
vecho "Starting conlink"
|
|
170
|
+
echo ${MODE} run ${RUN_OPTS} "${@}" ${IMAGE} ${CMD} ${CMD_OPTS}
|
|
171
|
+
[ ${DRY_RUN} ] || ${MODE} run ${RUN_OPTS} "${@}" ${IMAGE} ${CMD} ${CMD_OPTS}
|
|
172
|
+
|