enroll 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
enroll-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
enroll-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,77 @@
1
+ Metadata-Version: 2.1
2
+ Name: enroll
3
+ Version: 0.0.1
4
+ Summary: Enroll a server's running state retrospectively into Ansible
5
+ Home-page: https://git.mig5.net/mig5/enroll
6
+ License: GPL-3.0-or-later
7
+ Author: Miguel Jacq
8
+ Author-email: mig@mig5.net
9
+ Requires-Python: >=3.10,<4.0
10
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Project-URL: Repository, https://git.mig5.net/mig5/enroll
16
+ Description-Content-Type: text/markdown
17
+
18
+ # Enroll
19
+
20
+ **enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles for things it finds running on the machine.
21
+
22
+ It aims to be **optimistic and noninteractive**:
23
+ - Detects packages that have been installed
24
+ - Detects Debian package ownership of `/etc` files using dpkg’s local database.
25
+ - Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available).
26
+ - Also captures **service-relevant custom/unowned files** under `/etc/<service>/...` (e.g. drop-in config includes).
27
+ - Defensively excludes likely secrets (path denylist + content sniff + size caps).
28
+ - Captures non-system users that exist on the system, and their SSH public keys
29
+
30
+ ## Install (Poetry)
31
+
32
+ ```bash
33
+ poetry install
34
+ poetry run enroll --help
35
+ ```
36
+
37
+ ## Usage
38
+
39
+ On the host (root recommended):
40
+
41
+ ### 1. Generate a bundle of state/information about the host
42
+
43
+ ```bash
44
+ sudo poetry run enroll harvest --out /tmp/enroll-bundle
45
+ ```
46
+
47
+ ### 2. Generate Ansible manifests (roles/playbook) from that bundle
48
+
49
+ ```bash
50
+ sudo poetry run enroll manifest --bundle /tmp/enroll-bundle --out /tmp/enroll-ansible
51
+ ```
52
+
53
+ ### Alternatively, do both steps in one shot:
54
+
55
+ ```bash
56
+ sudo poetry run enroll export --bundle /tmp/enroll-bundle --out /tmp/enroll-ansible
57
+ ```
58
+
59
+ Then run:
60
+
61
+ ```bash
62
+ ansible-playbook -i "localhost," -c local /tmp/enroll-ansible/playbook.yml
63
+ ```
64
+
65
+
66
+ ## Notes / Safety
67
+
68
+ - enroll **skips** common sensitive locations like `/etc/ssl/private/*`, `/etc/ssh/ssh_host_*`, and files that look like private keys/tokens.
69
+ - It also skips symlinks, binary-ish files, and large files by default.
70
+ - Review each generated role’s README before committing it anywhere.
71
+ - It only stores the raw config files. If you want to turn these into Jinja2 templates with dynamic inventory, see my other tool https://git.mig5.net/mig5/jinjaturtle .
72
+
73
+
74
+ ## Troubleshooting
75
+
76
+ - Run as root for the most complete harvest (`sudo ...`).
77
+
enroll-0.0.1/README.md ADDED
@@ -0,0 +1,59 @@
1
+ # Enroll
2
+
3
+ **enroll** inspects a Linux machine (currently Debian-only) and generates Ansible roles for things it finds running on the machine.
4
+
5
+ It aims to be **optimistic and noninteractive**:
6
+ - Detects packages that have been installed
7
+ - Detects Debian package ownership of `/etc` files using dpkg’s local database.
8
+ - Captures config that has **changed from packaged defaults** (dpkg conffile hashes + package md5sums when available).
9
+ - Also captures **service-relevant custom/unowned files** under `/etc/<service>/...` (e.g. drop-in config includes).
10
+ - Defensively excludes likely secrets (path denylist + content sniff + size caps).
11
+ - Captures non-system users that exist on the system, and their SSH public keys
12
+
13
+ ## Install (Poetry)
14
+
15
+ ```bash
16
+ poetry install
17
+ poetry run enroll --help
18
+ ```
19
+
20
+ ## Usage
21
+
22
+ On the host (root recommended):
23
+
24
+ ### 1. Generate a bundle of state/information about the host
25
+
26
+ ```bash
27
+ sudo poetry run enroll harvest --out /tmp/enroll-bundle
28
+ ```
29
+
30
+ ### 2. Generate Ansible manifests (roles/playbook) from that bundle
31
+
32
+ ```bash
33
+ sudo poetry run enroll manifest --bundle /tmp/enroll-bundle --out /tmp/enroll-ansible
34
+ ```
35
+
36
+ ### Alternatively, do both steps in one shot:
37
+
38
+ ```bash
39
+ sudo poetry run enroll export --bundle /tmp/enroll-bundle --out /tmp/enroll-ansible
40
+ ```
41
+
42
+ Then run:
43
+
44
+ ```bash
45
+ ansible-playbook -i "localhost," -c local /tmp/enroll-ansible/playbook.yml
46
+ ```
47
+
48
+
49
+ ## Notes / Safety
50
+
51
+ - enroll **skips** common sensitive locations like `/etc/ssl/private/*`, `/etc/ssh/ssh_host_*`, and files that look like private keys/tokens.
52
+ - It also skips symlinks, binary-ish files, and large files by default.
53
+ - Review each generated role’s README before committing it anywhere.
54
+ - It only stores the raw config files. If you want to turn these into Jinja2 templates with dynamic inventory, see my other tool https://git.mig5.net/mig5/jinjaturtle .
55
+
56
+
57
+ ## Troubleshooting
58
+
59
+ - Run as root for the most complete harvest (`sudo ...`).
@@ -0,0 +1 @@
1
+ __all__ = []
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
@@ -0,0 +1,145 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dataclasses import dataclass
5
+ from typing import Dict, List, Optional, Set, Tuple
6
+
7
+
8
+ @dataclass
9
+ class UserRecord:
10
+ name: str
11
+ uid: int
12
+ gid: int
13
+ gecos: str
14
+ home: str
15
+ shell: str
16
+ primary_group: str
17
+ supplementary_groups: List[str]
18
+ ssh_files: List[str]
19
+
20
+
21
+ def parse_login_defs(path: str = "/etc/login.defs") -> Dict[str, int]:
22
+ vals: Dict[str, int] = {}
23
+ try:
24
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
25
+ for line in f:
26
+ line = line.strip()
27
+ if not line or line.startswith("#"):
28
+ continue
29
+ parts = line.split()
30
+ if len(parts) >= 2 and parts[0] in {"UID_MIN", "UID_MAX", "SYS_UID_MIN", "SYS_UID_MAX"}:
31
+ try:
32
+ vals[parts[0]] = int(parts[1])
33
+ except ValueError:
34
+ continue
35
+ except FileNotFoundError:
36
+ pass
37
+ return vals
38
+
39
+
40
+ def parse_passwd(path: str = "/etc/passwd") -> List[Tuple[str, int, int, str, str, str]]:
41
+ rows: List[Tuple[str, int, int, str, str, str]] = []
42
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
43
+ for line in f:
44
+ line = line.rstrip("\n")
45
+ if not line or line.startswith("#"):
46
+ continue
47
+ parts = line.split(":")
48
+ if len(parts) < 7:
49
+ continue
50
+ name = parts[0]
51
+ try:
52
+ uid = int(parts[2])
53
+ gid = int(parts[3])
54
+ except ValueError:
55
+ continue
56
+ gecos = parts[4]
57
+ home = parts[5]
58
+ shell = parts[6]
59
+ rows.append((name, uid, gid, gecos, home, shell))
60
+ return rows
61
+
62
+
63
+ def parse_group(path: str = "/etc/group") -> Tuple[Dict[int, str], Dict[str, int], Dict[str, Set[str]]]:
64
+ gid_to_name: Dict[int, str] = {}
65
+ name_to_gid: Dict[str, int] = {}
66
+ members: Dict[str, Set[str]] = {}
67
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
68
+ for line in f:
69
+ line = line.rstrip("\n")
70
+ if not line or line.startswith("#"):
71
+ continue
72
+ parts = line.split(":")
73
+ if len(parts) < 4:
74
+ continue
75
+ name = parts[0]
76
+ try:
77
+ gid = int(parts[2])
78
+ except ValueError:
79
+ continue
80
+ mem = set([m for m in parts[3].split(",") if m])
81
+ gid_to_name[gid] = name
82
+ name_to_gid[name] = gid
83
+ members[name] = mem
84
+ return gid_to_name, name_to_gid, members
85
+
86
+
87
+ def is_human_user(uid: int, shell: str, uid_min: int) -> bool:
88
+ if uid < uid_min:
89
+ return False
90
+ shell = (shell or "").strip()
91
+ if shell in {"/usr/sbin/nologin", "/usr/bin/nologin", "/bin/false"}:
92
+ return False
93
+ return True
94
+
95
+
96
+ def find_user_ssh_files(home: str) -> List[str]:
97
+ sshdir = os.path.join(home, ".ssh")
98
+ out: List[str] = []
99
+ if not os.path.isdir(sshdir):
100
+ return out
101
+
102
+ ak = os.path.join(sshdir, "authorized_keys")
103
+ if os.path.isfile(ak) and not os.path.islink(ak):
104
+ out.append(ak)
105
+
106
+ return sorted(set(out))
107
+
108
+
109
+ def collect_non_system_users() -> List[UserRecord]:
110
+ defs = parse_login_defs()
111
+ uid_min = defs.get("UID_MIN", 1000)
112
+
113
+ passwd_rows = parse_passwd()
114
+ gid_to_name, _, group_members = parse_group()
115
+
116
+ users: List[UserRecord] = []
117
+ for name, uid, gid, gecos, home, shell in passwd_rows:
118
+ if name in {"root", "nobody"}:
119
+ continue
120
+ if not is_human_user(uid, shell, uid_min):
121
+ continue
122
+
123
+ primary_group = gid_to_name.get(gid, str(gid))
124
+
125
+ supp: List[str] = []
126
+ for gname, mem in group_members.items():
127
+ if name in mem and gname != primary_group:
128
+ supp.append(gname)
129
+ supp = sorted(set(supp))
130
+
131
+ ssh_files = find_user_ssh_files(home) if home and home.startswith("/") else []
132
+
133
+ users.append(UserRecord(
134
+ name=name,
135
+ uid=uid,
136
+ gid=gid,
137
+ gecos=gecos,
138
+ home=home,
139
+ shell=shell,
140
+ primary_group=primary_group,
141
+ supplementary_groups=supp,
142
+ ssh_files=ssh_files,
143
+ ))
144
+
145
+ return users
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from .harvest import harvest
5
+ from .manifest import manifest
6
+
7
+
8
+ def main() -> None:
9
+ ap = argparse.ArgumentParser(prog="enroll")
10
+ sub = ap.add_subparsers(dest="cmd", required=True)
11
+
12
+ h = sub.add_parser("harvest", help="Harvest service/package/config state into a bundle")
13
+ h.add_argument("--out", required=True, help="Bundle output directory")
14
+
15
+ r = sub.add_parser("manifest", help="Render Ansible roles from a harvested bundle")
16
+ r.add_argument("--bundle", required=True, help="Path to the bundle directory created by the harvest command")
17
+ r.add_argument("--out", required=True, help="Output directory for generated roles/playbook Ansible manifest")
18
+
19
+ e = sub.add_parser("export", help="Harvest then manifest in one shot")
20
+ e.add_argument("--bundle", required=True, help="Path to the directory to place the bundle in")
21
+ e.add_argument("--out", required=True, help="Output directory for generated roles/playbook Ansible manifest")
22
+
23
+ args = ap.parse_args()
24
+
25
+ if args.cmd == "harvest":
26
+ path = harvest(args.out)
27
+ print(path)
28
+ elif args.cmd == "manifest":
29
+ manifest(args.bundle, args.out)
30
+ elif args.cmd == "export":
31
+ harvest(args.bundle)
32
+ manifest(args.bundle, args.out)
@@ -0,0 +1,175 @@
1
+ from __future__ import annotations
2
+
3
+ import glob
4
+ import hashlib
5
+ import os
6
+ import subprocess
7
+ from typing import Dict, List, Optional, Set, Tuple
8
+
9
+
10
+ def _run(cmd: list[str]) -> str:
11
+ p = subprocess.run(cmd, check=False, text=True, capture_output=True)
12
+ if p.returncode != 0:
13
+ raise RuntimeError(f"Command failed: {cmd}\n{p.stderr}")
14
+ return p.stdout
15
+
16
+
17
+ def dpkg_owner(path: str) -> Optional[str]:
18
+ p = subprocess.run(["dpkg", "-S", path], text=True, capture_output=True)
19
+ if p.returncode != 0:
20
+ return None
21
+ left = p.stdout.split(":", 1)[0].strip()
22
+ pkg = left.split(":", 1)[0].strip()
23
+ return pkg or None
24
+
25
+
26
+
27
+ def list_manual_packages() -> List[str]:
28
+ """Return packages marked as manually installed (apt-mark showmanual)."""
29
+ p = subprocess.run(["apt-mark", "showmanual"], text=True, capture_output=True)
30
+ if p.returncode != 0:
31
+ return []
32
+ pkgs: List[str] = []
33
+ for line in (p.stdout or "").splitlines():
34
+ line = line.strip()
35
+ if not line or line.startswith("#"):
36
+ continue
37
+ pkgs.append(line)
38
+ return sorted(set(pkgs))
39
+
40
+ def build_dpkg_etc_index(
41
+ info_dir: str = "/var/lib/dpkg/info",
42
+ ) -> Tuple[Set[str], Dict[str, str], Dict[str, Set[str]], Dict[str, List[str]]]:
43
+ """
44
+ Returns:
45
+ owned_etc_paths: set of /etc paths owned by dpkg
46
+ etc_owner_map: /etc/path -> pkg
47
+ topdir_to_pkgs: "nginx" -> {"nginx-common", ...} based on /etc/<topdir>/...
48
+ pkg_to_etc_paths: pkg -> list of /etc paths it installs
49
+ """
50
+ owned: Set[str] = set()
51
+ owner: Dict[str, str] = {}
52
+ topdir_to_pkgs: Dict[str, Set[str]] = {}
53
+ pkg_to_etc: Dict[str, List[str]] = {}
54
+
55
+ for list_path in glob.glob(os.path.join(info_dir, "*.list")):
56
+ pkg_raw = os.path.basename(list_path)[:-5] # strip ".list"
57
+ pkg = pkg_raw.split(":", 1)[0] # drop arch suffix if present
58
+
59
+ etc_paths: List[str] = []
60
+ try:
61
+ with open(list_path, "r", encoding="utf-8", errors="replace") as f:
62
+ for line in f:
63
+ p = line.rstrip("\n")
64
+ if not p.startswith("/etc/"):
65
+ continue
66
+ owned.add(p)
67
+ owner.setdefault(p, pkg)
68
+ etc_paths.append(p)
69
+
70
+ parts = p.split("/", 3)
71
+ if len(parts) >= 3 and parts[2]:
72
+ top = parts[2]
73
+ topdir_to_pkgs.setdefault(top, set()).add(pkg)
74
+ except FileNotFoundError:
75
+ continue
76
+
77
+ if etc_paths:
78
+ pkg_to_etc.setdefault(pkg, []).extend(etc_paths)
79
+
80
+ for k, v in list(pkg_to_etc.items()):
81
+ pkg_to_etc[k] = sorted(set(v))
82
+
83
+ return owned, owner, topdir_to_pkgs, pkg_to_etc
84
+
85
+
86
+ def parse_status_conffiles(status_path: str = "/var/lib/dpkg/status") -> Dict[str, Dict[str, str]]:
87
+ """
88
+ pkg -> { "/etc/foo": md5hex, ... } based on dpkg status "Conffiles" field.
89
+ This md5 is the packaged baseline for the conffile.
90
+ """
91
+ out: Dict[str, Dict[str, str]] = {}
92
+
93
+ cur: Dict[str, str] = {}
94
+ key: Optional[str] = None
95
+
96
+ def flush() -> None:
97
+ pkg = cur.get("Package")
98
+ if not pkg:
99
+ return
100
+ raw = cur.get("Conffiles")
101
+ if not raw:
102
+ return
103
+ m: Dict[str, str] = {}
104
+ for line in raw.splitlines():
105
+ line = line.strip()
106
+ if not line:
107
+ continue
108
+ parts = line.split()
109
+ if len(parts) >= 2 and parts[0].startswith("/"):
110
+ m[parts[0]] = parts[1]
111
+ if m:
112
+ out[pkg] = m
113
+
114
+ with open(status_path, "r", encoding="utf-8", errors="replace") as f:
115
+ for line in f:
116
+ if line.strip() == "":
117
+ if cur:
118
+ flush()
119
+ cur = {}
120
+ key = None
121
+ continue
122
+ if line[0].isspace() and key:
123
+ cur[key] += line
124
+ else:
125
+ if ":" in line:
126
+ k, v = line.split(":", 1)
127
+ key = k
128
+ cur[key] = v.lstrip()
129
+
130
+ if cur:
131
+ flush()
132
+ return out
133
+
134
+
135
+ def read_pkg_md5sums(pkg: str) -> Dict[str, str]:
136
+ """
137
+ relpath -> md5hex from /var/lib/dpkg/info/<pkg>.md5sums
138
+ relpath has no leading slash, e.g. 'etc/nginx/nginx.conf'
139
+ """
140
+ path = f"/var/lib/dpkg/info/{pkg}.md5sums"
141
+ if not os.path.exists(path):
142
+ return {}
143
+ m: Dict[str, str] = {}
144
+ with open(path, "r", encoding="utf-8", errors="replace") as f:
145
+ for line in f:
146
+ line = line.strip()
147
+ if not line:
148
+ continue
149
+ md5, rel = line.split(None, 1)
150
+ m[rel.strip()] = md5.strip()
151
+ return m
152
+
153
+
154
+ def file_md5(path: str) -> str:
155
+ h = hashlib.md5()
156
+ with open(path, "rb") as f:
157
+ for chunk in iter(lambda: f.read(1024 * 1024), b""):
158
+ h.update(chunk)
159
+ return h.hexdigest()
160
+
161
+
162
+ def stat_triplet(path: str) -> Tuple[str, str, str]:
163
+ st = os.stat(path, follow_symlinks=True)
164
+ mode = oct(st.st_mode & 0o777)[2:].zfill(4)
165
+
166
+ import pwd, grp
167
+ try:
168
+ owner = pwd.getpwuid(st.st_uid).pw_name
169
+ except KeyError:
170
+ owner = str(st.st_uid)
171
+ try:
172
+ group = grp.getgrgid(st.st_gid).gr_name
173
+ except KeyError:
174
+ group = str(st.st_gid)
175
+ return owner, group, mode