backup-docker-to-local 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: backup-docker-to-local
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: Backup Docker volumes to local with rsync and optional DB dumps.
5
5
  Author: Kevin Veen-Birkenbach
6
6
  License: AGPL-3.0-or-later
@@ -147,7 +147,7 @@ baudolo \
147
147
  | Flag | Description |
148
148
  | --------------- | ------------------------------------------- |
149
149
  | `--everything` | Always stop containers and re-run rsync |
150
- | `--dump-only` | Only create SQL dumps, skip file backups |
150
+ | `--dump-only-sql`| Skip file backups only for DB volumes when dumps succeed; non-DB volumes are still backed up; fallback to files if no dump. |
151
151
  | `--shutdown` | Do not restart containers after backup |
152
152
  | `--backups-dir` | Backup root directory (default: `/Backups`) |
153
153
  | `--repo-name` | Backup namespace under machine hash |
@@ -1,11 +1,11 @@
1
- backup_docker_to_local-1.1.1.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
1
+ backup_docker_to_local-1.2.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
2
2
  baudolo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  baudolo/backup/__init__.py,sha256=5BfF8JBXB2j6sAptcmswtbjlajNWxOho6_CjwIamO7k,30
4
4
  baudolo/backup/__main__.py,sha256=118gZ0wij9_PAtR-jlG7LizrhrxwhHlAcAPW1eFeJtU,140
5
- baudolo/backup/app.py,sha256=UE6i4U5nJJsLFjKsU7qYk52vVIdbNtyrOM04PpZaQA4,6197
6
- baudolo/backup/cli.py,sha256=4SJVYK-HwNrsHtO9crmKXE0XVcjwEu_sIwVXnh8aM8E,2238
5
+ baudolo/backup/app.py,sha256=dCKSbBGWi-Yw8JnrUTBUQLUyLHXxQd1Ebp_eqCD3LWA,6550
6
+ baudolo/backup/cli.py,sha256=KEHgeohlyBxkACQ6yxhRqh9nTbE2B74Q5U50JBJf7SU,2477
7
7
  baudolo/backup/compose.py,sha256=dJiZnHlBZdeXTVqLR5zIbAtwR97YWxbULVn6VL16Hqg,1136
8
- baudolo/backup/db.py,sha256=vHaBrXlxFGGd-NLaunKUZtE7MxVhLj4pYAFAzxs5xKI,2777
8
+ baudolo/backup/db.py,sha256=A6PRcTAL5ryivpAs6Lc9udmSMIOKGpCBdDq5tI_s8mo,4486
9
9
  baudolo/backup/docker.py,sha256=6Sj9fpf1bm-CoqoeerQaq059VyDnF1Pj2ketzIt1Nkk,1364
10
10
  baudolo/backup/shell.py,sha256=guMHWcRb0Qlrz79gqFqmJLXVQK8cJEvNkhkMe5Tpgwc,738
11
11
  baudolo/backup/volume.py,sha256=DWBp_dZUo-a0MfnbwEd8jRAAHvSyN6vGdRSye0iQJbE,1485
@@ -17,9 +17,9 @@ baudolo/restore/run.py,sha256=rai5F27D6F8RRnFMyjcEGiHBAlVXtMOJoazs6zkLSC0,2302
17
17
  baudolo/restore/db/__init__.py,sha256=C4K_YAB0F9p8QhZRZIRyV2naMb54Qf_1O2cDxwwVQAI,59
18
18
  baudolo/restore/db/mariadb.py,sha256=_JEQFuF4GhTl8lKmgk2gsHYcXHcpCKrf32tigMSeTKI,2782
19
19
  baudolo/restore/db/postgres.py,sha256=_NIlcxdpcQIUZ8SNBWfWIaH1oGIkmZy9emFMgjGznhk,1446
20
- baudolo/seed/__main__.py,sha256=Fxto3wfzrA80gTH5p13wH0aqy-1PtN6bwklla9aGDo4,2046
21
- backup_docker_to_local-1.1.1.dist-info/METADATA,sha256=JChpUJ4hPotP_Un3EeTogZfyYmQHXZAPkrBBRonmwGE,7116
22
- backup_docker_to_local-1.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
- backup_docker_to_local-1.1.1.dist-info/entry_points.txt,sha256=92f5jPSjW__9-u5gzwmWkdiHKt4ysEFCVmwMtorQCv4,147
24
- backup_docker_to_local-1.1.1.dist-info/top_level.txt,sha256=y_5iNpF7EdLzqWWXIDfcTJpqijyy2hvrYgNiJXrN4r4,8
25
- backup_docker_to_local-1.1.1.dist-info/RECORD,,
20
+ baudolo/seed/__main__.py,sha256=bw4IaWVhhqYF5toOmtziHRDWbYgTvqHqqRV4KnHKyf4,3088
21
+ backup_docker_to_local-1.2.0.dist-info/METADATA,sha256=Mg-_GQiTyZG1VdS_PiwAFjAKC8V0lrHml_gC60dcN7I,7200
22
+ backup_docker_to_local-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ backup_docker_to_local-1.2.0.dist-info/entry_points.txt,sha256=92f5jPSjW__9-u5gzwmWkdiHKt4ysEFCVmwMtorQCv4,147
24
+ backup_docker_to_local-1.2.0.dist-info/top_level.txt,sha256=y_5iNpF7EdLzqWWXIDfcTJpqijyy2hvrYgNiJXrN4r4,8
25
+ backup_docker_to_local-1.2.0.dist-info/RECORD,,
baudolo/backup/app.py CHANGED
@@ -132,7 +132,12 @@ def main() -> int:
132
132
  versions_dir = os.path.join(args.backups_dir, machine_id, args.repo_name)
133
133
  version_dir = create_version_directory(versions_dir, backup_time)
134
134
 
135
- databases_df = pandas.read_csv(args.databases_csv, sep=";")
135
+ # IMPORTANT:
136
+ # - keep_default_na=False prevents empty fields from turning into NaN
137
+ # - dtype=str keeps all columns stable for comparisons/validation
138
+ databases_df = pandas.read_csv(
139
+ args.databases_csv, sep=";", keep_default_na=False, dtype=str
140
+ )
136
141
 
137
142
  print("💾 Start volume backups...", flush=True)
138
143
 
@@ -140,8 +145,16 @@ def main() -> int:
140
145
  print(f"Start backup routine for volume: {volume_name}", flush=True)
141
146
  containers = containers_using_volume(volume_name)
142
147
 
148
+ # EARLY SKIP: if all linked containers are ignored, do not create any dirs
149
+ if volume_is_fully_ignored(containers, args.images_no_backup_required):
150
+ print(
151
+ f"Skipping volume '{volume_name}' entirely (all linked containers are ignored).",
152
+ flush=True,
153
+ )
154
+ continue
155
+
143
156
  vol_dir = create_volume_directory(version_dir, volume_name)
144
-
157
+
145
158
  found_db, dumped_any = _backup_dumps_for_volume(
146
159
  containers=containers,
147
160
  vol_dir=vol_dir,
@@ -149,26 +162,19 @@ def main() -> int:
149
162
  database_containers=args.database_containers,
150
163
  )
151
164
 
152
- # dump-only logic:
153
- if args.dump_only:
154
- if found_db and not dumped_any:
155
- print(
156
- f"WARNING: dump-only requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.",
157
- flush=True,
158
- )
159
- # continue to file backup below
160
- else:
161
- # keep old behavior: skip file backups
162
- continue
163
-
164
-
165
- # skip file backup if all linked containers are ignored
166
- if volume_is_fully_ignored(containers, args.images_no_backup_required):
167
- print(
168
- f"Skipping file backup for volume '{volume_name}' (all linked containers are ignored).",
169
- flush=True,
170
- )
171
- continue
165
+ # dump-only-sql logic:
166
+ if args.dump_only_sql:
167
+ if found_db:
168
+ if not dumped_any:
169
+ print(
170
+ f"WARNING: dump-only-sql requested but no DB dump was produced for DB volume '{volume_name}'. Falling back to file backup.",
171
+ flush=True,
172
+ )
173
+ # fall through to file backup below
174
+ else:
175
+ # DB volume successfully dumped -> skip file backup
176
+ continue
177
+ # Non-DB volume -> always do file backup (fall through)
172
178
 
173
179
  if args.everything:
174
180
  # "everything": always do pre-rsync, then stop + rsync again
baudolo/backup/cli.py CHANGED
@@ -68,10 +68,15 @@ def parse_args() -> argparse.Namespace:
68
68
  action="store_true",
69
69
  help="Do not restart containers after backup",
70
70
  )
71
+
71
72
  p.add_argument(
72
- "--dump-only",
73
+ "--dump-only-sql",
73
74
  action="store_true",
74
- help="Only create DB dumps (skip ALL file rsync backups)",
75
+ help=(
76
+ "Create database dumps only for DB volumes. "
77
+ "File backups are skipped for DB volumes if a dump succeeds, "
78
+ "but non-DB volumes are still backed up. "
79
+ "If a DB dump cannot be produced, baudolo falls back to a file backup."
80
+ ),
75
81
  )
76
-
77
82
  return p.parse_args()
baudolo/backup/db.py CHANGED
@@ -4,6 +4,8 @@ import os
4
4
  import pathlib
5
5
  import re
6
6
  import logging
7
+ from typing import Optional
8
+
7
9
  import pandas
8
10
 
9
11
  from .shell import BackupException, execute_shell_command
@@ -12,17 +14,53 @@ log = logging.getLogger(__name__)
12
14
 
13
15
 
14
16
  def get_instance(container: str, database_containers: list[str]) -> str:
17
+ """
18
+ Derive a stable instance name from the container name.
19
+ """
15
20
  if container in database_containers:
16
21
  return container
17
22
  return re.split(r"(_|-)(database|db|postgres)", container)[0]
18
23
 
19
24
 
25
+ def _validate_database_value(value: Optional[str], *, instance: str) -> str:
26
+ """
27
+ Enforce explicit database semantics:
28
+
29
+ - "*" => dump ALL databases (cluster dump for Postgres)
30
+ - "<name>" => dump exactly this database
31
+ - "" => invalid configuration (would previously result in NaN / nan.backup.sql)
32
+ """
33
+ v = (value or "").strip()
34
+ if v == "":
35
+ raise ValueError(
36
+ f"Invalid databases.csv entry for instance '{instance}': "
37
+ "column 'database' must be '*' or a concrete database name (not empty)."
38
+ )
39
+ return v
40
+
41
+
42
+ def _atomic_write_cmd(cmd: str, out_file: str) -> None:
43
+ """
44
+ Write dump output atomically:
45
+ - write to <file>.tmp
46
+ - rename to <file> only on success
47
+
48
+ This prevents empty or partial dump files from being treated as valid backups.
49
+ """
50
+ tmp = f"{out_file}.tmp"
51
+ execute_shell_command(f"{cmd} > {tmp}")
52
+ execute_shell_command(f"mv {tmp} {out_file}")
53
+
54
+
20
55
  def fallback_pg_dumpall(container: str, username: str, password: str, out_file: str) -> None:
56
+ """
57
+ Perform a full Postgres cluster dump using pg_dumpall.
58
+ """
21
59
  cmd = (
22
60
  f"PGPASSWORD={password} docker exec -i {container} "
23
- f"pg_dumpall -U {username} -h localhost > {out_file}"
61
+ f"pg_dumpall -U {username} -h localhost"
24
62
  )
25
- execute_shell_command(cmd)
63
+ _atomic_write_cmd(cmd, out_file)
26
64
 
27
65
 
28
66
  def backup_database(
@@ -34,12 +72,15 @@ def backup_database(
34
72
  database_containers: list[str],
35
73
  ) -> bool:
36
74
  """
37
- Returns True if at least one dump file was produced, else False.
75
+ Backup databases for a given DB container.
76
+
77
+ Returns True if at least one dump was produced.
38
78
  """
39
79
  instance_name = get_instance(container, database_containers)
40
- entries = databases_df.loc[databases_df["instance"] == instance_name]
80
+
81
+ entries = databases_df[databases_df["instance"] == instance_name]
41
82
  if entries.empty:
42
- log.warning("No entry found for instance '%s' (skipping DB dump)", instance_name)
83
+ log.debug("No database entries for instance '%s'", instance_name)
43
84
  return False
44
85
 
45
86
  out_dir = os.path.join(volume_dir, "sql")
@@ -48,43 +89,56 @@ def backup_database(
48
89
  produced = False
49
90
 
50
91
  for row in entries.itertuples(index=False):
51
- db_name = row.database
52
- user = row.username
53
- password = row.password
92
+ raw_db = getattr(row, "database", "")
93
+ user = (getattr(row, "username", "") or "").strip()
94
+ password = (getattr(row, "password", "") or "").strip()
95
+
96
+ db_value = _validate_database_value(raw_db, instance=instance_name)
97
+
98
+ # Explicit: dump ALL databases
99
+ if db_value == "*":
100
+ if db_type != "postgres":
101
+ raise ValueError(
102
+ f"databases.csv entry for instance '{instance_name}': "
103
+ "'*' is currently only supported for Postgres."
104
+ )
105
+
106
+ cluster_file = os.path.join(
107
+ out_dir, f"{instance_name}.cluster.backup.sql"
108
+ )
109
+ fallback_pg_dumpall(container, user, password, cluster_file)
110
+ produced = True
111
+ continue
54
112
 
113
+ # Concrete database dump
114
+ db_name = db_value
55
115
  dump_file = os.path.join(out_dir, f"{db_name}.backup.sql")
56
116
 
57
117
  if db_type == "mariadb":
58
118
  cmd = (
59
119
  f"docker exec {container} /usr/bin/mariadb-dump "
60
- f"-u {user} -p{password} {db_name} > {dump_file}"
120
+ f"-u {user} -p{password} {db_name}"
61
121
  )
62
- execute_shell_command(cmd)
122
+ _atomic_write_cmd(cmd, dump_file)
63
123
  produced = True
64
124
  continue
65
125
 
66
126
  if db_type == "postgres":
67
- cluster_file = os.path.join(out_dir, f"{instance_name}.cluster.backup.sql")
68
-
69
- if not db_name:
70
- fallback_pg_dumpall(container, user, password, cluster_file)
71
- return True
72
-
73
127
  try:
74
128
  cmd = (
75
129
  f"PGPASSWORD={password} docker exec -i {container} "
76
- f"pg_dump -U {user} -d {db_name} -h localhost > {dump_file}"
130
+ f"pg_dump -U {user} -d {db_name} -h localhost"
77
131
  )
78
- execute_shell_command(cmd)
132
+ _atomic_write_cmd(cmd, dump_file)
79
133
  produced = True
80
134
  except BackupException as e:
81
- print(f"pg_dump failed: {e}", flush=True)
82
- print(
83
- f"Falling back to pg_dumpall for instance '{instance_name}'",
84
- flush=True,
135
+ # Explicit DB dump failed -> hard error
136
+ raise BackupException(
137
+ f"Postgres dump failed for instance '{instance_name}', "
138
+ f"database '{db_name}'. This database was explicitly configured "
139
+ "and therefore must succeed.\n"
140
+ f"{e}"
85
141
  )
86
- fallback_pg_dumpall(container, user, password, cluster_file)
87
- produced = True
88
142
  continue
89
143
 
90
144
  return produced
baudolo/seed/__main__.py CHANGED
@@ -1,67 +1,106 @@
1
- import pandas as pd
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
2
4
  import argparse
3
5
  import os
6
+ import re
7
+ import sys
8
+ import pandas as pd
9
+ from typing import Optional
4
10
 
5
11
 
6
- def check_and_add_entry(file_path, instance, database, username, password):
7
- # Check if the file exists and is not empty
8
- if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
9
- # Read the existing CSV file with header
10
- df = pd.read_csv(file_path, sep=";")
11
- else:
12
- # Create a new DataFrame with columns if file does not exist
13
- df = pd.DataFrame(columns=["instance", "database", "username", "password"])
14
-
15
- # Check if the entry exists and remove it
16
- mask = (
17
- (df["instance"] == instance)
18
- & (
19
- (df["database"] == database)
20
- | (((df["database"].isna()) | (df["database"] == "")) & (database == ""))
12
+ DB_NAME_RE = re.compile(r"^[a-zA-Z0-9_][a-zA-Z0-9_-]*$")
13
+
14
+ def _validate_database_value(value: Optional[str], *, instance: str) -> str:
15
+ v = (value or "").strip()
16
+ if v == "":
17
+ raise ValueError(
18
+ f"Invalid databases.csv entry for instance '{instance}': "
19
+ "column 'database' must be '*' or a concrete database name (not empty)."
21
20
  )
22
- & (df["username"] == username)
23
- )
21
+ if v == "*":
22
+ return "*"
23
+ if v.lower() == "nan":
24
+ raise ValueError(
25
+ f"Invalid databases.csv entry for instance '{instance}': database must not be 'nan'."
26
+ )
27
+ if not DB_NAME_RE.match(v):
28
+ raise ValueError(
29
+ f"Invalid databases.csv entry for instance '{instance}': "
30
+ f"invalid database name '{v}'. Allowed: letters, numbers, '_' and '-'."
31
+ )
32
+ return v
33
+
34
+ def check_and_add_entry(
35
+ file_path: str,
36
+ instance: str,
37
+ database: Optional[str],
38
+ username: str,
39
+ password: str,
40
+ ) -> None:
41
+ """
42
+ Add or update an entry in databases.csv.
43
+
44
+ The function enforces strict validation:
45
+ - database MUST be set
46
+ - database MUST be '*' or a valid database name
47
+ """
48
+ database = _validate_database_value(database, instance=instance)
24
49
 
25
- if not df[mask].empty:
26
- print("Replacing existing entry.")
27
- df = df[~mask]
50
+ if os.path.exists(file_path):
51
+ df = pd.read_csv(
52
+ file_path,
53
+ sep=";",
54
+ dtype=str,
55
+ keep_default_na=False,
56
+ )
28
57
  else:
29
- print("Adding new entry.")
58
+ df = pd.DataFrame(
59
+ columns=["instance", "database", "username", "password"]
60
+ )
30
61
 
31
- # Create a new DataFrame for the new entry
32
- new_entry = pd.DataFrame(
33
- [
34
- {
35
- "instance": instance,
36
- "database": database,
37
- "username": username,
38
- "password": password,
39
- }
40
- ]
41
- )
62
+ mask = (df["instance"] == instance) & (df["database"] == database)
42
63
 
43
- # Add (or replace) the entry using concat
44
- df = pd.concat([df, new_entry], ignore_index=True)
64
+ if mask.any():
65
+ print("Updating existing entry.")
66
+ df.loc[mask, ["username", "password"]] = [username, password]
67
+ else:
68
+ print("Adding new entry.")
69
+ new_entry = pd.DataFrame(
70
+ [[instance, database, username, password]],
71
+ columns=["instance", "database", "username", "password"],
72
+ )
73
+ df = pd.concat([df, new_entry], ignore_index=True)
45
74
 
46
- # Save the updated CSV file
47
75
  df.to_csv(file_path, sep=";", index=False)
48
76
 
49
77
 
50
- def main():
78
+ def main() -> None:
51
79
  parser = argparse.ArgumentParser(
52
- description="Check and replace (or add) a database entry in a CSV file."
80
+ description="Seed or update databases.csv for backup configuration."
81
+ )
82
+ parser.add_argument("file", help="Path to databases.csv")
83
+ parser.add_argument("instance", help="Instance name (e.g. bigbluebutton)")
84
+ parser.add_argument(
85
+ "database",
86
+ help="Database name or '*' to dump all databases",
53
87
  )
54
- parser.add_argument("file_path", help="Path to the CSV file")
55
- parser.add_argument("instance", help="Database instance")
56
- parser.add_argument("database", help="Database name")
57
- parser.add_argument("username", help="Username")
58
- parser.add_argument("password", nargs="?", default="", help="Password (optional)")
88
+ parser.add_argument("username", help="Database username")
89
+ parser.add_argument("password", help="Database password")
59
90
 
60
91
  args = parser.parse_args()
61
92
 
62
- check_and_add_entry(
63
- args.file_path, args.instance, args.database, args.username, args.password
64
- )
93
+ try:
94
+ check_and_add_entry(
95
+ file_path=args.file,
96
+ instance=args.instance,
97
+ database=args.database,
98
+ username=args.username,
99
+ password=args.password,
100
+ )
101
+ except Exception as exc:
102
+ print(f"ERROR: {exc}", file=sys.stderr)
103
+ sys.exit(1)
65
104
 
66
105
 
67
106
  if __name__ == "__main__":