linkarchivetools 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkarchivetools/LICENSE +674 -0
- linkarchivetools/README.md +3 -0
- linkarchivetools/__init__.py +8 -0
- linkarchivetools/backup.py +764 -0
- linkarchivetools/db2feeds.py +263 -0
- linkarchivetools/db2json.py +188 -0
- linkarchivetools/dbanalyzer.py +356 -0
- linkarchivetools/dbfilter.py +154 -0
- linkarchivetools/dbmerge.py +82 -0
- linkarchivetools/json2db.py +237 -0
- linkarchivetools/tableconfig.py +66 -0
- linkarchivetools/utils/alchemysearch.py +177 -0
- linkarchivetools/utils/omnisearch.py +335 -0
- linkarchivetools/utils/reflected.py +501 -0
- linkarchivetools-0.1.10.dist-info/LICENSE +674 -0
- linkarchivetools-0.1.10.dist-info/METADATA +38 -0
- linkarchivetools-0.1.10.dist-info/RECORD +18 -0
- linkarchivetools-0.1.10.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,764 @@
|
|
|
1
|
+
"""
|
|
2
|
+
POSTGRES backup script.
|
|
3
|
+
|
|
4
|
+
- Creates and restores backups (custom format, SQLITE format)
|
|
5
|
+
- Check against data corruption with analyze, vacuum and reindex
|
|
6
|
+
|
|
7
|
+
What to do:
|
|
8
|
+
- run vacuum, which shows problems with data
|
|
9
|
+
- if any table is affected run reindex on it
|
|
10
|
+
- run reindex from time to time
|
|
11
|
+
"""
|
|
12
|
+
import sys
|
|
13
|
+
import os
|
|
14
|
+
import subprocess
|
|
15
|
+
import argparse
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
import time
|
|
18
|
+
from datetime import datetime
|
|
19
|
+
|
|
20
|
+
from sqlalchemy import create_engine, Column, String, Integer, MetaData, Table, text, LargeBinary, DateTime, select
|
|
21
|
+
from sqlalchemy.dialects.postgresql.types import BYTEA
|
|
22
|
+
from sqlalchemy.orm import sessionmaker
|
|
23
|
+
|
|
24
|
+
from linkarchivetools.utils.reflected import *
|
|
25
|
+
from linkarchivetools.tableconfig import get_backup_tables
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
output_directory = Path(__file__).parents[1]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
tables_to_backup = get_backup_tables()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
all_tables = list(tables_to_backup)
|
|
35
|
+
all_tables.append("blockentry")
|
|
36
|
+
all_tables.append("apikeys")
|
|
37
|
+
all_tables.append("applogging")
|
|
38
|
+
all_tables.append("backgroundjob")
|
|
39
|
+
all_tables.append("backgroundjobhistory")
|
|
40
|
+
all_tables.append("keywords")
|
|
41
|
+
|
|
42
|
+
def get_backup_directory(export_type):
|
|
43
|
+
return output_directory / "data" / ("backup_" + export_type)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_workspace_backup_directory(export_type, workspace):
|
|
47
|
+
return get_backup_directory(export_type) / workspace
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def run_pg_dump_backup(run_info):
|
|
51
|
+
workspace = run_info["workspace"]
|
|
52
|
+
tables = run_info["tables"]
|
|
53
|
+
output_file = run_info["output_file"]
|
|
54
|
+
user = run_info["user"]
|
|
55
|
+
database = run_info["database"]
|
|
56
|
+
host = run_info["host"]
|
|
57
|
+
|
|
58
|
+
if "format" not in run_info:
|
|
59
|
+
run_info["format"] = "custom"
|
|
60
|
+
|
|
61
|
+
if run_info["format"] == "custom":
|
|
62
|
+
format_args = "c"
|
|
63
|
+
elif run_info["format"] == "plain" or run_info["format"] == "sql":
|
|
64
|
+
format_args = "p"
|
|
65
|
+
|
|
66
|
+
command_input = [
|
|
67
|
+
"pg_dump",
|
|
68
|
+
"-h", host,
|
|
69
|
+
"-U", user,
|
|
70
|
+
"-d", database,
|
|
71
|
+
"-F", format_args,
|
|
72
|
+
"-f", output_file,
|
|
73
|
+
"--data-only",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
if "format" in run_info and run_info["format"] == "sql":
|
|
77
|
+
command_input.append("--inserts")
|
|
78
|
+
|
|
79
|
+
for table in tables:
|
|
80
|
+
command_input.append("-t")
|
|
81
|
+
command_input.append(table)
|
|
82
|
+
|
|
83
|
+
operating_dir = get_workspace_backup_directory(run_info["format"], workspace)
|
|
84
|
+
operating_dir.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
|
|
86
|
+
print("Running: {} @ {}".format(command_input, operating_dir))
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
result = subprocess.run(command_input, cwd=str(operating_dir), check=True,
|
|
90
|
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
91
|
+
print("Backup completed successfully.")
|
|
92
|
+
except subprocess.CalledProcessError as e:
|
|
93
|
+
print("An error occurred:", e)
|
|
94
|
+
print("Standard Output:", e.stdout)
|
|
95
|
+
print("Standard Error:", e.stderr)
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
return True
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def truncate_table(run_info, table):
|
|
102
|
+
user = run_info["user"]
|
|
103
|
+
database = run_info["database"]
|
|
104
|
+
host = run_info["host"]
|
|
105
|
+
tables = run_info["tables"]
|
|
106
|
+
|
|
107
|
+
print("Truncating {}".format(table))
|
|
108
|
+
|
|
109
|
+
sql = f'TRUNCATE TABLE {table} CASCADE;'
|
|
110
|
+
|
|
111
|
+
command = [
|
|
112
|
+
'psql',
|
|
113
|
+
"-h", host,
|
|
114
|
+
"-U", user,
|
|
115
|
+
"-d", database,
|
|
116
|
+
'-c', sql,
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
subprocess.run(command, check=True)
|
|
121
|
+
print("Table truncated successfully.")
|
|
122
|
+
except subprocess.CalledProcessError as e:
|
|
123
|
+
print("An error occurred:", e)
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def reset_table_index_sequence(run_info, table):
|
|
130
|
+
user = run_info["user"]
|
|
131
|
+
database = run_info["database"]
|
|
132
|
+
host = run_info["host"]
|
|
133
|
+
tables = run_info["tables"]
|
|
134
|
+
|
|
135
|
+
print("Resetting sequence for table {}".format(table))
|
|
136
|
+
|
|
137
|
+
sql = f"SELECT setval('{table}_id_seq', COALESCE((SELECT MAX(id) FROM {table}), 1));"
|
|
138
|
+
|
|
139
|
+
command = [
|
|
140
|
+
'psql',
|
|
141
|
+
"-h", host,
|
|
142
|
+
"-U", user,
|
|
143
|
+
"-d", database,
|
|
144
|
+
'-c', sql,
|
|
145
|
+
]
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
subprocess.run(command, check=True)
|
|
149
|
+
print("Reset index sequence successfully.")
|
|
150
|
+
except subprocess.CalledProcessError as e:
|
|
151
|
+
print("An error occurred:", e)
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
return True
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def reset_tables_index_sequence(tablemapping, run_info):
|
|
158
|
+
workspace = run_info["workspace"]
|
|
159
|
+
|
|
160
|
+
# after restore we need to reset sequences
|
|
161
|
+
for item in tablemapping:
|
|
162
|
+
key = item[0]
|
|
163
|
+
tables = item[1]
|
|
164
|
+
|
|
165
|
+
run_info["output_file"] = key
|
|
166
|
+
run_info["tables"] = tables
|
|
167
|
+
|
|
168
|
+
for table in tables:
|
|
169
|
+
table = table.replace("instance_", workspace+"_")
|
|
170
|
+
|
|
171
|
+
if not reset_table_index_sequence(run_info, table):
|
|
172
|
+
print("Could not reset index in table")
|
|
173
|
+
return False
|
|
174
|
+
|
|
175
|
+
return True
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def run_table_sql(run_info, table, sql):
|
|
179
|
+
user = run_info["user"]
|
|
180
|
+
database = run_info["database"]
|
|
181
|
+
host = run_info["host"]
|
|
182
|
+
|
|
183
|
+
print("Call {} initiating".format(table))
|
|
184
|
+
|
|
185
|
+
command = [
|
|
186
|
+
'psql',
|
|
187
|
+
"-h", host,
|
|
188
|
+
"-U", user,
|
|
189
|
+
"-d", database,
|
|
190
|
+
'-c', sql,
|
|
191
|
+
]
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
subprocess.run(command, check=True)
|
|
195
|
+
print("Call {} successful.".format(sql))
|
|
196
|
+
except subprocess.CalledProcessError as e:
|
|
197
|
+
print("An error occurred:", e)
|
|
198
|
+
return False
|
|
199
|
+
|
|
200
|
+
return True
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def truncate_all(run_info):
|
|
204
|
+
workspace = run_info["workspace"]
|
|
205
|
+
user = run_info["user"]
|
|
206
|
+
database = run_info["database"]
|
|
207
|
+
host = run_info["host"]
|
|
208
|
+
tables = run_info["tables"]
|
|
209
|
+
|
|
210
|
+
for table in tables:
|
|
211
|
+
table = table.replace("instance_", workspace+"_")
|
|
212
|
+
|
|
213
|
+
if not truncate_table(run_info, table):
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
return True
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def run_pg_restore(run_info):
|
|
220
|
+
workspace = run_info["workspace"]
|
|
221
|
+
tables = run_info["tables"]
|
|
222
|
+
output_file = run_info["output_file"]
|
|
223
|
+
user = run_info["user"]
|
|
224
|
+
database = run_info["database"]
|
|
225
|
+
host = run_info["host"]
|
|
226
|
+
|
|
227
|
+
if "format" not in run_info:
|
|
228
|
+
run_info["format"] = "custom"
|
|
229
|
+
|
|
230
|
+
if run_info["format"] == "custom":
|
|
231
|
+
format_args = "c"
|
|
232
|
+
elif run_info["format"] == "plain":
|
|
233
|
+
format_args = "p"
|
|
234
|
+
|
|
235
|
+
command_input = [
|
|
236
|
+
"pg_restore",
|
|
237
|
+
"-h", host,
|
|
238
|
+
"-U", user,
|
|
239
|
+
"-d", database,
|
|
240
|
+
"-F", format_args,
|
|
241
|
+
"--data-only",
|
|
242
|
+
output_file,
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
for table in tables:
|
|
246
|
+
command_input.append("-t")
|
|
247
|
+
command_input.append(table)
|
|
248
|
+
|
|
249
|
+
operating_dir = get_workspace_backup_directory(run_info["format"], workspace)
|
|
250
|
+
operating_dir.mkdir(parents=True, exist_ok=True)
|
|
251
|
+
|
|
252
|
+
print("Running: {} @ {}".format(command_input, operating_dir))
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
subprocess.run(command_input, cwd = str(operating_dir), check=True)
|
|
256
|
+
print("Restore completed successfully.")
|
|
257
|
+
except subprocess.CalledProcessError as e:
|
|
258
|
+
print("An error occurred:", e)
|
|
259
|
+
return False
|
|
260
|
+
|
|
261
|
+
return True
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
### SQLite code
|
|
265
|
+
|
|
266
|
+
def create_destionation_table(table_name, source_table, destination_engine):
|
|
267
|
+
"""
|
|
268
|
+
Copy columns from postgres to sqlite
|
|
269
|
+
BYTEA is not represented in sqlite
|
|
270
|
+
"""
|
|
271
|
+
with destination_engine.connect() as connection:
|
|
272
|
+
if not destination_engine.dialect.has_table(connection, table_name):
|
|
273
|
+
columns = []
|
|
274
|
+
for column in source_table.columns:
|
|
275
|
+
col_type = column.type
|
|
276
|
+
if isinstance(col_type, BYTEA):
|
|
277
|
+
col_type = LargeBinary
|
|
278
|
+
|
|
279
|
+
# Handle 'id' as primary key and autoincrement in SQLite
|
|
280
|
+
if column.name == "id":
|
|
281
|
+
columns.append(Column(
|
|
282
|
+
"id",
|
|
283
|
+
Integer,
|
|
284
|
+
primary_key=True,
|
|
285
|
+
autoincrement=True,
|
|
286
|
+
nullable=False
|
|
287
|
+
))
|
|
288
|
+
else:
|
|
289
|
+
columns.append(Column(
|
|
290
|
+
column.name,
|
|
291
|
+
col_type,
|
|
292
|
+
nullable=column.nullable
|
|
293
|
+
))
|
|
294
|
+
|
|
295
|
+
# For debugging purposes, you can print column details
|
|
296
|
+
# print(column.name)
|
|
297
|
+
# print(column.type.__class__)
|
|
298
|
+
# print(f"Nullable: {column.nullable}")
|
|
299
|
+
|
|
300
|
+
destination_metadata = MetaData()
|
|
301
|
+
destination_table = Table(table_name, destination_metadata, *columns)
|
|
302
|
+
destination_table.create(destination_engine)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def get_engine_table(workspace, table_name, engine, with_workspace=True):
|
|
306
|
+
if with_workspace:
|
|
307
|
+
this_tables_name = "{}_{}".format(workspace, table_name)
|
|
308
|
+
else:
|
|
309
|
+
this_tables_name = table_name
|
|
310
|
+
|
|
311
|
+
engine_metadata = MetaData()
|
|
312
|
+
engine_table = Table(this_tables_name, engine_metadata, autoload_with=engine)
|
|
313
|
+
return engine_table
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def get_table_row_values(row, source_table):
|
|
317
|
+
data = {}
|
|
318
|
+
|
|
319
|
+
for column in source_table.columns:
|
|
320
|
+
value = getattr(row, column.name)
|
|
321
|
+
data[column.name] = value
|
|
322
|
+
|
|
323
|
+
return data
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def is_row_with_id(connection, table, id_value):
|
|
327
|
+
try:
|
|
328
|
+
existing = connection.execute(
|
|
329
|
+
select(table.c.id).where(table.c.id == id_value)
|
|
330
|
+
).first()
|
|
331
|
+
|
|
332
|
+
return existing
|
|
333
|
+
except Exception as E:
|
|
334
|
+
connection.rollback()
|
|
335
|
+
return False
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def copy_table(instance, table_name, source_engine, destination_engine, override=False, to_sqlite=True, commit_every_row=False):
|
|
339
|
+
"""
|
|
340
|
+
Copies table from postgres to destination
|
|
341
|
+
@param override If true, will work faster, since it does not check if row with id exists
|
|
342
|
+
@param to_sqlite If to SQLlite then destination table names will not include workspace. If from SQLite then
|
|
343
|
+
source tables will not include
|
|
344
|
+
"""
|
|
345
|
+
Session = sessionmaker(bind=source_engine)
|
|
346
|
+
session = Session()
|
|
347
|
+
|
|
348
|
+
source_table = get_engine_table(instance, table_name, source_engine, with_workspace=to_sqlite)
|
|
349
|
+
destination_table = get_engine_table(instance, table_name, destination_engine, with_workspace=not to_sqlite)
|
|
350
|
+
|
|
351
|
+
print(f"Copying from {source_table} to {destination_table}")
|
|
352
|
+
|
|
353
|
+
with destination_engine.connect() as destination_connection:
|
|
354
|
+
with source_engine.connect() as connection:
|
|
355
|
+
result = connection.execute(source_table.select())
|
|
356
|
+
|
|
357
|
+
index = 0
|
|
358
|
+
for row in result:
|
|
359
|
+
index += 1
|
|
360
|
+
sys.stdout.write("{}\r".format(index))
|
|
361
|
+
|
|
362
|
+
data = get_table_row_values(row, source_table)
|
|
363
|
+
|
|
364
|
+
# Check if the ID already exists
|
|
365
|
+
if not override:
|
|
366
|
+
id_value = data.get("id")
|
|
367
|
+
if id_value is not None:
|
|
368
|
+
existing = is_row_with_id(destination_connection, destination_table, id_value)
|
|
369
|
+
|
|
370
|
+
if existing:
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
destination_connection.execute(destination_table.insert(), data)
|
|
375
|
+
except Exception as e:
|
|
376
|
+
print(f"Skipping row {index} due to insert error {e}")
|
|
377
|
+
destination_connection.rollback()
|
|
378
|
+
continue
|
|
379
|
+
|
|
380
|
+
if commit_every_row:
|
|
381
|
+
try:
|
|
382
|
+
destination_connection.commit()
|
|
383
|
+
except Exception as e:
|
|
384
|
+
print(f"Skipping row {index} due to insert error {e}")
|
|
385
|
+
destination_connection.rollback()
|
|
386
|
+
continue
|
|
387
|
+
|
|
388
|
+
if not commit_every_row:
|
|
389
|
+
destination_connection.commit()
|
|
390
|
+
|
|
391
|
+
session.close()
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def obfuscate_user_table(table_name, destination_engine):
|
|
395
|
+
"""
|
|
396
|
+
Remove passwords from the database
|
|
397
|
+
"""
|
|
398
|
+
destination_metadata = MetaData()
|
|
399
|
+
destination_table = Table(table_name, destination_metadata, autoload_with=destination_engine)
|
|
400
|
+
|
|
401
|
+
columns = destination_table.columns.keys()
|
|
402
|
+
is_superuser_index = columns.index('is_superuser')
|
|
403
|
+
|
|
404
|
+
with destination_engine.connect() as destination_connection:
|
|
405
|
+
result = destination_connection.execute(destination_table.select())
|
|
406
|
+
|
|
407
|
+
for row in result:
|
|
408
|
+
update_stmt = destination_table.update().where(destination_table.c.id == row[0]).values(password='')
|
|
409
|
+
destination_connection.execute(update_stmt)
|
|
410
|
+
|
|
411
|
+
if is_superuser_index and row[is_superuser_index]:
|
|
412
|
+
update_stmt = destination_table.update().where(destination_table.c.id == row[0]).values(username='admin')
|
|
413
|
+
|
|
414
|
+
destination_connection.commit()
|
|
415
|
+
|
|
416
|
+
|
|
417
|
+
def create_indexes(destination_engine, table_name, column_name):
|
|
418
|
+
destination_metadata = MetaData()
|
|
419
|
+
destination_table = Table(table_name, destination_metadata, autoload_with=destination_engine)
|
|
420
|
+
|
|
421
|
+
#r = ReflectedTable(destination_engine)
|
|
422
|
+
#r.create_index(destination_table, "link")
|
|
423
|
+
#r.create_index(destination_table, "title")
|
|
424
|
+
#r.create_index(destination_table, "date_published")
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def obfuscate_all(destination_engine):
|
|
428
|
+
obfuscate_user_table("user", destination_engine)
|
|
429
|
+
|
|
430
|
+
with destination_engine.connect() as connection:
|
|
431
|
+
r = ReflectedTable(engine=destination_engine, connection=connection)
|
|
432
|
+
r.truncate_table("dataexport")
|
|
433
|
+
r.truncate_table("usersearchhistory")
|
|
434
|
+
r.truncate_table("credentials")
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
#### SQLite
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def get_local_engine(run_info):
|
|
441
|
+
workspace = run_info["workspace"]
|
|
442
|
+
user = run_info["user"]
|
|
443
|
+
database = run_info["database"]
|
|
444
|
+
host = run_info["host"]
|
|
445
|
+
password = run_info["password"]
|
|
446
|
+
|
|
447
|
+
# Create the database engine
|
|
448
|
+
SOURCE_DATABASE_URL = f"postgresql://{user}:{password}@{host}/{database}"
|
|
449
|
+
source_engine = create_engine(SOURCE_DATABASE_URL)
|
|
450
|
+
|
|
451
|
+
return source_engine
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def get_sqlite_engine(run_info):
|
|
455
|
+
workspace = run_info["workspace"]
|
|
456
|
+
|
|
457
|
+
file_name = workspace+".db"
|
|
458
|
+
DESTINATION_DATABASE_URL = "sqlite:///" + file_name
|
|
459
|
+
destination_engine = create_engine(DESTINATION_DATABASE_URL)
|
|
460
|
+
|
|
461
|
+
return destination_engine
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def run_db_copy_backup(run_info):
|
|
465
|
+
workspace = run_info["workspace"]
|
|
466
|
+
tables = run_info["tables"]
|
|
467
|
+
empty = run_info["empty"]
|
|
468
|
+
|
|
469
|
+
# Create the database engine
|
|
470
|
+
source_engine = get_local_engine(run_info)
|
|
471
|
+
|
|
472
|
+
operating_dir = get_workspace_backup_directory(run_info["format"], workspace)
|
|
473
|
+
operating_dir.mkdir(parents=True, exist_ok=True)
|
|
474
|
+
os.chdir(operating_dir)
|
|
475
|
+
|
|
476
|
+
destination_engine = get_sqlite_engine(run_info)
|
|
477
|
+
|
|
478
|
+
for table in tables:
|
|
479
|
+
table = table.replace(workspace + "_", "")
|
|
480
|
+
|
|
481
|
+
source_table = get_engine_table(workspace, table, source_engine)
|
|
482
|
+
create_destionation_table(table, source_table, destination_engine)
|
|
483
|
+
|
|
484
|
+
if not empty:
|
|
485
|
+
copy_table(workspace, table, source_engine, destination_engine, override=True, to_sqlite=True)
|
|
486
|
+
|
|
487
|
+
return True
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def run_db_copy_restore(run_info):
|
|
491
|
+
workspace = run_info["workspace"]
|
|
492
|
+
tables = run_info["tables"]
|
|
493
|
+
empty = run_info["empty"]
|
|
494
|
+
append = run_info["append"]
|
|
495
|
+
|
|
496
|
+
destination_engine = get_local_engine(run_info)
|
|
497
|
+
|
|
498
|
+
operating_dir = get_workspace_backup_directory(run_info["format"], workspace)
|
|
499
|
+
os.chdir(operating_dir)
|
|
500
|
+
|
|
501
|
+
source_engine = get_sqlite_engine(run_info)
|
|
502
|
+
|
|
503
|
+
for table in tables:
|
|
504
|
+
table = table.replace(workspace + "_", "")
|
|
505
|
+
copy_table(workspace, table, source_engine, destination_engine, override=False, to_sqlite=False, commit_every_row=True)
|
|
506
|
+
|
|
507
|
+
return True
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def run_db_copy_backup_auth(run_info):
|
|
511
|
+
workspace = run_info["workspace"]
|
|
512
|
+
|
|
513
|
+
# Create the database engine
|
|
514
|
+
source_engine = get_local_engine(run_info)
|
|
515
|
+
|
|
516
|
+
operating_dir = get_workspace_backup_directory(run_info["format"], workspace)
|
|
517
|
+
operating_dir.mkdir(parents=True, exist_ok=True)
|
|
518
|
+
os.chdir(operating_dir)
|
|
519
|
+
|
|
520
|
+
destination_engine = get_sqlite_engine(run_info)
|
|
521
|
+
|
|
522
|
+
source_table = get_engine_table("auth", "user", source_engine)
|
|
523
|
+
create_destionation_table("user", source_table, destination_engine)
|
|
524
|
+
|
|
525
|
+
copy_table("auth", "user", source_engine, destination_engine, override=True, to_sqlite=True)
|
|
526
|
+
|
|
527
|
+
return True
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def backup_workspace(run_info):
|
|
531
|
+
"""
|
|
532
|
+
@note table order is important
|
|
533
|
+
|
|
534
|
+
mapping:
|
|
535
|
+
file : tables
|
|
536
|
+
"""
|
|
537
|
+
print("--------------------")
|
|
538
|
+
print(run_info["workspace"])
|
|
539
|
+
print("--------------------")
|
|
540
|
+
|
|
541
|
+
workspace = run_info["workspace"]
|
|
542
|
+
|
|
543
|
+
for table in tables_to_backup:
|
|
544
|
+
new_run_info = dict(run_info)
|
|
545
|
+
new_key = "instance_" + table
|
|
546
|
+
|
|
547
|
+
new_run_info["tables"] = []
|
|
548
|
+
new_run_info["output_file"] = new_key
|
|
549
|
+
|
|
550
|
+
table_name = workspace + "_" + table
|
|
551
|
+
new_run_info["tables"].append(table_name)
|
|
552
|
+
|
|
553
|
+
if new_run_info["format"] == "sqlite":
|
|
554
|
+
if not run_db_copy_backup(new_run_info):
|
|
555
|
+
return False
|
|
556
|
+
else:
|
|
557
|
+
if not run_pg_dump_backup(new_run_info):
|
|
558
|
+
return False
|
|
559
|
+
|
|
560
|
+
if run_info["format"] == "custom":
|
|
561
|
+
new_run_info = dict(run_info)
|
|
562
|
+
new_run_info["tables"] = ['auth_user']
|
|
563
|
+
new_run_info["workspace"] = 'auth'
|
|
564
|
+
new_run_info["output_file"] = "auth_user"
|
|
565
|
+
if not run_pg_dump_backup(new_run_info):
|
|
566
|
+
return False
|
|
567
|
+
|
|
568
|
+
if run_info["format"] == "sqlite":
|
|
569
|
+
run_db_copy_backup_auth(run_info)
|
|
570
|
+
|
|
571
|
+
destination_engine = get_sqlite_engine(run_info)
|
|
572
|
+
|
|
573
|
+
create_indexes(destination_engine, "linkdatamodel", "link")
|
|
574
|
+
create_indexes(destination_engine, "linkdatamodel", "title")
|
|
575
|
+
create_indexes(destination_engine, "linkdatamodel", "date_published")
|
|
576
|
+
|
|
577
|
+
obfuscate_all(destination_engine)
|
|
578
|
+
|
|
579
|
+
return True
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def restore_workspace(run_info):
|
|
583
|
+
"""
|
|
584
|
+
@note table order is important
|
|
585
|
+
"""
|
|
586
|
+
workspace = run_info["workspace"]
|
|
587
|
+
|
|
588
|
+
print("--------------------")
|
|
589
|
+
print(run_info["workspace"])
|
|
590
|
+
print("--------------------")
|
|
591
|
+
|
|
592
|
+
if not run_info["append"]:
|
|
593
|
+
for table in tables_to_backup:
|
|
594
|
+
key = "./instance_" + table
|
|
595
|
+
tables = [workspace + "_" + table]
|
|
596
|
+
|
|
597
|
+
run_info["output_file"] = key
|
|
598
|
+
run_info["tables"] = tables
|
|
599
|
+
|
|
600
|
+
if not truncate_all(run_info):
|
|
601
|
+
print("Could not truncate table")
|
|
602
|
+
return
|
|
603
|
+
|
|
604
|
+
for table in tables_to_backup:
|
|
605
|
+
key = "./instance_" + table
|
|
606
|
+
tables = [workspace + "_" + table]
|
|
607
|
+
|
|
608
|
+
new_run_info = dict(run_info)
|
|
609
|
+
|
|
610
|
+
new_key = key.replace("instance", workspace)
|
|
611
|
+
new_run_info["output_file"] = new_key
|
|
612
|
+
new_run_info["tables"] = []
|
|
613
|
+
|
|
614
|
+
for item in tables:
|
|
615
|
+
table_name = item.replace("instance", workspace)
|
|
616
|
+
new_run_info["tables"].append(table_name)
|
|
617
|
+
|
|
618
|
+
if new_run_info["format"] == "sqlite":
|
|
619
|
+
if not run_db_copy_restore(new_run_info):
|
|
620
|
+
return False
|
|
621
|
+
else:
|
|
622
|
+
if not run_pg_restore(new_run_info):
|
|
623
|
+
return False
|
|
624
|
+
|
|
625
|
+
reset_tables_index_sequence(tablemapping, run_info)
|
|
626
|
+
|
|
627
|
+
return True
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def run_sql_for_workspaces(run_info, sql_command):
|
|
631
|
+
print("--------------------")
|
|
632
|
+
print(run_info["workspace"])
|
|
633
|
+
print("--------------------")
|
|
634
|
+
|
|
635
|
+
workspace = run_info["workspace"]
|
|
636
|
+
|
|
637
|
+
for table in all_tables:
|
|
638
|
+
call_table = workspace + "_" + table
|
|
639
|
+
call_sql_command = sql_command.replace("{table}", call_table)
|
|
640
|
+
|
|
641
|
+
if not run_table_sql(run_info, call_table, call_sql_command):
|
|
642
|
+
return False
|
|
643
|
+
|
|
644
|
+
return True
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def parse_backup_commandline():
|
|
648
|
+
parser = argparse.ArgumentParser(prog="Backup", description="Backup manager. Please provide .pgpass file, and define your password there.")
|
|
649
|
+
|
|
650
|
+
parser.add_argument("-b", "--backup", action="store_true", help="Perform a backup")
|
|
651
|
+
parser.add_argument("-r", "--restore", action="store_true", help="Restore from a backup")
|
|
652
|
+
parser.add_argument("-o", "--output-dir", help="Output directory")
|
|
653
|
+
parser.add_argument("-a", "--analyze", action="store_true", help="Analyze the database")
|
|
654
|
+
parser.add_argument("--vacuum", action="store_true", help="Vacuum the database")
|
|
655
|
+
parser.add_argument("--reindex", action="store_true", help="Reindex the database. Useful to detect errors in consistency")
|
|
656
|
+
parser.add_argument("-s", "--sequence-update", action="store_true", help="Updates sequence numbers")
|
|
657
|
+
parser.add_argument("-U", "--user", default="user", help="Username for the database (default: 'user')")
|
|
658
|
+
parser.add_argument("-d", "--database", default="db", help="Database name (default: 'db')")
|
|
659
|
+
parser.add_argument("-p", "--password", default="", help="Password. Necessary for sqlite format")
|
|
660
|
+
parser.add_argument("-w", "--workspaces", help="Workspace for which to perform backup/restore. If not specified - all")
|
|
661
|
+
parser.add_argument("-D", "--debug", help="Enable debug output") # TODO implement debug
|
|
662
|
+
parser.add_argument("-i", "--ignore-errors", action="store_true", help="Ignore errors during the operation")
|
|
663
|
+
parser.add_argument("--empty", action="store_true", help="Creates empty table version during backup")
|
|
664
|
+
parser.add_argument("--append", action="store_true", help="Appends data during restore, does not clear tables")
|
|
665
|
+
parser.add_argument("-f", "--format", default="custom", choices=["custom", "plain", "sql", "sqlite"],
|
|
666
|
+
help="Format of the backup (default: 'custom'). Choices: 'custom', 'plain', or 'sql'.")
|
|
667
|
+
|
|
668
|
+
parser.add_argument("--host", default="127.0.0.1", help="Host address for the database (default: 127.0.0.1)")
|
|
669
|
+
|
|
670
|
+
return parser, parser.parse_args()
|
|
671
|
+
|
|
672
|
+
|
|
673
|
+
class Backup(object):
|
|
674
|
+
def __init__(self, args):
|
|
675
|
+
self.args = args
|
|
676
|
+
|
|
677
|
+
def process(self):
|
|
678
|
+
workspaces = []
|
|
679
|
+
|
|
680
|
+
if self.args.workspaces:
|
|
681
|
+
if self.args.workspaces.find(",") >= 0:
|
|
682
|
+
workspaces = self.args.workspaces.split(",")
|
|
683
|
+
else:
|
|
684
|
+
workspaces = [self.args.workspaces]
|
|
685
|
+
else:
|
|
686
|
+
print("Provide workspace")
|
|
687
|
+
sys.exit(1)
|
|
688
|
+
|
|
689
|
+
if self.args.output_dir:
|
|
690
|
+
output_directory = Path(self.args.output_dir)
|
|
691
|
+
|
|
692
|
+
start_time = time.time()
|
|
693
|
+
|
|
694
|
+
errors = False
|
|
695
|
+
|
|
696
|
+
for workspace in workspaces:
|
|
697
|
+
run_info = {}
|
|
698
|
+
run_info["workspace"] = workspace
|
|
699
|
+
run_info["user"] = self.args.user
|
|
700
|
+
run_info["database"] = self.args.database
|
|
701
|
+
run_info["host"] = self.args.host
|
|
702
|
+
run_info["format"] = self.args.format
|
|
703
|
+
run_info["password"] = self.args.password
|
|
704
|
+
run_info["empty"] = self.args.empty
|
|
705
|
+
run_info["append"] = self.args.append
|
|
706
|
+
|
|
707
|
+
if self.args.ignore_errors:
|
|
708
|
+
run_info["ignore_errors"] = True
|
|
709
|
+
|
|
710
|
+
if self.args.backup and not backup_workspace(run_info):
|
|
711
|
+
print("Leaving because of errors")
|
|
712
|
+
errors = True
|
|
713
|
+
break
|
|
714
|
+
|
|
715
|
+
if self.args.restore and not restore_workspace(run_info):
|
|
716
|
+
print("Leaving because of errors")
|
|
717
|
+
errors = True
|
|
718
|
+
break
|
|
719
|
+
|
|
720
|
+
if self.args.analyze and not run_sql_for_workspaces(run_info, "ANALYZE {table};"):
|
|
721
|
+
print("Leaving because of errors")
|
|
722
|
+
errors = True
|
|
723
|
+
break
|
|
724
|
+
|
|
725
|
+
if self.args.vacuum and not run_sql_for_workspaces(run_info, "VACUUM {table};"):
|
|
726
|
+
print("Leaving because of errors")
|
|
727
|
+
errors = True
|
|
728
|
+
break
|
|
729
|
+
|
|
730
|
+
if self.args.reindex and not run_sql_for_workspaces(run_info, "REINDEX TABLE {table};"):
|
|
731
|
+
print("Leaving because of errors")
|
|
732
|
+
errors = True
|
|
733
|
+
break
|
|
734
|
+
|
|
735
|
+
sql_text = "SELECT setval('{table}_id_seq', COALESCE((SELECT MAX(id) FROM {table}), 1));"
|
|
736
|
+
if self.args.sequence_update and not run_sql_for_workspaces(run_info, sql_text):
|
|
737
|
+
print("Leaving because of errors")
|
|
738
|
+
errors = True
|
|
739
|
+
break
|
|
740
|
+
|
|
741
|
+
if errors:
|
|
742
|
+
print("There were errors")
|
|
743
|
+
else:
|
|
744
|
+
print("All calls were successful")
|
|
745
|
+
|
|
746
|
+
elapsed_time_seconds = time.time() - start_time
|
|
747
|
+
elapsed_minutes = int(elapsed_time_seconds // 60)
|
|
748
|
+
elapsed_seconds = int(elapsed_time_seconds % 60)
|
|
749
|
+
print(f"Time: {elapsed_minutes}:{elapsed_seconds}")
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
def main():
|
|
753
|
+
parser, args = parse_backup_commandline()
|
|
754
|
+
|
|
755
|
+
if not args.backup and not args.restore and not args.analyze and not args.vacuum and not args.reindex and not args.sequence_update:
|
|
756
|
+
parser.print_help()
|
|
757
|
+
|
|
758
|
+
else:
|
|
759
|
+
b = Backup(args)
|
|
760
|
+
b.process()
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
if __name__ == "__main__":
|
|
764
|
+
main()
|