linkarchivetools 0.1.7__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkarchivetools might be problematic. Click here for more details.

Files changed (18) hide show
  1. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/PKG-INFO +2 -3
  2. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/utils/reflected.py +150 -166
  3. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/pyproject.toml +1 -1
  4. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/LICENSE +0 -0
  5. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/README.md +0 -0
  6. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/LICENSE +0 -0
  7. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/README.md +0 -0
  8. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/__init__.py +0 -0
  9. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/backup.py +0 -0
  10. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/db2feeds.py +0 -0
  11. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/db2json.py +0 -0
  12. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/dbanalyzer.py +0 -0
  13. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/dbfilter.py +0 -0
  14. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/dbmerge.py +0 -0
  15. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/json2db.py +0 -0
  16. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/tableconfig.py +0 -0
  17. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/utils/alchemysearch.py +0 -0
  18. {linkarchivetools-0.1.7 → linkarchivetools-0.1.10}/linkarchivetools/utils/omnisearch.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.1
2
2
  Name: linkarchivetools
3
- Version: 0.1.7
3
+ Version: 0.1.10
4
4
  Summary: Link Archive Tools
5
5
  License: GPL3
6
6
  Author: Iwan Grozny
@@ -11,7 +11,6 @@ Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
- Classifier: Programming Language :: Python :: 3.13
15
14
  Requires-Dist: psycopg2-binary
16
15
  Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
17
16
  Requires-Dist: requests (>=2.32.5,<3.0.0)
@@ -17,19 +17,12 @@ from sqlalchemy import (
17
17
  class ReflectedTable(object):
18
18
  def __init__(self, engine, connection):
19
19
  self.engine = engine
20
- self.connection = connection
21
-
22
- def get_table(self, table_name):
23
- destination_metadata = MetaData()
24
- destination_table = Table(
25
- table_name, destination_metadata, autoload_with=self.engine
26
- )
27
- return destination_table
28
20
 
29
21
  def truncate_table(self, table_name):
30
22
  sql_text = f"DELETE FROM {table_name};"
31
- self.connection.execute(text(sql_text))
32
- self.connection.commit()
23
+
24
+ with self.engine.begin() as connection:
25
+ connection.execute(text(sql_text))
33
26
 
34
27
  def create_index(self, table, column_name):
35
28
  index_name = f"idx_{table.name}_{column_name}"
@@ -38,30 +31,17 @@ class ReflectedTable(object):
38
31
  index.create(bind=self.engine)
39
32
 
40
33
  def vacuum(self):
41
- self.connection.execute(text("VACUUM"))
34
+ with self.engine.connect() as connection:
35
+ connection.execution_options(isolation_level="AUTOCOMMIT")
36
+ connection.execute(text("VACUUM"))
42
37
 
43
38
  def close(self):
44
39
  pass
45
40
 
46
- def insert_json_data(self, table_name, json_data: dict):
47
- table = self.get_table(table_name)
48
-
49
- stmt = (
50
- insert(table)
51
- .values(**json_data)
52
- .returning(table.c.id)
53
- )
54
-
55
- result = self.connection.execute(stmt)
56
- inserted_id = result.scalar_one()
57
- self.connection.commit()
58
-
59
- return inserted_id
60
-
61
41
  def count(self, table_name):
62
- row_count = self.connection.execute(
63
- text(f"SELECT COUNT(*) FROM {table_name}")
64
- ).scalar()
42
+ sql_text = text(f"SELECT COUNT(*) FROM {table_name}")
43
+ with self.engine.connect() as connection:
44
+ row_count = connection.execute(sql_text).scalar_one()
65
45
  return row_count
66
46
 
67
47
  def print_summary(self, print_columns=False):
@@ -92,32 +72,34 @@ class ReflectedTable(object):
92
72
  return data
93
73
 
94
74
  def run_sql(self, sql_text):
95
- self.connection.execute(text(sql_text))
96
- self.connection.commit()
75
+ with self.engine.begin() as connection:
76
+ connection.execute(text(sql_text))
97
77
 
98
78
 
99
79
  class ReflectedGenericTable(object):
100
80
  def __init__(self, engine, connection, table_name=None):
101
81
  self.engine = engine
102
- self.connection = connection
103
82
  self.table_name = table_name
104
83
  if self.table_name is None:
105
84
  self.table_name = self.get_table_name()
85
+ self.table = None
106
86
 
107
87
  def get_table_name():
108
88
  return self.table_name
109
89
 
110
90
  def get_table(self):
111
- destination_metadata = MetaData()
112
- destination_table = Table(
113
- self.table_name, destination_metadata, autoload_with=self.engine
114
- )
115
- return destination_table
91
+ if self.table is None:
92
+ destination_metadata = MetaData()
93
+ self.table = Table(
94
+ self.table_name, destination_metadata, autoload_with=self.engine
95
+ )
96
+ return self.table
97
+ return self.table
116
98
 
117
99
  def truncate(self):
118
100
  sql_text = f"DELETE FROM {self.table_name};"
119
- self.connection.execute(text(sql_text))
120
- self.connection.commit()
101
+ with self.engine.begin() as connection:
102
+ result = connection.execute(text(sql_text))
121
103
 
122
104
  def create_index(self, column_name):
123
105
  index_name = f"idx_{self.table.name}_{column_name}"
@@ -134,9 +116,9 @@ class ReflectedGenericTable(object):
134
116
  .returning(table.c.id)
135
117
  )
136
118
 
137
- result = self.connection.execute(stmt)
138
- inserted_id = result.scalar_one()
139
- self.connection.commit()
119
+ with self.engine.begin() as connection:
120
+ result = connection.execute(stmt)
121
+ inserted_id = result.scalar_one()
140
122
 
141
123
  return inserted_id
142
124
 
@@ -149,21 +131,22 @@ class ReflectedGenericTable(object):
149
131
  .values(**json_data)
150
132
  )
151
133
 
152
- self.connection.execute(stmt)
134
+ with self.engine.begin() as connection:
135
+ connection.execute(stmt)
153
136
 
154
137
  def count(self):
155
- row_count = self.connection.execute(
156
- text(f"SELECT COUNT(*) FROM {self.table_name}")
157
- ).scalar()
138
+ sql = text(f"SELECT COUNT(*) FROM {self.table_name}")
139
+ with self.engine.connect() as connection:
140
+ row_count = connection.execute(sql).scalar_one()
158
141
  return row_count
159
142
 
160
143
  def get(self, id):
161
- destination_table = self.get_table()
162
-
163
- stmt = select(destination_table).where(destination_table.c.id == id)
144
+ table = self.get_table()
145
+ stmt = select(table).where(table.c.id == id)
164
146
 
165
- result = self.connection.execute(stmt)
166
- return result.first()
147
+ with self.engine.connect() as connection:
148
+ result = connection.execute(stmt)
149
+ return result.first()
167
150
 
168
151
  def get_where(self,
169
152
  conditions_map: dict=None,
@@ -199,36 +182,38 @@ class ReflectedGenericTable(object):
199
182
  if limit is not None:
200
183
  stmt = stmt.limit(limit)
201
184
 
202
- result = self.connection.execute(stmt)
203
- for row in result:
204
- yield row
185
+ with self.engine.connect() as connection:
186
+ result = connection.execute(stmt)
187
+ rows = result.fetchall() # fetch all rows immediately
205
188
 
206
- def delete(self, id):
207
- destination_table = self.get_table()
189
+ return rows
208
190
 
209
- stmt = delete(destination_table).where(destination_table.c.id == id)
191
+ def delete(self, id):
192
+ table = self.get_table()
193
+ stmt = delete(table).where(table.c.id == id)
210
194
 
211
- result = self.connection.execute(stmt)
212
- self.connection.commit()
195
+ with self.engine.begin() as connection:
196
+ result = connection.execute(stmt)
197
+ rowcount = result.rowcount # number of rows deleted
213
198
 
214
- return result.rowcount # number of rows deleted
199
+ return rowcount
215
200
 
216
201
  def delete_where(self, conditions: dict):
217
- destination_table = self.get_table()
202
+ table = self.get_table()
218
203
 
219
204
  filters = []
220
205
  for column_name, value in conditions.items():
221
- if not hasattr(destination_table.c, column_name):
206
+ if not hasattr(table.c, column_name):
222
207
  raise ValueError(f"Unknown column: {column_name}")
208
+ filters.append(getattr(table.c, column_name) == value)
223
209
 
224
- filters.append(getattr(destination_table.c, column_name) == value)
225
-
226
- stmt = delete(destination_table).where(and_(*filters))
210
+ stmt = delete(table).where(and_(*filters))
227
211
 
228
- result = self.connection.execute(stmt)
229
- self.connection.commit()
212
+ with self.engine.begin() as connection:
213
+ result = connection.execute(stmt)
214
+ rowcount = result.rowcount # number of rows deleted
230
215
 
231
- return result.rowcount
216
+ return rowcount
232
217
 
233
218
  def print_summary(self, print_columns=False):
234
219
  row_count = self.count()
@@ -240,21 +225,23 @@ class ReflectedGenericTable(object):
240
225
 
241
226
  def get_column_names(self):
242
227
  inspector = inspect(self.engine)
243
- row_count = self.connection.execute(
244
- text(f"SELECT COUNT(*) FROM {self.table_name}")
245
- ).scalar()
228
+
229
+ with self.engine.connect() as connection:
230
+ row_count = connection.execute(text(f"SELECT COUNT(*) FROM {self.table_name}")).scalar_one()
246
231
 
247
232
  columns = inspector.get_columns(self.table_name)
248
233
  column_names = [column["name"] for column in columns]
249
234
  return column_names
250
235
 
251
236
  def row_to_json_data(self, row):
252
- data = dict(row._mapping)
253
- return data
237
+ """
238
+ Convert SQLAlchemy row to a dict
239
+ """
240
+ return dict(row._mapping)
254
241
 
255
242
  def run_sql(self, sql_text):
256
- self.connection.execute(text(sql_text))
257
- self.connection.commit()
243
+ with self.engine.begin() as connection:
244
+ connection.execute(text(sql_text))
258
245
 
259
246
 
260
247
  class ReflectedEntryTable(ReflectedGenericTable):
@@ -286,35 +273,38 @@ class ReflectedEntryTable(ReflectedGenericTable):
286
273
 
287
274
  return self.insert_json_data(entry_json)
288
275
 
289
- def get_entries(self, limit:int|None=None, offset:int=0):
290
- destination_table = self.get_table()
291
-
292
- entries_select = select(destination_table)
276
+ def get_entries(self, limit: int | None = None, offset: int = 0):
277
+ """
278
+ TODO remove use get_where
279
+ """
280
+ table = self.get_table()
281
+ stmt = select(table)
293
282
 
294
283
  if offset:
295
- entries_select = entries_select.offset(offset)
284
+ stmt = stmt.offset(offset)
296
285
  if limit is not None:
297
- entries_select = entries_select.limit(limit)
298
-
299
- result = self.connection.execute(entries_select)
286
+ stmt = stmt.limit(limit)
300
287
 
301
- for entry in result:
302
- yield entry
288
+ with self.engine.connect() as connection:
289
+ result = connection.execute(stmt)
290
+ rows = result.fetchall() # fetch all rows immediately
291
+ return rows
303
292
 
304
293
  def get_entries_good(self):
305
- destination_table = self.get_table()
306
-
294
+ """
295
+ TODO remove use get_where
296
+ """
297
+ table = self.get_table()
307
298
  stmt = (
308
- select(destination_table)
309
- .where(destination_table.c.page_rating_votes > 0)
310
- .order_by(destination_table.c.page_rating_votes.desc())
299
+ select(table)
300
+ .where(table.c.page_rating_votes > 0)
301
+ .order_by(table.c.page_rating_votes.desc())
311
302
  )
312
303
 
313
- result = self.connection.execute(stmt)
314
- entries = result.fetchall()
315
-
316
- for entry in entries:
317
- yield entry
304
+ with self.engine.connect() as connection:
305
+ result = connection.execute(stmt)
306
+ rows = result.fetchall() # fetch all rows immediately
307
+ return rows
318
308
 
319
309
  def exists(self, *, id=None, link=None):
320
310
  table = self.get_table()
@@ -329,7 +319,9 @@ class ReflectedEntryTable(ReflectedGenericTable):
329
319
  return False
330
320
 
331
321
  stmt = select(exists().where(or_(*conditions)))
332
- return self.connection.execute(stmt).scalar()
322
+
323
+ with self.engine.connect() as connection:
324
+ return connection.execute(stmt).scalar()
333
325
 
334
326
 
335
327
  class ReflectedUserTags(ReflectedGenericTable):
@@ -337,33 +329,27 @@ class ReflectedUserTags(ReflectedGenericTable):
337
329
  return "usertags"
338
330
 
339
331
  def get_tags_string(self, entry_id):
340
- destination_table = self.get_table()
341
-
342
- stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
332
+ table = self.get_table()
333
+ stmt = select(table).where(table.c.entry_id == entry_id)
343
334
 
344
- tags = ""
335
+ tags_list = []
345
336
 
346
- result = self.connection.execute(stmt)
347
- rows = result.fetchall()
348
- for row in rows:
349
- if tags:
350
- tags += ", "
337
+ with self.engine.connect() as connection:
338
+ result = connection.execute(stmt)
339
+ for row in result:
340
+ tags_list.append(f"#{row.tag}")
351
341
 
352
- tags += "#" + row.tag
353
-
354
- return tags
342
+ return ", ".join(tags_list)
355
343
 
356
344
  def get_tags(self, entry_id):
357
- destination_table = self.get_table()
358
-
359
- stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
345
+ table = self.get_table()
346
+ stmt = select(table).where(table.c.entry_id == entry_id)
360
347
 
361
348
  tags = []
362
-
363
- result = self.connection.execute(stmt)
364
- rows = result.fetchall()
365
- for row in rows:
366
- tags.append(row.tag)
349
+ with self.engine.connect() as connection:
350
+ result = connection.execute(stmt)
351
+ for row in result:
352
+ tags.append(row.tag)
367
353
 
368
354
  return tags
369
355
 
@@ -372,36 +358,32 @@ class ReflectedEntryCompactedTags(ReflectedGenericTable):
372
358
  def get_table_name(self):
373
359
  return "entrycompactedtags"
374
360
 
375
- def get_tags_string(self, entry_id):
376
- destination_table = self.get_table()
377
-
378
- stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
379
-
380
- tags = ""
381
-
382
- result = self.connection.execute(stmt)
383
- rows = result.fetchall()
384
- for row in rows:
385
- if tags:
386
- tags += ", "
361
+ def get_tags(self, entry_id):
362
+ """Return a list of tag strings for the given entry_id."""
363
+ table = self.get_table()
364
+ stmt = select(table).where(table.c.entry_id == entry_id)
387
365
 
388
- tags += "#" + row.tag
366
+ tags = []
367
+ with self.engine.connect() as connection:
368
+ result = connection.execute(stmt)
369
+ for row in result:
370
+ tags.append(row.tag)
389
371
 
390
372
  return tags
391
373
 
392
- def get_tags(self, entry_id):
393
- destination_table = self.get_table()
394
374
 
395
- stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
396
-
397
- tags = []
375
+ def get_tags_string(self, entry_id):
376
+ """Return tags for the given entry_id as a single string formatted as '#tag1, #tag2'."""
377
+ table = self.get_table()
378
+ stmt = select(table).where(table.c.entry_id == entry_id)
398
379
 
399
- result = self.connection.execute(stmt)
400
- rows = result.fetchall()
401
- for row in rows:
402
- tags.append(row.tag)
380
+ tags_list = []
381
+ with self.engine.connect() as connection:
382
+ result = connection.execute(stmt)
383
+ for row in result:
384
+ tags_list.append(f"#{row.tag}")
403
385
 
404
- return tags
386
+ return ", ".join(tags_list)
405
387
 
406
388
 
407
389
  class ReflectedSourceTable(ReflectedGenericTable):
@@ -409,35 +391,35 @@ class ReflectedSourceTable(ReflectedGenericTable):
409
391
  return "sourcedatamodel"
410
392
 
411
393
  def get_source(self, source_id):
412
- destination_table = self.get_table()
413
-
414
- stmt = select(destination_table).where(destination_table.c.id == source_id)
415
-
416
- result = self.connection.execute(stmt)
417
- return result.first()
394
+ """Return a single source row by ID, or None if not found."""
395
+ table = self.get_table()
396
+ stmt = select(table).where(table.c.id == source_id)
418
397
 
419
- def get_sources(self, limit:int|None=None, offset:int=0):
420
- destination_table = self.get_table()
398
+ with self.engine.connect() as connection:
399
+ return connection.execute(stmt).first()
421
400
 
422
- sources_select = select(destination_table)
401
+ def get_sources(self, limit: int | None = None, offset: int = 0):
402
+ """Yield sources with optional offset and limit."""
403
+ table = self.get_table()
404
+ stmt = select(table)
423
405
 
424
406
  if offset:
425
- sources_select = sources_select.offset(offset)
407
+ stmt = stmt.offset(offset)
426
408
  if limit is not None:
427
- sources_select = sources_select.limit(limit)
428
-
429
- result = self.connection.execute(sources_select)
430
-
431
- for source in result:
432
- yield source
409
+ stmt = stmt.limit(limit)
433
410
 
434
- def insert_json(self, source_json):
435
- if "url" not in source_json:
436
- source_json["url"] = ""
411
+ with self.engine.connect() as connection:
412
+ result = connection.execute(stmt)
413
+ sources = result.fetchall()
414
+ return sources
437
415
 
416
+ def insert_json(self, source_json: dict):
417
+ """Insert a source JSON dict, ensuring 'url' key exists."""
418
+ source_json.setdefault("url", "")
438
419
  return self.insert_json_data(source_json)
439
420
 
440
421
  def exists(self, *, id=None, url=None):
422
+ """Return True if a source with given ID or URL exists."""
441
423
  table = self.get_table()
442
424
 
443
425
  conditions = []
@@ -450,7 +432,9 @@ class ReflectedSourceTable(ReflectedGenericTable):
450
432
  return False
451
433
 
452
434
  stmt = select(exists().where(or_(*conditions)))
453
- return self.connection.execute(stmt).scalar()
435
+
436
+ with self.engine.connect() as connection:
437
+ return connection.execute(stmt).scalar()
454
438
 
455
439
 
456
440
  class ReflectedSocialData(ReflectedGenericTable):
@@ -458,20 +442,20 @@ class ReflectedSocialData(ReflectedGenericTable):
458
442
  return "socialdata"
459
443
 
460
444
  def get(self, entry_id):
461
- destination_table = self.get_table()
445
+ """Return a single row matching entry_id, or None if not found."""
446
+ table = self.get_table()
447
+ stmt = select(table).where(table.c.entry_id == entry_id)
462
448
 
463
- stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
449
+ with self.engine.connect() as connection:
450
+ return connection.execute(stmt).first()
464
451
 
465
- result = self.connection.execute(stmt)
466
- return result.first()
467
452
 
468
453
  def get_json(self, entry_id):
454
+ """Return the row as a dict (JSON-style), or None if not found."""
469
455
  row = self.get(entry_id)
470
456
  if row is None:
471
457
  return None
472
-
473
- data = self.row_to_json_data(row)
474
- return data
458
+ return self.row_to_json_data(row)
475
459
 
476
460
 
477
461
  class EntryCopier(object):
@@ -3,7 +3,7 @@
3
3
 
4
4
  [tool.poetry]
5
5
  name = "linkarchivetools"
6
- version = "0.1.7"
6
+ version = "0.1.10"
7
7
  description = "Link Archive Tools"
8
8
  authors = ["Iwan Grozny <renegat@renegat0x0.ddns.net>"]
9
9
  license = "GPL3"