linkarchivetools 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,7 +17,6 @@ from sqlalchemy import (
17
17
  class ReflectedTable(object):
18
18
  def __init__(self, engine, connection):
19
19
  self.engine = engine
20
- self.connection = connection
21
20
 
22
21
  def get_table(self, table_name):
23
22
  destination_metadata = MetaData()
@@ -28,8 +27,9 @@ class ReflectedTable(object):
28
27
 
29
28
  def truncate_table(self, table_name):
30
29
  sql_text = f"DELETE FROM {table_name};"
31
- self.connection.execute(text(sql_text))
32
- self.connection.commit()
30
+
31
+ with self.engine.begin() as connection:
32
+ connection.execute(text(sql_text))
33
33
 
34
34
  def create_index(self, table, column_name):
35
35
  index_name = f"idx_{table.name}_{column_name}"
@@ -38,7 +38,9 @@ class ReflectedTable(object):
38
38
  index.create(bind=self.engine)
39
39
 
40
40
  def vacuum(self):
41
- self.connection.execute(text("VACUUM"))
41
+ with self.engine.connect() as connection:
42
+ connection.execution_options(isolation_level="AUTOCOMMIT")
43
+ connection.execute(text("VACUUM"))
42
44
 
43
45
  def close(self):
44
46
  pass
@@ -52,16 +54,16 @@ class ReflectedTable(object):
52
54
  .returning(table.c.id)
53
55
  )
54
56
 
55
- result = self.connection.execute(stmt)
56
- inserted_id = result.scalar_one()
57
- self.connection.commit()
57
+ with self.engine.begin() as connection:
58
+ result = connection.execute(stmt)
59
+ inserted_id = result.scalar_one()
58
60
 
59
61
  return inserted_id
60
62
 
61
63
  def count(self, table_name):
62
- row_count = self.connection.execute(
63
- text(f"SELECT COUNT(*) FROM {table_name}")
64
- ).scalar()
64
+ sql_text = text(f"SELECT COUNT(*) FROM {table_name}")
65
+ with self.engine.connect() as connection:
66
+ row_count = connection.execute(sql_text).scalar_one()
65
67
  return row_count
66
68
 
67
69
  def print_summary(self, print_columns=False):
@@ -92,14 +94,13 @@ class ReflectedTable(object):
92
94
  return data
93
95
 
94
96
  def run_sql(self, sql_text):
95
- self.connection.execute(text(sql_text))
96
- self.connection.commit()
97
+ with self.engine.begin() as connection:
98
+ connection.execute(text(sql_text))
97
99
 
98
100
 
99
101
  class ReflectedGenericTable(object):
100
102
  def __init__(self, engine, connection, table_name=None):
101
103
  self.engine = engine
102
- self.connection = connection
103
104
  self.table_name = table_name
104
105
  if self.table_name is None:
105
106
  self.table_name = self.get_table_name()
@@ -116,8 +117,8 @@ class ReflectedGenericTable(object):
116
117
 
117
118
  def truncate(self):
118
119
  sql_text = f"DELETE FROM {self.table_name};"
119
- self.connection.execute(text(sql_text))
120
- self.connection.commit()
120
+ with self.engine.begin() as connection:
121
+ result = connection.execute(text(sql_text))
121
122
 
122
123
  def create_index(self, column_name):
123
124
  index_name = f"idx_{self.table.name}_{column_name}"
@@ -134,9 +135,9 @@ class ReflectedGenericTable(object):
134
135
  .returning(table.c.id)
135
136
  )
136
137
 
137
- result = self.connection.execute(stmt)
138
- inserted_id = result.scalar_one()
139
- self.connection.commit()
138
+ with self.engine.begin() as connection:
139
+ result = connection.execute(stmt)
140
+ inserted_id = result.scalar_one()
140
141
 
141
142
  return inserted_id
142
143
 
@@ -149,21 +150,22 @@ class ReflectedGenericTable(object):
149
150
  .values(**json_data)
150
151
  )
151
152
 
152
- self.connection.execute(stmt)
153
+ with self.engine.begin() as connection:
154
+ connection.execute(stmt)
153
155
 
154
156
  def count(self):
155
- row_count = self.connection.execute(
156
- text(f"SELECT COUNT(*) FROM {self.table_name}")
157
- ).scalar()
157
+ sql = text(f"SELECT COUNT(*) FROM {self.table_name}")
158
+ with self.engine.connect() as connection:
159
+ row_count = connection.execute(sql).scalar_one()
158
160
  return row_count
159
161
 
160
162
  def get(self, id):
161
- destination_table = self.get_table()
162
-
163
- stmt = select(destination_table).where(destination_table.c.id == id)
163
+ table = self.get_table()
164
+ stmt = select(table).where(table.c.id == id)
164
165
 
165
- result = self.connection.execute(stmt)
166
- return result.first()
166
+ with self.engine.connect() as connection:
167
+ result = connection.execute(stmt)
168
+ return result.first()
167
169
 
168
170
  def get_where(self,
169
171
  conditions_map: dict=None,
@@ -199,36 +201,36 @@ class ReflectedGenericTable(object):
199
201
  if limit is not None:
200
202
  stmt = stmt.limit(limit)
201
203
 
202
- result = self.connection.execute(stmt)
203
- for row in result:
204
- yield row
204
+ with self.engine.connect() as connection:
205
+ result = connection.execute(stmt)
206
+ yield from result
205
207
 
206
208
  def delete(self, id):
207
- destination_table = self.get_table()
208
-
209
- stmt = delete(destination_table).where(destination_table.c.id == id)
209
+ table = self.get_table()
210
+ stmt = delete(table).where(table.c.id == id)
210
211
 
211
- result = self.connection.execute(stmt)
212
- self.connection.commit()
212
+ with self.engine.begin() as connection:
213
+ result = connection.execute(stmt)
214
+ rowcount = result.rowcount # number of rows deleted
213
215
 
214
- return result.rowcount # number of rows deleted
216
+ return rowcount
215
217
 
216
218
  def delete_where(self, conditions: dict):
217
- destination_table = self.get_table()
219
+ table = self.get_table()
218
220
 
219
221
  filters = []
220
222
  for column_name, value in conditions.items():
221
- if not hasattr(destination_table.c, column_name):
223
+ if not hasattr(table.c, column_name):
222
224
  raise ValueError(f"Unknown column: {column_name}")
225
+ filters.append(getattr(table.c, column_name) == value)
223
226
 
224
- filters.append(getattr(destination_table.c, column_name) == value)
227
+ stmt = delete(table).where(and_(*filters))
225
228
 
226
- stmt = delete(destination_table).where(and_(*filters))
229
+ with self.engine.begin() as connection:
230
+ result = connection.execute(stmt)
231
+ rowcount = result.rowcount # number of rows deleted
227
232
 
228
- result = self.connection.execute(stmt)
229
- self.connection.commit()
230
-
231
- return result.rowcount
233
+ return rowcount
232
234
 
233
235
  def print_summary(self, print_columns=False):
234
236
  row_count = self.count()
@@ -240,21 +242,23 @@ class ReflectedGenericTable(object):
240
242
 
241
243
  def get_column_names(self):
242
244
  inspector = inspect(self.engine)
243
- row_count = self.connection.execute(
244
- text(f"SELECT COUNT(*) FROM {self.table_name}")
245
- ).scalar()
245
+
246
+ with self.engine.connect() as connection:
247
+ row_count = connection.execute(text(f"SELECT COUNT(*) FROM {self.table_name}")).scalar_one()
246
248
 
247
249
  columns = inspector.get_columns(self.table_name)
248
250
  column_names = [column["name"] for column in columns]
249
251
  return column_names
250
252
 
251
253
  def row_to_json_data(self, row):
252
- data = dict(row._mapping)
253
- return data
254
+ """
255
+ Convert SQLAlchemy row to a dict
256
+ """
257
+ return dict(row._mapping)
254
258
 
255
259
  def run_sql(self, sql_text):
256
- self.connection.execute(text(sql_text))
257
- self.connection.commit()
260
+ with self.engine.begin() as connection:
261
+ connection.execute(text(sql_text))
258
262
 
259
263
 
260
264
  class ReflectedEntryTable(ReflectedGenericTable):
@@ -286,35 +290,36 @@ class ReflectedEntryTable(ReflectedGenericTable):
286
290
 
287
291
  return self.insert_json_data(entry_json)
288
292
 
289
- def get_entries(self, limit:int|None=None, offset:int=0):
290
- destination_table = self.get_table()
291
-
292
- entries_select = select(destination_table)
293
+ def get_entries(self, limit: int | None = None, offset: int = 0):
294
+ """
295
+ TODO remove use get_where
296
+ """
297
+ table = self.get_table()
298
+ stmt = select(table)
293
299
 
294
300
  if offset:
295
- entries_select = entries_select.offset(offset)
301
+ stmt = stmt.offset(offset)
296
302
  if limit is not None:
297
- entries_select = entries_select.limit(limit)
298
-
299
- result = self.connection.execute(entries_select)
303
+ stmt = stmt.limit(limit)
300
304
 
301
- for entry in result:
302
- yield entry
305
+ with self.engine.connect() as connection:
306
+ result = connection.execute(stmt)
307
+ yield from result # keep generator semantics
303
308
 
304
309
  def get_entries_good(self):
305
- destination_table = self.get_table()
306
-
310
+ """
311
+ TODO remove use get_where
312
+ """
313
+ table = self.get_table()
307
314
  stmt = (
308
- select(destination_table)
309
- .where(destination_table.c.page_rating_votes > 0)
310
- .order_by(destination_table.c.page_rating_votes.desc())
315
+ select(table)
316
+ .where(table.c.page_rating_votes > 0)
317
+ .order_by(table.c.page_rating_votes.desc())
311
318
  )
312
319
 
313
- result = self.connection.execute(stmt)
314
- entries = result.fetchall()
315
-
316
- for entry in entries:
317
- yield entry
320
+ with self.engine.connect() as connection:
321
+ result = connection.execute(stmt)
322
+ yield from result
318
323
 
319
324
  def exists(self, *, id=None, link=None):
320
325
  table = self.get_table()
@@ -329,7 +334,9 @@ class ReflectedEntryTable(ReflectedGenericTable):
329
334
  return False
330
335
 
331
336
  stmt = select(exists().where(or_(*conditions)))
332
- return self.connection.execute(stmt).scalar()
337
+
338
+ with self.engine.connect() as connection:
339
+ return connection.execute(stmt).scalar()
333
340
 
334
341
 
335
342
  class ReflectedUserTags(ReflectedGenericTable):
@@ -337,33 +344,27 @@ class ReflectedUserTags(ReflectedGenericTable):
337
344
  return "usertags"
338
345
 
339
346
  def get_tags_string(self, entry_id):
340
- destination_table = self.get_table()
341
-
342
- stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
343
-
344
- tags = ""
347
+ table = self.get_table()
348
+ stmt = select(table).where(table.c.entry_id == entry_id)
345
349
 
346
- result = self.connection.execute(stmt)
347
- rows = result.fetchall()
348
- for row in rows:
349
- if tags:
350
- tags += ", "
350
+ tags_list = []
351
351
 
352
- tags += "#" + row.tag
352
+ with self.engine.connect() as connection:
353
+ result = connection.execute(stmt)
354
+ for row in result:
355
+ tags_list.append(f"#{row.tag}")
353
356
 
354
- return tags
357
+ return ", ".join(tags_list)
355
358
 
356
359
  def get_tags(self, entry_id):
357
- destination_table = self.get_table()
358
-
359
- stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
360
+ table = self.get_table()
361
+ stmt = select(table).where(table.c.entry_id == entry_id)
360
362
 
361
363
  tags = []
362
-
363
- result = self.connection.execute(stmt)
364
- rows = result.fetchall()
365
- for row in rows:
366
- tags.append(row.tag)
364
+ with self.engine.connect() as connection:
365
+ result = connection.execute(stmt)
366
+ for row in result:
367
+ tags.append(row.tag)
367
368
 
368
369
  return tags
369
370
 
@@ -372,36 +373,32 @@ class ReflectedEntryCompactedTags(ReflectedGenericTable):
372
373
  def get_table_name(self):
373
374
  return "entrycompactedtags"
374
375
 
375
- def get_tags_string(self, entry_id):
376
- destination_table = self.get_table()
377
-
378
- stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
379
-
380
- tags = ""
381
-
382
- result = self.connection.execute(stmt)
383
- rows = result.fetchall()
384
- for row in rows:
385
- if tags:
386
- tags += ", "
376
+ def get_tags(self, entry_id):
377
+ """Return a list of tag strings for the given entry_id."""
378
+ table = self.get_table()
379
+ stmt = select(table).where(table.c.entry_id == entry_id)
387
380
 
388
- tags += "#" + row.tag
381
+ tags = []
382
+ with self.engine.connect() as connection:
383
+ result = connection.execute(stmt)
384
+ for row in result:
385
+ tags.append(row.tag)
389
386
 
390
387
  return tags
391
388
 
392
- def get_tags(self, entry_id):
393
- destination_table = self.get_table()
394
-
395
- stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
396
389
 
397
- tags = []
390
+ def get_tags_string(self, entry_id):
391
+ """Return tags for the given entry_id as a single string formatted as '#tag1, #tag2'."""
392
+ table = self.get_table()
393
+ stmt = select(table).where(table.c.entry_id == entry_id)
398
394
 
399
- result = self.connection.execute(stmt)
400
- rows = result.fetchall()
401
- for row in rows:
402
- tags.append(row.tag)
395
+ tags_list = []
396
+ with self.engine.connect() as connection:
397
+ result = connection.execute(stmt)
398
+ for row in result:
399
+ tags_list.append(f"#{row.tag}")
403
400
 
404
- return tags
401
+ return ", ".join(tags_list)
405
402
 
406
403
 
407
404
  class ReflectedSourceTable(ReflectedGenericTable):
@@ -409,35 +406,34 @@ class ReflectedSourceTable(ReflectedGenericTable):
409
406
  return "sourcedatamodel"
410
407
 
411
408
  def get_source(self, source_id):
412
- destination_table = self.get_table()
413
-
414
- stmt = select(destination_table).where(destination_table.c.id == source_id)
415
-
416
- result = self.connection.execute(stmt)
417
- return result.first()
409
+ """Return a single source row by ID, or None if not found."""
410
+ table = self.get_table()
411
+ stmt = select(table).where(table.c.id == source_id)
418
412
 
419
- def get_sources(self, limit:int|None=None, offset:int=0):
420
- destination_table = self.get_table()
413
+ with self.engine.connect() as connection:
414
+ return connection.execute(stmt).first()
421
415
 
422
- sources_select = select(destination_table)
416
+ def get_sources(self, limit: int | None = None, offset: int = 0):
417
+ """Yield sources with optional offset and limit."""
418
+ table = self.get_table()
419
+ stmt = select(table)
423
420
 
424
421
  if offset:
425
- sources_select = sources_select.offset(offset)
422
+ stmt = stmt.offset(offset)
426
423
  if limit is not None:
427
- sources_select = sources_select.limit(limit)
428
-
429
- result = self.connection.execute(sources_select)
430
-
431
- for source in result:
432
- yield source
424
+ stmt = stmt.limit(limit)
433
425
 
434
- def insert_json(self, source_json):
435
- if "url" not in source_json:
436
- source_json["url"] = ""
426
+ with self.engine.connect() as connection:
427
+ result = connection.execute(stmt)
428
+ yield from result
437
429
 
430
+ def insert_json(self, source_json: dict):
431
+ """Insert a source JSON dict, ensuring 'url' key exists."""
432
+ source_json.setdefault("url", "")
438
433
  return self.insert_json_data(source_json)
439
434
 
440
435
  def exists(self, *, id=None, url=None):
436
+ """Return True if a source with given ID or URL exists."""
441
437
  table = self.get_table()
442
438
 
443
439
  conditions = []
@@ -450,7 +446,9 @@ class ReflectedSourceTable(ReflectedGenericTable):
450
446
  return False
451
447
 
452
448
  stmt = select(exists().where(or_(*conditions)))
453
- return self.connection.execute(stmt).scalar()
449
+
450
+ with self.engine.connect() as connection:
451
+ return connection.execute(stmt).scalar()
454
452
 
455
453
 
456
454
  class ReflectedSocialData(ReflectedGenericTable):
@@ -458,20 +456,20 @@ class ReflectedSocialData(ReflectedGenericTable):
458
456
  return "socialdata"
459
457
 
460
458
  def get(self, entry_id):
461
- destination_table = self.get_table()
459
+ """Return a single row matching entry_id, or None if not found."""
460
+ table = self.get_table()
461
+ stmt = select(table).where(table.c.entry_id == entry_id)
462
462
 
463
- stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
463
+ with self.engine.connect() as connection:
464
+ return connection.execute(stmt).first()
464
465
 
465
- result = self.connection.execute(stmt)
466
- return result.first()
467
466
 
468
467
  def get_json(self, entry_id):
468
+ """Return the row as a dict (JSON-style), or None if not found."""
469
469
  row = self.get(entry_id)
470
470
  if row is None:
471
471
  return None
472
-
473
- data = self.row_to_json_data(row)
474
- return data
472
+ return self.row_to_json_data(row)
475
473
 
476
474
 
477
475
  class EntryCopier(object):
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.1
2
2
  Name: linkarchivetools
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Link Archive Tools
5
5
  License: GPL3
6
6
  Author: Iwan Grozny
@@ -11,7 +11,6 @@ Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
- Classifier: Programming Language :: Python :: 3.13
15
14
  Requires-Dist: psycopg2-binary
16
15
  Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
17
16
  Requires-Dist: requests (>=2.32.5,<3.0.0)
@@ -11,8 +11,8 @@ linkarchivetools/json2db.py,sha256=IF01_KhMmHcdm4jfagU_kuJGCEtRwgwjZDuxLtlT7EQ,7
11
11
  linkarchivetools/tableconfig.py,sha256=yPhxrU0ioo_uhzr03JTPi8xkPV5sdpCtCgXxyAP8EKE,1585
12
12
  linkarchivetools/utils/alchemysearch.py,sha256=voUDEBP4RShlGj8E0iHg83jP8JYJQKLOaJjCZNi4LMY,5689
13
13
  linkarchivetools/utils/omnisearch.py,sha256=7HVzz8LesS3Yo6exdlPf4WdPbi2DQekPR2CmkJcfBbI,10115
14
- linkarchivetools/utils/reflected.py,sha256=Ik50nN_BvMT01nf6Rw_-elyAd3UC4e0nP_hkgi27nFY,15280
15
- linkarchivetools-0.1.7.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
16
- linkarchivetools-0.1.7.dist-info/METADATA,sha256=Mwa5ieYGVr0d5FtJGlYiUvZGegauR0OM368Ik4pLfTU,1253
17
- linkarchivetools-0.1.7.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
18
- linkarchivetools-0.1.7.dist-info/RECORD,,
14
+ linkarchivetools/utils/reflected.py,sha256=gLRYCHP4lUXOz4bl9iJ2H5XYe2XS9P5vr9K4TRA2ACs,16238
15
+ linkarchivetools-0.1.8.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
16
+ linkarchivetools-0.1.8.dist-info/METADATA,sha256=m9fVKm5lC7evvAwyC_aoLzDX8DqXnR44HW1h04UCSMY,1202
17
+ linkarchivetools-0.1.8.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
18
+ linkarchivetools-0.1.8.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.2
2
+ Generator: poetry-core 1.9.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any