linkarchivetools 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,8 @@
1
+ """
2
+ This file is mainly for SQLite.
3
+ It will not open several connection, will use one.
4
+ This allows us to handle nested calls of generators without any problems.
5
+ """
1
6
  from sqlalchemy import (
2
7
  MetaData,
3
8
  Table,
@@ -17,12 +22,19 @@ from sqlalchemy import (
17
22
  class ReflectedTable(object):
18
23
  def __init__(self, engine, connection):
19
24
  self.engine = engine
25
+ self.connection = connection
26
+
27
+ def get_table(self, table_name):
28
+ destination_metadata = MetaData()
29
+ destination_table = Table(
30
+ table_name, destination_metadata, autoload_with=self.engine
31
+ )
32
+ return destination_table
20
33
 
21
34
  def truncate_table(self, table_name):
22
35
  sql_text = f"DELETE FROM {table_name};"
23
-
24
- with self.engine.begin() as connection:
25
- connection.execute(text(sql_text))
36
+ self.connection.execute(text(sql_text))
37
+ self.connection.commit()
26
38
 
27
39
  def create_index(self, table, column_name):
28
40
  index_name = f"idx_{table.name}_{column_name}"
@@ -31,17 +43,30 @@ class ReflectedTable(object):
31
43
  index.create(bind=self.engine)
32
44
 
33
45
  def vacuum(self):
34
- with self.engine.connect() as connection:
35
- connection.execution_options(isolation_level="AUTOCOMMIT")
36
- connection.execute(text("VACUUM"))
46
+ self.connection.execute(text("VACUUM"))
37
47
 
38
48
  def close(self):
39
49
  pass
40
50
 
51
+ def insert_json_data(self, table_name, json_data: dict):
52
+ table = self.get_table(table_name)
53
+
54
+ stmt = (
55
+ insert(table)
56
+ .values(**json_data)
57
+ .returning(table.c.id)
58
+ )
59
+
60
+ result = self.connection.execute(stmt)
61
+ inserted_id = result.scalar_one()
62
+ self.connection.commit()
63
+
64
+ return inserted_id
65
+
41
66
  def count(self, table_name):
42
- sql_text = text(f"SELECT COUNT(*) FROM {table_name}")
43
- with self.engine.connect() as connection:
44
- row_count = connection.execute(sql_text).scalar_one()
67
+ row_count = self.connection.execute(
68
+ text(f"SELECT COUNT(*) FROM {table_name}")
69
+ ).scalar()
45
70
  return row_count
46
71
 
47
72
  def print_summary(self, print_columns=False):
@@ -72,34 +97,32 @@ class ReflectedTable(object):
72
97
  return data
73
98
 
74
99
  def run_sql(self, sql_text):
75
- with self.engine.begin() as connection:
76
- connection.execute(text(sql_text))
100
+ self.connection.execute(text(sql_text))
101
+ self.connection.commit()
77
102
 
78
103
 
79
104
  class ReflectedGenericTable(object):
80
105
  def __init__(self, engine, connection, table_name=None):
81
106
  self.engine = engine
107
+ self.connection = connection
82
108
  self.table_name = table_name
83
109
  if self.table_name is None:
84
110
  self.table_name = self.get_table_name()
85
- self.table = None
86
111
 
87
112
  def get_table_name():
88
113
  return self.table_name
89
114
 
90
115
  def get_table(self):
91
- if self.table is None:
92
- destination_metadata = MetaData()
93
- self.table = Table(
94
- self.table_name, destination_metadata, autoload_with=self.engine
95
- )
96
- return self.table
97
- return self.table
116
+ destination_metadata = MetaData()
117
+ destination_table = Table(
118
+ self.table_name, destination_metadata, autoload_with=self.engine
119
+ )
120
+ return destination_table
98
121
 
99
122
  def truncate(self):
100
123
  sql_text = f"DELETE FROM {self.table_name};"
101
- with self.engine.begin() as connection:
102
- result = connection.execute(text(sql_text))
124
+ self.connection.execute(text(sql_text))
125
+ self.connection.commit()
103
126
 
104
127
  def create_index(self, column_name):
105
128
  index_name = f"idx_{self.table.name}_{column_name}"
@@ -116,9 +139,9 @@ class ReflectedGenericTable(object):
116
139
  .returning(table.c.id)
117
140
  )
118
141
 
119
- with self.engine.begin() as connection:
120
- result = connection.execute(stmt)
121
- inserted_id = result.scalar_one()
142
+ result = self.connection.execute(stmt)
143
+ inserted_id = result.scalar_one()
144
+ self.connection.commit()
122
145
 
123
146
  return inserted_id
124
147
 
@@ -131,22 +154,21 @@ class ReflectedGenericTable(object):
131
154
  .values(**json_data)
132
155
  )
133
156
 
134
- with self.engine.begin() as connection:
135
- connection.execute(stmt)
157
+ self.connection.execute(stmt)
136
158
 
137
159
  def count(self):
138
- sql = text(f"SELECT COUNT(*) FROM {self.table_name}")
139
- with self.engine.connect() as connection:
140
- row_count = connection.execute(sql).scalar_one()
160
+ row_count = self.connection.execute(
161
+ text(f"SELECT COUNT(*) FROM {self.table_name}")
162
+ ).scalar()
141
163
  return row_count
142
164
 
143
165
  def get(self, id):
144
- table = self.get_table()
145
- stmt = select(table).where(table.c.id == id)
166
+ destination_table = self.get_table()
146
167
 
147
- with self.engine.connect() as connection:
148
- result = connection.execute(stmt)
149
- return result.first()
168
+ stmt = select(destination_table).where(destination_table.c.id == id)
169
+
170
+ result = self.connection.execute(stmt)
171
+ return result.first()
150
172
 
151
173
  def get_where(self,
152
174
  conditions_map: dict=None,
@@ -182,38 +204,36 @@ class ReflectedGenericTable(object):
182
204
  if limit is not None:
183
205
  stmt = stmt.limit(limit)
184
206
 
185
- with self.engine.connect() as connection:
186
- result = connection.execute(stmt)
187
- rows = result.fetchall() # fetch all rows immediately
188
-
189
- return rows
207
+ result = self.connection.execute(stmt)
208
+ for row in result:
209
+ yield row
190
210
 
191
211
  def delete(self, id):
192
- table = self.get_table()
193
- stmt = delete(table).where(table.c.id == id)
212
+ destination_table = self.get_table()
213
+
214
+ stmt = delete(destination_table).where(destination_table.c.id == id)
194
215
 
195
- with self.engine.begin() as connection:
196
- result = connection.execute(stmt)
197
- rowcount = result.rowcount # number of rows deleted
216
+ result = self.connection.execute(stmt)
217
+ self.connection.commit()
198
218
 
199
- return rowcount
219
+ return result.rowcount # number of rows deleted
200
220
 
201
221
  def delete_where(self, conditions: dict):
202
- table = self.get_table()
222
+ destination_table = self.get_table()
203
223
 
204
224
  filters = []
205
225
  for column_name, value in conditions.items():
206
- if not hasattr(table.c, column_name):
226
+ if not hasattr(destination_table.c, column_name):
207
227
  raise ValueError(f"Unknown column: {column_name}")
208
- filters.append(getattr(table.c, column_name) == value)
209
228
 
210
- stmt = delete(table).where(and_(*filters))
229
+ filters.append(getattr(destination_table.c, column_name) == value)
230
+
231
+ stmt = delete(destination_table).where(and_(*filters))
211
232
 
212
- with self.engine.begin() as connection:
213
- result = connection.execute(stmt)
214
- rowcount = result.rowcount # number of rows deleted
233
+ result = self.connection.execute(stmt)
234
+ self.connection.commit()
215
235
 
216
- return rowcount
236
+ return result.rowcount
217
237
 
218
238
  def print_summary(self, print_columns=False):
219
239
  row_count = self.count()
@@ -225,23 +245,21 @@ class ReflectedGenericTable(object):
225
245
 
226
246
  def get_column_names(self):
227
247
  inspector = inspect(self.engine)
228
-
229
- with self.engine.connect() as connection:
230
- row_count = connection.execute(text(f"SELECT COUNT(*) FROM {self.table_name}")).scalar_one()
248
+ row_count = self.connection.execute(
249
+ text(f"SELECT COUNT(*) FROM {self.table_name}")
250
+ ).scalar()
231
251
 
232
252
  columns = inspector.get_columns(self.table_name)
233
253
  column_names = [column["name"] for column in columns]
234
254
  return column_names
235
255
 
236
256
  def row_to_json_data(self, row):
237
- """
238
- Convert SQLAlchemy row to a dict
239
- """
240
- return dict(row._mapping)
257
+ data = dict(row._mapping)
258
+ return data
241
259
 
242
260
  def run_sql(self, sql_text):
243
- with self.engine.begin() as connection:
244
- connection.execute(text(sql_text))
261
+ self.connection.execute(text(sql_text))
262
+ self.connection.commit()
245
263
 
246
264
 
247
265
  class ReflectedEntryTable(ReflectedGenericTable):
@@ -273,38 +291,35 @@ class ReflectedEntryTable(ReflectedGenericTable):
273
291
 
274
292
  return self.insert_json_data(entry_json)
275
293
 
276
- def get_entries(self, limit: int | None = None, offset: int = 0):
277
- """
278
- TODO remove use get_where
279
- """
280
- table = self.get_table()
281
- stmt = select(table)
294
+ def get_entries(self, limit:int|None=None, offset:int=0):
295
+ destination_table = self.get_table()
296
+
297
+ entries_select = select(destination_table)
282
298
 
283
299
  if offset:
284
- stmt = stmt.offset(offset)
300
+ entries_select = entries_select.offset(offset)
285
301
  if limit is not None:
286
- stmt = stmt.limit(limit)
302
+ entries_select = entries_select.limit(limit)
303
+
304
+ result = self.connection.execute(entries_select)
287
305
 
288
- with self.engine.connect() as connection:
289
- result = connection.execute(stmt)
290
- rows = result.fetchall() # fetch all rows immediately
291
- return rows
306
+ for entry in result:
307
+ yield entry
292
308
 
293
309
  def get_entries_good(self):
294
- """
295
- TODO remove use get_where
296
- """
297
- table = self.get_table()
310
+ destination_table = self.get_table()
311
+
298
312
  stmt = (
299
- select(table)
300
- .where(table.c.page_rating_votes > 0)
301
- .order_by(table.c.page_rating_votes.desc())
313
+ select(destination_table)
314
+ .where(destination_table.c.page_rating_votes > 0)
315
+ .order_by(destination_table.c.page_rating_votes.desc())
302
316
  )
303
317
 
304
- with self.engine.connect() as connection:
305
- result = connection.execute(stmt)
306
- rows = result.fetchall() # fetch all rows immediately
307
- return rows
318
+ result = self.connection.execute(stmt)
319
+ entries = result.fetchall()
320
+
321
+ for entry in entries:
322
+ yield entry
308
323
 
309
324
  def exists(self, *, id=None, link=None):
310
325
  table = self.get_table()
@@ -319,9 +334,7 @@ class ReflectedEntryTable(ReflectedGenericTable):
319
334
  return False
320
335
 
321
336
  stmt = select(exists().where(or_(*conditions)))
322
-
323
- with self.engine.connect() as connection:
324
- return connection.execute(stmt).scalar()
337
+ return self.connection.execute(stmt).scalar()
325
338
 
326
339
 
327
340
  class ReflectedUserTags(ReflectedGenericTable):
@@ -329,27 +342,33 @@ class ReflectedUserTags(ReflectedGenericTable):
329
342
  return "usertags"
330
343
 
331
344
  def get_tags_string(self, entry_id):
332
- table = self.get_table()
333
- stmt = select(table).where(table.c.entry_id == entry_id)
345
+ destination_table = self.get_table()
346
+
347
+ stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
334
348
 
335
- tags_list = []
349
+ tags = ""
336
350
 
337
- with self.engine.connect() as connection:
338
- result = connection.execute(stmt)
339
- for row in result:
340
- tags_list.append(f"#{row.tag}")
351
+ result = self.connection.execute(stmt)
352
+ rows = result.fetchall()
353
+ for row in rows:
354
+ if tags:
355
+ tags += ", "
341
356
 
342
- return ", ".join(tags_list)
357
+ tags += "#" + row.tag
358
+
359
+ return tags
343
360
 
344
361
  def get_tags(self, entry_id):
345
- table = self.get_table()
346
- stmt = select(table).where(table.c.entry_id == entry_id)
362
+ destination_table = self.get_table()
363
+
364
+ stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
347
365
 
348
366
  tags = []
349
- with self.engine.connect() as connection:
350
- result = connection.execute(stmt)
351
- for row in result:
352
- tags.append(row.tag)
367
+
368
+ result = self.connection.execute(stmt)
369
+ rows = result.fetchall()
370
+ for row in rows:
371
+ tags.append(row.tag)
353
372
 
354
373
  return tags
355
374
 
@@ -358,32 +377,36 @@ class ReflectedEntryCompactedTags(ReflectedGenericTable):
358
377
  def get_table_name(self):
359
378
  return "entrycompactedtags"
360
379
 
361
- def get_tags(self, entry_id):
362
- """Return a list of tag strings for the given entry_id."""
363
- table = self.get_table()
364
- stmt = select(table).where(table.c.entry_id == entry_id)
380
+ def get_tags_string(self, entry_id):
381
+ destination_table = self.get_table()
365
382
 
366
- tags = []
367
- with self.engine.connect() as connection:
368
- result = connection.execute(stmt)
369
- for row in result:
370
- tags.append(row.tag)
383
+ stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
384
+
385
+ tags = ""
386
+
387
+ result = self.connection.execute(stmt)
388
+ rows = result.fetchall()
389
+ for row in rows:
390
+ if tags:
391
+ tags += ", "
392
+
393
+ tags += "#" + row.tag
371
394
 
372
395
  return tags
373
396
 
397
+ def get_tags(self, entry_id):
398
+ destination_table = self.get_table()
374
399
 
375
- def get_tags_string(self, entry_id):
376
- """Return tags for the given entry_id as a single string formatted as '#tag1, #tag2'."""
377
- table = self.get_table()
378
- stmt = select(table).where(table.c.entry_id == entry_id)
400
+ stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
379
401
 
380
- tags_list = []
381
- with self.engine.connect() as connection:
382
- result = connection.execute(stmt)
383
- for row in result:
384
- tags_list.append(f"#{row.tag}")
402
+ tags = []
385
403
 
386
- return ", ".join(tags_list)
404
+ result = self.connection.execute(stmt)
405
+ rows = result.fetchall()
406
+ for row in rows:
407
+ tags.append(row.tag)
408
+
409
+ return tags
387
410
 
388
411
 
389
412
  class ReflectedSourceTable(ReflectedGenericTable):
@@ -391,35 +414,35 @@ class ReflectedSourceTable(ReflectedGenericTable):
391
414
  return "sourcedatamodel"
392
415
 
393
416
  def get_source(self, source_id):
394
- """Return a single source row by ID, or None if not found."""
395
- table = self.get_table()
396
- stmt = select(table).where(table.c.id == source_id)
417
+ destination_table = self.get_table()
397
418
 
398
- with self.engine.connect() as connection:
399
- return connection.execute(stmt).first()
419
+ stmt = select(destination_table).where(destination_table.c.id == source_id)
400
420
 
401
- def get_sources(self, limit: int | None = None, offset: int = 0):
402
- """Yield sources with optional offset and limit."""
403
- table = self.get_table()
404
- stmt = select(table)
421
+ result = self.connection.execute(stmt)
422
+ return result.first()
423
+
424
+ def get_sources(self, limit:int|None=None, offset:int=0):
425
+ destination_table = self.get_table()
426
+
427
+ sources_select = select(destination_table)
405
428
 
406
429
  if offset:
407
- stmt = stmt.offset(offset)
430
+ sources_select = sources_select.offset(offset)
408
431
  if limit is not None:
409
- stmt = stmt.limit(limit)
432
+ sources_select = sources_select.limit(limit)
433
+
434
+ result = self.connection.execute(sources_select)
435
+
436
+ for source in result:
437
+ yield source
410
438
 
411
- with self.engine.connect() as connection:
412
- result = connection.execute(stmt)
413
- sources = result.fetchall()
414
- return sources
439
+ def insert_json(self, source_json):
440
+ if "url" not in source_json:
441
+ source_json["url"] = ""
415
442
 
416
- def insert_json(self, source_json: dict):
417
- """Insert a source JSON dict, ensuring 'url' key exists."""
418
- source_json.setdefault("url", "")
419
443
  return self.insert_json_data(source_json)
420
444
 
421
445
  def exists(self, *, id=None, url=None):
422
- """Return True if a source with given ID or URL exists."""
423
446
  table = self.get_table()
424
447
 
425
448
  conditions = []
@@ -432,9 +455,7 @@ class ReflectedSourceTable(ReflectedGenericTable):
432
455
  return False
433
456
 
434
457
  stmt = select(exists().where(or_(*conditions)))
435
-
436
- with self.engine.connect() as connection:
437
- return connection.execute(stmt).scalar()
458
+ return self.connection.execute(stmt).scalar()
438
459
 
439
460
 
440
461
  class ReflectedSocialData(ReflectedGenericTable):
@@ -442,20 +463,20 @@ class ReflectedSocialData(ReflectedGenericTable):
442
463
  return "socialdata"
443
464
 
444
465
  def get(self, entry_id):
445
- """Return a single row matching entry_id, or None if not found."""
446
- table = self.get_table()
447
- stmt = select(table).where(table.c.entry_id == entry_id)
466
+ destination_table = self.get_table()
448
467
 
449
- with self.engine.connect() as connection:
450
- return connection.execute(stmt).first()
468
+ stmt = select(destination_table).where(destination_table.c.entry_id == entry_id)
451
469
 
470
+ result = self.connection.execute(stmt)
471
+ return result.first()
452
472
 
453
473
  def get_json(self, entry_id):
454
- """Return the row as a dict (JSON-style), or None if not found."""
455
474
  row = self.get(entry_id)
456
475
  if row is None:
457
476
  return None
458
- return self.row_to_json_data(row)
477
+
478
+ data = self.row_to_json_data(row)
479
+ return data
459
480
 
460
481
 
461
482
  class EntryCopier(object):
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: linkarchivetools
3
- Version: 0.1.10
3
+ Version: 0.1.11
4
4
  Summary: Link Archive Tools
5
5
  License: GPL3
6
6
  Author: Iwan Grozny
@@ -11,6 +11,7 @@ Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.10
12
12
  Classifier: Programming Language :: Python :: 3.11
13
13
  Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
14
15
  Requires-Dist: psycopg2-binary
15
16
  Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
16
17
  Requires-Dist: requests (>=2.32.5,<3.0.0)
@@ -11,8 +11,8 @@ linkarchivetools/json2db.py,sha256=IF01_KhMmHcdm4jfagU_kuJGCEtRwgwjZDuxLtlT7EQ,7
11
11
  linkarchivetools/tableconfig.py,sha256=yPhxrU0ioo_uhzr03JTPi8xkPV5sdpCtCgXxyAP8EKE,1585
12
12
  linkarchivetools/utils/alchemysearch.py,sha256=voUDEBP4RShlGj8E0iHg83jP8JYJQKLOaJjCZNi4LMY,5689
13
13
  linkarchivetools/utils/omnisearch.py,sha256=7HVzz8LesS3Yo6exdlPf4WdPbi2DQekPR2CmkJcfBbI,10115
14
- linkarchivetools/utils/reflected.py,sha256=J1pYN8ekZC1pkhCRPzXy_ca2iVp2aED_5ZloVTnedzM,15878
15
- linkarchivetools-0.1.10.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
16
- linkarchivetools-0.1.10.dist-info/METADATA,sha256=QKonY966i81YzUDw1PMS_AmHjpINrm1IL8dyr6FPORo,1203
17
- linkarchivetools-0.1.10.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
18
- linkarchivetools-0.1.10.dist-info/RECORD,,
14
+ linkarchivetools/utils/reflected.py,sha256=rpwCHX4rUvvyK_MqwmF8Z7eIqKqBK91xtcVbiqgVh1U,15445
15
+ linkarchivetools-0.1.11.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
16
+ linkarchivetools-0.1.11.dist-info/METADATA,sha256=lewUvXEdmdQXDlad0t02cPy5PXv1oKItFKUUWNF7nhA,1254
17
+ linkarchivetools-0.1.11.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
18
+ linkarchivetools-0.1.11.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 2.1.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any