rcdl 2.2.2__py3-none-any.whl → 3.0.0b13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rcdl/core/db.py CHANGED
@@ -1,235 +1,283 @@
1
1
  # core/db.py
2
2
 
3
- """Handle SQL DB and DB Parsing"""
3
+ """
4
+ Handle SQL Database
5
+ """
4
6
 
5
7
  import sqlite3
6
- import logging
7
- from typing import Iterable
8
8
 
9
+ from rcdl.core import adapters
10
+ from rcdl.core import db_queries as queries
11
+ from rcdl.core.config import Config
12
+ from rcdl.core.models import Post, Media, Status, FusedMedia, FusedStatus
13
+ from rcdl.utils import get_date_now
9
14
 
10
- import rcdl.core.db_queries as queries
11
- from .config import Config
12
- from .models import Video, VideoStatus
15
+ from rcdl.interface.ui import UI
13
16
 
14
17
 
15
18
  class DB:
19
+ """Handle all sqlite database command"""
20
+
16
21
  def __init__(self):
17
22
  self.conn = sqlite3.connect(Config.DB_PATH)
18
23
  self.conn.row_factory = sqlite3.Row
19
- self.conn.execute("PRAGMA foreign_keys = ON") # check input into db
20
- self.conn.execute("PRAGMA journal_mode = WAL") #
21
- self.conn.execute("PRAGMA synchronous = NORMAL") # faster write speed
22
24
 
23
25
  def __enter__(self):
26
+ """necessary to use with openDB()"""
24
27
  return self
25
28
 
26
29
  def __exit__(self, exc_type, exc_value, traceback):
30
+ """necessary to use with openDB()"""
27
31
  self.close()
28
32
 
29
- def init_table(self):
30
- # init table for videos to DL
31
- self.conn.execute(queries.CREATE_VIDEOS_TABLE)
32
- self.conn.execute(queries.CREATE_IDX_VIDEOS_STATUS)
33
- self.conn.execute(queries.CREATE_IDX_VIDEOS_CREATOR)
34
- self.conn.execute(queries.CREATE_IDX_VIDEOS_FAIL_COUNT)
33
+ def close(self):
34
+ """Properly close database"""
35
+ self.conn.close()
35
36
 
36
- # init table for version schem, easy migration if necessary
37
- self.conn.execute(queries.CREATE_SCHEMA_VERSION_TABLE)
37
+ def init_database(self):
38
+ """Create tables (posts, fuses, medias) if they dont exist"""
39
+ self.conn.execute(queries.CREATE_POSTS_TABLE)
40
+ self.conn.execute(queries.CREATE_MEDIAS_TABLE)
41
+ self.conn.execute(queries.CREATE_FUSE_TABLE)
38
42
 
39
- # Initialize version if empty
40
- cur = self.conn.cursor()
41
- cur.execute("SELECT COUNT(*) as cnt FROM schema_version")
43
+ self.conn.commit()
44
+
45
+ def get_nb_per_status(self):
46
+ """Return an info dict per tables with number of entry per status
47
+ info['tables1']['status1'] = X
48
+ ...
49
+ """
50
+ info = {}
51
+ info["medias"] = {}
52
+ info["fuses"] = {}
53
+ info["posts"] = 0
54
+ for status in Status:
55
+ info["medias"][status] = len(self.query_media_by_status(status))
56
+ for status in FusedStatus:
57
+ info["fuses"][status] = len(self.query_fuses_by_status(status))
58
+
59
+ cur = self.conn.execute(("SELECT COUNT(*) AS count FROM posts"))
42
60
  row = cur.fetchone()
43
- if row["cnt"] == 0:
44
- cur.execute("INSERT INTO schema_version (version) VALUES (?)", (1,))
61
+ info["posts"] = row["count"] if row else 0
62
+ return info
45
63
 
46
- self.conn.commit()
64
+ def query_post_by_id(self, _id: str) -> Post | None:
65
+ """Get a post from his post id"""
66
+ row = self.conn.execute(queries.QUERY_POST_ID, (_id,)).fetchone()
67
+
68
+ UI.debug(f"{queries.QUERY_POST_ID} {_id} returned {row}")
47
69
 
48
- def get_schema_version(self) -> int:
49
- # get current db version -> for future update to db
70
+ if row is None:
71
+ return None
72
+
73
+ return adapters.row_to_post(row)
74
+
75
+ def query_post_by_user(self, user: str) -> list[Post]:
76
+ """Get all posts of a user"""
50
77
  cur = self.conn.cursor()
51
- cur.execute("SELECT version FROM schema_version")
52
- row = cur.fetchone()
53
- return row["version"] if row else 0
78
+ cur.execute(queries.QUERY_POST_USER, (user,))
79
+ rows = cur.fetchall()
54
80
 
55
- def set_schema_version(self, version: int):
56
- # set current db version
57
- self.conn.execute("UPDATE schema_version SET version = ?", (version,))
58
- self.conn.commit()
81
+ UI.debug(f"{queries.QUERY_POST_USER} {user} returned {len(rows)} results")
59
82
 
60
- def _row_to_video(self, row: sqlite3.Row) -> Video:
61
- # helper function to retunr Video model from a sql result
62
- return Video(
63
- post_id=row["post_id"],
64
- creator_id=row["creator_id"],
65
- service=row["service"],
66
- domain=row["domain"],
67
- relative_path=row["relative_path"],
68
- url=row["url"],
69
- part=row["part"],
70
- status=VideoStatus(row["status"]),
71
- fail_count=row["fail_count"],
72
- published=row["published"],
73
- title=row["title"],
74
- substring=row["substring"],
75
- downloaded_at=row["downloaded_at"],
76
- file_size=row["file_size"],
83
+ return adapters.rows_to_posts(rows)
84
+
85
+ def query_media_by_status(self, status: Status) -> list[Media]:
86
+ """Get all medias with specified status"""
87
+ rows = self.conn.execute(queries.QUERY_MEDIA_STATUS, (status.value,)).fetchall()
88
+ UI.debug(
89
+ f"{queries.QUERY_MEDIA_STATUS} {status.value} returned {len(rows)} result"
77
90
  )
78
91
 
79
- def query_videos(
92
+ return adapters.rows_to_medias(rows)
93
+
94
+ def query_medias_by_status_sorted(
80
95
  self,
81
- *,
82
- status: VideoStatus | Iterable[VideoStatus] | None = None,
83
- creator_id: str | None = None,
84
- post_id: str | None = None,
85
- max_fail_count: int | None = None,
86
- min_part_number: int | None = None,
87
- pending: bool = False,
88
- ) -> list[Video]:
89
- """
90
- Query the DB.
91
- Parametes are 'AND' so if video status and creator_id are sepcified,
92
- it will look for a match between the two
96
+ status: Status | list[Status],
97
+ sort_by: str | None = None,
98
+ ascending: bool = True,
99
+ ) -> list[Media]:
100
+ """Get all medias with specified status (one or multiple)
101
+ Return them sorted by column and asc or desc"""
102
+
103
+ # validate sort column
104
+ valid_columns = {
105
+ "id",
106
+ "post_id",
107
+ "service",
108
+ "url",
109
+ "duration",
110
+ "sequence",
111
+ "status",
112
+ "checksum",
113
+ "file_path",
114
+ "created_at",
115
+ "updated_at",
116
+ "file_size",
117
+ "fail_count",
118
+ }
119
+ order_clause = ""
120
+ if sort_by:
121
+ if sort_by not in valid_columns:
122
+ UI.error(f"Invalid sort column: {sort_by}")
123
+ order_clause = f"ORDER BY {sort_by} {'ASC' if ascending else 'DESC'}"
93
124
 
94
- pending:
95
- No parameters are taken into account if pending True.
96
- Look for videos with: NOT DOWNLOADED status OR (FAILED & fail_count < Config.max_fail_count)
97
- """
98
- sql = "SELECT * FROM videos"
99
- conditions = []
100
- params = []
101
-
102
- if pending:
103
- sql += " WHERE status = ? OR (status = ? AND fail_count < ?)"
104
- params.extend(
105
- [
106
- VideoStatus.NOT_DOWNLOADED.value,
107
- VideoStatus.FAILED.value,
108
- max_fail_count or Config.MAX_FAIL_COUNT,
109
- ]
125
+ # status filter
126
+ if isinstance(status, Status):
127
+ status = [status]
128
+
129
+ status_values = [s.value if isinstance(s, Status) else s for s in status]
130
+ placeholders = ", ".join("?" for _ in status_values)
131
+
132
+ sql = f"SELECT * FROM medias WHERE status IN ({placeholders}) {order_clause}"
133
+ rows = self.conn.execute(sql, status_values).fetchall()
134
+
135
+ UI.debug(
136
+ f"Queried medias with status={status_values}, sorted by {sort_by}, ascending={ascending}, {len(rows)} results"
137
+ )
138
+
139
+ return adapters.rows_to_medias(rows)
140
+
141
+ def query_media_by_post_id(self, _id: str) -> list[Media]:
142
+ """Get all medias from the same post by post id"""
143
+ rows = self.conn.execute(queries.QUERY_MEDIA_ID, (_id,)).fetchall()
144
+ UI.debug(f"{queries.QUERY_MEDIA_ID} {_id} returned {len(rows)} result")
145
+ return adapters.rows_to_medias(rows)
146
+
147
+ def query_fuses_by_status(self, status: FusedStatus) -> list[FusedMedia]:
148
+ """Get all fused_media with specified status"""
149
+ rows = self.conn.execute(queries.QUERY_FUSES_STATUS, (status.value,)).fetchall()
150
+ UI.debug(
151
+ f"{queries.QUERY_FUSES_STATUS} {status.value} returned {len(rows)} result"
152
+ )
153
+
154
+ return adapters.rows_to_fuses(rows)
155
+
156
+ def query_fuses_by_id(self, _id: str) -> FusedMedia | None:
157
+ """Get a fuse group by its unique post id"""
158
+ row = self.conn.execute(queries.QUERY_FUSES_ID, (_id,)).fetchone()
159
+ UI.debug(f"{queries.QUERY_FUSES_ID} {_id} returned {row} result")
160
+ return adapters.row_to_fused_media(row)
161
+
162
+ def insert_posts(self, posts: list[Post] | Post):
163
+ """Add post to DB if it does not already exist (UNIQUE post_id)"""
164
+ if isinstance(posts, Post):
165
+ posts = [posts]
166
+
167
+ values = []
168
+ for post in posts:
169
+ values.append(
170
+ (
171
+ post.id,
172
+ post.user,
173
+ post.service,
174
+ post.domain,
175
+ post.published,
176
+ post.json_hash,
177
+ post.raw_json,
178
+ post.fetched_at,
179
+ )
110
180
  )
111
- else:
112
- if status is not None:
113
- if isinstance(status, VideoStatus):
114
- conditions.append("status = ?")
115
- params.append(status.value)
116
- else:
117
- statuses = list(status)
118
- placeholders = ",".join("?" for _ in statuses)
119
- conditions.append(f"status IN ({placeholders})")
120
- params.extend(s.value for s in statuses)
121
-
122
- if creator_id is not None:
123
- conditions.append("creator_id = ?")
124
- params.append(creator_id)
125
-
126
- if post_id is not None:
127
- conditions.append("post_id = ?")
128
- params.append(post_id)
129
-
130
- if max_fail_count is not None:
131
- conditions.append("fail_count < ?")
132
- params.append(max_fail_count)
133
-
134
- if min_part_number is not None:
135
- conditions.append("part > ?")
136
- params.append(min_part_number)
137
-
138
- if conditions:
139
- sql += " WHERE " + " AND ".join(conditions)
140
-
141
- logging.debug(f"SQL CMD: {sql} with params: {params}")
142
- cur = self.conn.cursor()
143
- cur.execute(sql, params)
144
- rows = cur.fetchall()
145
- if Config.DEBUG:
146
- logging.debug(f"DB query returned {len(rows)} result")
147
181
 
148
- return [self._row_to_video(r) for r in rows]
182
+ with self.conn:
183
+ self.conn.executemany(queries.INSERT_POST, values)
149
184
 
150
- def get_db_videos_info(self):
151
- """Return number of videos per status
152
- return info: dict {
153
- "not_downloaded": int,
154
- "failed": int,
155
- etc...
156
- }
157
- """
158
- info = {}
159
- for status in VideoStatus:
160
- vids = self.query_videos(status=status)
161
- info[status.value] = len(vids)
162
- return info
185
+ inserted = self.conn.total_changes
186
+ UI.debug(f"Inserted {inserted} new posts out of {len(posts)} total posts")
163
187
 
164
- def set_status(
165
- self, video: Video, status: VideoStatus, *, fail_count: int | None = None
166
- ):
167
- """Set video status to specified status"""
168
- video.status = status
169
- if fail_count is not None:
170
- video.fail_count = fail_count
171
- self._upsert_video(video)
188
+ def insert_medias(self, medias: list[Media] | Media):
189
+ """Insert media into the db if it does not already exist (UNIQUE post_id, url)"""
190
+ if isinstance(medias, Media):
191
+ medias = [medias]
172
192
 
173
- def insert_videos(self, videos: list[Video]):
174
- """
175
- Insert a video if not already present. Else ignore.
176
- Does not modify any values
177
- """
178
- if not videos:
179
- return
193
+ values = []
194
+ for media in medias:
195
+ values.append(
196
+ (
197
+ media.post_id,
198
+ media.service,
199
+ media.url,
200
+ media.duration,
201
+ media.sequence,
202
+ media.status.value,
203
+ media.checksum,
204
+ media.file_path,
205
+ media.created_at,
206
+ get_date_now(),
207
+ media.file_size,
208
+ media.fail_count,
209
+ )
210
+ )
180
211
 
181
- rows = []
182
- for video in videos:
183
- rows.append(
212
+ with self.conn:
213
+ self.conn.executemany(queries.INSERT_MEDIA, values)
214
+
215
+ inserted = self.conn.total_changes
216
+ UI.debug(f"Inserted {inserted} new media out of {len(medias)} total medias")
217
+
218
+ def update_media(self, media: Media):
219
+ """Update media entry in the db. Found it by post_id & url, and update:
220
+ - duration, file_size, checksum, status, create_at, updated_at, fail_count"""
221
+ params = (
222
+ media.duration,
223
+ media.file_size,
224
+ media.checksum,
225
+ media.status.value,
226
+ media.created_at,
227
+ get_date_now(),
228
+ media.fail_count,
229
+ media.post_id,
230
+ media.url,
231
+ )
232
+ with self.conn:
233
+ self.conn.execute(queries.UPDATE_MEDIA, params)
234
+ UI.debug(f"Updated media {media.post_id} / {media.url}")
235
+
236
+ def insert_fused_media(self, fuses: list[FusedMedia] | FusedMedia):
237
+ """Insert fused_media into the db if it does not already exist (UNIQUE post_id)"""
238
+ if isinstance(fuses, FusedMedia):
239
+ fuses = [fuses]
240
+
241
+ values = []
242
+ for fuse in fuses:
243
+ values.append(
184
244
  (
185
- video.post_id,
186
- video.creator_id,
187
- video.service,
188
- video.domain,
189
- video.relative_path,
190
- video.url,
191
- video.part,
192
- VideoStatus.NOT_DOWNLOADED.value,
193
- 0,
194
- video.published,
195
- video.title,
196
- video.substring,
197
- None,
198
- None,
245
+ fuse.id,
246
+ fuse.duration,
247
+ fuse.total_parts,
248
+ fuse.status.value,
249
+ fuse.checksum,
250
+ fuse.file_path,
251
+ fuse.created_at,
252
+ get_date_now(),
253
+ fuse.file_size,
254
+ fuse.fail_count,
199
255
  )
200
256
  )
201
257
 
202
- self.conn.executemany(queries.INSERT_IGNORE_VIDEO_UPSERT, rows)
203
- self.conn.commit()
258
+ with self.conn:
259
+ self.conn.executemany(queries.INSERT_FUSED_MEDIA, values)
204
260
 
205
- def _upsert_video(self, video: Video):
206
- """Upsert a video.
207
- If video already in DB, update specifics fields:
208
- status, fail_count, relative path, file_size, downloaded_at
209
- """
210
- if video.status is None:
211
- video.status = VideoStatus.NOT_DOWNLOADED
212
-
213
- self.conn.execute(
214
- queries.INSERT_VIDEO_UPSERT,
215
- (
216
- video.post_id,
217
- video.creator_id,
218
- video.service,
219
- video.domain,
220
- video.relative_path,
221
- video.url,
222
- video.part,
223
- video.status.value,
224
- video.fail_count,
225
- video.published,
226
- video.title,
227
- video.substring,
228
- video.downloaded_at,
229
- video.file_size,
230
- ),
261
+ inserted = self.conn.total_changes
262
+ UI.debug(
263
+ f"Inserted {inserted} new fused_media out of {len(fuses)} total fused_media"
231
264
  )
232
- self.conn.commit()
233
265
 
234
- def close(self):
235
- self.conn.close()
266
+ def update_fuse(self, fuse: FusedMedia):
267
+ """Update fuse group: duration, status, checksum,
268
+ created_at, updated_at, file_size, fail_count
269
+
270
+ """
271
+ params = (
272
+ fuse.duration,
273
+ fuse.status.value,
274
+ fuse.checksum,
275
+ fuse.created_at,
276
+ get_date_now(),
277
+ fuse.file_size,
278
+ fuse.fail_count,
279
+ fuse.id,
280
+ )
281
+ with self.conn:
282
+ self.conn.execute(queries.UPDATE_FUSE, params)
283
+ UI.debug(f"Updated fuse {fuse.id} / {fuse.file_path}")
rcdl/core/db_queries.py CHANGED
@@ -1,66 +1,97 @@
1
1
  # core/db_queries.py
2
-
3
2
  """
4
- Hold all the SQL commands strings
3
+ Hold SQL STRING
5
4
  """
6
5
 
7
- CREATE_VIDEOS_TABLE = """
8
- CREATE TABLE IF NOT EXISTS videos (
6
+ CREATE_MEDIAS_TABLE = """
7
+ CREATE TABLE IF NOT EXISTS medias (
9
8
  id INTEGER PRIMARY KEY AUTOINCREMENT,
10
9
  post_id TEXT,
11
- creator_id TEXT,
12
10
  service TEXT,
13
- domain TEXT,
14
- relative_path TEXT,
15
11
  url TEXT,
16
- part TEXT,
17
- status TEXT DEFAULT 'not_downloaded',
18
- fail_count INTEGER DEFAULT 0,
19
- published TEXT,
20
- title TEXT,
21
- substring TEXT,
22
- downloaded_at TEXT,
23
- file_size REAL,
24
- UNIQUE (service, url)
12
+ duration REAL,
13
+ sequence INTEGER,
14
+ status TEXT,
15
+ checksum TEXT,
16
+ file_path TEXT,
17
+ created_at DATETIME,
18
+ updated_at DATETIME,
19
+ file_size INTEGER,
20
+ fail_count INTEGER,
21
+ UNIQUE(post_id, url)
25
22
  )
26
23
  """
27
24
 
28
- CREATE_SCHEMA_VERSION_TABLE = """
29
- CREATE TABLE IF NOT EXISTS schema_version (
30
- version INTEGER NOT NULL
25
+ CREATE_POSTS_TABLE = """
26
+ CREATE TABLE IF NOT EXISTS posts (
27
+ id TEXT PRIMARY KEY,
28
+ user TEXT,
29
+ service TEXT,
30
+ domain TEXT,
31
+ published DATETIME,
32
+ json_hash TEXT,
33
+ raw_json JSON,
34
+ fetched_at DATETIME
31
35
  )
32
36
  """
33
37
 
34
- INSERT_VIDEO_UPSERT = """
35
- INSERT INTO videos (
36
- post_id, creator_id, service, domain, relative_path, url, part,
37
- status, fail_count, published, title, substring,
38
- downloaded_at, file_size
38
+ CREATE_FUSE_TABLE = """
39
+ CREATE TABLE IF NOT EXISTS fuses (
40
+ id TEXT PRIMARY KEY,
41
+ duration INTEGER,
42
+ total_parts INTEGER,
43
+ status TEXT,
44
+ checksum TEXT,
45
+ file_path TEXT,
46
+ created_at DATETIME,
47
+ updated_at DATETIME,
48
+ file_size INTEGER,
49
+ fail_count INTEGER
39
50
  )
40
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
41
- ON CONFLICT(service, url) DO UPDATE SET
42
- status = excluded.status,
43
- fail_count = excluded.fail_count,
44
- relative_path = excluded.relative_path,
45
- downloaded_at = excluded.downloaded_at,
46
- file_size = excluded.file_size
47
51
  """
48
52
 
49
- INSERT_IGNORE_VIDEO_UPSERT = """
50
- INSERT OR IGNORE INTO videos (
51
- post_id, creator_id, service, domain, relative_path, url, part,
52
- status, fail_count, published, title, substring,
53
- downloaded_at, file_size
53
+ INSERT_POST = """
54
+ INSERT OR IGNORE INTO posts (
55
+ id, user, service, domain, published,
56
+ json_hash, raw_json, fetched_at
54
57
  )
55
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
58
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
56
59
  """
57
60
 
58
- CREATE_IDX_VIDEOS_STATUS = (
59
- "CREATE INDEX IF NOT EXISTS idx_videos_status ON videos(status)"
61
+ INSERT_FUSED_MEDIA = """
62
+ INSERT OR IGNORE INTO fuses (
63
+ id, duration, total_parts, status, checksum,
64
+ file_path, created_at, updated_at, file_size, fail_count
60
65
  )
61
- CREATE_IDX_VIDEOS_CREATOR = (
62
- "CREATE INDEX IF NOT EXISTS idx_videos_creator ON videos(creator_id)"
63
- )
64
- CREATE_IDX_VIDEOS_FAIL_COUNT = (
65
- "CREATE INDEX IF NOT EXISTS idx_videos_fail_count ON videos(fail_count)"
66
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
67
+ """
68
+
69
+ INSERT_MEDIA = """
70
+ INSERT OR IGNORE INTO medias (
71
+ post_id, service, url, duration, sequence, status,
72
+ checksum, file_path, created_at, updated_at, file_size, fail_count
66
73
  )
74
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
75
+ """
76
+
77
+ UPDATE_MEDIA = """
78
+ UPDATE medias
79
+ SET duration = ?, file_size = ?, checksum = ?, status = ?,
80
+ created_at = ?, updated_at = ?, fail_count = ?
81
+ WHERE post_id = ? AND url = ?
82
+ """
83
+
84
+ UPDATE_FUSE = """
85
+ UPDATE fuses
86
+ SET duration = ?, status = ?, checksum = ?,
87
+ created_at = ?, updated_at = ?, file_size = ?,
88
+ fail_count = ?
89
+ WHERE id = ?
90
+ """
91
+
92
+ QUERY_POST_ID = "SELECT * FROM posts WHERE id = ?"
93
+ QUERY_POST_USER = "SELECT * FROM posts WHERE user = ?"
94
+ QUERY_MEDIA_STATUS = "SELECT * FROM medias WHERE status = ?"
95
+ QUERY_MEDIA_ID = "SELECT * FROM medias WHERE post_id = ?"
96
+ QUERY_FUSES_STATUS = "SELECT * FROM fuses WHERE status = ?"
97
+ QUERY_FUSES_ID = "SELECT * FROM fuses WHERE id = ?"