agent-data-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. agent_data_cli/PYPI_README.md +72 -0
  2. agent_data_cli/__init__.py +2 -0
  3. agent_data_cli/__main__.py +7 -0
  4. agent_data_cli/cli/__init__.py +2 -0
  5. agent_data_cli/cli/__main__.py +7 -0
  6. agent_data_cli/cli/commands/__init__.py +85 -0
  7. agent_data_cli/cli/commands/channel.py +78 -0
  8. agent_data_cli/cli/commands/common.py +97 -0
  9. agent_data_cli/cli/commands/config.py +270 -0
  10. agent_data_cli/cli/commands/content/__init__.py +29 -0
  11. agent_data_cli/cli/commands/content/common.py +172 -0
  12. agent_data_cli/cli/commands/content/interact.py +74 -0
  13. agent_data_cli/cli/commands/content/query.py +111 -0
  14. agent_data_cli/cli/commands/content/search.py +75 -0
  15. agent_data_cli/cli/commands/content/update.py +198 -0
  16. agent_data_cli/cli/commands/dashboard.py +87 -0
  17. agent_data_cli/cli/commands/group.py +128 -0
  18. agent_data_cli/cli/commands/help.py +44 -0
  19. agent_data_cli/cli/commands/hub.py +107 -0
  20. agent_data_cli/cli/commands/init.py +29 -0
  21. agent_data_cli/cli/commands/source.py +41 -0
  22. agent_data_cli/cli/commands/specs.py +241 -0
  23. agent_data_cli/cli/commands/sub.py +60 -0
  24. agent_data_cli/cli/formatters.py +537 -0
  25. agent_data_cli/cli/help.py +149 -0
  26. agent_data_cli/cli/main.py +46 -0
  27. agent_data_cli/core/__init__.py +2 -0
  28. agent_data_cli/core/base.py +222 -0
  29. agent_data_cli/core/capabilities.py +105 -0
  30. agent_data_cli/core/config.py +236 -0
  31. agent_data_cli/core/discovery.py +158 -0
  32. agent_data_cli/core/help.py +16 -0
  33. agent_data_cli/core/manifest.py +329 -0
  34. agent_data_cli/core/models.py +296 -0
  35. agent_data_cli/core/protocol.py +135 -0
  36. agent_data_cli/core/registry.py +353 -0
  37. agent_data_cli/core/source_defaults.py +24 -0
  38. agent_data_cli/dashboard/__init__.py +2 -0
  39. agent_data_cli/dashboard/adapters/__init__.py +2 -0
  40. agent_data_cli/dashboard/adapters/channel.py +73 -0
  41. agent_data_cli/dashboard/adapters/config.py +153 -0
  42. agent_data_cli/dashboard/adapters/content.py +350 -0
  43. agent_data_cli/dashboard/adapters/group.py +47 -0
  44. agent_data_cli/dashboard/adapters/help.py +32 -0
  45. agent_data_cli/dashboard/adapters/source.py +61 -0
  46. agent_data_cli/dashboard/adapters/sub.py +28 -0
  47. agent_data_cli/dashboard/context.py +29 -0
  48. agent_data_cli/dashboard/index.py +30 -0
  49. agent_data_cli/dashboard/pages/01_Source.py +57 -0
  50. agent_data_cli/dashboard/pages/02_Channel.py +99 -0
  51. agent_data_cli/dashboard/pages/03_Content_Search.py +64 -0
  52. agent_data_cli/dashboard/pages/04_Content_Query.py +79 -0
  53. agent_data_cli/dashboard/pages/05_Content_Update.py +103 -0
  54. agent_data_cli/dashboard/pages/06_Sub.py +51 -0
  55. agent_data_cli/dashboard/pages/07_Group.py +116 -0
  56. agent_data_cli/dashboard/pages/08_Config.py +114 -0
  57. agent_data_cli/dashboard/pages/09_Help.py +48 -0
  58. agent_data_cli/dashboard/pages/__init__.py +2 -0
  59. agent_data_cli/dashboard/runtime.py +208 -0
  60. agent_data_cli/dashboard/state.py +60 -0
  61. agent_data_cli/dashboard/widgets/__init__.py +2 -0
  62. agent_data_cli/dashboard/widgets/common.py +90 -0
  63. agent_data_cli/dashboard/widgets/forms.py +29 -0
  64. agent_data_cli/dashboard/widgets/tables.py +10 -0
  65. agent_data_cli/fetchers/__init__.py +2 -0
  66. agent_data_cli/fetchers/base.py +61 -0
  67. agent_data_cli/fetchers/browser.py +44 -0
  68. agent_data_cli/fetchers/http.py +313 -0
  69. agent_data_cli/fetchers/jina.py +44 -0
  70. agent_data_cli/hub/__init__.py +6 -0
  71. agent_data_cli/hub/models.py +20 -0
  72. agent_data_cli/hub/service.py +210 -0
  73. agent_data_cli/init_service.py +29 -0
  74. agent_data_cli/main.py +72 -0
  75. agent_data_cli/migration.py +53 -0
  76. agent_data_cli/runtime_paths.py +90 -0
  77. agent_data_cli/store/__init__.py +2 -0
  78. agent_data_cli/store/audit.py +42 -0
  79. agent_data_cli/store/channels.py +80 -0
  80. agent_data_cli/store/configs.py +134 -0
  81. agent_data_cli/store/content.py +770 -0
  82. agent_data_cli/store/db.py +298 -0
  83. agent_data_cli/store/groups.py +120 -0
  84. agent_data_cli/store/health.py +53 -0
  85. agent_data_cli/store/migrations.py +176 -0
  86. agent_data_cli/store/repositories.py +136 -0
  87. agent_data_cli/store/subscriptions.py +119 -0
  88. agent_data_cli/utils/__init__.py +2 -0
  89. agent_data_cli/utils/text.py +21 -0
  90. agent_data_cli/utils/time.py +63 -0
  91. agent_data_cli/utils/urls.py +8 -0
  92. agent_data_cli-0.1.0.dist-info/METADATA +104 -0
  93. agent_data_cli-0.1.0.dist-info/RECORD +97 -0
  94. agent_data_cli-0.1.0.dist-info/WHEEL +5 -0
  95. agent_data_cli-0.1.0.dist-info/entry_points.txt +2 -0
  96. agent_data_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  97. agent_data_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,770 @@
1
+ from __future__ import annotations
2
+
3
+ import sqlite3
4
+
5
+ from agent_data_cli.core.models import (
6
+ ContentBatchWriteResult,
7
+ ContentChannelLink,
8
+ ContentNode,
9
+ ContentQueryRow,
10
+ ContentRecord,
11
+ ContentRelation,
12
+ ContentSyncBatch,
13
+ SourceStorageSpec,
14
+ parse_content_ref,
15
+ )
16
+ from agent_data_cli.utils.time import utc_now_iso
17
+
18
+ from .repositories import row_to_content, row_to_content_query_row
19
+
20
+
21
+ def write_content_batch(
22
+ connection: sqlite3.Connection,
23
+ source: str,
24
+ channel_key: str,
25
+ batch: ContentSyncBatch,
26
+ ) -> ContentBatchWriteResult:
27
+ _validate_batch(connection, source=source, channel_key=channel_key, batch=batch)
28
+ saved_nodes = sum(_upsert_content_node(connection, node) for node in batch.nodes)
29
+ saved_links = sum(_upsert_content_channel_link(connection, link) for link in batch.channel_links)
30
+ saved_relations = sum(_upsert_content_relation(connection, relation) for relation in batch.relations)
31
+ return ContentBatchWriteResult(
32
+ saved_nodes=saved_nodes,
33
+ skipped_nodes=len(batch.nodes) - saved_nodes,
34
+ saved_links=saved_links,
35
+ saved_relations=saved_relations,
36
+ )
37
+
38
+
39
+ def upsert_content(connection: sqlite3.Connection, table_name: str, record: ContentRecord) -> bool:
40
+ _ = table_name
41
+ result = write_content_batch(
42
+ connection,
43
+ source=record.source,
44
+ channel_key=record.channel_key,
45
+ batch=ContentSyncBatch(
46
+ nodes=[
47
+ ContentNode(
48
+ source=record.source,
49
+ content_key=record.dedup_key,
50
+ content_type=record.record_type or "content",
51
+ external_id=record.external_id,
52
+ title=record.title,
53
+ url=record.url,
54
+ snippet=record.snippet,
55
+ author=record.author,
56
+ published_at=record.published_at,
57
+ fetched_at=record.fetched_at,
58
+ raw_payload=record.raw_payload,
59
+ content_ref=record.content_ref,
60
+ )
61
+ ],
62
+ channel_links=[
63
+ ContentChannelLink(
64
+ source=record.source,
65
+ channel_key=record.channel_key,
66
+ content_key=record.dedup_key,
67
+ membership_kind="direct",
68
+ )
69
+ ],
70
+ relations=[],
71
+ ),
72
+ )
73
+ return result.saved_nodes == 1
74
+
75
+
76
+ def set_sync_state(connection: sqlite3.Connection, source: str, channel_key: str, cursor: str) -> None:
77
+ connection.execute(
78
+ """
79
+ INSERT INTO sync_state (source, channel_key, cursor, updated_at)
80
+ VALUES (?, ?, ?, ?)
81
+ ON CONFLICT(source, channel_key) DO UPDATE SET
82
+ cursor = excluded.cursor,
83
+ updated_at = excluded.updated_at
84
+ """,
85
+ (source, channel_key, cursor, utc_now_iso()),
86
+ )
87
+
88
+
89
+ def list_content(
90
+ connection: sqlite3.Connection,
91
+ table_name: str,
92
+ source: str,
93
+ channel_key: str,
94
+ *,
95
+ record_type: str | None = None,
96
+ limit: int = 10,
97
+ since: str | None = None,
98
+ fetch_all: bool = False,
99
+ ) -> list[ContentRecord]:
100
+ _ = table_name
101
+ query = [
102
+ """
103
+ SELECT
104
+ n.source,
105
+ ? AS channel_key,
106
+ n.content_type AS record_type,
107
+ n.external_id,
108
+ n.title,
109
+ n.url,
110
+ n.snippet,
111
+ n.author,
112
+ n.published_at,
113
+ n.fetched_at,
114
+ n.raw_payload,
115
+ n.content_key AS dedup_key,
116
+ n.content_ref
117
+ FROM content_nodes AS n
118
+ JOIN content_channel_links AS l
119
+ ON l.source = n.source
120
+ AND l.content_key = n.content_key
121
+ WHERE n.source = ? AND l.channel_key = ?
122
+ """
123
+ ]
124
+ params: list[str | int] = [channel_key, source, channel_key]
125
+
126
+ if record_type is not None:
127
+ query.append("AND n.content_type = ?")
128
+ params.append(record_type)
129
+
130
+ if since is not None:
131
+ normalized_since = _normalize_since_value(since)
132
+ query.append("AND julianday(n.published_at) >= julianday(?)")
133
+ params.append(normalized_since)
134
+
135
+ query.append("ORDER BY n.published_at DESC, n.node_id DESC")
136
+
137
+ if not fetch_all and limit >= 0:
138
+ query.append("LIMIT ?")
139
+ params.append(limit)
140
+
141
+ rows = connection.execute(" ".join(query), params).fetchall()
142
+ return [row_to_content(row) for row in rows]
143
+
144
+
145
+ def query_content(
146
+ connection: sqlite3.Connection,
147
+ storage_specs: dict[str, SourceStorageSpec],
148
+ *,
149
+ source: str | None = None,
150
+ channel_key: str | None = None,
151
+ group_name: str | None = None,
152
+ record_type: str | None = None,
153
+ parent_ref: str | None = None,
154
+ children_ref: str | None = None,
155
+ depth: int = 1,
156
+ since: str | None = None,
157
+ keywords: str | None = None,
158
+ limit: int = 10,
159
+ fetch_all: bool = False,
160
+ ) -> list[ContentQueryRow]:
161
+ targets = resolve_query_targets(
162
+ connection,
163
+ storage_specs=storage_specs,
164
+ source=source,
165
+ channel_key=channel_key,
166
+ group_name=group_name,
167
+ )
168
+ if not targets:
169
+ return []
170
+ parent_source, parent_content_key = _resolve_parent_ref(parent_ref)
171
+ children_source, children_content_key = _resolve_parent_ref(children_ref)
172
+ if parent_source is not None and source is not None and parent_source != source:
173
+ raise RuntimeError(f"parent_ref source mismatch: expected {source}, got {parent_source}")
174
+ if children_source is not None and source is not None and children_source != source:
175
+ raise RuntimeError(f"children_ref source mismatch: expected {source}, got {children_source}")
176
+ all_records: list[ContentQueryRow] = []
177
+ relation_query = parent_content_key is not None or children_content_key is not None
178
+ for source_name, channel_filter in targets.items():
179
+ all_records.extend(
180
+ query_source_content(
181
+ connection,
182
+ source=source_name,
183
+ channel_filter=channel_filter,
184
+ record_type=record_type,
185
+ parent_content_key=parent_content_key if parent_source in (None, source_name) else None,
186
+ children_content_key=children_content_key if children_source in (None, source_name) else None,
187
+ depth=depth,
188
+ since=since,
189
+ keywords=keywords,
190
+ )
191
+ )
192
+ if not relation_query:
193
+ all_records.sort(
194
+ key=lambda node: (
195
+ node.published_at or "",
196
+ node.fetched_at or "",
197
+ node.content_key,
198
+ ),
199
+ reverse=True,
200
+ )
201
+ if fetch_all or limit < 0:
202
+ return all_records
203
+ return all_records[:limit]
204
+
205
+
206
+ def query_source_content(
207
+ connection: sqlite3.Connection,
208
+ *,
209
+ source: str,
210
+ channel_filter: set[str] | None,
211
+ record_type: str | None,
212
+ parent_content_key: str | None,
213
+ children_content_key: str | None,
214
+ depth: int,
215
+ since: str | None,
216
+ keywords: str | None,
217
+ ) -> list[ContentQueryRow]:
218
+ if parent_content_key is not None:
219
+ return query_ancestor_nodes(
220
+ connection,
221
+ source=source,
222
+ origin_content_key=parent_content_key,
223
+ depth=depth,
224
+ channel_filter=channel_filter,
225
+ record_type=record_type,
226
+ since=since,
227
+ keywords=keywords,
228
+ )
229
+ if children_content_key is not None:
230
+ return query_descendant_nodes(
231
+ connection,
232
+ source=source,
233
+ origin_content_key=children_content_key,
234
+ depth=depth,
235
+ channel_filter=channel_filter,
236
+ record_type=record_type,
237
+ since=since,
238
+ keywords=keywords,
239
+ )
240
+ query = [
241
+ """
242
+ SELECT
243
+ n.source,
244
+ n.content_key,
245
+ n.content_type,
246
+ n.external_id,
247
+ n.title,
248
+ n.url,
249
+ n.snippet,
250
+ n.author,
251
+ n.published_at,
252
+ n.fetched_at,
253
+ n.raw_payload,
254
+ NULL AS relation_depth,
255
+ NULL AS relation_semantic,
256
+ n.content_ref
257
+ FROM content_nodes AS n
258
+ WHERE n.source = ?
259
+ """
260
+ ]
261
+ params: list[str | int] = [source]
262
+ if channel_filter is not None:
263
+ if not channel_filter:
264
+ return []
265
+ placeholders = ", ".join("?" for _ in channel_filter)
266
+ query.append(
267
+ f"""
268
+ AND EXISTS (
269
+ SELECT 1
270
+ FROM content_channel_links AS l
271
+ WHERE l.source = n.source
272
+ AND l.content_key = n.content_key
273
+ AND l.channel_key IN ({placeholders})
274
+ )
275
+ """
276
+ )
277
+ params.extend(sorted(channel_filter))
278
+ if record_type is not None:
279
+ query.append("AND n.content_type = ?")
280
+ params.append(record_type)
281
+ if parent_content_key is not None:
282
+ query.append(
283
+ """
284
+ AND EXISTS (
285
+ SELECT 1
286
+ FROM content_relations AS r
287
+ WHERE r.source = n.source
288
+ AND r.to_content_key = n.content_key
289
+ AND r.relation_type = 'parent'
290
+ AND r.from_content_key = ?
291
+ )
292
+ """
293
+ )
294
+ params.append(parent_content_key)
295
+ if children_content_key is not None:
296
+ query.append(
297
+ """
298
+ AND EXISTS (
299
+ SELECT 1
300
+ FROM content_relations AS r
301
+ WHERE r.source = n.source
302
+ AND r.from_content_key = n.content_key
303
+ AND r.relation_type = 'parent'
304
+ AND r.to_content_key = ?
305
+ )
306
+ """
307
+ )
308
+ params.append(children_content_key)
309
+ if since is not None:
310
+ normalized_since = _normalize_since_value(since)
311
+ query.append("AND julianday(n.published_at) >= julianday(?)")
312
+ params.append(normalized_since)
313
+ if keywords is not None:
314
+ keyword = f"%{keywords}%"
315
+ query.append(
316
+ """
317
+ AND (
318
+ n.title LIKE ?
319
+ OR n.snippet LIKE ?
320
+ OR n.url LIKE ?
321
+ OR n.content_key LIKE ?
322
+ )
323
+ """
324
+ )
325
+ params.extend([keyword, keyword, keyword, keyword])
326
+ _ = depth
327
+ query.append("ORDER BY n.published_at DESC, n.node_id DESC")
328
+ rows = connection.execute(" ".join(query), params).fetchall()
329
+ return [row_to_content_query_row(row) for row in rows]
330
+
331
+
332
+ def query_ancestor_nodes(
333
+ connection: sqlite3.Connection,
334
+ *,
335
+ source: str,
336
+ origin_content_key: str,
337
+ depth: int,
338
+ channel_filter: set[str] | None,
339
+ record_type: str | None,
340
+ since: str | None,
341
+ keywords: str | None,
342
+ ) -> list[ContentQueryRow]:
343
+ return _query_related_nodes(
344
+ connection,
345
+ source=source,
346
+ origin_content_key=origin_content_key,
347
+ depth=depth,
348
+ channel_filter=channel_filter,
349
+ record_type=record_type,
350
+ since=since,
351
+ keywords=keywords,
352
+ direction="ancestor",
353
+ )
354
+
355
+
356
+ def query_descendant_nodes(
357
+ connection: sqlite3.Connection,
358
+ *,
359
+ source: str,
360
+ origin_content_key: str,
361
+ depth: int,
362
+ channel_filter: set[str] | None,
363
+ record_type: str | None,
364
+ since: str | None,
365
+ keywords: str | None,
366
+ ) -> list[ContentQueryRow]:
367
+ return _query_related_nodes(
368
+ connection,
369
+ source=source,
370
+ origin_content_key=origin_content_key,
371
+ depth=depth,
372
+ channel_filter=channel_filter,
373
+ record_type=record_type,
374
+ since=since,
375
+ keywords=keywords,
376
+ direction="descendant",
377
+ )
378
+
379
+
380
+ def _query_related_nodes(
381
+ connection: sqlite3.Connection,
382
+ *,
383
+ source: str,
384
+ origin_content_key: str,
385
+ depth: int,
386
+ channel_filter: set[str] | None,
387
+ record_type: str | None,
388
+ since: str | None,
389
+ keywords: str | None,
390
+ direction: str,
391
+ ) -> list[ContentQueryRow]:
392
+ if direction == "ancestor":
393
+ anchor_key = "r.to_content_key"
394
+ join_condition = "r.from_content_key = walk.content_key"
395
+ else:
396
+ anchor_key = "r.from_content_key"
397
+ join_condition = "r.to_content_key = walk.content_key"
398
+
399
+ query = [
400
+ f"""
401
+ WITH RECURSIVE walk(content_key, relation_depth, relation_semantic) AS (
402
+ SELECT {anchor_key}, 1, r.relation_semantic
403
+ FROM content_relations AS r
404
+ WHERE r.source = ?
405
+ AND r.relation_type = 'parent'
406
+ AND {'r.from_content_key' if direction == 'ancestor' else 'r.to_content_key'} = ?
407
+ UNION ALL
408
+ SELECT {'r.to_content_key' if direction == 'ancestor' else 'r.from_content_key'}, walk.relation_depth + 1, r.relation_semantic
409
+ FROM content_relations AS r
410
+ JOIN walk ON {join_condition}
411
+ WHERE r.source = ?
412
+ AND r.relation_type = 'parent'
413
+ AND (? = -1 OR walk.relation_depth < ?)
414
+ ),
415
+ ranked AS (
416
+ SELECT
417
+ content_key,
418
+ relation_depth,
419
+ relation_semantic,
420
+ ROW_NUMBER() OVER (
421
+ PARTITION BY content_key
422
+ ORDER BY relation_depth ASC
423
+ ) AS row_number
424
+ FROM walk
425
+ )
426
+ SELECT
427
+ n.source,
428
+ n.content_key,
429
+ n.content_type,
430
+ n.external_id,
431
+ n.title,
432
+ n.url,
433
+ n.snippet,
434
+ n.author,
435
+ n.published_at,
436
+ n.fetched_at,
437
+ n.raw_payload,
438
+ ranked.relation_depth,
439
+ ranked.relation_semantic,
440
+ n.content_ref
441
+ FROM ranked
442
+ JOIN content_nodes AS n
443
+ ON n.source = ? AND n.content_key = ranked.content_key
444
+ WHERE n.source = ?
445
+ AND ranked.row_number = 1
446
+ """
447
+ ]
448
+ params: list[str | int] = [source, origin_content_key, source, depth, depth, source, source]
449
+ if channel_filter is not None:
450
+ if not channel_filter:
451
+ return []
452
+ placeholders = ", ".join("?" for _ in channel_filter)
453
+ query.append(
454
+ f"""
455
+ AND EXISTS (
456
+ SELECT 1
457
+ FROM content_channel_links AS l
458
+ WHERE l.source = n.source
459
+ AND l.content_key = n.content_key
460
+ AND l.channel_key IN ({placeholders})
461
+ )
462
+ """
463
+ )
464
+ params.extend(sorted(channel_filter))
465
+ if record_type is not None:
466
+ query.append("AND n.content_type = ?")
467
+ params.append(record_type)
468
+ if since is not None:
469
+ normalized_since = _normalize_since_value(since)
470
+ query.append("AND julianday(n.published_at) >= julianday(?)")
471
+ params.append(normalized_since)
472
+ if keywords is not None:
473
+ keyword = f"%{keywords}%"
474
+ query.append(
475
+ """
476
+ AND (
477
+ n.title LIKE ?
478
+ OR n.snippet LIKE ?
479
+ OR n.url LIKE ?
480
+ OR n.content_key LIKE ?
481
+ )
482
+ """
483
+ )
484
+ params.extend([keyword, keyword, keyword, keyword])
485
+ if direction == "ancestor":
486
+ query.append("ORDER BY ranked.relation_depth ASC, n.published_at DESC, n.node_id DESC")
487
+ else:
488
+ query.append("ORDER BY ranked.relation_depth ASC, n.published_at DESC, n.content_key")
489
+ rows = connection.execute(" ".join(query), params).fetchall()
490
+ return [row_to_content_query_row(row) for row in rows]
491
+
492
+
493
+ def resolve_query_targets(
494
+ connection: sqlite3.Connection,
495
+ *,
496
+ storage_specs: dict[str, SourceStorageSpec],
497
+ source: str | None,
498
+ channel_key: str | None,
499
+ group_name: str | None,
500
+ ) -> dict[str, set[str] | None]:
501
+ if source is not None:
502
+ _require_storage_spec(storage_specs, source)
503
+ if group_name is None:
504
+ if source is not None:
505
+ if channel_key is not None:
506
+ return {source: {channel_key}}
507
+ return {source: None}
508
+ if channel_key is not None:
509
+ return {source_name: {channel_key} for source_name in storage_specs}
510
+ return {source_name: None for source_name in storage_specs}
511
+
512
+ members = connection.execute(
513
+ """
514
+ SELECT group_name, member_type, source, channel_key
515
+ FROM group_members
516
+ WHERE group_name = ?
517
+ ORDER BY member_type, source, channel_key
518
+ """,
519
+ (group_name,),
520
+ ).fetchall()
521
+ if not members:
522
+ return {}
523
+ grouped_targets: dict[str, set[str] | None] = {}
524
+ for member in members:
525
+ member_source = member["source"]
526
+ _require_storage_spec(storage_specs, member_source)
527
+ if member["member_type"] == "source":
528
+ grouped_targets[member_source] = None
529
+ continue
530
+ existing = grouped_targets.get(member_source)
531
+ if existing is None and member_source in grouped_targets:
532
+ continue
533
+ if existing is None:
534
+ existing = set()
535
+ grouped_targets[member_source] = existing
536
+ member_channel_key = member["channel_key"]
537
+ if member_channel_key is not None:
538
+ existing.add(member_channel_key)
539
+
540
+ if source is not None:
541
+ channel_filter = grouped_targets.get(source)
542
+ if source not in grouped_targets:
543
+ return {}
544
+ grouped_targets = {source: channel_filter}
545
+
546
+ if channel_key is None:
547
+ return grouped_targets
548
+
549
+ filtered_targets: dict[str, set[str] | None] = {}
550
+ for source_name, channels in grouped_targets.items():
551
+ if channels is None:
552
+ filtered_targets[source_name] = {channel_key}
553
+ continue
554
+ if channel_key in channels:
555
+ filtered_targets[source_name] = {channel_key}
556
+ return filtered_targets
557
+
558
+
559
+ def ensure_content_table_columns(connection: sqlite3.Connection, table_name: str) -> None:
560
+ rows = connection.execute(f"PRAGMA table_info({table_name})").fetchall()
561
+ existing_columns = {row["name"] for row in rows}
562
+ if "content_ref" not in existing_columns:
563
+ connection.execute(f"ALTER TABLE {table_name} ADD COLUMN content_ref TEXT")
564
+
565
+
566
+ def _normalize_since_value(since: str) -> str:
567
+ if len(since) == 8 and since.isdigit():
568
+ return f"{since[:4]}-{since[4:6]}-{since[6:8]}"
569
+ return since
570
+
571
+
572
+ def _require_storage_spec(storage_specs: dict[str, SourceStorageSpec], source: str) -> SourceStorageSpec:
573
+ try:
574
+ return storage_specs[source]
575
+ except KeyError as exc:
576
+ raise RuntimeError(f"no storage spec registered for source: {source}") from exc
577
+
578
+
579
+ def _resolve_parent_ref(parent_ref: str | None) -> tuple[str | None, str | None]:
580
+ if parent_ref is None:
581
+ return None, None
582
+ try:
583
+ parsed = parse_content_ref(parent_ref)
584
+ except ValueError as exc:
585
+ raise RuntimeError(f"invalid parent_ref: {parent_ref}") from exc
586
+ return parsed.source, parsed.opaque_id
587
+
588
+
589
+ def _upsert_content_node(connection: sqlite3.Connection, node: ContentNode) -> int:
590
+ fetched_at = node.fetched_at or utc_now_iso()
591
+ cursor = connection.execute(
592
+ """
593
+ INSERT OR IGNORE INTO content_nodes (
594
+ source,
595
+ content_key,
596
+ content_type,
597
+ external_id,
598
+ title,
599
+ url,
600
+ snippet,
601
+ author,
602
+ published_at,
603
+ fetched_at,
604
+ raw_payload,
605
+ content_ref
606
+ )
607
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
608
+ """,
609
+ (
610
+ node.source,
611
+ node.content_key,
612
+ node.content_type,
613
+ node.external_id,
614
+ node.title,
615
+ node.url,
616
+ node.snippet,
617
+ node.author,
618
+ node.published_at,
619
+ fetched_at,
620
+ node.raw_payload,
621
+ node.content_ref,
622
+ ),
623
+ )
624
+ return cursor.rowcount
625
+
626
+
627
+ def _upsert_content_channel_link(connection: sqlite3.Connection, link: ContentChannelLink) -> int:
628
+ cursor = connection.execute(
629
+ """
630
+ INSERT OR IGNORE INTO content_channel_links (
631
+ source,
632
+ channel_key,
633
+ content_key,
634
+ membership_kind,
635
+ linked_at
636
+ )
637
+ VALUES (?, ?, ?, ?, ?)
638
+ """,
639
+ (
640
+ link.source,
641
+ link.channel_key,
642
+ link.content_key,
643
+ link.membership_kind,
644
+ link.linked_at or utc_now_iso(),
645
+ ),
646
+ )
647
+ return cursor.rowcount
648
+
649
+
650
+ def _upsert_content_relation(connection: sqlite3.Connection, relation: ContentRelation) -> int:
651
+ cursor = connection.execute(
652
+ """
653
+ INSERT OR IGNORE INTO content_relations (
654
+ source,
655
+ from_content_key,
656
+ relation_type,
657
+ to_content_key,
658
+ relation_semantic,
659
+ position,
660
+ metadata_json
661
+ )
662
+ VALUES (?, ?, ?, ?, ?, ?, ?)
663
+ """,
664
+ (
665
+ relation.source,
666
+ relation.from_content_key,
667
+ relation.relation_type,
668
+ relation.to_content_key,
669
+ relation.relation_semantic,
670
+ relation.position,
671
+ relation.metadata_json,
672
+ ),
673
+ )
674
+ return cursor.rowcount
675
+
676
+
677
+ def _validate_batch(
678
+ connection: sqlite3.Connection,
679
+ *,
680
+ source: str,
681
+ channel_key: str,
682
+ batch: ContentSyncBatch,
683
+ ) -> None:
684
+ batch_keys = {node.content_key for node in batch.nodes}
685
+ _validate_node_sources(source, batch.nodes)
686
+ _validate_link_sources(source, channel_key, batch.channel_links, batch_keys, connection)
687
+ _validate_relation_sources(source, batch.relations, batch_keys, connection)
688
+
689
+
690
+ def _validate_node_sources(source: str, nodes: list[ContentNode]) -> None:
691
+ for node in nodes:
692
+ if node.source != source:
693
+ raise RuntimeError(f"content node source mismatch: expected {source}, got {node.source}")
694
+
695
+
696
+ def _validate_link_sources(
697
+ source: str,
698
+ channel_key: str,
699
+ links: list[ContentChannelLink],
700
+ batch_keys: set[str],
701
+ connection: sqlite3.Connection,
702
+ ) -> None:
703
+ for link in links:
704
+ if link.source != source:
705
+ raise RuntimeError(f"content channel link source mismatch: expected {source}, got {link.source}")
706
+ if link.channel_key != channel_key:
707
+ raise RuntimeError(f"content channel link channel mismatch: expected {channel_key}, got {link.channel_key}")
708
+ if not _content_key_exists(connection, source, link.content_key, batch_keys):
709
+ raise RuntimeError(f"content channel link references missing content node: {link.content_key}")
710
+
711
+
712
+ def _validate_relation_sources(
713
+ source: str,
714
+ relations: list[ContentRelation],
715
+ batch_keys: set[str],
716
+ connection: sqlite3.Connection,
717
+ ) -> None:
718
+ for relation in relations:
719
+ if relation.source != source:
720
+ raise RuntimeError(f"content relation source mismatch: expected {source}, got {relation.source}")
721
+ if relation.relation_type != "parent":
722
+ raise RuntimeError(f"unsupported content relation type: {relation.relation_type}")
723
+ if relation.from_content_key == relation.to_content_key:
724
+ raise RuntimeError(f"content relation cannot self-reference: {relation.from_content_key}")
725
+ if not _content_key_exists(connection, source, relation.from_content_key, batch_keys):
726
+ raise RuntimeError(f"content relation references missing content node: {relation.from_content_key}")
727
+ if not _content_key_exists(connection, source, relation.to_content_key, batch_keys):
728
+ raise RuntimeError(f"content relation references missing content node: {relation.to_content_key}")
729
+
730
+
731
+ def _content_key_exists(
732
+ connection: sqlite3.Connection,
733
+ source: str,
734
+ content_key: str,
735
+ batch_keys: set[str],
736
+ ) -> bool:
737
+ if content_key in batch_keys:
738
+ return True
739
+ row = connection.execute(
740
+ """
741
+ SELECT 1
742
+ FROM content_nodes
743
+ WHERE source = ? AND content_key = ?
744
+ """,
745
+ (source, content_key),
746
+ ).fetchone()
747
+ return row is not None
748
+
749
+
750
+ def list_content_channels(connection: sqlite3.Connection, source: str, content_key: str) -> tuple[str, ...]:
751
+ rows = connection.execute(
752
+ """
753
+ SELECT channel_key
754
+ FROM content_channel_links
755
+ WHERE source = ? AND content_key = ?
756
+ ORDER BY channel_key
757
+ """,
758
+ (source, content_key),
759
+ ).fetchall()
760
+ return tuple(row["channel_key"] for row in rows)
761
+
762
+
763
+ def delete_source_sync_state(connection: sqlite3.Connection, source: str) -> None:
764
+ connection.execute("DELETE FROM sync_state WHERE source = ?", (source,))
765
+
766
+
767
+ def delete_source_content(connection: sqlite3.Connection, source: str) -> None:
768
+ connection.execute("DELETE FROM content_relations WHERE source = ?", (source,))
769
+ connection.execute("DELETE FROM content_channel_links WHERE source = ?", (source,))
770
+ connection.execute("DELETE FROM content_nodes WHERE source = ?", (source,))