agent-data-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_data_cli/PYPI_README.md +72 -0
- agent_data_cli/__init__.py +2 -0
- agent_data_cli/__main__.py +7 -0
- agent_data_cli/cli/__init__.py +2 -0
- agent_data_cli/cli/__main__.py +7 -0
- agent_data_cli/cli/commands/__init__.py +85 -0
- agent_data_cli/cli/commands/channel.py +78 -0
- agent_data_cli/cli/commands/common.py +97 -0
- agent_data_cli/cli/commands/config.py +270 -0
- agent_data_cli/cli/commands/content/__init__.py +29 -0
- agent_data_cli/cli/commands/content/common.py +172 -0
- agent_data_cli/cli/commands/content/interact.py +74 -0
- agent_data_cli/cli/commands/content/query.py +111 -0
- agent_data_cli/cli/commands/content/search.py +75 -0
- agent_data_cli/cli/commands/content/update.py +198 -0
- agent_data_cli/cli/commands/dashboard.py +87 -0
- agent_data_cli/cli/commands/group.py +128 -0
- agent_data_cli/cli/commands/help.py +44 -0
- agent_data_cli/cli/commands/hub.py +107 -0
- agent_data_cli/cli/commands/init.py +29 -0
- agent_data_cli/cli/commands/source.py +41 -0
- agent_data_cli/cli/commands/specs.py +241 -0
- agent_data_cli/cli/commands/sub.py +60 -0
- agent_data_cli/cli/formatters.py +537 -0
- agent_data_cli/cli/help.py +149 -0
- agent_data_cli/cli/main.py +46 -0
- agent_data_cli/core/__init__.py +2 -0
- agent_data_cli/core/base.py +222 -0
- agent_data_cli/core/capabilities.py +105 -0
- agent_data_cli/core/config.py +236 -0
- agent_data_cli/core/discovery.py +158 -0
- agent_data_cli/core/help.py +16 -0
- agent_data_cli/core/manifest.py +329 -0
- agent_data_cli/core/models.py +296 -0
- agent_data_cli/core/protocol.py +135 -0
- agent_data_cli/core/registry.py +353 -0
- agent_data_cli/core/source_defaults.py +24 -0
- agent_data_cli/dashboard/__init__.py +2 -0
- agent_data_cli/dashboard/adapters/__init__.py +2 -0
- agent_data_cli/dashboard/adapters/channel.py +73 -0
- agent_data_cli/dashboard/adapters/config.py +153 -0
- agent_data_cli/dashboard/adapters/content.py +350 -0
- agent_data_cli/dashboard/adapters/group.py +47 -0
- agent_data_cli/dashboard/adapters/help.py +32 -0
- agent_data_cli/dashboard/adapters/source.py +61 -0
- agent_data_cli/dashboard/adapters/sub.py +28 -0
- agent_data_cli/dashboard/context.py +29 -0
- agent_data_cli/dashboard/index.py +30 -0
- agent_data_cli/dashboard/pages/01_Source.py +57 -0
- agent_data_cli/dashboard/pages/02_Channel.py +99 -0
- agent_data_cli/dashboard/pages/03_Content_Search.py +64 -0
- agent_data_cli/dashboard/pages/04_Content_Query.py +79 -0
- agent_data_cli/dashboard/pages/05_Content_Update.py +103 -0
- agent_data_cli/dashboard/pages/06_Sub.py +51 -0
- agent_data_cli/dashboard/pages/07_Group.py +116 -0
- agent_data_cli/dashboard/pages/08_Config.py +114 -0
- agent_data_cli/dashboard/pages/09_Help.py +48 -0
- agent_data_cli/dashboard/pages/__init__.py +2 -0
- agent_data_cli/dashboard/runtime.py +208 -0
- agent_data_cli/dashboard/state.py +60 -0
- agent_data_cli/dashboard/widgets/__init__.py +2 -0
- agent_data_cli/dashboard/widgets/common.py +90 -0
- agent_data_cli/dashboard/widgets/forms.py +29 -0
- agent_data_cli/dashboard/widgets/tables.py +10 -0
- agent_data_cli/fetchers/__init__.py +2 -0
- agent_data_cli/fetchers/base.py +61 -0
- agent_data_cli/fetchers/browser.py +44 -0
- agent_data_cli/fetchers/http.py +313 -0
- agent_data_cli/fetchers/jina.py +44 -0
- agent_data_cli/hub/__init__.py +6 -0
- agent_data_cli/hub/models.py +20 -0
- agent_data_cli/hub/service.py +210 -0
- agent_data_cli/init_service.py +29 -0
- agent_data_cli/main.py +72 -0
- agent_data_cli/migration.py +53 -0
- agent_data_cli/runtime_paths.py +90 -0
- agent_data_cli/store/__init__.py +2 -0
- agent_data_cli/store/audit.py +42 -0
- agent_data_cli/store/channels.py +80 -0
- agent_data_cli/store/configs.py +134 -0
- agent_data_cli/store/content.py +770 -0
- agent_data_cli/store/db.py +298 -0
- agent_data_cli/store/groups.py +120 -0
- agent_data_cli/store/health.py +53 -0
- agent_data_cli/store/migrations.py +176 -0
- agent_data_cli/store/repositories.py +136 -0
- agent_data_cli/store/subscriptions.py +119 -0
- agent_data_cli/utils/__init__.py +2 -0
- agent_data_cli/utils/text.py +21 -0
- agent_data_cli/utils/time.py +63 -0
- agent_data_cli/utils/urls.py +8 -0
- agent_data_cli-0.1.0.dist-info/METADATA +104 -0
- agent_data_cli-0.1.0.dist-info/RECORD +97 -0
- agent_data_cli-0.1.0.dist-info/WHEEL +5 -0
- agent_data_cli-0.1.0.dist-info/entry_points.txt +2 -0
- agent_data_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
- agent_data_cli-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,770 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
|
|
5
|
+
from agent_data_cli.core.models import (
|
|
6
|
+
ContentBatchWriteResult,
|
|
7
|
+
ContentChannelLink,
|
|
8
|
+
ContentNode,
|
|
9
|
+
ContentQueryRow,
|
|
10
|
+
ContentRecord,
|
|
11
|
+
ContentRelation,
|
|
12
|
+
ContentSyncBatch,
|
|
13
|
+
SourceStorageSpec,
|
|
14
|
+
parse_content_ref,
|
|
15
|
+
)
|
|
16
|
+
from agent_data_cli.utils.time import utc_now_iso
|
|
17
|
+
|
|
18
|
+
from .repositories import row_to_content, row_to_content_query_row
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def write_content_batch(
|
|
22
|
+
connection: sqlite3.Connection,
|
|
23
|
+
source: str,
|
|
24
|
+
channel_key: str,
|
|
25
|
+
batch: ContentSyncBatch,
|
|
26
|
+
) -> ContentBatchWriteResult:
|
|
27
|
+
_validate_batch(connection, source=source, channel_key=channel_key, batch=batch)
|
|
28
|
+
saved_nodes = sum(_upsert_content_node(connection, node) for node in batch.nodes)
|
|
29
|
+
saved_links = sum(_upsert_content_channel_link(connection, link) for link in batch.channel_links)
|
|
30
|
+
saved_relations = sum(_upsert_content_relation(connection, relation) for relation in batch.relations)
|
|
31
|
+
return ContentBatchWriteResult(
|
|
32
|
+
saved_nodes=saved_nodes,
|
|
33
|
+
skipped_nodes=len(batch.nodes) - saved_nodes,
|
|
34
|
+
saved_links=saved_links,
|
|
35
|
+
saved_relations=saved_relations,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def upsert_content(connection: sqlite3.Connection, table_name: str, record: ContentRecord) -> bool:
|
|
40
|
+
_ = table_name
|
|
41
|
+
result = write_content_batch(
|
|
42
|
+
connection,
|
|
43
|
+
source=record.source,
|
|
44
|
+
channel_key=record.channel_key,
|
|
45
|
+
batch=ContentSyncBatch(
|
|
46
|
+
nodes=[
|
|
47
|
+
ContentNode(
|
|
48
|
+
source=record.source,
|
|
49
|
+
content_key=record.dedup_key,
|
|
50
|
+
content_type=record.record_type or "content",
|
|
51
|
+
external_id=record.external_id,
|
|
52
|
+
title=record.title,
|
|
53
|
+
url=record.url,
|
|
54
|
+
snippet=record.snippet,
|
|
55
|
+
author=record.author,
|
|
56
|
+
published_at=record.published_at,
|
|
57
|
+
fetched_at=record.fetched_at,
|
|
58
|
+
raw_payload=record.raw_payload,
|
|
59
|
+
content_ref=record.content_ref,
|
|
60
|
+
)
|
|
61
|
+
],
|
|
62
|
+
channel_links=[
|
|
63
|
+
ContentChannelLink(
|
|
64
|
+
source=record.source,
|
|
65
|
+
channel_key=record.channel_key,
|
|
66
|
+
content_key=record.dedup_key,
|
|
67
|
+
membership_kind="direct",
|
|
68
|
+
)
|
|
69
|
+
],
|
|
70
|
+
relations=[],
|
|
71
|
+
),
|
|
72
|
+
)
|
|
73
|
+
return result.saved_nodes == 1
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def set_sync_state(connection: sqlite3.Connection, source: str, channel_key: str, cursor: str) -> None:
|
|
77
|
+
connection.execute(
|
|
78
|
+
"""
|
|
79
|
+
INSERT INTO sync_state (source, channel_key, cursor, updated_at)
|
|
80
|
+
VALUES (?, ?, ?, ?)
|
|
81
|
+
ON CONFLICT(source, channel_key) DO UPDATE SET
|
|
82
|
+
cursor = excluded.cursor,
|
|
83
|
+
updated_at = excluded.updated_at
|
|
84
|
+
""",
|
|
85
|
+
(source, channel_key, cursor, utc_now_iso()),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def list_content(
|
|
90
|
+
connection: sqlite3.Connection,
|
|
91
|
+
table_name: str,
|
|
92
|
+
source: str,
|
|
93
|
+
channel_key: str,
|
|
94
|
+
*,
|
|
95
|
+
record_type: str | None = None,
|
|
96
|
+
limit: int = 10,
|
|
97
|
+
since: str | None = None,
|
|
98
|
+
fetch_all: bool = False,
|
|
99
|
+
) -> list[ContentRecord]:
|
|
100
|
+
_ = table_name
|
|
101
|
+
query = [
|
|
102
|
+
"""
|
|
103
|
+
SELECT
|
|
104
|
+
n.source,
|
|
105
|
+
? AS channel_key,
|
|
106
|
+
n.content_type AS record_type,
|
|
107
|
+
n.external_id,
|
|
108
|
+
n.title,
|
|
109
|
+
n.url,
|
|
110
|
+
n.snippet,
|
|
111
|
+
n.author,
|
|
112
|
+
n.published_at,
|
|
113
|
+
n.fetched_at,
|
|
114
|
+
n.raw_payload,
|
|
115
|
+
n.content_key AS dedup_key,
|
|
116
|
+
n.content_ref
|
|
117
|
+
FROM content_nodes AS n
|
|
118
|
+
JOIN content_channel_links AS l
|
|
119
|
+
ON l.source = n.source
|
|
120
|
+
AND l.content_key = n.content_key
|
|
121
|
+
WHERE n.source = ? AND l.channel_key = ?
|
|
122
|
+
"""
|
|
123
|
+
]
|
|
124
|
+
params: list[str | int] = [channel_key, source, channel_key]
|
|
125
|
+
|
|
126
|
+
if record_type is not None:
|
|
127
|
+
query.append("AND n.content_type = ?")
|
|
128
|
+
params.append(record_type)
|
|
129
|
+
|
|
130
|
+
if since is not None:
|
|
131
|
+
normalized_since = _normalize_since_value(since)
|
|
132
|
+
query.append("AND julianday(n.published_at) >= julianday(?)")
|
|
133
|
+
params.append(normalized_since)
|
|
134
|
+
|
|
135
|
+
query.append("ORDER BY n.published_at DESC, n.node_id DESC")
|
|
136
|
+
|
|
137
|
+
if not fetch_all and limit >= 0:
|
|
138
|
+
query.append("LIMIT ?")
|
|
139
|
+
params.append(limit)
|
|
140
|
+
|
|
141
|
+
rows = connection.execute(" ".join(query), params).fetchall()
|
|
142
|
+
return [row_to_content(row) for row in rows]
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def query_content(
|
|
146
|
+
connection: sqlite3.Connection,
|
|
147
|
+
storage_specs: dict[str, SourceStorageSpec],
|
|
148
|
+
*,
|
|
149
|
+
source: str | None = None,
|
|
150
|
+
channel_key: str | None = None,
|
|
151
|
+
group_name: str | None = None,
|
|
152
|
+
record_type: str | None = None,
|
|
153
|
+
parent_ref: str | None = None,
|
|
154
|
+
children_ref: str | None = None,
|
|
155
|
+
depth: int = 1,
|
|
156
|
+
since: str | None = None,
|
|
157
|
+
keywords: str | None = None,
|
|
158
|
+
limit: int = 10,
|
|
159
|
+
fetch_all: bool = False,
|
|
160
|
+
) -> list[ContentQueryRow]:
|
|
161
|
+
targets = resolve_query_targets(
|
|
162
|
+
connection,
|
|
163
|
+
storage_specs=storage_specs,
|
|
164
|
+
source=source,
|
|
165
|
+
channel_key=channel_key,
|
|
166
|
+
group_name=group_name,
|
|
167
|
+
)
|
|
168
|
+
if not targets:
|
|
169
|
+
return []
|
|
170
|
+
parent_source, parent_content_key = _resolve_parent_ref(parent_ref)
|
|
171
|
+
children_source, children_content_key = _resolve_parent_ref(children_ref)
|
|
172
|
+
if parent_source is not None and source is not None and parent_source != source:
|
|
173
|
+
raise RuntimeError(f"parent_ref source mismatch: expected {source}, got {parent_source}")
|
|
174
|
+
if children_source is not None and source is not None and children_source != source:
|
|
175
|
+
raise RuntimeError(f"children_ref source mismatch: expected {source}, got {children_source}")
|
|
176
|
+
all_records: list[ContentQueryRow] = []
|
|
177
|
+
relation_query = parent_content_key is not None or children_content_key is not None
|
|
178
|
+
for source_name, channel_filter in targets.items():
|
|
179
|
+
all_records.extend(
|
|
180
|
+
query_source_content(
|
|
181
|
+
connection,
|
|
182
|
+
source=source_name,
|
|
183
|
+
channel_filter=channel_filter,
|
|
184
|
+
record_type=record_type,
|
|
185
|
+
parent_content_key=parent_content_key if parent_source in (None, source_name) else None,
|
|
186
|
+
children_content_key=children_content_key if children_source in (None, source_name) else None,
|
|
187
|
+
depth=depth,
|
|
188
|
+
since=since,
|
|
189
|
+
keywords=keywords,
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
if not relation_query:
|
|
193
|
+
all_records.sort(
|
|
194
|
+
key=lambda node: (
|
|
195
|
+
node.published_at or "",
|
|
196
|
+
node.fetched_at or "",
|
|
197
|
+
node.content_key,
|
|
198
|
+
),
|
|
199
|
+
reverse=True,
|
|
200
|
+
)
|
|
201
|
+
if fetch_all or limit < 0:
|
|
202
|
+
return all_records
|
|
203
|
+
return all_records[:limit]
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def query_source_content(
|
|
207
|
+
connection: sqlite3.Connection,
|
|
208
|
+
*,
|
|
209
|
+
source: str,
|
|
210
|
+
channel_filter: set[str] | None,
|
|
211
|
+
record_type: str | None,
|
|
212
|
+
parent_content_key: str | None,
|
|
213
|
+
children_content_key: str | None,
|
|
214
|
+
depth: int,
|
|
215
|
+
since: str | None,
|
|
216
|
+
keywords: str | None,
|
|
217
|
+
) -> list[ContentQueryRow]:
|
|
218
|
+
if parent_content_key is not None:
|
|
219
|
+
return query_ancestor_nodes(
|
|
220
|
+
connection,
|
|
221
|
+
source=source,
|
|
222
|
+
origin_content_key=parent_content_key,
|
|
223
|
+
depth=depth,
|
|
224
|
+
channel_filter=channel_filter,
|
|
225
|
+
record_type=record_type,
|
|
226
|
+
since=since,
|
|
227
|
+
keywords=keywords,
|
|
228
|
+
)
|
|
229
|
+
if children_content_key is not None:
|
|
230
|
+
return query_descendant_nodes(
|
|
231
|
+
connection,
|
|
232
|
+
source=source,
|
|
233
|
+
origin_content_key=children_content_key,
|
|
234
|
+
depth=depth,
|
|
235
|
+
channel_filter=channel_filter,
|
|
236
|
+
record_type=record_type,
|
|
237
|
+
since=since,
|
|
238
|
+
keywords=keywords,
|
|
239
|
+
)
|
|
240
|
+
query = [
|
|
241
|
+
"""
|
|
242
|
+
SELECT
|
|
243
|
+
n.source,
|
|
244
|
+
n.content_key,
|
|
245
|
+
n.content_type,
|
|
246
|
+
n.external_id,
|
|
247
|
+
n.title,
|
|
248
|
+
n.url,
|
|
249
|
+
n.snippet,
|
|
250
|
+
n.author,
|
|
251
|
+
n.published_at,
|
|
252
|
+
n.fetched_at,
|
|
253
|
+
n.raw_payload,
|
|
254
|
+
NULL AS relation_depth,
|
|
255
|
+
NULL AS relation_semantic,
|
|
256
|
+
n.content_ref
|
|
257
|
+
FROM content_nodes AS n
|
|
258
|
+
WHERE n.source = ?
|
|
259
|
+
"""
|
|
260
|
+
]
|
|
261
|
+
params: list[str | int] = [source]
|
|
262
|
+
if channel_filter is not None:
|
|
263
|
+
if not channel_filter:
|
|
264
|
+
return []
|
|
265
|
+
placeholders = ", ".join("?" for _ in channel_filter)
|
|
266
|
+
query.append(
|
|
267
|
+
f"""
|
|
268
|
+
AND EXISTS (
|
|
269
|
+
SELECT 1
|
|
270
|
+
FROM content_channel_links AS l
|
|
271
|
+
WHERE l.source = n.source
|
|
272
|
+
AND l.content_key = n.content_key
|
|
273
|
+
AND l.channel_key IN ({placeholders})
|
|
274
|
+
)
|
|
275
|
+
"""
|
|
276
|
+
)
|
|
277
|
+
params.extend(sorted(channel_filter))
|
|
278
|
+
if record_type is not None:
|
|
279
|
+
query.append("AND n.content_type = ?")
|
|
280
|
+
params.append(record_type)
|
|
281
|
+
if parent_content_key is not None:
|
|
282
|
+
query.append(
|
|
283
|
+
"""
|
|
284
|
+
AND EXISTS (
|
|
285
|
+
SELECT 1
|
|
286
|
+
FROM content_relations AS r
|
|
287
|
+
WHERE r.source = n.source
|
|
288
|
+
AND r.to_content_key = n.content_key
|
|
289
|
+
AND r.relation_type = 'parent'
|
|
290
|
+
AND r.from_content_key = ?
|
|
291
|
+
)
|
|
292
|
+
"""
|
|
293
|
+
)
|
|
294
|
+
params.append(parent_content_key)
|
|
295
|
+
if children_content_key is not None:
|
|
296
|
+
query.append(
|
|
297
|
+
"""
|
|
298
|
+
AND EXISTS (
|
|
299
|
+
SELECT 1
|
|
300
|
+
FROM content_relations AS r
|
|
301
|
+
WHERE r.source = n.source
|
|
302
|
+
AND r.from_content_key = n.content_key
|
|
303
|
+
AND r.relation_type = 'parent'
|
|
304
|
+
AND r.to_content_key = ?
|
|
305
|
+
)
|
|
306
|
+
"""
|
|
307
|
+
)
|
|
308
|
+
params.append(children_content_key)
|
|
309
|
+
if since is not None:
|
|
310
|
+
normalized_since = _normalize_since_value(since)
|
|
311
|
+
query.append("AND julianday(n.published_at) >= julianday(?)")
|
|
312
|
+
params.append(normalized_since)
|
|
313
|
+
if keywords is not None:
|
|
314
|
+
keyword = f"%{keywords}%"
|
|
315
|
+
query.append(
|
|
316
|
+
"""
|
|
317
|
+
AND (
|
|
318
|
+
n.title LIKE ?
|
|
319
|
+
OR n.snippet LIKE ?
|
|
320
|
+
OR n.url LIKE ?
|
|
321
|
+
OR n.content_key LIKE ?
|
|
322
|
+
)
|
|
323
|
+
"""
|
|
324
|
+
)
|
|
325
|
+
params.extend([keyword, keyword, keyword, keyword])
|
|
326
|
+
_ = depth
|
|
327
|
+
query.append("ORDER BY n.published_at DESC, n.node_id DESC")
|
|
328
|
+
rows = connection.execute(" ".join(query), params).fetchall()
|
|
329
|
+
return [row_to_content_query_row(row) for row in rows]
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def query_ancestor_nodes(
|
|
333
|
+
connection: sqlite3.Connection,
|
|
334
|
+
*,
|
|
335
|
+
source: str,
|
|
336
|
+
origin_content_key: str,
|
|
337
|
+
depth: int,
|
|
338
|
+
channel_filter: set[str] | None,
|
|
339
|
+
record_type: str | None,
|
|
340
|
+
since: str | None,
|
|
341
|
+
keywords: str | None,
|
|
342
|
+
) -> list[ContentQueryRow]:
|
|
343
|
+
return _query_related_nodes(
|
|
344
|
+
connection,
|
|
345
|
+
source=source,
|
|
346
|
+
origin_content_key=origin_content_key,
|
|
347
|
+
depth=depth,
|
|
348
|
+
channel_filter=channel_filter,
|
|
349
|
+
record_type=record_type,
|
|
350
|
+
since=since,
|
|
351
|
+
keywords=keywords,
|
|
352
|
+
direction="ancestor",
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def query_descendant_nodes(
|
|
357
|
+
connection: sqlite3.Connection,
|
|
358
|
+
*,
|
|
359
|
+
source: str,
|
|
360
|
+
origin_content_key: str,
|
|
361
|
+
depth: int,
|
|
362
|
+
channel_filter: set[str] | None,
|
|
363
|
+
record_type: str | None,
|
|
364
|
+
since: str | None,
|
|
365
|
+
keywords: str | None,
|
|
366
|
+
) -> list[ContentQueryRow]:
|
|
367
|
+
return _query_related_nodes(
|
|
368
|
+
connection,
|
|
369
|
+
source=source,
|
|
370
|
+
origin_content_key=origin_content_key,
|
|
371
|
+
depth=depth,
|
|
372
|
+
channel_filter=channel_filter,
|
|
373
|
+
record_type=record_type,
|
|
374
|
+
since=since,
|
|
375
|
+
keywords=keywords,
|
|
376
|
+
direction="descendant",
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _query_related_nodes(
|
|
381
|
+
connection: sqlite3.Connection,
|
|
382
|
+
*,
|
|
383
|
+
source: str,
|
|
384
|
+
origin_content_key: str,
|
|
385
|
+
depth: int,
|
|
386
|
+
channel_filter: set[str] | None,
|
|
387
|
+
record_type: str | None,
|
|
388
|
+
since: str | None,
|
|
389
|
+
keywords: str | None,
|
|
390
|
+
direction: str,
|
|
391
|
+
) -> list[ContentQueryRow]:
|
|
392
|
+
if direction == "ancestor":
|
|
393
|
+
anchor_key = "r.to_content_key"
|
|
394
|
+
join_condition = "r.from_content_key = walk.content_key"
|
|
395
|
+
else:
|
|
396
|
+
anchor_key = "r.from_content_key"
|
|
397
|
+
join_condition = "r.to_content_key = walk.content_key"
|
|
398
|
+
|
|
399
|
+
query = [
|
|
400
|
+
f"""
|
|
401
|
+
WITH RECURSIVE walk(content_key, relation_depth, relation_semantic) AS (
|
|
402
|
+
SELECT {anchor_key}, 1, r.relation_semantic
|
|
403
|
+
FROM content_relations AS r
|
|
404
|
+
WHERE r.source = ?
|
|
405
|
+
AND r.relation_type = 'parent'
|
|
406
|
+
AND {'r.from_content_key' if direction == 'ancestor' else 'r.to_content_key'} = ?
|
|
407
|
+
UNION ALL
|
|
408
|
+
SELECT {'r.to_content_key' if direction == 'ancestor' else 'r.from_content_key'}, walk.relation_depth + 1, r.relation_semantic
|
|
409
|
+
FROM content_relations AS r
|
|
410
|
+
JOIN walk ON {join_condition}
|
|
411
|
+
WHERE r.source = ?
|
|
412
|
+
AND r.relation_type = 'parent'
|
|
413
|
+
AND (? = -1 OR walk.relation_depth < ?)
|
|
414
|
+
),
|
|
415
|
+
ranked AS (
|
|
416
|
+
SELECT
|
|
417
|
+
content_key,
|
|
418
|
+
relation_depth,
|
|
419
|
+
relation_semantic,
|
|
420
|
+
ROW_NUMBER() OVER (
|
|
421
|
+
PARTITION BY content_key
|
|
422
|
+
ORDER BY relation_depth ASC
|
|
423
|
+
) AS row_number
|
|
424
|
+
FROM walk
|
|
425
|
+
)
|
|
426
|
+
SELECT
|
|
427
|
+
n.source,
|
|
428
|
+
n.content_key,
|
|
429
|
+
n.content_type,
|
|
430
|
+
n.external_id,
|
|
431
|
+
n.title,
|
|
432
|
+
n.url,
|
|
433
|
+
n.snippet,
|
|
434
|
+
n.author,
|
|
435
|
+
n.published_at,
|
|
436
|
+
n.fetched_at,
|
|
437
|
+
n.raw_payload,
|
|
438
|
+
ranked.relation_depth,
|
|
439
|
+
ranked.relation_semantic,
|
|
440
|
+
n.content_ref
|
|
441
|
+
FROM ranked
|
|
442
|
+
JOIN content_nodes AS n
|
|
443
|
+
ON n.source = ? AND n.content_key = ranked.content_key
|
|
444
|
+
WHERE n.source = ?
|
|
445
|
+
AND ranked.row_number = 1
|
|
446
|
+
"""
|
|
447
|
+
]
|
|
448
|
+
params: list[str | int] = [source, origin_content_key, source, depth, depth, source, source]
|
|
449
|
+
if channel_filter is not None:
|
|
450
|
+
if not channel_filter:
|
|
451
|
+
return []
|
|
452
|
+
placeholders = ", ".join("?" for _ in channel_filter)
|
|
453
|
+
query.append(
|
|
454
|
+
f"""
|
|
455
|
+
AND EXISTS (
|
|
456
|
+
SELECT 1
|
|
457
|
+
FROM content_channel_links AS l
|
|
458
|
+
WHERE l.source = n.source
|
|
459
|
+
AND l.content_key = n.content_key
|
|
460
|
+
AND l.channel_key IN ({placeholders})
|
|
461
|
+
)
|
|
462
|
+
"""
|
|
463
|
+
)
|
|
464
|
+
params.extend(sorted(channel_filter))
|
|
465
|
+
if record_type is not None:
|
|
466
|
+
query.append("AND n.content_type = ?")
|
|
467
|
+
params.append(record_type)
|
|
468
|
+
if since is not None:
|
|
469
|
+
normalized_since = _normalize_since_value(since)
|
|
470
|
+
query.append("AND julianday(n.published_at) >= julianday(?)")
|
|
471
|
+
params.append(normalized_since)
|
|
472
|
+
if keywords is not None:
|
|
473
|
+
keyword = f"%{keywords}%"
|
|
474
|
+
query.append(
|
|
475
|
+
"""
|
|
476
|
+
AND (
|
|
477
|
+
n.title LIKE ?
|
|
478
|
+
OR n.snippet LIKE ?
|
|
479
|
+
OR n.url LIKE ?
|
|
480
|
+
OR n.content_key LIKE ?
|
|
481
|
+
)
|
|
482
|
+
"""
|
|
483
|
+
)
|
|
484
|
+
params.extend([keyword, keyword, keyword, keyword])
|
|
485
|
+
if direction == "ancestor":
|
|
486
|
+
query.append("ORDER BY ranked.relation_depth ASC, n.published_at DESC, n.node_id DESC")
|
|
487
|
+
else:
|
|
488
|
+
query.append("ORDER BY ranked.relation_depth ASC, n.published_at DESC, n.content_key")
|
|
489
|
+
rows = connection.execute(" ".join(query), params).fetchall()
|
|
490
|
+
return [row_to_content_query_row(row) for row in rows]
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def resolve_query_targets(
|
|
494
|
+
connection: sqlite3.Connection,
|
|
495
|
+
*,
|
|
496
|
+
storage_specs: dict[str, SourceStorageSpec],
|
|
497
|
+
source: str | None,
|
|
498
|
+
channel_key: str | None,
|
|
499
|
+
group_name: str | None,
|
|
500
|
+
) -> dict[str, set[str] | None]:
|
|
501
|
+
if source is not None:
|
|
502
|
+
_require_storage_spec(storage_specs, source)
|
|
503
|
+
if group_name is None:
|
|
504
|
+
if source is not None:
|
|
505
|
+
if channel_key is not None:
|
|
506
|
+
return {source: {channel_key}}
|
|
507
|
+
return {source: None}
|
|
508
|
+
if channel_key is not None:
|
|
509
|
+
return {source_name: {channel_key} for source_name in storage_specs}
|
|
510
|
+
return {source_name: None for source_name in storage_specs}
|
|
511
|
+
|
|
512
|
+
members = connection.execute(
|
|
513
|
+
"""
|
|
514
|
+
SELECT group_name, member_type, source, channel_key
|
|
515
|
+
FROM group_members
|
|
516
|
+
WHERE group_name = ?
|
|
517
|
+
ORDER BY member_type, source, channel_key
|
|
518
|
+
""",
|
|
519
|
+
(group_name,),
|
|
520
|
+
).fetchall()
|
|
521
|
+
if not members:
|
|
522
|
+
return {}
|
|
523
|
+
grouped_targets: dict[str, set[str] | None] = {}
|
|
524
|
+
for member in members:
|
|
525
|
+
member_source = member["source"]
|
|
526
|
+
_require_storage_spec(storage_specs, member_source)
|
|
527
|
+
if member["member_type"] == "source":
|
|
528
|
+
grouped_targets[member_source] = None
|
|
529
|
+
continue
|
|
530
|
+
existing = grouped_targets.get(member_source)
|
|
531
|
+
if existing is None and member_source in grouped_targets:
|
|
532
|
+
continue
|
|
533
|
+
if existing is None:
|
|
534
|
+
existing = set()
|
|
535
|
+
grouped_targets[member_source] = existing
|
|
536
|
+
member_channel_key = member["channel_key"]
|
|
537
|
+
if member_channel_key is not None:
|
|
538
|
+
existing.add(member_channel_key)
|
|
539
|
+
|
|
540
|
+
if source is not None:
|
|
541
|
+
channel_filter = grouped_targets.get(source)
|
|
542
|
+
if source not in grouped_targets:
|
|
543
|
+
return {}
|
|
544
|
+
grouped_targets = {source: channel_filter}
|
|
545
|
+
|
|
546
|
+
if channel_key is None:
|
|
547
|
+
return grouped_targets
|
|
548
|
+
|
|
549
|
+
filtered_targets: dict[str, set[str] | None] = {}
|
|
550
|
+
for source_name, channels in grouped_targets.items():
|
|
551
|
+
if channels is None:
|
|
552
|
+
filtered_targets[source_name] = {channel_key}
|
|
553
|
+
continue
|
|
554
|
+
if channel_key in channels:
|
|
555
|
+
filtered_targets[source_name] = {channel_key}
|
|
556
|
+
return filtered_targets
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def ensure_content_table_columns(connection: sqlite3.Connection, table_name: str) -> None:
|
|
560
|
+
rows = connection.execute(f"PRAGMA table_info({table_name})").fetchall()
|
|
561
|
+
existing_columns = {row["name"] for row in rows}
|
|
562
|
+
if "content_ref" not in existing_columns:
|
|
563
|
+
connection.execute(f"ALTER TABLE {table_name} ADD COLUMN content_ref TEXT")
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def _normalize_since_value(since: str) -> str:
|
|
567
|
+
if len(since) == 8 and since.isdigit():
|
|
568
|
+
return f"{since[:4]}-{since[4:6]}-{since[6:8]}"
|
|
569
|
+
return since
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def _require_storage_spec(storage_specs: dict[str, SourceStorageSpec], source: str) -> SourceStorageSpec:
|
|
573
|
+
try:
|
|
574
|
+
return storage_specs[source]
|
|
575
|
+
except KeyError as exc:
|
|
576
|
+
raise RuntimeError(f"no storage spec registered for source: {source}") from exc
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def _resolve_parent_ref(parent_ref: str | None) -> tuple[str | None, str | None]:
|
|
580
|
+
if parent_ref is None:
|
|
581
|
+
return None, None
|
|
582
|
+
try:
|
|
583
|
+
parsed = parse_content_ref(parent_ref)
|
|
584
|
+
except ValueError as exc:
|
|
585
|
+
raise RuntimeError(f"invalid parent_ref: {parent_ref}") from exc
|
|
586
|
+
return parsed.source, parsed.opaque_id
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
def _upsert_content_node(connection: sqlite3.Connection, node: ContentNode) -> int:
|
|
590
|
+
fetched_at = node.fetched_at or utc_now_iso()
|
|
591
|
+
cursor = connection.execute(
|
|
592
|
+
"""
|
|
593
|
+
INSERT OR IGNORE INTO content_nodes (
|
|
594
|
+
source,
|
|
595
|
+
content_key,
|
|
596
|
+
content_type,
|
|
597
|
+
external_id,
|
|
598
|
+
title,
|
|
599
|
+
url,
|
|
600
|
+
snippet,
|
|
601
|
+
author,
|
|
602
|
+
published_at,
|
|
603
|
+
fetched_at,
|
|
604
|
+
raw_payload,
|
|
605
|
+
content_ref
|
|
606
|
+
)
|
|
607
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
608
|
+
""",
|
|
609
|
+
(
|
|
610
|
+
node.source,
|
|
611
|
+
node.content_key,
|
|
612
|
+
node.content_type,
|
|
613
|
+
node.external_id,
|
|
614
|
+
node.title,
|
|
615
|
+
node.url,
|
|
616
|
+
node.snippet,
|
|
617
|
+
node.author,
|
|
618
|
+
node.published_at,
|
|
619
|
+
fetched_at,
|
|
620
|
+
node.raw_payload,
|
|
621
|
+
node.content_ref,
|
|
622
|
+
),
|
|
623
|
+
)
|
|
624
|
+
return cursor.rowcount
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
def _upsert_content_channel_link(connection: sqlite3.Connection, link: ContentChannelLink) -> int:
|
|
628
|
+
cursor = connection.execute(
|
|
629
|
+
"""
|
|
630
|
+
INSERT OR IGNORE INTO content_channel_links (
|
|
631
|
+
source,
|
|
632
|
+
channel_key,
|
|
633
|
+
content_key,
|
|
634
|
+
membership_kind,
|
|
635
|
+
linked_at
|
|
636
|
+
)
|
|
637
|
+
VALUES (?, ?, ?, ?, ?)
|
|
638
|
+
""",
|
|
639
|
+
(
|
|
640
|
+
link.source,
|
|
641
|
+
link.channel_key,
|
|
642
|
+
link.content_key,
|
|
643
|
+
link.membership_kind,
|
|
644
|
+
link.linked_at or utc_now_iso(),
|
|
645
|
+
),
|
|
646
|
+
)
|
|
647
|
+
return cursor.rowcount
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
def _upsert_content_relation(connection: sqlite3.Connection, relation: ContentRelation) -> int:
|
|
651
|
+
cursor = connection.execute(
|
|
652
|
+
"""
|
|
653
|
+
INSERT OR IGNORE INTO content_relations (
|
|
654
|
+
source,
|
|
655
|
+
from_content_key,
|
|
656
|
+
relation_type,
|
|
657
|
+
to_content_key,
|
|
658
|
+
relation_semantic,
|
|
659
|
+
position,
|
|
660
|
+
metadata_json
|
|
661
|
+
)
|
|
662
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
663
|
+
""",
|
|
664
|
+
(
|
|
665
|
+
relation.source,
|
|
666
|
+
relation.from_content_key,
|
|
667
|
+
relation.relation_type,
|
|
668
|
+
relation.to_content_key,
|
|
669
|
+
relation.relation_semantic,
|
|
670
|
+
relation.position,
|
|
671
|
+
relation.metadata_json,
|
|
672
|
+
),
|
|
673
|
+
)
|
|
674
|
+
return cursor.rowcount
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
def _validate_batch(
|
|
678
|
+
connection: sqlite3.Connection,
|
|
679
|
+
*,
|
|
680
|
+
source: str,
|
|
681
|
+
channel_key: str,
|
|
682
|
+
batch: ContentSyncBatch,
|
|
683
|
+
) -> None:
|
|
684
|
+
batch_keys = {node.content_key for node in batch.nodes}
|
|
685
|
+
_validate_node_sources(source, batch.nodes)
|
|
686
|
+
_validate_link_sources(source, channel_key, batch.channel_links, batch_keys, connection)
|
|
687
|
+
_validate_relation_sources(source, batch.relations, batch_keys, connection)
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
def _validate_node_sources(source: str, nodes: list[ContentNode]) -> None:
|
|
691
|
+
for node in nodes:
|
|
692
|
+
if node.source != source:
|
|
693
|
+
raise RuntimeError(f"content node source mismatch: expected {source}, got {node.source}")
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def _validate_link_sources(
|
|
697
|
+
source: str,
|
|
698
|
+
channel_key: str,
|
|
699
|
+
links: list[ContentChannelLink],
|
|
700
|
+
batch_keys: set[str],
|
|
701
|
+
connection: sqlite3.Connection,
|
|
702
|
+
) -> None:
|
|
703
|
+
for link in links:
|
|
704
|
+
if link.source != source:
|
|
705
|
+
raise RuntimeError(f"content channel link source mismatch: expected {source}, got {link.source}")
|
|
706
|
+
if link.channel_key != channel_key:
|
|
707
|
+
raise RuntimeError(f"content channel link channel mismatch: expected {channel_key}, got {link.channel_key}")
|
|
708
|
+
if not _content_key_exists(connection, source, link.content_key, batch_keys):
|
|
709
|
+
raise RuntimeError(f"content channel link references missing content node: {link.content_key}")
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def _validate_relation_sources(
|
|
713
|
+
source: str,
|
|
714
|
+
relations: list[ContentRelation],
|
|
715
|
+
batch_keys: set[str],
|
|
716
|
+
connection: sqlite3.Connection,
|
|
717
|
+
) -> None:
|
|
718
|
+
for relation in relations:
|
|
719
|
+
if relation.source != source:
|
|
720
|
+
raise RuntimeError(f"content relation source mismatch: expected {source}, got {relation.source}")
|
|
721
|
+
if relation.relation_type != "parent":
|
|
722
|
+
raise RuntimeError(f"unsupported content relation type: {relation.relation_type}")
|
|
723
|
+
if relation.from_content_key == relation.to_content_key:
|
|
724
|
+
raise RuntimeError(f"content relation cannot self-reference: {relation.from_content_key}")
|
|
725
|
+
if not _content_key_exists(connection, source, relation.from_content_key, batch_keys):
|
|
726
|
+
raise RuntimeError(f"content relation references missing content node: {relation.from_content_key}")
|
|
727
|
+
if not _content_key_exists(connection, source, relation.to_content_key, batch_keys):
|
|
728
|
+
raise RuntimeError(f"content relation references missing content node: {relation.to_content_key}")
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
def _content_key_exists(
|
|
732
|
+
connection: sqlite3.Connection,
|
|
733
|
+
source: str,
|
|
734
|
+
content_key: str,
|
|
735
|
+
batch_keys: set[str],
|
|
736
|
+
) -> bool:
|
|
737
|
+
if content_key in batch_keys:
|
|
738
|
+
return True
|
|
739
|
+
row = connection.execute(
|
|
740
|
+
"""
|
|
741
|
+
SELECT 1
|
|
742
|
+
FROM content_nodes
|
|
743
|
+
WHERE source = ? AND content_key = ?
|
|
744
|
+
""",
|
|
745
|
+
(source, content_key),
|
|
746
|
+
).fetchone()
|
|
747
|
+
return row is not None
|
|
748
|
+
|
|
749
|
+
|
|
750
|
+
def list_content_channels(connection: sqlite3.Connection, source: str, content_key: str) -> tuple[str, ...]:
|
|
751
|
+
rows = connection.execute(
|
|
752
|
+
"""
|
|
753
|
+
SELECT channel_key
|
|
754
|
+
FROM content_channel_links
|
|
755
|
+
WHERE source = ? AND content_key = ?
|
|
756
|
+
ORDER BY channel_key
|
|
757
|
+
""",
|
|
758
|
+
(source, content_key),
|
|
759
|
+
).fetchall()
|
|
760
|
+
return tuple(row["channel_key"] for row in rows)
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def delete_source_sync_state(connection: sqlite3.Connection, source: str) -> None:
|
|
764
|
+
connection.execute("DELETE FROM sync_state WHERE source = ?", (source,))
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
def delete_source_content(connection: sqlite3.Connection, source: str) -> None:
|
|
768
|
+
connection.execute("DELETE FROM content_relations WHERE source = ?", (source,))
|
|
769
|
+
connection.execute("DELETE FROM content_channel_links WHERE source = ?", (source,))
|
|
770
|
+
connection.execute("DELETE FROM content_nodes WHERE source = ?", (source,))
|