nonebot-plugin-git-poller 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,485 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from dataclasses import dataclass
5
+ from datetime import datetime, timezone
6
+ import shutil
7
+ from threading import RLock
8
+ from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
9
+
10
+ from nonebot import logger
11
+
12
+ from .archive import ArchiveBuilder, ArchiveFile
13
+ from .config import Config
14
+ from .git import GitRepositoryCache
15
+ from .models import PollResult, RepositoryIdentity, Subscription, UpdatePayload
16
+ from .repository import (
17
+ build_compare_url,
18
+ build_identity,
19
+ build_identity_from_normalized_url,
20
+ normalize_branch,
21
+ normalize_repo_url,
22
+ )
23
+ from .state import StateStore
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class FollowResult:
28
+ identity: RepositoryIdentity
29
+ subscription: Subscription
30
+ already_following: bool
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class PullResult:
35
+ identity: RepositoryIdentity
36
+ subscription: Subscription
37
+ previous_sha: str | None
38
+ target_sha: str
39
+ payload: UpdatePayload
40
+ archive: ArchiveFile
41
+
42
+
43
+ @dataclass(frozen=True)
44
+ class SummaryResult:
45
+ result: PollResult
46
+ behind_count: int | None
47
+
48
+
49
+ @dataclass(frozen=True)
50
+ class DeliveryResult:
51
+ result: PollResult
52
+ archive: ArchiveFile
53
+
54
+
55
+ class GitPollerService:
56
+ def __init__(
57
+ self,
58
+ config: Config,
59
+ *,
60
+ state: StateStore | None = None,
61
+ git_cache: GitRepositoryCache | None = None,
62
+ archive_builder: ArchiveBuilder | None = None,
63
+ ) -> None:
64
+ self.config = config
65
+ self.state = state or StateStore()
66
+ self.git_cache = git_cache or GitRepositoryCache(config)
67
+ self.archive_builder = archive_builder or ArchiveBuilder(
68
+ config.git_poller_archive_password
69
+ )
70
+ self._lock = asyncio.Lock()
71
+ # Protect StateStore's read-modify-write JSON operations only.
72
+ self._state_lock = RLock()
73
+
74
+ async def follow_repo(self, group_id: int, url: str, branch: str | None = None) -> FollowResult:
75
+ async with self._lock:
76
+ return await asyncio.to_thread(self._follow_repo_sync, group_id, url, branch)
77
+
78
+ async def pull_repo(self, group_id: int, url: str, branch: str | None = None) -> PullResult:
79
+ async with self._lock:
80
+ return await asyncio.to_thread(self._pull_repo_sync, group_id, url, branch)
81
+
82
+ async def summarize_repo(self, group_id: int, url: str, branch: str | None = None) -> SummaryResult:
83
+ async with self._lock:
84
+ return await asyncio.to_thread(self._summarize_repo_sync, group_id, url, branch)
85
+
86
+ async def poll_schedule(self, schedule: str):
87
+ subscriptions = await asyncio.to_thread(self._subscriptions_for_schedule_sync, schedule)
88
+ for group_id, repo_key, subscription in subscriptions:
89
+ try:
90
+ async with self._lock:
91
+ result = await asyncio.to_thread(
92
+ self._poll_scheduled_subscription_sync,
93
+ group_id,
94
+ repo_key,
95
+ subscription,
96
+ )
97
+ except Exception:
98
+ logger.exception(
99
+ f"git poller scheduled subscription failed: "
100
+ f"group={group_id}, repo={repo_key}"
101
+ )
102
+ continue
103
+ if result is not None:
104
+ yield result
105
+
106
+ def update_repo_schedule(
107
+ self,
108
+ group_id: int,
109
+ url: str,
110
+ branch: str | None,
111
+ schedule: str,
112
+ ) -> tuple[RepositoryIdentity, Subscription]:
113
+ with self._state_lock:
114
+ identity, _ = self._get_subscription(group_id, url, branch)
115
+ subscription = self.state.update_schedule(
116
+ group_id,
117
+ identity.key,
118
+ schedule,
119
+ _now_iso(self.config.git_poller_timezone),
120
+ )
121
+ return identity, subscription
122
+
123
+ def update_repo_archive_password(
124
+ self,
125
+ group_id: int,
126
+ url: str,
127
+ branch: str | None,
128
+ password: str | None,
129
+ ) -> tuple[RepositoryIdentity, Subscription]:
130
+ with self._state_lock:
131
+ identity, _ = self._get_subscription(group_id, url, branch)
132
+ subscription = self.state.update_archive_password(
133
+ group_id,
134
+ identity.key,
135
+ password,
136
+ _now_iso(self.config.git_poller_timezone),
137
+ )
138
+ return identity, subscription
139
+
140
+ def unfollow_repo(
141
+ self,
142
+ group_id: int,
143
+ url: str,
144
+ branch: str | None = None,
145
+ ) -> tuple[RepositoryIdentity, bool]:
146
+ archive_path: str | None = None
147
+ with self._state_lock:
148
+ identity, existing = self._find_subscription(group_id, url, branch)
149
+ if existing is None:
150
+ return identity, False
151
+ removed = self.state.remove_subscription(group_id, identity.key)
152
+ if removed:
153
+ archive_path = existing.last_archive_path
154
+ if archive_path:
155
+ self.archive_builder.remove_archive(archive_path)
156
+ return identity, removed
157
+
158
+ def get_repo_subscription(
159
+ self,
160
+ group_id: int,
161
+ url: str,
162
+ branch: str | None = None,
163
+ ) -> tuple[RepositoryIdentity, Subscription]:
164
+ with self._state_lock:
165
+ return self._get_subscription(group_id, url, branch)
166
+
167
+ def list_group_subscriptions(self, group_id: int) -> dict[str, Subscription]:
168
+ with self._state_lock:
169
+ return self.state.list_group_subscriptions(group_id)
170
+
171
+ def scheduled_rules(self) -> set[str]:
172
+ with self._state_lock:
173
+ rules = {self.config.git_poller_default_schedule.strip()}
174
+ for subscriptions in self.state.list_all_subscriptions().values():
175
+ for subscription in subscriptions.values():
176
+ if subscription.schedule.strip():
177
+ rules.add(subscription.schedule.strip())
178
+ return {rule for rule in rules if rule}
179
+
180
+ def mark_success(self, result: PollResult) -> None:
181
+ with self._state_lock:
182
+ self.state.update_last_success(
183
+ result.group_id,
184
+ result.repo_key,
185
+ result.payload.target_sha,
186
+ _now_iso(self.config.git_poller_timezone),
187
+ )
188
+
189
+ def mark_pull_success(self, group_id: int, repo_key: str, target_sha: str) -> None:
190
+ with self._state_lock:
191
+ self.state.update_last_success(group_id, repo_key, target_sha, _now_iso(self.config.git_poller_timezone))
192
+
193
+ def cleanup_unsubscribed_repo(self, repo_key: str) -> bool:
194
+ with self._state_lock:
195
+ is_subscribed = self.state.is_repo_key_subscribed(repo_key)
196
+ if is_subscribed:
197
+ logger.info(f"git poller cleanup skipped for subscribed repo: {repo_key}")
198
+ return False
199
+ removed_cache = self.git_cache.remove_cache(repo_key)
200
+ removed_archives = self.archive_builder.remove_archives_for_repo(repo_key)
201
+ logger.info(
202
+ f"git poller cleanup finished: repo={repo_key}, "
203
+ f"cache={removed_cache}, archives={removed_archives}"
204
+ )
205
+ return removed_cache or bool(removed_archives)
206
+
207
+ def cleanup_orphaned_storage(self) -> tuple[int, int]:
208
+ with self._state_lock:
209
+ active_repo_keys = {
210
+ repo_key
211
+ for subscriptions in self.state.list_all_subscriptions().values()
212
+ for repo_key in subscriptions
213
+ }
214
+ removed_caches = 0
215
+ for repo_key in sorted(self.git_cache.cached_repo_keys() - active_repo_keys):
216
+ if self.git_cache.remove_cache(repo_key):
217
+ removed_caches += 1
218
+ removed_archives = self.archive_builder.remove_archives_except(active_repo_keys)
219
+ logger.info(
220
+ f"git poller orphan cleanup finished: "
221
+ f"cache={removed_caches}, archives={removed_archives}"
222
+ )
223
+ return removed_caches, removed_archives
224
+
225
+ def _follow_repo_sync(self, group_id: int, url: str, branch: str | None) -> FollowResult:
226
+ normalized_url = normalize_repo_url(url)
227
+ remote_head = self.git_cache.resolve_remote_head(
228
+ normalized_url,
229
+ self._explicit_branch(branch),
230
+ )
231
+ target_branch = remote_head.branch
232
+ identity = build_identity_from_normalized_url(normalized_url, target_branch)
233
+ with self._state_lock:
234
+ existing = self.state.get_subscription(group_id, identity.key)
235
+ if existing is not None:
236
+ return FollowResult(
237
+ identity=identity,
238
+ subscription=existing,
239
+ already_following=True,
240
+ )
241
+
242
+ now = _now_iso(self.config.git_poller_timezone)
243
+ subscription = Subscription(
244
+ url=identity.url,
245
+ branch=target_branch,
246
+ schedule=self.config.git_poller_default_schedule,
247
+ last_success_sha=None,
248
+ enabled=True,
249
+ created_at=now,
250
+ updated_at=now,
251
+ )
252
+
253
+ subscription.last_success_sha = remote_head.sha
254
+ self.state.upsert_subscription(group_id, identity.key, subscription)
255
+ return FollowResult(
256
+ identity=identity,
257
+ subscription=subscription,
258
+ already_following=False,
259
+ )
260
+
261
+ def _pull_repo_sync(self, group_id: int, url: str, branch: str | None) -> PullResult:
262
+ with self._state_lock:
263
+ identity, subscription = self._get_subscription(group_id, url, branch)
264
+ fetched = self.git_cache.fetch(identity.key, subscription.url, subscription.branch)
265
+ try:
266
+ previous_sha = subscription.last_success_sha
267
+ payload = self._build_payload(identity, subscription, fetched, previous_sha)
268
+ archive = self._build_archive(
269
+ payload,
270
+ subscription,
271
+ fetched,
272
+ group_id=group_id,
273
+ )
274
+ return PullResult(
275
+ identity=identity,
276
+ subscription=subscription,
277
+ previous_sha=previous_sha,
278
+ target_sha=fetched.head_sha,
279
+ payload=payload,
280
+ archive=archive,
281
+ )
282
+ finally:
283
+ fetched.close()
284
+
285
+ def _summarize_repo_sync(self, group_id: int, url: str, branch: str | None) -> SummaryResult:
286
+ with self._state_lock:
287
+ identity, subscription = self._get_subscription(group_id, url, branch)
288
+ return self._poll_subscription(group_id, identity.key, identity, subscription)
289
+
290
+ def _get_subscription(
291
+ self,
292
+ group_id: int,
293
+ url: str,
294
+ branch: str | None,
295
+ ) -> tuple[RepositoryIdentity, Subscription]:
296
+ identity, subscription = self._find_subscription(group_id, url, branch)
297
+ if subscription is None:
298
+ raise KeyError("本群尚未关注这个仓库。")
299
+ return identity, subscription
300
+
301
+ def _poll_subscription(
302
+ self,
303
+ group_id: int,
304
+ repo_key: str,
305
+ identity: RepositoryIdentity,
306
+ subscription: Subscription,
307
+ ) -> SummaryResult:
308
+ fetched = self.git_cache.fetch(repo_key, subscription.url, subscription.branch)
309
+ try:
310
+ behind_count = fetched.count_commits_since(subscription.last_success_sha)
311
+ payload = self._build_payload(
312
+ identity,
313
+ subscription,
314
+ fetched,
315
+ subscription.last_success_sha,
316
+ )
317
+ return SummaryResult(
318
+ result=PollResult(
319
+ group_id=group_id,
320
+ repo_key=repo_key,
321
+ subscription=subscription,
322
+ payload=payload,
323
+ ),
324
+ behind_count=behind_count,
325
+ )
326
+ finally:
327
+ fetched.close()
328
+
329
+ def _subscriptions_for_schedule_sync(
330
+ self,
331
+ schedule: str,
332
+ ) -> list[tuple[int, str, Subscription]]:
333
+ with self._state_lock:
334
+ return self.state.subscriptions_for_schedule(schedule)
335
+
336
+ def _poll_scheduled_subscription_sync(
337
+ self,
338
+ group_id: int,
339
+ repo_key: str,
340
+ subscription: Subscription,
341
+ ) -> DeliveryResult | None:
342
+ with self._state_lock:
343
+ fresh = self.state.get_subscription(group_id, repo_key)
344
+ if fresh is None:
345
+ return None
346
+ subscription = fresh
347
+ identity = build_identity(subscription.url, subscription.branch)
348
+ fetched = self.git_cache.fetch(repo_key, subscription.url, subscription.branch)
349
+ try:
350
+ if not subscription.last_success_sha:
351
+ with self._state_lock:
352
+ self.state.update_last_success(
353
+ group_id,
354
+ repo_key,
355
+ fetched.head_sha,
356
+ _now_iso(self.config.git_poller_timezone),
357
+ )
358
+ return None
359
+
360
+ previous_sha = subscription.last_success_sha
361
+ if previous_sha == fetched.head_sha:
362
+ logger.debug(
363
+ f"git poller no update: group={group_id}, repo={repo_key}, "
364
+ f"head={fetched.head_sha[:8]}"
365
+ )
366
+ return None
367
+
368
+ payload = self._build_payload(
369
+ identity,
370
+ subscription,
371
+ fetched,
372
+ previous_sha,
373
+ )
374
+ result = PollResult(
375
+ group_id=group_id,
376
+ repo_key=repo_key,
377
+ subscription=subscription,
378
+ payload=payload,
379
+ )
380
+ return DeliveryResult(
381
+ result=result,
382
+ archive=self._build_archive(
383
+ payload,
384
+ subscription,
385
+ fetched,
386
+ group_id=group_id,
387
+ ),
388
+ )
389
+ finally:
390
+ fetched.close()
391
+
392
+ def _build_payload(
393
+ self,
394
+ identity: RepositoryIdentity,
395
+ subscription: Subscription,
396
+ fetched,
397
+ previous_sha: str | None,
398
+ ) -> UpdatePayload:
399
+ if previous_sha == fetched.head_sha:
400
+ commits = []
401
+ else:
402
+ commits = fetched.commits_since(previous_sha)
403
+ return UpdatePayload(
404
+ repo_key=identity.key,
405
+ repo_url=subscription.url,
406
+ repo_name=identity.display_name,
407
+ branch=subscription.branch,
408
+ previous_sha=previous_sha,
409
+ target_sha=fetched.head_sha,
410
+ target_short_sha=fetched.head_sha[:8],
411
+ generated_at=_now_iso(self.config.git_poller_timezone),
412
+ commits=commits,
413
+ compare_url=build_compare_url(subscription.url, previous_sha, fetched.head_sha),
414
+ )
415
+
416
+ def _build_archive(
417
+ self,
418
+ payload: UpdatePayload,
419
+ subscription: Subscription,
420
+ fetched,
421
+ *,
422
+ group_id: int,
423
+ ) -> ArchiveFile:
424
+ if subscription.last_archive_path:
425
+ self.archive_builder.remove_archive(subscription.last_archive_path)
426
+ with self._state_lock:
427
+ self.state.update_last_archive_path(
428
+ group_id,
429
+ payload.repo_key,
430
+ None,
431
+ _now_iso(self.config.git_poller_timezone),
432
+ )
433
+ source_dir = self.archive_builder.source_root(payload)
434
+ try:
435
+ fetched.export_head_tree(source_dir)
436
+ archive = self.archive_builder.build(payload, subscription, source_dir)
437
+ with self._state_lock:
438
+ self.state.update_last_archive_path(
439
+ group_id,
440
+ payload.repo_key,
441
+ str(archive.path),
442
+ _now_iso(self.config.git_poller_timezone),
443
+ )
444
+ return archive
445
+ finally:
446
+ shutil.rmtree(source_dir.parent, ignore_errors=True)
447
+
448
+ def _find_subscription(
449
+ self,
450
+ group_id: int,
451
+ url: str,
452
+ branch: str | None,
453
+ ) -> tuple[RepositoryIdentity, Subscription | None]:
454
+ explicit = self._explicit_branch(branch)
455
+ if explicit is not None:
456
+ identity = build_identity(url, explicit)
457
+ return identity, self.state.get_subscription(group_id, identity.key)
458
+
459
+ requested = build_identity(url)
460
+ matches = [
461
+ (repo_key, subscription)
462
+ for repo_key, subscription in self.state.list_group_subscriptions(group_id).items()
463
+ if subscription.url == requested.url
464
+ ]
465
+ if not matches:
466
+ return requested, None
467
+ if len(matches) > 1:
468
+ raise ValueError("本群关注了这个仓库的多个分支,请使用 --分支名 指定。")
469
+ _, subscription = matches[0]
470
+ return build_identity(subscription.url, subscription.branch), subscription
471
+
472
+ @staticmethod
473
+ def _explicit_branch(branch: str | None) -> str | None:
474
+ if branch is None:
475
+ return None
476
+ return normalize_branch(branch)
477
+
478
+
479
+ def _now_iso(timezone_name: str | None = None) -> str:
480
+ if timezone_name:
481
+ try:
482
+ return datetime.now(ZoneInfo(timezone_name)).isoformat(timespec="seconds")
483
+ except ZoneInfoNotFoundError:
484
+ logger.warning(f"git poller fell back to local timezone for timestamps: {timezone_name!r}")
485
+ return datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds")
@@ -0,0 +1,153 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class RepositoryIdentity:
9
+ key: str
10
+ url: str
11
+ display_name: str
12
+ web_url: str | None
13
+
14
+
15
+ @dataclass
16
+ class Subscription:
17
+ url: str
18
+ branch: str
19
+ schedule: str
20
+ last_success_sha: str | None = None
21
+ last_archive_path: str | None = None
22
+ archive_password: str | None = None
23
+ enabled: bool = True
24
+ created_at: str = ""
25
+ updated_at: str = ""
26
+
27
+ def to_json(self) -> dict[str, Any]:
28
+ return {
29
+ "url": self.url,
30
+ "branch": self.branch,
31
+ "schedule": self.schedule,
32
+ "last_success_sha": self.last_success_sha,
33
+ "last_archive_path": self.last_archive_path,
34
+ "archive_password": self.archive_password,
35
+ "enabled": self.enabled,
36
+ "created_at": self.created_at,
37
+ "updated_at": self.updated_at,
38
+ }
39
+
40
+ @classmethod
41
+ def from_json(cls, data: dict[str, Any]) -> "Subscription":
42
+ return cls(
43
+ url=str(data["url"]),
44
+ branch=str(data.get("branch") or "main"),
45
+ schedule=str(data.get("schedule") or ""),
46
+ last_success_sha=(
47
+ str(data["last_success_sha"])
48
+ if data.get("last_success_sha")
49
+ else None
50
+ ),
51
+ last_archive_path=(
52
+ str(data["last_archive_path"])
53
+ if data.get("last_archive_path")
54
+ else None
55
+ ),
56
+ archive_password=(
57
+ str(data["archive_password"])
58
+ if data.get("archive_password")
59
+ else None
60
+ ),
61
+ enabled=bool(data.get("enabled", True)),
62
+ created_at=str(data.get("created_at") or ""),
63
+ updated_at=str(data.get("updated_at") or ""),
64
+ )
65
+
66
+
67
+ @dataclass(frozen=True)
68
+ class CommitInfo:
69
+ sha: str
70
+ short_sha: str
71
+ title: str
72
+ committed_at: str
73
+ author: str
74
+ url: str | None = None
75
+
76
+ def to_json(self) -> dict[str, Any]:
77
+ return {
78
+ "sha": self.sha,
79
+ "short_sha": self.short_sha,
80
+ "title": self.title,
81
+ "committed_at": self.committed_at,
82
+ "author": self.author,
83
+ "url": self.url,
84
+ }
85
+
86
+ @classmethod
87
+ def from_json(cls, data: dict[str, Any]) -> "CommitInfo":
88
+ sha = str(data["sha"])
89
+ return cls(
90
+ sha=sha,
91
+ short_sha=str(data.get("short_sha") or sha[:8]),
92
+ title=str(data.get("title") or sha[:8]).strip(),
93
+ committed_at=str(data.get("committed_at") or ""),
94
+ author=str(data.get("author") or ""),
95
+ url=str(data["url"]) if data.get("url") else None,
96
+ )
97
+
98
+
99
+ @dataclass(frozen=True)
100
+ class UpdatePayload:
101
+ repo_key: str
102
+ repo_url: str
103
+ repo_name: str
104
+ branch: str
105
+ previous_sha: str | None
106
+ target_sha: str
107
+ target_short_sha: str
108
+ generated_at: str
109
+ commits: list[CommitInfo]
110
+ compare_url: str | None = None
111
+
112
+ def to_json(self) -> dict[str, Any]:
113
+ return {
114
+ "repo_key": self.repo_key,
115
+ "repo_url": self.repo_url,
116
+ "repo_name": self.repo_name,
117
+ "branch": self.branch,
118
+ "previous_sha": self.previous_sha,
119
+ "target_sha": self.target_sha,
120
+ "target_short_sha": self.target_short_sha,
121
+ "generated_at": self.generated_at,
122
+ "commits": [commit.to_json() for commit in self.commits],
123
+ "compare_url": self.compare_url,
124
+ }
125
+
126
+ @classmethod
127
+ def from_json(cls, data: dict[str, Any]) -> "UpdatePayload":
128
+ return cls(
129
+ repo_key=str(data["repo_key"]),
130
+ repo_url=str(data["repo_url"]),
131
+ repo_name=str(data["repo_name"]),
132
+ branch=str(data.get("branch") or "main"),
133
+ previous_sha=(
134
+ str(data["previous_sha"]) if data.get("previous_sha") else None
135
+ ),
136
+ target_sha=str(data["target_sha"]),
137
+ target_short_sha=str(data.get("target_short_sha") or str(data["target_sha"])[:8]),
138
+ generated_at=str(data.get("generated_at") or ""),
139
+ commits=[
140
+ CommitInfo.from_json(item)
141
+ for item in data.get("commits", [])
142
+ if isinstance(item, dict)
143
+ ],
144
+ compare_url=str(data["compare_url"]) if data.get("compare_url") else None,
145
+ )
146
+
147
+
148
+ @dataclass(frozen=True)
149
+ class PollResult:
150
+ group_id: int
151
+ repo_key: str
152
+ subscription: Subscription
153
+ payload: UpdatePayload