skycoll 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skycoll-0.1.0/LICENSE +21 -0
- skycoll-0.1.0/PKG-INFO +298 -0
- skycoll-0.1.0/README.md +270 -0
- skycoll-0.1.0/pyproject.toml +39 -0
- skycoll-0.1.0/setup.cfg +4 -0
- skycoll-0.1.0/skycoll/__init__.py +3 -0
- skycoll-0.1.0/skycoll/__main__.py +154 -0
- skycoll-0.1.0/skycoll/api.py +544 -0
- skycoll-0.1.0/skycoll/appview.py +52 -0
- skycoll-0.1.0/skycoll/auth.py +675 -0
- skycoll-0.1.0/skycoll/commands/__init__.py +0 -0
- skycoll-0.1.0/skycoll/commands/appviews.py +16 -0
- skycoll-0.1.0/skycoll/commands/backlinks.py +36 -0
- skycoll-0.1.0/skycoll/commands/edgelist.py +188 -0
- skycoll-0.1.0/skycoll/commands/fetch.py +56 -0
- skycoll-0.1.0/skycoll/commands/firehose.py +88 -0
- skycoll-0.1.0/skycoll/commands/init.py +107 -0
- skycoll-0.1.0/skycoll/commands/likes.py +48 -0
- skycoll-0.1.0/skycoll/commands/plc.py +104 -0
- skycoll-0.1.0/skycoll/commands/posts.py +82 -0
- skycoll-0.1.0/skycoll/commands/resolve.py +17 -0
- skycoll-0.1.0/skycoll/commands/sync.py +28 -0
- skycoll-0.1.0/skycoll/commands/threads.py +84 -0
- skycoll-0.1.0/skycoll/constellation.py +67 -0
- skycoll-0.1.0/skycoll/resolve.py +232 -0
- skycoll-0.1.0/skycoll/storage.py +492 -0
- skycoll-0.1.0/skycoll.egg-info/PKG-INFO +298 -0
- skycoll-0.1.0/skycoll.egg-info/SOURCES.txt +39 -0
- skycoll-0.1.0/skycoll.egg-info/dependency_links.txt +1 -0
- skycoll-0.1.0/skycoll.egg-info/entry_points.txt +2 -0
- skycoll-0.1.0/skycoll.egg-info/requires.txt +11 -0
- skycoll-0.1.0/skycoll.egg-info/top_level.txt +1 -0
- skycoll-0.1.0/tests/test_api.py +153 -0
- skycoll-0.1.0/tests/test_appview.py +38 -0
- skycoll-0.1.0/tests/test_auth.py +209 -0
- skycoll-0.1.0/tests/test_car.py +120 -0
- skycoll-0.1.0/tests/test_constellation.py +68 -0
- skycoll-0.1.0/tests/test_firehose.py +39 -0
- skycoll-0.1.0/tests/test_plc.py +64 -0
- skycoll-0.1.0/tests/test_resolve.py +270 -0
- skycoll-0.1.0/tests/test_storage.py +280 -0
skycoll-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 skycoll contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
skycoll-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skycoll
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Bluesky/AT Protocol social-graph CLI — twecoll equivalent
|
|
5
|
+
Author-email: j4ckxyz <j4ckxyz@users.noreply.github.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/j4ckxyz/skycoll
|
|
8
|
+
Project-URL: Repository, https://github.com/j4ckxyz/skycoll
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Utilities
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: httpx<1,>=0.27
|
|
19
|
+
Requires-Dist: atproto<1,>=0.0.50
|
|
20
|
+
Requires-Dist: cryptography<45,>=43.0
|
|
21
|
+
Requires-Dist: cbor2<6,>=5.6
|
|
22
|
+
Provides-Extra: graph
|
|
23
|
+
Requires-Dist: python-igraph>=0.11; extra == "graph"
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-httpx>=0.30; extra == "dev"
|
|
27
|
+
Dynamic: license-file
|
|
28
|
+
|
|
29
|
+
# skycoll
|
|
30
|
+
|
|
31
|
+
**skycoll** is a Bluesky/AT Protocol social-graph CLI tool — the equivalent of [twecoll](https://github.com/nickvdp/twecoll) for the ATmosphere.
|
|
32
|
+
|
|
33
|
+
It resolves identities, fetches social graphs, downloads posts and likes via CAR repo sync, reconstructs reply threads, and produces GML graph files (with optional PNG visualisations).
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
### From source
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
git clone https://github.com/j4ckxyz/skycoll.git
|
|
41
|
+
cd skycoll
|
|
42
|
+
pip install -e .
|
|
43
|
+
|
|
44
|
+
# Optional: graph visualisation support
|
|
45
|
+
pip install -e ".[graph]"
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Dependencies
|
|
49
|
+
|
|
50
|
+
Core: `httpx`, `atproto`, `cryptography`, `cbor2`
|
|
51
|
+
|
|
52
|
+
Optional: `python-igraph` (for PNG graph rendering)
|
|
53
|
+
|
|
54
|
+
Dev: `pytest`, `pytest-httpx`
|
|
55
|
+
|
|
56
|
+
## First-run OAuth flow
|
|
57
|
+
|
|
58
|
+
The first time you run a command that requires authentication (`init`, `fetch`, `posts`, `likes`), skycoll will:
|
|
59
|
+
|
|
60
|
+
1. Resolve your handle to a DID and PDS endpoint.
|
|
61
|
+
2. Discover the OAuth 2.0 authorisation server from your PDS.
|
|
62
|
+
3. Start a temporary HTTP server on `127.0.0.1:<random-port>` to serve the client metadata document and receive the callback.
|
|
63
|
+
4. Open your browser for you to authorise the request (scopes: `atproto transition:generic`).
|
|
64
|
+
5. Exchange the authorisation code using PKCE (S256) and bind it with DPoP (ES256).
|
|
65
|
+
6. Save the session to `~/.skycoll/sessions/<did>.json` (mode `0600`).
|
|
66
|
+
|
|
67
|
+
On subsequent runs, the saved session is reused and refreshed automatically when within 60 seconds of token expiry.
|
|
68
|
+
|
|
69
|
+
> **NOTE:** The `transition:generic` scope provides read access and like-deletion. When AT Protocol proposal 0011 (granular scopes) stabilises, this should be narrowed to only `app.bsky.feed.*` reads and `app.bsky.feed.like` delete.
|
|
70
|
+
|
|
71
|
+
## Commands
|
|
72
|
+
|
|
73
|
+
### `resolve`
|
|
74
|
+
|
|
75
|
+
Resolve a handle to a DID (or a DID to a handle + PDS endpoint).
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
skycoll resolve j4ck.xyz
|
|
79
|
+
skycoll resolve did:plc:z72i7hdynmk6r22z27h6tvae
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### `init`
|
|
83
|
+
|
|
84
|
+
Fetch your profile, follows, and followers. Writes `<handle>.dat` and downloads avatars to `img/`.
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
skycoll init j4ck.xyz
|
|
88
|
+
|
|
89
|
+
# Also fetch lists you've created
|
|
90
|
+
skycoll init j4ck.xyz --lists
|
|
91
|
+
|
|
92
|
+
# Include self-labels and server-assigned labels
|
|
93
|
+
skycoll init j4ck.xyz --labels
|
|
94
|
+
|
|
95
|
+
# Route through the Blacksky AppView
|
|
96
|
+
skycoll init j4ck.xyz --appview blacksky
|
|
97
|
+
|
|
98
|
+
# Query a Constellation backlinks index
|
|
99
|
+
skycoll init j4ck.xyz --constellation https://constellation.example.com
|
|
100
|
+
|
|
101
|
+
# All flags combined
|
|
102
|
+
skycoll init j4ck.xyz --lists --labels --appview blacksky --constellation https://constellation.example.com
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
The `.dat` file includes:
|
|
106
|
+
- Profile header row with labels column
|
|
107
|
+
- `F` rows for follows
|
|
108
|
+
- `B` rows for followers
|
|
109
|
+
- `L` rows for lists (with `--lists`)
|
|
110
|
+
- `S` rows for starter packs
|
|
111
|
+
- `K` rows for Constellation backlink counts (with `--constellation`)
|
|
112
|
+
|
|
113
|
+
### `appview` flag
|
|
114
|
+
|
|
115
|
+
Several commands accept `--appview` to route API requests through a specific Bluesky-compatible AppView. This sets the `atproto-proxy` HTTP header to a service DID, rather than hardcoding a base URL.
|
|
116
|
+
|
|
117
|
+
Built-in names:
|
|
118
|
+
| Name | Service DID | Description |
|
|
119
|
+
|---|---|---|
|
|
120
|
+
| `bluesky` | `did:web:api.bsky.app#bsky_appview` | Bluesky official AppView (default) |
|
|
121
|
+
| `blacksky` | `did:web:api.blacksky.community#bsky_appview` | Blacksky community AppView |
|
|
122
|
+
|
|
123
|
+
You can also pass a raw DID+fragment string for custom AppViews:
|
|
124
|
+
```bash
|
|
125
|
+
skycoll init j4ck.xyz --appview did:web:custom.example#bsky_appview
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### `appviews`
|
|
129
|
+
|
|
130
|
+
List the built-in AppView names and their service DIDs:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
skycoll appviews
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### `fetch`
|
|
137
|
+
|
|
138
|
+
Fetch the follows of every person listed in `<handle>.dat`. Writes one `fdat/<friend>.dat` per followed user.
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
skycoll fetch j4ck.xyz
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### `posts`
|
|
145
|
+
|
|
146
|
+
Download posts using paginated `getAuthorFeed` (default, no cap — pages until cursor is exhausted):
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
skycoll posts j4ck.xyz
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Use `--car` for full CAR repo sync (slower but gives a complete archive including all record types):
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
skycoll posts j4ck.xyz --car
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Rich `.twt` format columns: `type uri timestamp reply_to_uri root_uri text`
|
|
159
|
+
|
|
160
|
+
Where `type` is `post`, `repost`, or `quote`.
|
|
161
|
+
|
|
162
|
+
Route through an alternative AppView:
|
|
163
|
+
```bash
|
|
164
|
+
skycoll posts j4ck.xyz --appview blacksky
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### `likes`
|
|
168
|
+
|
|
169
|
+
Download all likes. Writes `<handle>.fav` (tab-separated: `uri timestamp author_did author_handle text`).
|
|
170
|
+
|
|
171
|
+
```bash
|
|
172
|
+
skycoll likes j4ck.xyz
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Purge (delete all likes — the only write operation):
|
|
176
|
+
```bash
|
|
177
|
+
skycoll likes j4ck.xyz --purge
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### `threads`
|
|
181
|
+
|
|
182
|
+
Reconstruct reply threads from an existing `<handle>.twt` file. Uses the `reply_to_uri` and `root_uri` fields to build thread trees. Outputs `<handle>.threads` as JSON.
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
skycoll threads j4ck.xyz
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### `edgelist`
|
|
189
|
+
|
|
190
|
+
Generate `<handle>.gml` from `.dat` and `fdat/` data. If `python-igraph` is installed, also renders a `<handle>.png` visualisation.
|
|
191
|
+
|
|
192
|
+
The GML includes bidirectional edges, `mutual_only` attributes, and `node_type` attributes.
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
skycoll edgelist j4ck.xyz
|
|
196
|
+
|
|
197
|
+
# Enrich edges with likes counts from Constellation
|
|
198
|
+
skycoll edgelist j4ck.xyz --constellation https://constellation.example.com
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### `sync`
|
|
202
|
+
|
|
203
|
+
Download the full repo CAR and write it to `<handle>.car` for archival. No parsing.
|
|
204
|
+
|
|
205
|
+
```bash
|
|
206
|
+
skycoll sync j4ck.xyz
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### `backlinks`
|
|
210
|
+
|
|
211
|
+
Query a [Constellation](https://github.com/at-microcosm/microcosm-rs/tree/main/constellation) backlinks index and pretty-print the full backlink breakdown for a handle.
|
|
212
|
+
|
|
213
|
+
Constellation is a self-hostable AT Protocol backlinks index. A public instance may be available; this feature is opt-in and the host must be provided explicitly.
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
skycoll backlinks j4ck.xyz --constellation https://constellation.example.com
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### `plc`
|
|
220
|
+
|
|
221
|
+
Fetch the full PLC directory operation log for a DID and write it to `<did>.plc` as JSON. This gives the complete identity history — handle changes, PDS migrations, key rotations.
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
skycoll plc did:plc:z72i7hdynmk6r22z27h6tvae
|
|
225
|
+
|
|
226
|
+
# Also print a human-readable summary
|
|
227
|
+
skycoll plc did:plc:z72i7hdynmk6r22z27h6tvae --audit
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### `firehose`
|
|
231
|
+
|
|
232
|
+
Connect to an AT Protocol relay WebSocket and stream repo events in real time. Filter by handle or DID, and optionally stop after N events.
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
# Stream all events from the default relay (wss://bsky.network)
|
|
236
|
+
skycoll firehose
|
|
237
|
+
|
|
238
|
+
# Filter by DID
|
|
239
|
+
skycoll firehose --did did:plc:abc123
|
|
240
|
+
|
|
241
|
+
# Filter by handle (resolved to DID automatically)
|
|
242
|
+
skycoll firehose --handle j4ck.xyz
|
|
243
|
+
|
|
244
|
+
# Use the Blacksky/atproto.africa relay
|
|
245
|
+
skycoll firehose --relay wss://atproto.africa
|
|
246
|
+
|
|
247
|
+
# Stop after 100 matching events
|
|
248
|
+
skycoll firehose --handle j4ck.xyz --limit 100
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## File formats
|
|
252
|
+
|
|
253
|
+
| File | Format |
|
|
254
|
+
|------|--------|
|
|
255
|
+
| `<handle>.dat` | Tab-separated: profile header + `F`/`B`/`L`/`S`/`K` prefixed rows |
|
|
256
|
+
| `fdat/<handle>.dat` | Same format as `.dat`, one file per followed user |
|
|
257
|
+
| `<handle>.twt` | Tab-separated: `type uri timestamp reply_to_uri root_uri text` |
|
|
258
|
+
| `<handle>.fav` | Tab-separated: `uri timestamp author_did author_handle text` |
|
|
259
|
+
| `<handle>.threads` | JSON array of thread trees (root + nested replies) |
|
|
260
|
+
| `<handle>.gml` | Graph Modeling Language file with `mutual_only` and `node_type` |
|
|
261
|
+
| `<handle>.car` | Raw CAR archive (binary) |
|
|
262
|
+
| `<did>.plc` | PLC directory operation log (JSON) |
|
|
263
|
+
| `img/<handle>` | Avatar image |
|
|
264
|
+
|
|
265
|
+
## Authentication details
|
|
266
|
+
|
|
267
|
+
- **PKCE**: S256 code challenge method (mandatory)
|
|
268
|
+
- **DPoP**: ES256 keypair; separate nonces for auth server vs PDS
|
|
269
|
+
- **Scopes**: `atproto transition:generic`
|
|
270
|
+
- **Client type**: Public/native — loopback redirect URI on a random port
|
|
271
|
+
- **Client metadata**: Served from the loopback server at `/client-metadata.json`
|
|
272
|
+
- **Session storage**: `~/.skycoll/sessions/<did>.json` (mode `0600`)
|
|
273
|
+
- **`sub` verification**: Token exchange verifies the `sub` claim matches the expected DID
|
|
274
|
+
- **`atproto-proxy` header**: Routes requests through a specified AppView service DID
|
|
275
|
+
|
|
276
|
+
## PDS resolution
|
|
277
|
+
|
|
278
|
+
skycoll **never hardcodes bsky.social**. For every handle:
|
|
279
|
+
|
|
280
|
+
1. Resolve handle → DID via DNS `_atproto` TXT or `https://bsky.social/xrpc/com.atproto.identity.resolveHandle`
|
|
281
|
+
2. Fetch the DID document (`plc.directory` for `did:plc`, HTTPS well-known for `did:web`)
|
|
282
|
+
3. Extract the `#atproto_pds` service endpoint
|
|
283
|
+
4. Make all authenticated API calls against that PDS
|
|
284
|
+
|
|
285
|
+
## Pagination & rate limits
|
|
286
|
+
|
|
287
|
+
All AT Protocol list endpoints are cursor-based. skycoll loops until no cursor is returned. On HTTP 429, it backs off with exponential retry (max 3 attempts).
|
|
288
|
+
|
|
289
|
+
## Running tests
|
|
290
|
+
|
|
291
|
+
```bash
|
|
292
|
+
pip install -r requirements-dev.txt
|
|
293
|
+
pytest tests/ -v
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
## License
|
|
297
|
+
|
|
298
|
+
MIT
|
skycoll-0.1.0/README.md
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
# skycoll
|
|
2
|
+
|
|
3
|
+
**skycoll** is a Bluesky/AT Protocol social-graph CLI tool — the equivalent of [twecoll](https://github.com/nickvdp/twecoll) for the ATmosphere.
|
|
4
|
+
|
|
5
|
+
It resolves identities, fetches social graphs, downloads posts and likes via CAR repo sync, reconstructs reply threads, and produces GML graph files (with optional PNG visualisations).
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
### From source
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
git clone https://github.com/j4ckxyz/skycoll.git
|
|
13
|
+
cd skycoll
|
|
14
|
+
pip install -e .
|
|
15
|
+
|
|
16
|
+
# Optional: graph visualisation support
|
|
17
|
+
pip install -e ".[graph]"
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### Dependencies
|
|
21
|
+
|
|
22
|
+
Core: `httpx`, `atproto`, `cryptography`, `cbor2`
|
|
23
|
+
|
|
24
|
+
Optional: `python-igraph` (for PNG graph rendering)
|
|
25
|
+
|
|
26
|
+
Dev: `pytest`, `pytest-httpx`
|
|
27
|
+
|
|
28
|
+
## First-run OAuth flow
|
|
29
|
+
|
|
30
|
+
The first time you run a command that requires authentication (`init`, `fetch`, `posts`, `likes`), skycoll will:
|
|
31
|
+
|
|
32
|
+
1. Resolve your handle to a DID and PDS endpoint.
|
|
33
|
+
2. Discover the OAuth 2.0 authorisation server from your PDS.
|
|
34
|
+
3. Start a temporary HTTP server on `127.0.0.1:<random-port>` to serve the client metadata document and receive the callback.
|
|
35
|
+
4. Open your browser for you to authorise the request (scopes: `atproto transition:generic`).
|
|
36
|
+
5. Exchange the authorisation code using PKCE (S256) and bind it with DPoP (ES256).
|
|
37
|
+
6. Save the session to `~/.skycoll/sessions/<did>.json` (mode `0600`).
|
|
38
|
+
|
|
39
|
+
On subsequent runs, the saved session is reused and refreshed automatically when within 60 seconds of token expiry.
|
|
40
|
+
|
|
41
|
+
> **NOTE:** The `transition:generic` scope provides read access and like-deletion. When AT Protocol proposal 0011 (granular scopes) stabilises, this should be narrowed to only `app.bsky.feed.*` reads and `app.bsky.feed.like` delete.
|
|
42
|
+
|
|
43
|
+
## Commands
|
|
44
|
+
|
|
45
|
+
### `resolve`
|
|
46
|
+
|
|
47
|
+
Resolve a handle to a DID (or a DID to a handle + PDS endpoint).
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
skycoll resolve j4ck.xyz
|
|
51
|
+
skycoll resolve did:plc:z72i7hdynmk6r22z27h6tvae
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### `init`
|
|
55
|
+
|
|
56
|
+
Fetch your profile, follows, and followers. Writes `<handle>.dat` and downloads avatars to `img/`.
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
skycoll init j4ck.xyz
|
|
60
|
+
|
|
61
|
+
# Also fetch lists you've created
|
|
62
|
+
skycoll init j4ck.xyz --lists
|
|
63
|
+
|
|
64
|
+
# Include self-labels and server-assigned labels
|
|
65
|
+
skycoll init j4ck.xyz --labels
|
|
66
|
+
|
|
67
|
+
# Route through the Blacksky AppView
|
|
68
|
+
skycoll init j4ck.xyz --appview blacksky
|
|
69
|
+
|
|
70
|
+
# Query a Constellation backlinks index
|
|
71
|
+
skycoll init j4ck.xyz --constellation https://constellation.example.com
|
|
72
|
+
|
|
73
|
+
# All flags combined
|
|
74
|
+
skycoll init j4ck.xyz --lists --labels --appview blacksky --constellation https://constellation.example.com
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
The `.dat` file includes:
|
|
78
|
+
- Profile header row with labels column
|
|
79
|
+
- `F` rows for follows
|
|
80
|
+
- `B` rows for followers
|
|
81
|
+
- `L` rows for lists (with `--lists`)
|
|
82
|
+
- `S` rows for starter packs
|
|
83
|
+
- `K` rows for Constellation backlink counts (with `--constellation`)
|
|
84
|
+
|
|
85
|
+
### `appview` flag
|
|
86
|
+
|
|
87
|
+
Several commands accept `--appview` to route API requests through a specific Bluesky-compatible AppView. This sets the `atproto-proxy` HTTP header to a service DID, rather than hardcoding a base URL.
|
|
88
|
+
|
|
89
|
+
Built-in names:
|
|
90
|
+
| Name | Service DID | Description |
|
|
91
|
+
|---|---|---|
|
|
92
|
+
| `bluesky` | `did:web:api.bsky.app#bsky_appview` | Bluesky official AppView (default) |
|
|
93
|
+
| `blacksky` | `did:web:api.blacksky.community#bsky_appview` | Blacksky community AppView |
|
|
94
|
+
|
|
95
|
+
You can also pass a raw DID+fragment string for custom AppViews:
|
|
96
|
+
```bash
|
|
97
|
+
skycoll init j4ck.xyz --appview did:web:custom.example#bsky_appview
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### `appviews`
|
|
101
|
+
|
|
102
|
+
List the built-in AppView names and their service DIDs:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
skycoll appviews
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### `fetch`
|
|
109
|
+
|
|
110
|
+
Fetch the follows of every person listed in `<handle>.dat`. Writes one `fdat/<friend>.dat` per followed user.
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
skycoll fetch j4ck.xyz
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### `posts`
|
|
117
|
+
|
|
118
|
+
Download posts using paginated `getAuthorFeed` (default, no cap — pages until cursor is exhausted):
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
skycoll posts j4ck.xyz
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Use `--car` for full CAR repo sync (slower but gives a complete archive including all record types):
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
skycoll posts j4ck.xyz --car
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Rich `.twt` format columns: `type uri timestamp reply_to_uri root_uri text`
|
|
131
|
+
|
|
132
|
+
Where `type` is `post`, `repost`, or `quote`.
|
|
133
|
+
|
|
134
|
+
Route through an alternative AppView:
|
|
135
|
+
```bash
|
|
136
|
+
skycoll posts j4ck.xyz --appview blacksky
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### `likes`
|
|
140
|
+
|
|
141
|
+
Download all likes. Writes `<handle>.fav` (tab-separated: `uri timestamp author_did author_handle text`).
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
skycoll likes j4ck.xyz
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Purge (delete all likes — the only write operation):
|
|
148
|
+
```bash
|
|
149
|
+
skycoll likes j4ck.xyz --purge
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### `threads`
|
|
153
|
+
|
|
154
|
+
Reconstruct reply threads from an existing `<handle>.twt` file. Uses the `reply_to_uri` and `root_uri` fields to build thread trees. Outputs `<handle>.threads` as JSON.
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
skycoll threads j4ck.xyz
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### `edgelist`
|
|
161
|
+
|
|
162
|
+
Generate `<handle>.gml` from `.dat` and `fdat/` data. If `python-igraph` is installed, also renders a `<handle>.png` visualisation.
|
|
163
|
+
|
|
164
|
+
The GML includes bidirectional edges, `mutual_only` attributes, and `node_type` attributes.
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
skycoll edgelist j4ck.xyz
|
|
168
|
+
|
|
169
|
+
# Enrich edges with likes counts from Constellation
|
|
170
|
+
skycoll edgelist j4ck.xyz --constellation https://constellation.example.com
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### `sync`
|
|
174
|
+
|
|
175
|
+
Download the full repo CAR and write it to `<handle>.car` for archival. No parsing.
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
skycoll sync j4ck.xyz
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### `backlinks`
|
|
182
|
+
|
|
183
|
+
Query a [Constellation](https://github.com/at-microcosm/microcosm-rs/tree/main/constellation) backlinks index and pretty-print the full backlink breakdown for a handle.
|
|
184
|
+
|
|
185
|
+
Constellation is a self-hostable AT Protocol backlinks index. A public instance may be available; this feature is opt-in and the host must be provided explicitly.
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
skycoll backlinks j4ck.xyz --constellation https://constellation.example.com
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### `plc`
|
|
192
|
+
|
|
193
|
+
Fetch the full PLC directory operation log for a DID and write it to `<did>.plc` as JSON. This gives the complete identity history — handle changes, PDS migrations, key rotations.
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
skycoll plc did:plc:z72i7hdynmk6r22z27h6tvae
|
|
197
|
+
|
|
198
|
+
# Also print a human-readable summary
|
|
199
|
+
skycoll plc did:plc:z72i7hdynmk6r22z27h6tvae --audit
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### `firehose`
|
|
203
|
+
|
|
204
|
+
Connect to an AT Protocol relay WebSocket and stream repo events in real time. Filter by handle or DID, and optionally stop after N events.
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
# Stream all events from the default relay (wss://bsky.network)
|
|
208
|
+
skycoll firehose
|
|
209
|
+
|
|
210
|
+
# Filter by DID
|
|
211
|
+
skycoll firehose --did did:plc:abc123
|
|
212
|
+
|
|
213
|
+
# Filter by handle (resolved to DID automatically)
|
|
214
|
+
skycoll firehose --handle j4ck.xyz
|
|
215
|
+
|
|
216
|
+
# Use the Blacksky/atproto.africa relay
|
|
217
|
+
skycoll firehose --relay wss://atproto.africa
|
|
218
|
+
|
|
219
|
+
# Stop after 100 matching events
|
|
220
|
+
skycoll firehose --handle j4ck.xyz --limit 100
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## File formats
|
|
224
|
+
|
|
225
|
+
| File | Format |
|
|
226
|
+
|------|--------|
|
|
227
|
+
| `<handle>.dat` | Tab-separated: profile header + `F`/`B`/`L`/`S`/`K` prefixed rows |
|
|
228
|
+
| `fdat/<handle>.dat` | Same format as `.dat`, one file per followed user |
|
|
229
|
+
| `<handle>.twt` | Tab-separated: `type uri timestamp reply_to_uri root_uri text` |
|
|
230
|
+
| `<handle>.fav` | Tab-separated: `uri timestamp author_did author_handle text` |
|
|
231
|
+
| `<handle>.threads` | JSON array of thread trees (root + nested replies) |
|
|
232
|
+
| `<handle>.gml` | Graph Modeling Language file with `mutual_only` and `node_type` |
|
|
233
|
+
| `<handle>.car` | Raw CAR archive (binary) |
|
|
234
|
+
| `<did>.plc` | PLC directory operation log (JSON) |
|
|
235
|
+
| `img/<handle>` | Avatar image |
|
|
236
|
+
|
|
237
|
+
## Authentication details
|
|
238
|
+
|
|
239
|
+
- **PKCE**: S256 code challenge method (mandatory)
|
|
240
|
+
- **DPoP**: ES256 keypair; separate nonces for auth server vs PDS
|
|
241
|
+
- **Scopes**: `atproto transition:generic`
|
|
242
|
+
- **Client type**: Public/native — loopback redirect URI on a random port
|
|
243
|
+
- **Client metadata**: Served from the loopback server at `/client-metadata.json`
|
|
244
|
+
- **Session storage**: `~/.skycoll/sessions/<did>.json` (mode `0600`)
|
|
245
|
+
- **`sub` verification**: Token exchange verifies the `sub` claim matches the expected DID
|
|
246
|
+
- **`atproto-proxy` header**: Routes requests through a specified AppView service DID
|
|
247
|
+
|
|
248
|
+
## PDS resolution
|
|
249
|
+
|
|
250
|
+
skycoll **never hardcodes bsky.social**. For every handle:
|
|
251
|
+
|
|
252
|
+
1. Resolve handle → DID via DNS `_atproto` TXT or `https://bsky.social/xrpc/com.atproto.identity.resolveHandle`
|
|
253
|
+
2. Fetch the DID document (`plc.directory` for `did:plc`, HTTPS well-known for `did:web`)
|
|
254
|
+
3. Extract the `#atproto_pds` service endpoint
|
|
255
|
+
4. Make all authenticated API calls against that PDS
|
|
256
|
+
|
|
257
|
+
## Pagination & rate limits
|
|
258
|
+
|
|
259
|
+
All AT Protocol list endpoints are cursor-based. skycoll loops until no cursor is returned. On HTTP 429, it backs off with exponential retry (max 3 attempts).
|
|
260
|
+
|
|
261
|
+
## Running tests
|
|
262
|
+
|
|
263
|
+
```bash
|
|
264
|
+
pip install -r requirements-dev.txt
|
|
265
|
+
pytest tests/ -v
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## License
|
|
269
|
+
|
|
270
|
+
MIT
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "skycoll"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Bluesky/AT Protocol social-graph CLI — twecoll equivalent"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "j4ckxyz", email = "j4ckxyz@users.noreply.github.com"},
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Topic :: Utilities",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"httpx>=0.27,<1",
|
|
25
|
+
"atproto>=0.0.50,<1",
|
|
26
|
+
"cryptography>=43.0,<45",
|
|
27
|
+
"cbor2>=5.6,<6",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
graph = ["python-igraph>=0.11"]
|
|
32
|
+
dev = ["pytest>=8.0", "pytest-httpx>=0.30"]
|
|
33
|
+
|
|
34
|
+
[project.urls]
|
|
35
|
+
Homepage = "https://github.com/j4ckxyz/skycoll"
|
|
36
|
+
Repository = "https://github.com/j4ckxyz/skycoll"
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
skycoll = "skycoll.__main__:main"
|
skycoll-0.1.0/setup.cfg
ADDED