caption-flow 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {caption_flow-0.2.3/src/caption_flow.egg-info → caption_flow-0.2.4}/PKG-INFO +44 -177
  2. {caption_flow-0.2.3 → caption_flow-0.2.4}/README.md +41 -176
  3. {caption_flow-0.2.3 → caption_flow-0.2.4}/pyproject.toml +3 -1
  4. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/cli.py +307 -0
  5. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/models.py +26 -0
  6. caption_flow-0.2.4/src/caption_flow/storage/__init__.py +1 -0
  7. caption_flow-0.2.4/src/caption_flow/storage/exporter.py +550 -0
  8. caption_flow-0.2.3/src/caption_flow/storage.py → caption_flow-0.2.4/src/caption_flow/storage/manager.py +85 -1
  9. caption_flow-0.2.4/src/caption_flow/viewer.py +594 -0
  10. {caption_flow-0.2.3 → caption_flow-0.2.4/src/caption_flow.egg-info}/PKG-INFO +44 -177
  11. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow.egg-info/SOURCES.txt +4 -1
  12. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow.egg-info/requires.txt +2 -0
  13. {caption_flow-0.2.3 → caption_flow-0.2.4}/LICENSE +0 -0
  14. {caption_flow-0.2.3 → caption_flow-0.2.4}/setup.cfg +0 -0
  15. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/__init__.py +0 -0
  16. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/monitor.py +0 -0
  17. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/orchestrator.py +0 -0
  18. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/processors/__init__.py +0 -0
  19. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/processors/base.py +0 -0
  20. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/processors/huggingface.py +0 -0
  21. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/processors/local_filesystem.py +0 -0
  22. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/processors/webdataset.py +0 -0
  23. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/__init__.py +0 -0
  24. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/auth.py +0 -0
  25. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/caption_utils.py +0 -0
  26. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/certificates.py +0 -0
  27. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/checkpoint_tracker.py +0 -0
  28. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/chunk_tracker.py +0 -0
  29. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/dataset_loader.py +0 -0
  30. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/dataset_metadata_cache.py +0 -0
  31. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/image_processor.py +0 -0
  32. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/job_queue.py +0 -0
  33. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/json_utils.py +0 -0
  34. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/prompt_template.py +0 -0
  35. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/shard_processor.py +0 -0
  36. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/shard_tracker.py +0 -0
  37. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/utils/vllm_config.py +0 -0
  38. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/workers/base.py +0 -0
  39. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/workers/caption.py +0 -0
  40. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow/workers/data.py +0 -0
  41. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow.egg-info/dependency_links.txt +0 -0
  42. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow.egg-info/entry_points.txt +0 -0
  43. {caption_flow-0.2.3 → caption_flow-0.2.4}/src/caption_flow.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: caption-flow
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: Self-contained distributed community captioning system
5
5
  Author-email: bghira <bghira@users.github.com>
6
6
  License: MIT
@@ -33,6 +33,8 @@ Requires-Dist: arrow<2.0.0,>=1.3.0
33
33
  Requires-Dist: datasets<5.0.0,>=4.0.0
34
34
  Requires-Dist: boto3<2.0.0,>=1.40.11
35
35
  Requires-Dist: torchdata<0.12.0,>=0.11.0
36
+ Requires-Dist: textual<6.0.0,>=5.3.0
37
+ Requires-Dist: urwid<4.0.0,>=3.0.2
36
38
  Provides-Extra: dev
37
39
  Requires-Dist: pytest>=7.4.0; extra == "dev"
38
40
  Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
@@ -44,12 +46,13 @@ Dynamic: license-file
44
46
 
45
47
  # CaptionFlow
46
48
 
47
- scalable, fault-tolerant **vLLM-powered image captioning**. this "first round" focuses on a fast websocket orchestrator plus lightweight gpu workers that batch requests through vLLM.
49
+ scalable, fault-tolerant **vLLM-powered image captioning**.
50
+
51
+ a fast websocket-based orchestrator paired with lightweight gpu workers achieves exceptional performance for batched requests through vLLM.
48
52
 
49
53
  * **orchestrator**: hands out work in chunked shards, collects captions, checkpoints progress, and keeps simple stats.
50
54
  * **workers (vLLM)**: connect to the orchestrator, stream in image samples, batch them, and generate 1..N captions per image using prompts supplied by the orchestrator.
51
55
  * **config-driven**: all components read YAML config; flags can override.
52
- * **tui monitor (optional)**: a monitor client is wired into the CLI; ship a `monitor` module to enable it.
53
56
 
54
57
  > no conda. just `venv` + `pip`.
55
58
 
@@ -103,6 +106,25 @@ caption-flow worker --config my-worker.yaml --server ws://your.hostname.address:
103
106
  caption-flow monitor --config my-monitor.yaml
104
107
  ```
105
108
 
109
+ 5. export the data
110
+
111
+ ```bash
112
+ % caption-flow export --help
113
+ Usage: caption-flow export [OPTIONS]
114
+
115
+ Export caption data to various formats.
116
+
117
+ Options:
118
+ --format [jsonl|json|csv|txt|huggingface_hub|all] Export format (default: jsonl)
119
+ ```
120
+
121
+ * **jsonl**: create JSON line file in the specified `--output` path
122
+ * **csv**: exports CSV-compatible data columns to the `--output` path containing incomplete metadata
123
+ * **json**: creates a `.json` file for each sample inside the `--output` subdirectory containing **complete** metadata; useful for webdatasets
124
+ * **txt**: creates `.txt` file for each sample inside the `--output` subdirectory containing ONLY captions
125
+ * **huggingface_hub**: creates a dataset on Hugging Face Hub, possibly `--private` and `--nsfw` where necessary
126
+ * **all**: creates all export formats in a specified `--output` directory
127
+
106
128
  ---
107
129
 
108
130
  ## how it’s wired
@@ -111,20 +133,11 @@ caption-flow monitor --config my-monitor.yaml
111
133
 
112
134
  * **websocket server** (default `0.0.0.0:8765`) with three client roles: workers, data-feeders, and admin.
113
135
  * **dataset control**: the orchestrator centrally defines the dataset (`huggingface` or `local`) and version/name. it chunk-slices shards and assigns work.
136
+ * **data serving to remote workers**: local files can be captioned by remote workers that don't have access to the same files, automatically.
114
137
  * **vLLM config broadcast**: model, tp size, dtype, max seq len, memory targets, batching, sampling params, and **inference prompts** are all pushed to workers; workers can apply many changes without a model reload.
115
138
  * **storage + checkpoints**: captions buffer to disk with periodic checkpoints. chunk state is tracked so restarts don’t double-work.
116
139
  * **auth**: token lists for `worker`, `monitor`, and `admin` roles.
117
140
 
118
- start flags you’ll likely use:
119
-
120
- ```text
121
- --config PATH # yaml config for the orchestrator
122
- --port INT, --host STR # bind controls
123
- --data-dir PATH # overrides storage.data_dir
124
- --cert PATH, --key PATH # enable TLS (or use --no-ssl for ws:// in dev)
125
- --vllm # use the vLLM-style orchestrator (webdataset/hf)
126
- ```
127
-
128
141
  ### vLLM worker
129
142
 
130
143
  * **one process per gpu**. select the device with `--gpu-id` (or `worker.gpu_id` in YAML).
@@ -132,27 +145,15 @@ start flags you’ll likely use:
132
145
  * **resilient**: detects disconnects, abandons the current chunk cleanly, clears queues, reconnects, and resumes.
133
146
  * **batched generate()**: images are resized down for consistent batching; each image can get multiple captions (one per prompt).
134
147
 
135
- start flags you’ll likely use:
136
-
137
- ```text
138
- --config PATH # yaml for the worker
139
- --server URL # ws(s)://host:port
140
- --token STR # must match an allowed worker token on the orchestrator
141
- --name STR # display name
142
- --batch-size INT # override vLLM batch size
143
- --vllm # use the vLLM worker implementation
144
- --gpu-id INT # which gpu to use
145
- --precision STR, --model STR # optional overrides for dtype/model
146
- --no-verify-ssl # accept self-signed certs in dev
147
- ```
148
-
149
- ### (optional) monitor
148
+ ---
150
149
 
151
- * a CLI entry exists for a TUI monitor; wire in a `monitor` module to enable it. config lives in `monitor.yaml` or inside `orchestrator.yaml` under `monitor:`.
150
+ ## dataset formats
152
151
 
153
- ---
152
+ * huggingface hub or local based URL list datasets that are compatible with the datasets library
153
+ * webdatasets shards containing full image data; also can be hosted on the hub
154
+ * local folder filled with images; orchestrator will serve the data to workers
154
155
 
155
- ## configuration
156
+ ## configuration path
156
157
 
157
158
  ### config discovery order
158
159
 
@@ -166,98 +167,6 @@ for any component, the CLI looks for config in this order (first match wins):
166
167
  6. any `$XDG_CONFIG_DIRS` entries under `caption-flow/`
167
168
  7. `./examples/<component>.yaml` (fallback)
168
169
 
169
- ### orchestrator.yaml (highlights)
170
-
171
- ```yaml
172
- orchestrator:
173
- host: 0.0.0.0
174
- port: 8765
175
- # ssl:
176
- # cert: /path/fullchain.pem
177
- # key: /path/privkey.pem
178
-
179
- dataset:
180
- type: huggingface
181
- path: <hf-dataset-or-local-path>
182
- name: <logical-name>
183
- version: "1.0"
184
-
185
- vllm:
186
- model: Qwen/Qwen2.5-VL-3B-Instruct
187
- tensor_parallel_size: 1
188
- max_model_len: 16384
189
- dtype: float16
190
- gpu_memory_utilization: 0.92
191
- enforce_eager: true
192
- disable_mm_preprocessor_cache: true
193
- limit_mm_per_prompt: { image: 1 }
194
-
195
- batch_size: 8
196
-
197
- sampling:
198
- temperature: 0.7
199
- top_p: 0.95
200
- max_tokens: 256
201
- repetition_penalty: 1.05
202
- skip_special_tokens: true
203
- stop: ["<|end|>", "<|endoftext|>", "<|im_end|>"]
204
-
205
- inference_prompts:
206
- - "describe this image in detail"
207
- - "provide a comprehensive description of the visual content"
208
- - "what are the key elements in this image?"
209
-
210
- storage:
211
- data_dir: ./caption_data
212
- checkpoint_dir: ./checkpoints
213
- caption_buffer_size: 100
214
- checkpoint_interval: 1000
215
-
216
- # chunking/queueing
217
- chunk_size: 1000
218
- chunks_per_request: 2
219
- chunk_buffer_multiplier: 3
220
- min_chunk_buffer: 10
221
-
222
- auth:
223
- worker_tokens:
224
- - { token: "example-worker-token", name: "Example Worker" }
225
- monitor_tokens:
226
- - { token: "letmein", name: "Default monitor" }
227
- admin_tokens:
228
- - { token: "admin-secret-2024", name: "Admin" }
229
- ```
230
-
231
- ### worker.yaml (highlights)
232
-
233
- ```yaml
234
- worker:
235
- server: ws://localhost:8765 # use wss:// in prod
236
- token: example-worker-token
237
- name: local-gpu
238
- gpu_id: 0
239
- vllm: true
240
-
241
- # local queues
242
- readahead_size: 256
243
- inference_queue_size: 128
244
- ```
245
-
246
- ### monitor.yaml (optional)
247
-
248
- ```yaml
249
- monitor:
250
- server: ws://localhost:8765
251
- token: letmein
252
- refresh_rate: 1.0
253
- show_contributors: true
254
- show_quality_metrics: true
255
- max_activity_items: 20
256
- show_chunk_progress: true
257
- show_worker_queues: true
258
- show_throughput_graph: true
259
- ```
260
-
261
170
  ---
262
171
 
263
172
  ## tls / certificates
@@ -300,66 +209,24 @@ PRs welcome. keep it simple and fast.
300
209
  ```
301
210
  ┌─────────────┐ WebSocket ┌─────────────┐
302
211
  │ Worker │◄──────────────────►│ │
303
- └─────────────┘ │ │ ┌──────────────┐
304
- Orchestrator│────►│Arrow/Parquet │
305
- ┌─────────────┐ │ │ Storage │
306
- Worker │◄──────────────────►│ │ └──────────────┘
307
- └─────────────┘ └─────────────┘
212
+ │ │ │ │ ┌──────────────┐
213
+ │◄───────────────────│ │────►│Arrow/Parquet │
214
+ └─────────────┘ HTTP (img data) Orchestrator│ │ Storage │
215
+ │ │ └──────────────┘
216
+ ┌─────────────┐ │ │
217
+ │ Worker │◄──────────────────►│ │
218
+ │ │ │ │
219
+ │ │◄───────────────────│ │
220
+ └─────────────┘ HTTP (img data) └─────────────┘
308
221
 
309
222
  ┌─────────────┐ │
310
223
  │ Monitor │◄──────────────────────────┘
311
224
  └─────────────┘
312
225
  ```
313
226
 
314
- ## Storage Schema
315
-
316
- ### captions.parquet
317
-
318
- - `job_id`: Unique job identifier
319
- * `dataset`: Dataset name
320
- * `shard`: Shard identifier
321
- * `item_key`: Item within shard
322
- * `caption`: Generated caption text
323
- * `contributor_id`: Worker who generated it
324
- * `timestamp`: Generation time
325
- * `quality_score`: Optional quality metric
326
-
327
- ### jobs.parquet
328
-
329
- - `job_id`: Unique identifier
330
- * `dataset`: Dataset name
331
- * `shard`: Shard identifier
332
- * `status`: pending/processing/completed/failed
333
- * `assigned_to`: Worker ID
334
- * `timestamp`: Status change time
335
-
336
- ### contributors.parquet
337
-
338
- - `contributor_id`: Unique identifier
339
- * `name`: Display name
340
- * `total_captions`: Lifetime count
341
- * `trust_level`: Quality tier (0-5)
342
-
343
- ## Development
344
-
345
- ```bash
346
- # Install with dev dependencies
347
- pip install -e ".[dev]"
348
-
349
- # Run tests
350
- pytest
351
-
352
- # Format code
353
- black src/
354
- ruff --fix src/
355
-
356
- # Type checking
357
- mypy src/
358
- ```
359
-
360
- ## Community Contribution
227
+ ## Community Clusters
361
228
 
362
- To contribute compute:
229
+ To contribute compute to a cluster:
363
230
 
364
231
  1. Install caption-flow: `pip install caption-flow`
365
232
  2. Get a worker token from the project maintainer
@@ -369,4 +236,4 @@ Your contributions will be tracked and attributed in the final dataset!
369
236
 
370
237
  ## License
371
238
 
372
- MIT
239
+ AGPLv3
@@ -1,11 +1,12 @@
1
1
  # CaptionFlow
2
2
 
3
- scalable, fault-tolerant **vLLM-powered image captioning**. this "first round" focuses on a fast websocket orchestrator plus lightweight gpu workers that batch requests through vLLM.
3
+ scalable, fault-tolerant **vLLM-powered image captioning**.
4
+
5
+ a fast websocket-based orchestrator paired with lightweight gpu workers achieves exceptional performance for batched requests through vLLM.
4
6
 
5
7
  * **orchestrator**: hands out work in chunked shards, collects captions, checkpoints progress, and keeps simple stats.
6
8
  * **workers (vLLM)**: connect to the orchestrator, stream in image samples, batch them, and generate 1..N captions per image using prompts supplied by the orchestrator.
7
9
  * **config-driven**: all components read YAML config; flags can override.
8
- * **tui monitor (optional)**: a monitor client is wired into the CLI; ship a `monitor` module to enable it.
9
10
 
10
11
  > no conda. just `venv` + `pip`.
11
12
 
@@ -59,6 +60,25 @@ caption-flow worker --config my-worker.yaml --server ws://your.hostname.address:
59
60
  caption-flow monitor --config my-monitor.yaml
60
61
  ```
61
62
 
63
+ 5. export the data
64
+
65
+ ```bash
66
+ % caption-flow export --help
67
+ Usage: caption-flow export [OPTIONS]
68
+
69
+ Export caption data to various formats.
70
+
71
+ Options:
72
+ --format [jsonl|json|csv|txt|huggingface_hub|all] Export format (default: jsonl)
73
+ ```
74
+
75
+ * **jsonl**: create JSON line file in the specified `--output` path
76
+ * **csv**: exports CSV-compatible data columns to the `--output` path containing incomplete metadata
77
+ * **json**: creates a `.json` file for each sample inside the `--output` subdirectory containing **complete** metadata; useful for webdatasets
78
+ * **txt**: creates `.txt` file for each sample inside the `--output` subdirectory containing ONLY captions
79
+ * **huggingface_hub**: creates a dataset on Hugging Face Hub, possibly `--private` and `--nsfw` where necessary
80
+ * **all**: creates all export formats in a specified `--output` directory
81
+
62
82
  ---
63
83
 
64
84
  ## how it’s wired
@@ -67,20 +87,11 @@ caption-flow monitor --config my-monitor.yaml
67
87
 
68
88
  * **websocket server** (default `0.0.0.0:8765`) with three client roles: workers, data-feeders, and admin.
69
89
  * **dataset control**: the orchestrator centrally defines the dataset (`huggingface` or `local`) and version/name. it chunk-slices shards and assigns work.
90
+ * **data serving to remote workers**: local files can be captioned by remote workers that don't have access to the same files, automatically.
70
91
  * **vLLM config broadcast**: model, tp size, dtype, max seq len, memory targets, batching, sampling params, and **inference prompts** are all pushed to workers; workers can apply many changes without a model reload.
71
92
  * **storage + checkpoints**: captions buffer to disk with periodic checkpoints. chunk state is tracked so restarts don’t double-work.
72
93
  * **auth**: token lists for `worker`, `monitor`, and `admin` roles.
73
94
 
74
- start flags you’ll likely use:
75
-
76
- ```text
77
- --config PATH # yaml config for the orchestrator
78
- --port INT, --host STR # bind controls
79
- --data-dir PATH # overrides storage.data_dir
80
- --cert PATH, --key PATH # enable TLS (or use --no-ssl for ws:// in dev)
81
- --vllm # use the vLLM-style orchestrator (webdataset/hf)
82
- ```
83
-
84
95
  ### vLLM worker
85
96
 
86
97
  * **one process per gpu**. select the device with `--gpu-id` (or `worker.gpu_id` in YAML).
@@ -88,27 +99,15 @@ start flags you’ll likely use:
88
99
  * **resilient**: detects disconnects, abandons the current chunk cleanly, clears queues, reconnects, and resumes.
89
100
  * **batched generate()**: images are resized down for consistent batching; each image can get multiple captions (one per prompt).
90
101
 
91
- start flags you’ll likely use:
92
-
93
- ```text
94
- --config PATH # yaml for the worker
95
- --server URL # ws(s)://host:port
96
- --token STR # must match an allowed worker token on the orchestrator
97
- --name STR # display name
98
- --batch-size INT # override vLLM batch size
99
- --vllm # use the vLLM worker implementation
100
- --gpu-id INT # which gpu to use
101
- --precision STR, --model STR # optional overrides for dtype/model
102
- --no-verify-ssl # accept self-signed certs in dev
103
- ```
104
-
105
- ### (optional) monitor
102
+ ---
106
103
 
107
- * a CLI entry exists for a TUI monitor; wire in a `monitor` module to enable it. config lives in `monitor.yaml` or inside `orchestrator.yaml` under `monitor:`.
104
+ ## dataset formats
108
105
 
109
- ---
106
+ * huggingface hub or local based URL list datasets that are compatible with the datasets library
107
+ * webdatasets shards containing full image data; also can be hosted on the hub
108
+ * local folder filled with images; orchestrator will serve the data to workers
110
109
 
111
- ## configuration
110
+ ## configuration path
112
111
 
113
112
  ### config discovery order
114
113
 
@@ -122,98 +121,6 @@ for any component, the CLI looks for config in this order (first match wins):
122
121
  6. any `$XDG_CONFIG_DIRS` entries under `caption-flow/`
123
122
  7. `./examples/<component>.yaml` (fallback)
124
123
 
125
- ### orchestrator.yaml (highlights)
126
-
127
- ```yaml
128
- orchestrator:
129
- host: 0.0.0.0
130
- port: 8765
131
- # ssl:
132
- # cert: /path/fullchain.pem
133
- # key: /path/privkey.pem
134
-
135
- dataset:
136
- type: huggingface
137
- path: <hf-dataset-or-local-path>
138
- name: <logical-name>
139
- version: "1.0"
140
-
141
- vllm:
142
- model: Qwen/Qwen2.5-VL-3B-Instruct
143
- tensor_parallel_size: 1
144
- max_model_len: 16384
145
- dtype: float16
146
- gpu_memory_utilization: 0.92
147
- enforce_eager: true
148
- disable_mm_preprocessor_cache: true
149
- limit_mm_per_prompt: { image: 1 }
150
-
151
- batch_size: 8
152
-
153
- sampling:
154
- temperature: 0.7
155
- top_p: 0.95
156
- max_tokens: 256
157
- repetition_penalty: 1.05
158
- skip_special_tokens: true
159
- stop: ["<|end|>", "<|endoftext|>", "<|im_end|>"]
160
-
161
- inference_prompts:
162
- - "describe this image in detail"
163
- - "provide a comprehensive description of the visual content"
164
- - "what are the key elements in this image?"
165
-
166
- storage:
167
- data_dir: ./caption_data
168
- checkpoint_dir: ./checkpoints
169
- caption_buffer_size: 100
170
- checkpoint_interval: 1000
171
-
172
- # chunking/queueing
173
- chunk_size: 1000
174
- chunks_per_request: 2
175
- chunk_buffer_multiplier: 3
176
- min_chunk_buffer: 10
177
-
178
- auth:
179
- worker_tokens:
180
- - { token: "example-worker-token", name: "Example Worker" }
181
- monitor_tokens:
182
- - { token: "letmein", name: "Default monitor" }
183
- admin_tokens:
184
- - { token: "admin-secret-2024", name: "Admin" }
185
- ```
186
-
187
- ### worker.yaml (highlights)
188
-
189
- ```yaml
190
- worker:
191
- server: ws://localhost:8765 # use wss:// in prod
192
- token: example-worker-token
193
- name: local-gpu
194
- gpu_id: 0
195
- vllm: true
196
-
197
- # local queues
198
- readahead_size: 256
199
- inference_queue_size: 128
200
- ```
201
-
202
- ### monitor.yaml (optional)
203
-
204
- ```yaml
205
- monitor:
206
- server: ws://localhost:8765
207
- token: letmein
208
- refresh_rate: 1.0
209
- show_contributors: true
210
- show_quality_metrics: true
211
- max_activity_items: 20
212
- show_chunk_progress: true
213
- show_worker_queues: true
214
- show_throughput_graph: true
215
- ```
216
-
217
124
  ---
218
125
 
219
126
  ## tls / certificates
@@ -256,66 +163,24 @@ PRs welcome. keep it simple and fast.
256
163
  ```
257
164
  ┌─────────────┐ WebSocket ┌─────────────┐
258
165
  │ Worker │◄──────────────────►│ │
259
- └─────────────┘ │ │ ┌──────────────┐
260
- Orchestrator│────►│Arrow/Parquet │
261
- ┌─────────────┐ │ │ Storage │
262
- Worker │◄──────────────────►│ │ └──────────────┘
263
- └─────────────┘ └─────────────┘
166
+ │ │ │ │ ┌──────────────┐
167
+ │◄───────────────────│ │────►│Arrow/Parquet │
168
+ └─────────────┘ HTTP (img data) Orchestrator│ │ Storage │
169
+ │ │ └──────────────┘
170
+ ┌─────────────┐ │ │
171
+ │ Worker │◄──────────────────►│ │
172
+ │ │ │ │
173
+ │ │◄───────────────────│ │
174
+ └─────────────┘ HTTP (img data) └─────────────┘
264
175
 
265
176
  ┌─────────────┐ │
266
177
  │ Monitor │◄──────────────────────────┘
267
178
  └─────────────┘
268
179
  ```
269
180
 
270
- ## Storage Schema
271
-
272
- ### captions.parquet
273
-
274
- - `job_id`: Unique job identifier
275
- * `dataset`: Dataset name
276
- * `shard`: Shard identifier
277
- * `item_key`: Item within shard
278
- * `caption`: Generated caption text
279
- * `contributor_id`: Worker who generated it
280
- * `timestamp`: Generation time
281
- * `quality_score`: Optional quality metric
282
-
283
- ### jobs.parquet
284
-
285
- - `job_id`: Unique identifier
286
- * `dataset`: Dataset name
287
- * `shard`: Shard identifier
288
- * `status`: pending/processing/completed/failed
289
- * `assigned_to`: Worker ID
290
- * `timestamp`: Status change time
291
-
292
- ### contributors.parquet
293
-
294
- - `contributor_id`: Unique identifier
295
- * `name`: Display name
296
- * `total_captions`: Lifetime count
297
- * `trust_level`: Quality tier (0-5)
298
-
299
- ## Development
300
-
301
- ```bash
302
- # Install with dev dependencies
303
- pip install -e ".[dev]"
304
-
305
- # Run tests
306
- pytest
307
-
308
- # Format code
309
- black src/
310
- ruff --fix src/
311
-
312
- # Type checking
313
- mypy src/
314
- ```
315
-
316
- ## Community Contribution
181
+ ## Community Clusters
317
182
 
318
- To contribute compute:
183
+ To contribute compute to a cluster:
319
184
 
320
185
  1. Install caption-flow: `pip install caption-flow`
321
186
  2. Get a worker token from the project maintainer
@@ -325,4 +190,4 @@ Your contributions will be tracked and attributed in the final dataset!
325
190
 
326
191
  ## License
327
192
 
328
- MIT
193
+ AGPLv3
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "caption-flow"
3
- version = "0.2.3"
3
+ version = "0.2.4"
4
4
  description = "Self-contained distributed community captioning system"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10,<3.13"
@@ -38,6 +38,8 @@ dependencies = [
38
38
  "datasets (>=4.0.0,<5.0.0)",
39
39
  "boto3 (>=1.40.11,<2.0.0)",
40
40
  "torchdata (>=0.11.0,<0.12.0)",
41
+ "textual (>=5.3.0,<6.0.0)",
42
+ "urwid (>=3.0.2,<4.0.0)",
41
43
  ]
42
44
 
43
45
  [project.optional-dependencies]