caption-flow 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. {caption_flow-0.2.2/src/caption_flow.egg-info → caption_flow-0.2.4}/PKG-INFO +49 -180
  2. caption_flow-0.2.4/README.md +193 -0
  3. {caption_flow-0.2.2 → caption_flow-0.2.4}/pyproject.toml +3 -1
  4. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/cli.py +308 -0
  5. caption_flow-0.2.4/src/caption_flow/models.py +217 -0
  6. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/monitor.py +1 -1
  7. caption_flow-0.2.4/src/caption_flow/orchestrator.py +914 -0
  8. caption_flow-0.2.4/src/caption_flow/processors/__init__.py +11 -0
  9. caption_flow-0.2.4/src/caption_flow/processors/base.py +219 -0
  10. caption_flow-0.2.4/src/caption_flow/processors/huggingface.py +832 -0
  11. caption_flow-0.2.4/src/caption_flow/processors/local_filesystem.py +683 -0
  12. caption_flow-0.2.4/src/caption_flow/processors/webdataset.py +782 -0
  13. caption_flow-0.2.4/src/caption_flow/storage/__init__.py +1 -0
  14. caption_flow-0.2.4/src/caption_flow/storage/exporter.py +550 -0
  15. caption_flow-0.2.2/src/caption_flow/storage.py → caption_flow-0.2.4/src/caption_flow/storage/manager.py +489 -401
  16. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/checkpoint_tracker.py +2 -2
  17. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/chunk_tracker.py +73 -32
  18. caption_flow-0.2.4/src/caption_flow/utils/dataset_loader.py +222 -0
  19. caption_flow-0.2.4/src/caption_flow/utils/dataset_metadata_cache.py +67 -0
  20. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/image_processor.py +1 -4
  21. caption_flow-0.2.4/src/caption_flow/utils/shard_processor.py +119 -0
  22. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/shard_tracker.py +1 -5
  23. caption_flow-0.2.4/src/caption_flow/viewer.py +594 -0
  24. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/workers/base.py +3 -3
  25. caption_flow-0.2.4/src/caption_flow/workers/caption.py +945 -0
  26. {caption_flow-0.2.2 → caption_flow-0.2.4/src/caption_flow.egg-info}/PKG-INFO +49 -180
  27. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow.egg-info/SOURCES.txt +10 -1
  28. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow.egg-info/requires.txt +2 -0
  29. caption_flow-0.2.2/README.md +0 -326
  30. caption_flow-0.2.2/src/caption_flow/models.py +0 -84
  31. caption_flow-0.2.2/src/caption_flow/orchestrator.py +0 -2206
  32. caption_flow-0.2.2/src/caption_flow/utils/dataset_loader.py +0 -462
  33. caption_flow-0.2.2/src/caption_flow/utils/shard_processor.py +0 -379
  34. caption_flow-0.2.2/src/caption_flow/workers/caption.py +0 -1321
  35. {caption_flow-0.2.2 → caption_flow-0.2.4}/LICENSE +0 -0
  36. {caption_flow-0.2.2 → caption_flow-0.2.4}/setup.cfg +0 -0
  37. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/__init__.py +0 -0
  38. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/__init__.py +0 -0
  39. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/auth.py +0 -0
  40. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/caption_utils.py +0 -0
  41. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/certificates.py +0 -0
  42. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/job_queue.py +0 -0
  43. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/json_utils.py +0 -0
  44. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/prompt_template.py +0 -0
  45. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/utils/vllm_config.py +0 -0
  46. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow/workers/data.py +0 -0
  47. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow.egg-info/dependency_links.txt +0 -0
  48. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow.egg-info/entry_points.txt +0 -0
  49. {caption_flow-0.2.2 → caption_flow-0.2.4}/src/caption_flow.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: caption-flow
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Self-contained distributed community captioning system
5
5
  Author-email: bghira <bghira@users.github.com>
6
6
  License: MIT
@@ -33,6 +33,8 @@ Requires-Dist: arrow<2.0.0,>=1.3.0
33
33
  Requires-Dist: datasets<5.0.0,>=4.0.0
34
34
  Requires-Dist: boto3<2.0.0,>=1.40.11
35
35
  Requires-Dist: torchdata<0.12.0,>=0.11.0
36
+ Requires-Dist: textual<6.0.0,>=5.3.0
37
+ Requires-Dist: urwid<4.0.0,>=3.0.2
36
38
  Provides-Extra: dev
37
39
  Requires-Dist: pytest>=7.4.0; extra == "dev"
38
40
  Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
@@ -44,12 +46,13 @@ Dynamic: license-file
44
46
 
45
47
  # CaptionFlow
46
48
 
47
- scalable, fault-tolerant **vLLM-powered image captioning**. this "first round" focuses on a fast websocket orchestrator plus lightweight gpu workers that batch requests through vLLM.
49
+ scalable, fault-tolerant **vLLM-powered image captioning**.
50
+
51
+ a fast websocket-based orchestrator paired with lightweight gpu workers achieves exceptional performance for batched requests through vLLM.
48
52
 
49
53
  * **orchestrator**: hands out work in chunked shards, collects captions, checkpoints progress, and keeps simple stats.
50
54
  * **workers (vLLM)**: connect to the orchestrator, stream in image samples, batch them, and generate 1..N captions per image using prompts supplied by the orchestrator.
51
55
  * **config-driven**: all components read YAML config; flags can override.
52
- * **tui monitor (optional)**: a monitor client is wired into the CLI; ship a `monitor` module to enable it.
53
56
 
54
57
  > no conda. just `venv` + `pip`.
55
58
 
@@ -69,12 +72,14 @@ pip install -e . # installs the `caption-flow` command
69
72
  1. copy + edit the sample configs
70
73
 
71
74
  ```bash
72
- cp orchestrator.yaml my-orchestrator.yaml
73
- cp worker.yaml my-worker.yaml
74
- cp monitor.yaml my-monitor.yaml # optional; requires a monitor module
75
+ cp examples/orchestrator/local_image_files.yaml my-orchestrator.yaml
76
+ cp examples/worker.yaml my-worker.yaml
77
+ cp examples/monitor.yaml my-monitor.yaml # optional terminal interface
75
78
  ```
76
79
 
77
- set a unique shared token in both `my-orchestrator.yaml` and `my-worker.yaml` (see `auth.worker_tokens` in the orchestrator config and `worker.token` in the worker config). if you use private hugging face datasets/models, export `HUGGINGFACE_HUB_TOKEN` before starting workers.
80
+ set a unique shared token in both `my-orchestrator.yaml` and `my-worker.yaml` (see `auth.worker_tokens` in the orchestrator config and `worker.token` in the worker config).
81
+
82
+ if you use private hugging face datasets/models, export `HUGGINGFACE_HUB_TOKEN` before starting anything.
78
83
 
79
84
  2. start the orchestrator
80
85
 
@@ -90,6 +95,9 @@ caption-flow worker --config my-worker.yaml --gpu-id 0
90
95
 
91
96
  # your second GPU
92
97
  caption-flow worker --config my-worker.yaml --gpu-id 1
98
+
99
+ # on a remote host
100
+ caption-flow worker --config my-worker.yaml --server ws://your.hostname.address:8765
93
101
  ```
94
102
 
95
103
  4. (optional) start the monitor
@@ -98,12 +106,25 @@ caption-flow worker --config my-worker.yaml --gpu-id 1
98
106
  caption-flow monitor --config my-monitor.yaml
99
107
  ```
100
108
 
101
- 5. (optional) scan/fix chunks on disk if you had crashes
109
+ 5. export the data
102
110
 
103
111
  ```bash
104
- caption-flow scan_chunks --data-dir ./caption_data --checkpoint-dir ./checkpoints --fix
112
+ % caption-flow export --help
113
+ Usage: caption-flow export [OPTIONS]
114
+
115
+ Export caption data to various formats.
116
+
117
+ Options:
118
+ --format [jsonl|json|csv|txt|huggingface_hub|all] Export format (default: jsonl)
105
119
  ```
106
120
 
121
+ * **jsonl**: create JSON line file in the specified `--output` path
122
+ * **csv**: exports CSV-compatible data columns to the `--output` path containing incomplete metadata
123
+ * **json**: creates a `.json` file for each sample inside the `--output` subdirectory containing **complete** metadata; useful for webdatasets
124
+ * **txt**: creates `.txt` file for each sample inside the `--output` subdirectory containing ONLY captions
125
+ * **huggingface_hub**: creates a dataset on Hugging Face Hub, possibly `--private` and `--nsfw` where necessary
126
+ * **all**: creates all export formats in a specified `--output` directory
127
+
107
128
  ---
108
129
 
109
130
  ## how it’s wired
@@ -112,20 +133,11 @@ caption-flow scan_chunks --data-dir ./caption_data --checkpoint-dir ./checkpoint
112
133
 
113
134
  * **websocket server** (default `0.0.0.0:8765`) with three client roles: workers, data-feeders, and admin.
114
135
  * **dataset control**: the orchestrator centrally defines the dataset (`huggingface` or `local`) and version/name. it chunk-slices shards and assigns work.
136
+ * **data serving to remote workers**: local files can be captioned by remote workers that don't have access to the same files, automatically.
115
137
  * **vLLM config broadcast**: model, tp size, dtype, max seq len, memory targets, batching, sampling params, and **inference prompts** are all pushed to workers; workers can apply many changes without a model reload.
116
138
  * **storage + checkpoints**: captions buffer to disk with periodic checkpoints. chunk state is tracked so restarts don’t double-work.
117
139
  * **auth**: token lists for `worker`, `monitor`, and `admin` roles.
118
140
 
119
- start flags you’ll likely use:
120
-
121
- ```text
122
- --config PATH # yaml config for the orchestrator
123
- --port INT, --host STR # bind controls
124
- --data-dir PATH # overrides storage.data_dir
125
- --cert PATH, --key PATH # enable TLS (or use --no-ssl for ws:// in dev)
126
- --vllm # use the vLLM-style orchestrator (webdataset/hf)
127
- ```
128
-
129
141
  ### vLLM worker
130
142
 
131
143
  * **one process per gpu**. select the device with `--gpu-id` (or `worker.gpu_id` in YAML).
@@ -133,27 +145,15 @@ start flags you’ll likely use:
133
145
  * **resilient**: detects disconnects, abandons the current chunk cleanly, clears queues, reconnects, and resumes.
134
146
  * **batched generate()**: images are resized down for consistent batching; each image can get multiple captions (one per prompt).
135
147
 
136
- start flags you’ll likely use:
137
-
138
- ```text
139
- --config PATH # yaml for the worker
140
- --server URL # ws(s)://host:port
141
- --token STR # must match an allowed worker token on the orchestrator
142
- --name STR # display name
143
- --batch-size INT # override vLLM batch size
144
- --vllm # use the vLLM worker implementation
145
- --gpu-id INT # which gpu to use
146
- --precision STR, --model STR # optional overrides for dtype/model
147
- --no-verify-ssl # accept self-signed certs in dev
148
- ```
148
+ ---
149
149
 
150
- ### (optional) monitor
150
+ ## dataset formats
151
151
 
152
- * a CLI entry exists for a TUI monitor; wire in a `monitor` module to enable it. config lives in `monitor.yaml` or inside `orchestrator.yaml` under `monitor:`.
152
+ * huggingface hub or local based URL list datasets that are compatible with the datasets library
153
+ * webdatasets shards containing full image data; also can be hosted on the hub
154
+ * local folder filled with images; orchestrator will serve the data to workers
153
155
 
154
- ---
155
-
156
- ## configuration
156
+ ## configuration path
157
157
 
158
158
  ### config discovery order
159
159
 
@@ -167,98 +167,6 @@ for any component, the CLI looks for config in this order (first match wins):
167
167
  6. any `$XDG_CONFIG_DIRS` entries under `caption-flow/`
168
168
  7. `./examples/<component>.yaml` (fallback)
169
169
 
170
- ### orchestrator.yaml (highlights)
171
-
172
- ```yaml
173
- orchestrator:
174
- host: 0.0.0.0
175
- port: 8765
176
- # ssl:
177
- # cert: /path/fullchain.pem
178
- # key: /path/privkey.pem
179
-
180
- dataset:
181
- type: huggingface # or "local"
182
- path: <hf-dataset-or-local-path>
183
- name: <logical-name>
184
- version: "1.0"
185
-
186
- vllm:
187
- model: Qwen/Qwen2.5-VL-3B-Instruct
188
- tensor_parallel_size: 1
189
- max_model_len: 16384
190
- dtype: float16
191
- gpu_memory_utilization: 0.92
192
- enforce_eager: true
193
- disable_mm_preprocessor_cache: true
194
- limit_mm_per_prompt: { image: 1 }
195
-
196
- batch_size: 8
197
-
198
- sampling:
199
- temperature: 0.7
200
- top_p: 0.95
201
- max_tokens: 256
202
- repetition_penalty: 1.05
203
- skip_special_tokens: true
204
- stop: ["<|end|>", "<|endoftext|>", "<|im_end|>"]
205
-
206
- inference_prompts:
207
- - "describe this image in detail"
208
- - "provide a comprehensive description of the visual content"
209
- - "what are the key elements in this image?"
210
-
211
- storage:
212
- data_dir: ./caption_data
213
- checkpoint_dir: ./checkpoints
214
- caption_buffer_size: 100
215
- checkpoint_interval: 1000
216
-
217
- # chunking/queueing
218
- chunk_size: 1000
219
- chunks_per_request: 2
220
- chunk_buffer_multiplier: 3
221
- min_chunk_buffer: 10
222
-
223
- auth:
224
- worker_tokens:
225
- - { token: "example-worker-token", name: "Example Worker" }
226
- monitor_tokens:
227
- - { token: "letmein", name: "Default monitor" }
228
- admin_tokens:
229
- - { token: "admin-secret-2024", name: "Admin" }
230
- ```
231
-
232
- ### worker.yaml (highlights)
233
-
234
- ```yaml
235
- worker:
236
- server: ws://localhost:8765 # use wss:// in prod
237
- token: example-worker-token
238
- name: local-gpu
239
- gpu_id: 0
240
- vllm: true
241
-
242
- # local queues
243
- readahead_size: 256
244
- inference_queue_size: 128
245
- ```
246
-
247
- ### monitor.yaml (optional)
248
-
249
- ```yaml
250
- monitor:
251
- server: ws://localhost:8765
252
- token: letmein
253
- refresh_rate: 1.0
254
- show_contributors: true
255
- show_quality_metrics: true
256
- max_activity_items: 20
257
- show_chunk_progress: true
258
- show_worker_queues: true
259
- show_throughput_graph: true
260
- ```
261
-
262
170
  ---
263
171
 
264
172
  ## tls / certificates
@@ -301,63 +209,24 @@ PRs welcome. keep it simple and fast.
301
209
  ```
302
210
  ┌─────────────┐ WebSocket ┌─────────────┐
303
211
  │ Worker │◄──────────────────►│ │
304
- └─────────────┘ │ │ ┌──────────────┐
305
- Orchestrator│────►│Arrow/Parquet │
306
- ┌─────────────┐ │ │ Storage │
307
- Worker │◄──────────────────►│ │ └──────────────┘
308
- └─────────────┘ └─────────────┘
212
+ │ │ │ │ ┌──────────────┐
213
+ │◄───────────────────│ │────►│Arrow/Parquet │
214
+ └─────────────┘ HTTP (img data) Orchestrator│ │ Storage │
215
+ │ │ └──────────────┘
216
+ ┌─────────────┐ │ │
217
+ │ Worker │◄──────────────────►│ │
218
+ │ │ │ │
219
+ │ │◄───────────────────│ │
220
+ └─────────────┘ HTTP (img data) └─────────────┘
309
221
 
310
222
  ┌─────────────┐ │
311
223
  │ Monitor │◄──────────────────────────┘
312
224
  └─────────────┘
313
225
  ```
314
226
 
315
- ## Storage Schema
316
-
317
- ### captions.parquet
318
- - `job_id`: Unique job identifier
319
- - `dataset`: Dataset name
320
- - `shard`: Shard identifier
321
- - `item_key`: Item within shard
322
- - `caption`: Generated caption text
323
- - `contributor_id`: Worker who generated it
324
- - `timestamp`: Generation time
325
- - `quality_score`: Optional quality metric
326
-
327
- ### jobs.parquet
328
- - `job_id`: Unique identifier
329
- - `dataset`: Dataset name
330
- - `shard`: Shard identifier
331
- - `status`: pending/processing/completed/failed
332
- - `assigned_to`: Worker ID
333
- - `timestamp`: Status change time
334
-
335
- ### contributors.parquet
336
- - `contributor_id`: Unique identifier
337
- - `name`: Display name
338
- - `total_captions`: Lifetime count
339
- - `trust_level`: Quality tier (0-5)
340
-
341
- ## Development
342
-
343
- ```bash
344
- # Install with dev dependencies
345
- pip install -e ".[dev]"
346
-
347
- # Run tests
348
- pytest
349
-
350
- # Format code
351
- black src/
352
- ruff --fix src/
353
-
354
- # Type checking
355
- mypy src/
356
- ```
357
-
358
- ## Community Contribution
227
+ ## Community Clusters
359
228
 
360
- To contribute compute:
229
+ To contribute compute to a cluster:
361
230
 
362
231
  1. Install caption-flow: `pip install caption-flow`
363
232
  2. Get a worker token from the project maintainer
@@ -367,4 +236,4 @@ Your contributions will be tracked and attributed in the final dataset!
367
236
 
368
237
  ## License
369
238
 
370
- MIT
239
+ AGPLv3
@@ -0,0 +1,193 @@
1
+ # CaptionFlow
2
+
3
+ scalable, fault-tolerant **vLLM-powered image captioning**.
4
+
5
+ a fast websocket-based orchestrator paired with lightweight gpu workers achieves exceptional performance for batched requests through vLLM.
6
+
7
+ * **orchestrator**: hands out work in chunked shards, collects captions, checkpoints progress, and keeps simple stats.
8
+ * **workers (vLLM)**: connect to the orchestrator, stream in image samples, batch them, and generate 1..N captions per image using prompts supplied by the orchestrator.
9
+ * **config-driven**: all components read YAML config; flags can override.
10
+
11
+ > no conda. just `venv` + `pip`.
12
+
13
+ ---
14
+
15
+ ## install
16
+
17
+ ```bash
18
+ python -m venv .venv
19
+ source .venv/bin/activate # windows: .venv\Scripts\activate
20
+ pip install --upgrade pip
21
+ pip install -e . # installs the `caption-flow` command
22
+ ```
23
+
24
+ ## quickstart (single box)
25
+
26
+ 1. copy + edit the sample configs
27
+
28
+ ```bash
29
+ cp examples/orchestrator/local_image_files.yaml my-orchestrator.yaml
30
+ cp examples/worker.yaml my-worker.yaml
31
+ cp examples/monitor.yaml my-monitor.yaml # optional terminal interface
32
+ ```
33
+
34
+ set a unique shared token in both `my-orchestrator.yaml` and `my-worker.yaml` (see `auth.worker_tokens` in the orchestrator config and `worker.token` in the worker config).
35
+
36
+ if you use private hugging face datasets/models, export `HUGGINGFACE_HUB_TOKEN` before starting anything.
37
+
38
+ 2. start the orchestrator
39
+
40
+ ```bash
41
+ caption-flow orchestrator --config my-orchestrator.yaml
42
+ ```
43
+
44
+ 3. start one or more vLLM workers
45
+
46
+ ```bash
47
+ # gpu 0 on the same host
48
+ caption-flow worker --config my-worker.yaml --gpu-id 0
49
+
50
+ # your second GPU
51
+ caption-flow worker --config my-worker.yaml --gpu-id 1
52
+
53
+ # on a remote host
54
+ caption-flow worker --config my-worker.yaml --server ws://your.hostname.address:8765
55
+ ```
56
+
57
+ 4. (optional) start the monitor
58
+
59
+ ```bash
60
+ caption-flow monitor --config my-monitor.yaml
61
+ ```
62
+
63
+ 5. export the data
64
+
65
+ ```bash
66
+ % caption-flow export --help
67
+ Usage: caption-flow export [OPTIONS]
68
+
69
+ Export caption data to various formats.
70
+
71
+ Options:
72
+ --format [jsonl|json|csv|txt|huggingface_hub|all] Export format (default: jsonl)
73
+ ```
74
+
75
+ * **jsonl**: create JSON line file in the specified `--output` path
76
+ * **csv**: exports CSV-compatible data columns to the `--output` path containing incomplete metadata
77
+ * **json**: creates a `.json` file for each sample inside the `--output` subdirectory containing **complete** metadata; useful for webdatasets
78
+ * **txt**: creates `.txt` file for each sample inside the `--output` subdirectory containing ONLY captions
79
+ * **huggingface_hub**: creates a dataset on Hugging Face Hub, possibly `--private` and `--nsfw` where necessary
80
+ * **all**: creates all export formats in a specified `--output` directory
81
+
82
+ ---
83
+
84
+ ## how it’s wired
85
+
86
+ ### orchestrator
87
+
88
+ * **websocket server** (default `0.0.0.0:8765`) with three client roles: workers, data-feeders, and admin.
89
+ * **dataset control**: the orchestrator centrally defines the dataset (`huggingface` or `local`) and version/name. it chunk-slices shards and assigns work.
90
+ * **data serving to remote workers**: local files can be captioned by remote workers that don't have access to the same files, automatically.
91
+ * **vLLM config broadcast**: model, tp size, dtype, max seq len, memory targets, batching, sampling params, and **inference prompts** are all pushed to workers; workers can apply many changes without a model reload.
92
+ * **storage + checkpoints**: captions buffer to disk with periodic checkpoints. chunk state is tracked so restarts don’t double-work.
93
+ * **auth**: token lists for `worker`, `monitor`, and `admin` roles.
94
+
95
+ ### vLLM worker
96
+
97
+ * **one process per gpu**. select the device with `--gpu-id` (or `worker.gpu_id` in YAML).
98
+ * **gets its marching orders** from the orchestrator: dataset info, model, prompts, batch size, and sampling.
99
+ * **resilient**: detects disconnects, abandons the current chunk cleanly, clears queues, reconnects, and resumes.
100
+ * **batched generate()**: images are resized down for consistent batching; each image can get multiple captions (one per prompt).
101
+
102
+ ---
103
+
104
+ ## dataset formats
105
+
106
+ * huggingface hub or local based URL list datasets that are compatible with the datasets library
107
+ * webdatasets shards containing full image data; also can be hosted on the hub
108
+ * local folder filled with images; orchestrator will serve the data to workers
109
+
110
+ ## configuration path
111
+
112
+ ### config discovery order
113
+
114
+ for any component, the CLI looks for config in this order (first match wins):
115
+
116
+ 1. `--config /path/to/file.yaml`
117
+ 2. `./<component>.yaml` (current directory)
118
+ 3. `~/.caption-flow/<component>.yaml`
119
+ 4. `$XDG_CONFIG_HOME/caption-flow/<component>.yaml`
120
+ 5. `/etc/caption-flow/<component>.yaml`
121
+ 6. any `$XDG_CONFIG_DIRS` entries under `caption-flow/`
122
+ 7. `./examples/<component>.yaml` (fallback)
123
+
124
+ ---
125
+
126
+ ## tls / certificates
127
+
128
+ use the built-in helpers during development:
129
+
130
+ ```bash
131
+ # self-signed certs for quick local testing
132
+ caption-flow generate_cert --self-signed --domain localhost --output-dir ./certs
133
+
134
+ # inspect any certificate file
135
+ caption-flow inspect_cert ./certs/fullchain.pem
136
+ ```
137
+
138
+ then point the orchestrator at the resulting cert/key (or run `--no-ssl` for dev-only ws\://).
139
+
140
+ ---
141
+
142
+ ## tips & notes
143
+
144
+ * **multi-gpu**: start one worker process per gpu (set `--gpu-id` or `worker.gpu_id`).
145
+ * **throughput**: tune `vllm.batch_size` in the orchestrator config (or override with `--batch-size` at worker start). higher isn’t always better; watch VRAM.
146
+ * **prompts**: add more strings under `vllm.inference_prompts` to get multiple captions per image; the worker returns only non-empty generations.
147
+ * **private HF**: if your dataset/model needs auth, export `HUGGINGFACE_HUB_TOKEN` before `caption-flow worker ...`.
148
+ * **self-signed ssl**: pass `--no-verify-ssl` to workers/monitors in dev.
149
+ * **recovery**: if you hard-crash mid-run, `caption-flow scan_chunks --fix` can reset abandoned chunks so the orchestrator can reissue them cleanly.
150
+
151
+ ---
152
+
153
+ ## roadmap
154
+
155
+ * hot config reload via the admin websocket path.
156
+ * dedicated data-feeder clients (separate from gpu workers) that push samples into the orchestrator.
157
+ * richer monitor TUI.
158
+
159
+ PRs welcome. keep it simple and fast.
160
+
161
+ ## architecture
162
+
163
+ ```
164
+ ┌─────────────┐ WebSocket ┌─────────────┐
165
+ │ Worker │◄──────────────────►│ │
166
+ │ │ │ │ ┌──────────────┐
167
+ │ │◄───────────────────│ │────►│Arrow/Parquet │
168
+ └─────────────┘ HTTP (img data) │ Orchestrator│ │ Storage │
169
+ │ │ └──────────────┘
170
+ ┌─────────────┐ │ │
171
+ │ Worker │◄──────────────────►│ │
172
+ │ │ │ │
173
+ │ │◄───────────────────│ │
174
+ └─────────────┘ HTTP (img data) └─────────────┘
175
+
176
+ ┌─────────────┐ │
177
+ │ Monitor │◄──────────────────────────┘
178
+ └─────────────┘
179
+ ```
180
+
181
+ ## Community Clusters
182
+
183
+ To contribute compute to a cluster:
184
+
185
+ 1. Install caption-flow: `pip install caption-flow`
186
+ 2. Get a worker token from the project maintainer
187
+ 3. Run: `caption-flow worker --server wss://project.domain.com:8765 --token YOUR_TOKEN`
188
+
189
+ Your contributions will be tracked and attributed in the final dataset!
190
+
191
+ ## License
192
+
193
+ AGPLv3
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "caption-flow"
3
- version = "0.2.2"
3
+ version = "0.2.4"
4
4
  description = "Self-contained distributed community captioning system"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10,<3.13"
@@ -38,6 +38,8 @@ dependencies = [
38
38
  "datasets (>=4.0.0,<5.0.0)",
39
39
  "boto3 (>=1.40.11,<2.0.0)",
40
40
  "torchdata (>=0.11.0,<0.12.0)",
41
+ "textual (>=5.3.0,<6.0.0)",
42
+ "urwid (>=3.0.2,<4.0.0)",
41
43
  ]
42
44
 
43
45
  [project.optional-dependencies]