caption-flow 0.2.4__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
caption_flow/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """CaptionFlow - Distributed community captioning system."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.3.2"
4
4
 
5
5
  from .orchestrator import Orchestrator
6
6
  from .workers.data import DataWorker
@@ -66,7 +66,7 @@ class Orchestrator:
66
66
  self.processor.initialize(processor_config, self.storage)
67
67
 
68
68
  # Processing configuration
69
- self.units_per_request = config.get("units_per_request", 2)
69
+ self.chunks_per_request = config.get("chunks_per_request", 2)
70
70
 
71
71
  # Track connections
72
72
  self.workers: Dict[str, WebSocketServerProtocol] = {}
@@ -284,10 +284,10 @@ class Orchestrator:
284
284
  self.processor.initialize(processor_config)
285
285
  updated_sections.append("processor_config")
286
286
 
287
- # Update units per request
288
- if "units_per_request" in orchestrator_config:
289
- self.units_per_request = orchestrator_config["units_per_request"]
290
- updated_sections.append("units_per_request")
287
+ # Update chunks per request
288
+ if "chunks_per_request" in orchestrator_config:
289
+ self.chunks_per_request = orchestrator_config["chunks_per_request"]
290
+ updated_sections.append("chunks_per_request")
291
291
 
292
292
  # Update auth configuration
293
293
  if "auth" in orchestrator_config:
@@ -332,8 +332,8 @@ class Orchestrator:
332
332
  """Process message from worker."""
333
333
  msg_type = data.get("type")
334
334
 
335
- if msg_type == "request_work":
336
- count = data.get("count", self.units_per_request)
335
+ if msg_type == "get_work_units":
336
+ count = data.get("count", self.chunks_per_request)
337
337
  units = self.processor.get_work_units(count, worker_id)
338
338
  logger.debug(f"Assigning units: {[unit.chunk_id for unit in units]}")
339
339
 
@@ -352,7 +352,8 @@ class Orchestrator:
352
352
 
353
353
  logger.debug(f"Assigned {len(units)} work units to worker {worker_id}")
354
354
  else:
355
- await self.workers[worker_id].send(safe_json_dumps({"type": "no_work"}))
355
+ if worker_id in self.workers:
356
+ await self.workers[worker_id].send(safe_json_dumps({"type": "no_work"}))
356
357
 
357
358
  elif msg_type == "work_complete":
358
359
  unit_id = data["unit_id"]
@@ -375,7 +376,6 @@ class Orchestrator:
375
376
  """Process results submission from worker."""
376
377
  # Extract user from worker_id
377
378
  worker_user = worker_id.rsplit("_", 1)[0] if "_" in worker_id else worker_id
378
-
379
379
  # Create work result
380
380
  _job_id = data.get("job_id")
381
381
  job_id = JobId.from_str(_job_id)
@@ -14,6 +14,7 @@ class WorkUnit:
14
14
  unit_id: str # usually, but not always, the chunk id
15
15
  chunk_id: str # always the chunk id
16
16
  source_id: str # the shard name
17
+ unit_size: int # how many elements are in the workunit
17
18
  data: Dict[str, Any]
18
19
  metadata: Dict[str, Any] = field(default_factory=dict)
19
20
  priority: int = 0
@@ -44,6 +45,7 @@ class WorkAssignment:
44
45
  "unit_id": u.unit_id,
45
46
  "source_id": u.source_id,
46
47
  "chunk_id": u.chunk_id,
48
+ "unit_size": u.unit_size,
47
49
  "data": u.data,
48
50
  "metadata": u.metadata,
49
51
  "priority": u.priority,
@@ -62,6 +64,7 @@ class WorkAssignment:
62
64
  unit_id=u["unit_id"],
63
65
  chunk_id=u["chunk_id"],
64
66
  source_id=u["source_id"],
67
+ unit_size=u["unit_size"],
65
68
  data=u["data"],
66
69
  metadata=u.get("metadata", {}),
67
70
  priority=u.get("priority", 0),