DeepFabric 4.11.0__py3-none-any.whl → 4.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepfabric/tree.py CHANGED
@@ -83,6 +83,17 @@ class TreeConfig(BaseModel):
83
83
  default=None,
84
84
  description="Base URL for API endpoint (e.g., custom OpenAI-compatible servers)",
85
85
  )
86
+ max_concurrent: int = Field(
87
+ default=4,
88
+ ge=1,
89
+ le=20,
90
+ description="Maximum concurrent LLM calls during tree expansion (helps avoid rate limits)",
91
+ )
92
+ max_tokens: int = Field(
93
+ default=DEFAULT_MAX_TOKENS,
94
+ ge=1,
95
+ description="Maximum tokens for topic generation LLM calls",
96
+ )
86
97
 
87
98
 
88
99
  class TreeValidator:
@@ -148,6 +159,8 @@ class Tree(TopicModel):
148
159
  self.temperature = self.config.temperature
149
160
  self.provider = self.config.provider
150
161
  self.model_name = self.config.model_name
162
+ self.max_concurrent = self.config.max_concurrent
163
+ self.max_tokens = self.config.max_tokens
151
164
 
152
165
  # Initialize LLM client
153
166
  llm_kwargs = {}
@@ -300,7 +313,7 @@ class Tree(TopicModel):
300
313
  prompt=prompt,
301
314
  schema=TopicList,
302
315
  max_retries=MAX_RETRY_ATTEMPTS,
303
- max_tokens=DEFAULT_MAX_TOKENS,
316
+ max_tokens=self.max_tokens,
304
317
  temperature=self.temperature,
305
318
  )
306
319
 
@@ -311,19 +324,11 @@ class Tree(TopicModel):
311
324
  source="tree_generation",
312
325
  )
313
326
 
314
- # Extract and validate subtopics
327
+ # Extract subtopics accept whatever the LLM returned
315
328
  subtopics = topic_response.subtopics
316
- if len(subtopics) >= num_subtopics:
317
- return subtopics[:num_subtopics]
318
-
319
- # If insufficient subtopics, pad with defaults
320
- while len(subtopics) < num_subtopics:
321
- subtopics.append(f"subtopic_{len(subtopics) + 1}_for_{node_path[-1]}")
322
-
323
329
  return subtopics[:num_subtopics]
324
330
 
325
331
  except Exception as e:
326
- # Log the failure and return default subtopics
327
332
  self.failed_generations.append(
328
333
  {
329
334
  "node_path": node_path,
@@ -331,9 +336,7 @@ class Tree(TopicModel):
331
336
  "timestamp": time.time(),
332
337
  }
333
338
  )
334
-
335
- # Generate default subtopics
336
- return [f"subtopic_{i + 1}_for_{node_path[-1]}" for i in range(num_subtopics)]
339
+ return []
337
340
 
338
341
  def _detect_domain(self, system_prompt: str, node_path: list[str]) -> str:
339
342
  """Detect the appropriate domain for prompt examples based on context."""
@@ -405,20 +408,32 @@ class Tree(TopicModel):
405
408
  yield {"event": "leaf_reached", "path": node_path}
406
409
  return
407
410
 
408
- async def _collect_child_events(child_subtopic: str) -> list[dict[str, Any]]:
409
- child_path = node_path + [child_subtopic]
410
- events: list[dict[str, Any]] = []
411
- async for child_event in self._build_subtree_generator(
412
- child_path, system_prompt, total_depth, n_child, current_depth + 1
413
- ):
414
- events.append(child_event)
415
- return events
411
+ event_queue: asyncio.Queue[dict[str, Any] | None] = asyncio.Queue()
412
+ semaphore = asyncio.Semaphore(self.max_concurrent)
413
+
414
+ async def _expand_child(child_subtopic: str) -> None:
415
+ async with semaphore:
416
+ child_path = node_path + [child_subtopic]
417
+ async for child_event in self._build_subtree_generator(
418
+ child_path, system_prompt, total_depth, n_child, current_depth + 1
419
+ ):
420
+ await event_queue.put(child_event)
421
+
422
+ tasks = [asyncio.create_task(_expand_child(s)) for s in subtopics]
423
+
424
+ async def _signal_done() -> None:
425
+ await asyncio.gather(*tasks)
426
+ await event_queue.put(None)
427
+
428
+ done_task = asyncio.create_task(_signal_done())
416
429
 
417
- tasks = [asyncio.create_task(_collect_child_events(subtopic)) for subtopic in subtopics]
430
+ while True:
431
+ event = await event_queue.get()
432
+ if event is None:
433
+ break
434
+ yield event
418
435
 
419
- for child_events in await asyncio.gather(*tasks):
420
- for child_event in child_events:
421
- yield child_event
436
+ await done_task
422
437
 
423
438
  def save(self, save_path: str) -> None:
424
439
  """Save the topic tree to a file.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: DeepFabric
3
- Version: 4.11.0
3
+ Version: 4.12.0
4
4
  Summary: Curate High Quality Datasets, Train, Evaluate and Ship
5
5
  Author-email: DeepFabric Team <oss@alwaysfurther.ai>
6
6
  License-File: LICENSE
@@ -3,10 +3,10 @@ deepfabric/__main__.py,sha256=Ojx6VFnAWx4wY52VehsWhW85IaEmPb8FP_sGvOk628g,113
3
3
  deepfabric/auth.py,sha256=7UEvk84XPx7wiXj0QfhIH_MJ5BlI7wQHodwgd4LPO80,12977
4
4
  deepfabric/builders.py,sha256=2-tJh0_yzGqcBn8qRy3ocd5qIhBciwLsGPiqP1PK75w,10821
5
5
  deepfabric/builders_agent.py,sha256=youDxc8tWJF0DbA3Al-6zcswx68gcI7z9Gz7hPsDTJo,28326
6
- deepfabric/cli.py,sha256=rVKzUcMUWTDQnpS_Si7FzrqdQbKAN-X2Sd1ks1__Wtw,70777
6
+ deepfabric/cli.py,sha256=cCJfGg6mhLgyldDekvq7OgmxBc4Ou74UT0MWLftJ0Sw,89114
7
7
  deepfabric/cloud_upload.py,sha256=Xu7xOycBnX0Jb7AhNBWEdEcAlxw5pXfKQQR4xcwD0BU,27819
8
- deepfabric/config.py,sha256=ru2eUEyHjrGdf822rn1eo179W8OxIC2YXbqUZnMRQ7Q,34006
9
- deepfabric/config_manager.py,sha256=c0N4Rb0P970khoiKgpN1ULDYV0bZEYIbec45cSfg92c,9334
8
+ deepfabric/config.py,sha256=ISJpay7CJvDoY48ydvsZJnc25-omwHr-mLrKYR9Tr3g,34235
9
+ deepfabric/config_manager.py,sha256=WH_usbD6JQ20K5xh9PzoT8oThAsnc2ywyfMy_06VuvM,9511
10
10
  deepfabric/constants.py,sha256=2DIJeIP2bqwvHjCXaZ0FKIx_8l7rf5yWSULN4Amd6_A,2846
11
11
  deepfabric/dataset.py,sha256=bZfx35A-dt0kMflgskU9Ge-NLVesq8xNKHsrxTnNn6Q,9740
12
12
  deepfabric/dataset_manager.py,sha256=3BC1fyECMxMvDkdx9POyUv1Lg98fFPAb3gAD8Z73Sn0,36151
@@ -14,7 +14,8 @@ deepfabric/error_codes.py,sha256=HGGWsahUTI8UG996C74X-XgNuaPX8RHo4gOidlaJql4,176
14
14
  deepfabric/exceptions.py,sha256=pEg4YFQaDEWtBoJaSkxsJJoBBp2-6EE3M7m5H7R6i_8,1586
15
15
  deepfabric/factory.py,sha256=OCqo3w-eiYNWvK_I_egDZuWj192kf18yD3SPj8rrPxU,753
16
16
  deepfabric/generator.py,sha256=9H_bDoGJYSxL9OSbEi7eUWk7RvhL1ofLaWvV8Z2AxWE,97072
17
- deepfabric/graph.py,sha256=mq__qIgMMm3YvRWTTz_ypaoX5g8iSVbdnpEEtDBe0xk,26675
17
+ deepfabric/graph.py,sha256=eOc5GsH0b7nojPztnxyHKvRPl3Q1MpC4Xi7AIx7Rcq0,32717
18
+ deepfabric/graph_pruner.py,sha256=bA8JhA70zI7dUX6Y6tiw6BtOMI3X3tdnwY1g6OTrMCw,3443
18
19
  deepfabric/hf_hub.py,sha256=hw2CWqZ3CzyAzMo552VPZKVWtuv-j0TQ2_gV5K0AUto,7670
19
20
  deepfabric/kaggle_hub.py,sha256=CXVO1Lv3IRhdO0bp9_IQr6nUs-v5jOWi5k4EwPkbJmw,7927
20
21
  deepfabric/loader.py,sha256=YNTGZZE-POjR0BIlx6WCT4bIzf0T4lW_fQl7ev9UFqE,18584
@@ -23,9 +24,10 @@ deepfabric/progress.py,sha256=_CWiNNBWLlNkBn-16NJnqb0DYQSzwveFaLLwOlqi-x0,10697
23
24
  deepfabric/prompts.py,sha256=pAFkw_0WAcYdzjqkHgywyLiSEdDlRdZwmyykUC307Gk,13872
24
25
  deepfabric/schemas.py,sha256=ebSwaXCu3lwshFGEhOhuAAUQA3deaYWXUcoRaeslqLc,37880
25
26
  deepfabric/stream_simulator.py,sha256=GzvAxWxHVsuTwgXlqwXNfrTUDn6sND2kJOoQuYg88FA,3028
26
- deepfabric/topic_manager.py,sha256=Q025z4Ri61nGrBlxdBEaSLcay_uJIo-Zr_rOoonj5gY,12109
27
+ deepfabric/topic_inspector.py,sha256=AmoVgPMbjgN_Xpd_i8Z8j7M6zrW5yAGCeGBrRF6YEeo,8435
28
+ deepfabric/topic_manager.py,sha256=5OLXUo7H98ukyHss8NL0S-mxySntlQky5fBbbxj2BA0,13837
27
29
  deepfabric/topic_model.py,sha256=GzxnlWhgYnAOgundlQrDxOHtaahC7bPX32nVhl-bWvM,2510
28
- deepfabric/tree.py,sha256=mMfHTvlBbe0x3cCLiiP6zdi-reBr-Tk54Hd0rrWt2J4,16578
30
+ deepfabric/tree.py,sha256=m3QlGjO1UFe_c4naBEAU96n6guOJaPTnDBn2IZRibB0,16870
29
31
  deepfabric/tui.py,sha256=-sXa-FC1bl-AwpXKAQTt4wxpdJEwhokJGk-V4kwa1c8,55612
30
32
  deepfabric/update_checker.py,sha256=AUa9iUdkGNzu7tWkQRxIlF19YRmKLetwxu-Ys2ONS8Y,5145
31
33
  deepfabric/utils.py,sha256=CZSijqXHuyqStlBLvfJLnnaNTB7c8sXXaMq0RgcwkoU,12544
@@ -69,8 +71,8 @@ deepfabric/training/api_key_prompt.py,sha256=pSIMX3eDGyV9x_r7MHE4TyIsIB2SqYb8gKC
69
71
  deepfabric/training/callback.py,sha256=5zdifbHA2PWILHl2cVFyO65aW7cGAQhcvDqm3s8_I0Q,13221
70
72
  deepfabric/training/dataset_utils.py,sha256=klx8DoawEwuMigBDP-RpMAfe7FvYxRbhj599MErxBr4,7313
71
73
  deepfabric/training/metrics_sender.py,sha256=ZCyvMv5hRu8XJnQYVGXJ9wh7HEMJ0l3Ktyi8_etOpZs,10833
72
- deepfabric-4.11.0.dist-info/METADATA,sha256=woNg0duKMI2zC-qTA2bFWyQVmViRMShb6r2l2s9uz0Y,20607
73
- deepfabric-4.11.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
74
- deepfabric-4.11.0.dist-info/entry_points.txt,sha256=zatevils13hfs8x29_vmUyivQ6rTtq7hE2RBusZw1Fo,50
75
- deepfabric-4.11.0.dist-info/licenses/LICENSE,sha256=OvJLTvm6YMMZmVsIiQYJI9zK9KoeeVbNZXPqQApENUM,11349
76
- deepfabric-4.11.0.dist-info/RECORD,,
74
+ deepfabric-4.12.0.dist-info/METADATA,sha256=sqyIyrdJi-4bgYy5S0cyum3u0F0SVqV-64X4tJjJres,20607
75
+ deepfabric-4.12.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
76
+ deepfabric-4.12.0.dist-info/entry_points.txt,sha256=zatevils13hfs8x29_vmUyivQ6rTtq7hE2RBusZw1Fo,50
77
+ deepfabric-4.12.0.dist-info/licenses/LICENSE,sha256=OvJLTvm6YMMZmVsIiQYJI9zK9KoeeVbNZXPqQApENUM,11349
78
+ deepfabric-4.12.0.dist-info/RECORD,,