mostlyai-mock 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mostlyai/mock/__init__.py CHANGED
@@ -15,4 +15,4 @@
15
15
  from mostlyai.mock.core import sample
16
16
 
17
17
  __all__ = ["sample"]
18
- __version__ = "0.1.8" # Do not set this manually. Use poetry version [params].
18
+ __version__ = "0.1.10" # Do not set this manually. Use poetry version [params].
mostlyai/mock/core.py CHANGED
@@ -449,6 +449,9 @@ def _completion_with_retries(*args, **kwargs):
449
449
 
450
450
 
451
451
  async def _yield_rows_from_json_chunks_stream(response: litellm.CustomStreamWrapper) -> AsyncGenerator[dict]:
452
+ def buffer_to_row(buffer: list[str]) -> dict:
453
+ return json.loads("".join(buffer))
454
+
452
455
  # starting with dirty buffer is to handle the `{"rows": []}` case
453
456
  buffer = list("garbage")
454
457
  rows_json_started = False
@@ -472,12 +475,14 @@ async def _yield_rows_from_json_chunks_stream(response: litellm.CustomStreamWrap
472
475
  # {"rows": [{"name": "Jo\}h\{n"}]}
473
476
  # * * * <- any of these
474
477
  try:
475
- row = json.loads("".join(buffer))
476
- yield row
478
+ row = buffer_to_row(buffer)
479
+ except Exception:
480
+ # in case of any error, silently drop the row
481
+ continue
482
+ finally:
477
483
  buffer = list()
478
484
  in_row_json = False
479
- except json.JSONDecodeError:
480
- continue
485
+ yield row
481
486
 
482
487
 
483
488
  async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapper) -> AsyncGenerator[dict]:
@@ -493,7 +498,13 @@ async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapp
493
498
  for char in delta:
494
499
  buffer.append(char)
495
500
  if char == "\n":
496
- row = buffer_to_row(buffer)
501
+ try:
502
+ row = buffer_to_row(buffer)
503
+ except Exception:
504
+ # in case of any error, silently drop the row
505
+ continue
506
+ finally:
507
+ buffer = list()
497
508
  if header is None:
498
509
  # column1,column2,column3\n
499
510
  # ** <- end of header row
@@ -502,11 +513,14 @@ async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapp
502
513
  # value_1,value_2,value_3\n
503
514
  # ** <- end of data row
504
515
  yield dict(zip(header, row))
505
- buffer = list()
506
516
  if buffer:
507
517
  # last row might not finish with a newline, in which case the buffer would not be empty here
508
- last_row = buffer_to_row(buffer)
509
- yield dict(zip(header, last_row))
518
+ try:
519
+ last_row = buffer_to_row(buffer)
520
+ yield dict(zip(header, last_row))
521
+ except Exception:
522
+ # in case of any error, silently drop the row
523
+ pass
510
524
 
511
525
 
512
526
  def _create_structured_output_schema(
@@ -710,6 +724,7 @@ async def _create_table_rows_generator(
710
724
  batch_size = 20 # generate 20 root table rows at a time
711
725
 
712
726
  def supports_structured_outputs(model: str) -> bool:
727
+ model = model.removeprefix("litellm_proxy/")
713
728
  supported_params = litellm.get_supported_openai_params(model=model) or []
714
729
  return "response_format" in supported_params and litellm.supports_response_schema(model)
715
730
 
@@ -733,7 +748,7 @@ async def _create_table_rows_generator(
733
748
  context_data = data[context_table_name]
734
749
  batch_size = 1 # generate 1 sequence at a time
735
750
  sample_size = len(context_data)
736
- context_batches = [data.iloc[i : i + batch_size] for i in range(0, len(data), batch_size)]
751
+ context_batches = [context_data.iloc[i : i + batch_size] for i in range(0, len(context_data), batch_size)]
737
752
 
738
753
  # derive non-context data (if more than one foreign key is present)
739
754
  non_context_data: dict[str, pd.DataFrame] = {}
@@ -1038,7 +1053,7 @@ def sample(
1038
1053
  api_key (str | None): The API key to use for the LLM. If not provided, LiteLLM will take it from the environment variables.
1039
1054
  temperature (float): The temperature to use for the LLM. Default is 1.0.
1040
1055
  top_p (float): The top-p value to use for the LLM. Default is 0.95.
1041
- n_workers (int): The number of concurrent workers making the LLM calls. Default is 10.
1056
+ n_workers (int): The number of concurrent workers making the LLM calls. Default is 10. The value is clamped to the range [1, 10].
1042
1057
  If n_workers is 1, the generation of batches becomes sequential and certain features for better data consistency are enabled.
1043
1058
  return_type (Literal["auto", "dict"]): The format of the returned data. Default is "auto".
1044
1059
 
@@ -1228,6 +1243,8 @@ def sample(
1228
1243
  sample_size: dict[str, int] = _harmonize_sample_size(sample_size, config)
1229
1244
  primary_keys = {table_name: table_config.primary_key for table_name, table_config in config.root.items()}
1230
1245
 
1246
+ n_workers = max(min(n_workers, 10), 1)
1247
+
1231
1248
  execution_plan: list[str] = _build_execution_plan(config)
1232
1249
 
1233
1250
  data: dict[str, pd.DataFrame] = existing_data or {}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mostlyai-mock
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Synthetic Mock Data
5
5
  Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
6
6
  Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
@@ -0,0 +1,8 @@
1
+ mostlyai/mock/__init__.py,sha256=DJjsLegi5Qn6G2Jow_zn5iT3yUl3EO-WloSPw87oIJ4,715
2
+ mostlyai/mock/core.py,sha256=Cja8wPLVa9LrlOiahft-U-Bxsip5Uv7o8ZyfQh40rls,54593
3
+ mostlyai/mock/mcp_server.py,sha256=MrVUrIsAZsFzjK1suwNl1fxS1ES-wpc-YSM8cS8Fqcw,2259
4
+ mostlyai_mock-0.1.10.dist-info/METADATA,sha256=mHr9nwnKjXo9qIX_TD-dT0VH9YpbHVU3r8teMut5Z7Q,14100
5
+ mostlyai_mock-0.1.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ mostlyai_mock-0.1.10.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
7
+ mostlyai_mock-0.1.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
+ mostlyai_mock-0.1.10.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- mostlyai/mock/__init__.py,sha256=ig9Ifl6JwFYl952LopbiuZuoqO6W5IN2bdeWpZ2vMjg,714
2
- mostlyai/mock/core.py,sha256=Ga78HucN2ltGCzmhzJ5p9AOKgMMemL23g6pldHi90-c,53968
3
- mostlyai/mock/mcp_server.py,sha256=MrVUrIsAZsFzjK1suwNl1fxS1ES-wpc-YSM8cS8Fqcw,2259
4
- mostlyai_mock-0.1.8.dist-info/METADATA,sha256=tNOj8tufD0VgePB9F0_uovNCCym-TaQCCBKhk2_etIs,14099
5
- mostlyai_mock-0.1.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- mostlyai_mock-0.1.8.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
7
- mostlyai_mock-0.1.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
- mostlyai_mock-0.1.8.dist-info/RECORD,,