mostlyai-mock 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mostlyai/mock/__init__.py +1 -1
- mostlyai/mock/core.py +27 -10
- {mostlyai_mock-0.1.8.dist-info → mostlyai_mock-0.1.10.dist-info}/METADATA +1 -1
- mostlyai_mock-0.1.10.dist-info/RECORD +8 -0
- mostlyai_mock-0.1.8.dist-info/RECORD +0 -8
- {mostlyai_mock-0.1.8.dist-info → mostlyai_mock-0.1.10.dist-info}/WHEEL +0 -0
- {mostlyai_mock-0.1.8.dist-info → mostlyai_mock-0.1.10.dist-info}/entry_points.txt +0 -0
- {mostlyai_mock-0.1.8.dist-info → mostlyai_mock-0.1.10.dist-info}/licenses/LICENSE +0 -0
mostlyai/mock/__init__.py
CHANGED
mostlyai/mock/core.py
CHANGED
@@ -449,6 +449,9 @@ def _completion_with_retries(*args, **kwargs):
|
|
449
449
|
|
450
450
|
|
451
451
|
async def _yield_rows_from_json_chunks_stream(response: litellm.CustomStreamWrapper) -> AsyncGenerator[dict]:
|
452
|
+
def buffer_to_row(buffer: list[str]) -> dict:
|
453
|
+
return json.loads("".join(buffer))
|
454
|
+
|
452
455
|
# starting with dirty buffer is to handle the `{"rows": []}` case
|
453
456
|
buffer = list("garbage")
|
454
457
|
rows_json_started = False
|
@@ -472,12 +475,14 @@ async def _yield_rows_from_json_chunks_stream(response: litellm.CustomStreamWrap
|
|
472
475
|
# {"rows": [{"name": "Jo\}h\{n"}]}
|
473
476
|
# * * * <- any of these
|
474
477
|
try:
|
475
|
-
row =
|
476
|
-
|
478
|
+
row = buffer_to_row(buffer)
|
479
|
+
except Exception:
|
480
|
+
# in case of any error, silently drop the row
|
481
|
+
continue
|
482
|
+
finally:
|
477
483
|
buffer = list()
|
478
484
|
in_row_json = False
|
479
|
-
|
480
|
-
continue
|
485
|
+
yield row
|
481
486
|
|
482
487
|
|
483
488
|
async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapper) -> AsyncGenerator[dict]:
|
@@ -493,7 +498,13 @@ async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapp
|
|
493
498
|
for char in delta:
|
494
499
|
buffer.append(char)
|
495
500
|
if char == "\n":
|
496
|
-
|
501
|
+
try:
|
502
|
+
row = buffer_to_row(buffer)
|
503
|
+
except Exception:
|
504
|
+
# in case of any error, silently drop the row
|
505
|
+
continue
|
506
|
+
finally:
|
507
|
+
buffer = list()
|
497
508
|
if header is None:
|
498
509
|
# column1,column2,column3\n
|
499
510
|
# ** <- end of header row
|
@@ -502,11 +513,14 @@ async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapp
|
|
502
513
|
# value_1,value_2,value_3\n
|
503
514
|
# ** <- end of data row
|
504
515
|
yield dict(zip(header, row))
|
505
|
-
buffer = list()
|
506
516
|
if buffer:
|
507
517
|
# last row might not finish with a newline, in which case the buffer would not be empty here
|
508
|
-
|
509
|
-
|
518
|
+
try:
|
519
|
+
last_row = buffer_to_row(buffer)
|
520
|
+
yield dict(zip(header, last_row))
|
521
|
+
except Exception:
|
522
|
+
# in case of any error, silently drop the row
|
523
|
+
pass
|
510
524
|
|
511
525
|
|
512
526
|
def _create_structured_output_schema(
|
@@ -710,6 +724,7 @@ async def _create_table_rows_generator(
|
|
710
724
|
batch_size = 20 # generate 20 root table rows at a time
|
711
725
|
|
712
726
|
def supports_structured_outputs(model: str) -> bool:
|
727
|
+
model = model.removeprefix("litellm_proxy/")
|
713
728
|
supported_params = litellm.get_supported_openai_params(model=model) or []
|
714
729
|
return "response_format" in supported_params and litellm.supports_response_schema(model)
|
715
730
|
|
@@ -733,7 +748,7 @@ async def _create_table_rows_generator(
|
|
733
748
|
context_data = data[context_table_name]
|
734
749
|
batch_size = 1 # generate 1 sequence at a time
|
735
750
|
sample_size = len(context_data)
|
736
|
-
context_batches = [
|
751
|
+
context_batches = [context_data.iloc[i : i + batch_size] for i in range(0, len(context_data), batch_size)]
|
737
752
|
|
738
753
|
# derive non-context data (if more than one foreign key is present)
|
739
754
|
non_context_data: dict[str, pd.DataFrame] = {}
|
@@ -1038,7 +1053,7 @@ def sample(
|
|
1038
1053
|
api_key (str | None): The API key to use for the LLM. If not provided, LiteLLM will take it from the environment variables.
|
1039
1054
|
temperature (float): The temperature to use for the LLM. Default is 1.0.
|
1040
1055
|
top_p (float): The top-p value to use for the LLM. Default is 0.95.
|
1041
|
-
n_workers (int): The number of concurrent workers making the LLM calls. Default is 10.
|
1056
|
+
n_workers (int): The number of concurrent workers making the LLM calls. Default is 10. The value is clamped to the range [1, 10].
|
1042
1057
|
If n_workers is 1, the generation of batches becomes sequential and certain features for better data consistency are enabled.
|
1043
1058
|
return_type (Literal["auto", "dict"]): The format of the returned data. Default is "auto".
|
1044
1059
|
|
@@ -1228,6 +1243,8 @@ def sample(
|
|
1228
1243
|
sample_size: dict[str, int] = _harmonize_sample_size(sample_size, config)
|
1229
1244
|
primary_keys = {table_name: table_config.primary_key for table_name, table_config in config.root.items()}
|
1230
1245
|
|
1246
|
+
n_workers = max(min(n_workers, 10), 1)
|
1247
|
+
|
1231
1248
|
execution_plan: list[str] = _build_execution_plan(config)
|
1232
1249
|
|
1233
1250
|
data: dict[str, pd.DataFrame] = existing_data or {}
|
@@ -0,0 +1,8 @@
|
|
1
|
+
mostlyai/mock/__init__.py,sha256=DJjsLegi5Qn6G2Jow_zn5iT3yUl3EO-WloSPw87oIJ4,715
|
2
|
+
mostlyai/mock/core.py,sha256=Cja8wPLVa9LrlOiahft-U-Bxsip5Uv7o8ZyfQh40rls,54593
|
3
|
+
mostlyai/mock/mcp_server.py,sha256=MrVUrIsAZsFzjK1suwNl1fxS1ES-wpc-YSM8cS8Fqcw,2259
|
4
|
+
mostlyai_mock-0.1.10.dist-info/METADATA,sha256=mHr9nwnKjXo9qIX_TD-dT0VH9YpbHVU3r8teMut5Z7Q,14100
|
5
|
+
mostlyai_mock-0.1.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
+
mostlyai_mock-0.1.10.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
+
mostlyai_mock-0.1.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
+
mostlyai_mock-0.1.10.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
mostlyai/mock/__init__.py,sha256=ig9Ifl6JwFYl952LopbiuZuoqO6W5IN2bdeWpZ2vMjg,714
|
2
|
-
mostlyai/mock/core.py,sha256=Ga78HucN2ltGCzmhzJ5p9AOKgMMemL23g6pldHi90-c,53968
|
3
|
-
mostlyai/mock/mcp_server.py,sha256=MrVUrIsAZsFzjK1suwNl1fxS1ES-wpc-YSM8cS8Fqcw,2259
|
4
|
-
mostlyai_mock-0.1.8.dist-info/METADATA,sha256=tNOj8tufD0VgePB9F0_uovNCCym-TaQCCBKhk2_etIs,14099
|
5
|
-
mostlyai_mock-0.1.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
-
mostlyai_mock-0.1.8.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
-
mostlyai_mock-0.1.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
-
mostlyai_mock-0.1.8.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|