PyPI - mostlyai-mock - Versions diffs - 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl - Mend

mostlyai-mock 0.1.8py3-none-any.whl → 0.1.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

mostlyai/mock/__init__.py CHANGED Viewed

@@ -15,4 +15,4 @@
 from mostlyai.mock.core import sample
 __all__ = ["sample"]
-__version__ = "0.1.8"  # Do not set this manually. Use poetry version [params].
+__version__ = "0.1.10"  # Do not set this manually. Use poetry version [params].

mostlyai/mock/core.py CHANGED Viewed

@@ -449,6 +449,9 @@ def _completion_with_retries(*args, **kwargs):
 async def _yield_rows_from_json_chunks_stream(response: litellm.CustomStreamWrapper) -> AsyncGenerator[dict]:
+    def buffer_to_row(buffer: list[str]) -> dict:
+        return json.loads("".join(buffer))
     # starting with dirty buffer is to handle the `{"rows": []}` case
     buffer = list("garbage")
     rows_json_started = False
@@ -472,12 +475,14 @@ async def _yield_rows_from_json_chunks_stream(response: litellm.CustomStreamWrap
                 # {"rows": [{"name": "Jo\}h\{n"}]}
                 #                        *     * *  <- any of these
                 try:
-                    row = json.loads("".join(buffer))
-                    yield row
+                    row = buffer_to_row(buffer)
+                except Exception:
+                    # in case of any error, silently drop the row
+                    continue
+                finally:
                     buffer = list()
                     in_row_json = False
-                except json.JSONDecodeError:
-                    continue
+                yield row
 async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapper) -> AsyncGenerator[dict]:
@@ -493,7 +498,13 @@ async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapp
         for char in delta:
             buffer.append(char)
             if char == "\n":
-                row = buffer_to_row(buffer)
+                try:
+                    row = buffer_to_row(buffer)
+                except Exception:
+                    # in case of any error, silently drop the row
+                    continue
+                finally:
+                    buffer = list()
                 if header is None:
                     # column1,column2,column3\n
                     #                        ** <- end of header row
@@ -502,11 +513,14 @@ async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapp
                     # value_1,value_2,value_3\n
                     #                        ** <- end of data row
                     yield dict(zip(header, row))
-                buffer = list()
     if buffer:
         # last row might not finish with a newline, in which case the buffer would not be empty here
-        last_row = buffer_to_row(buffer)
-        yield dict(zip(header, last_row))
+        try:
+            last_row = buffer_to_row(buffer)
+            yield dict(zip(header, last_row))
+        except Exception:
+            # in case of any error, silently drop the row
+            pass
 def _create_structured_output_schema(
@@ -710,6 +724,7 @@ async def _create_table_rows_generator(
     batch_size = 20  # generate 20 root table rows at a time
     def supports_structured_outputs(model: str) -> bool:
+        model = model.removeprefix("litellm_proxy/")
         supported_params = litellm.get_supported_openai_params(model=model) or []
         return "response_format" in supported_params and litellm.supports_response_schema(model)
@@ -733,7 +748,7 @@ async def _create_table_rows_generator(
         context_data = data[context_table_name]
         batch_size = 1  # generate 1 sequence at a time
         sample_size = len(context_data)
-        context_batches = [data.iloc[i : i + batch_size] for i in range(0, len(data), batch_size)]
+        context_batches = [context_data.iloc[i : i + batch_size] for i in range(0, len(context_data), batch_size)]
     # derive non-context data (if more than one foreign key is present)
     non_context_data: dict[str, pd.DataFrame] = {}
@@ -1038,7 +1053,7 @@ def sample(
         api_key (str | None): The API key to use for the LLM. If not provided, LiteLLM will take it from the environment variables.
         temperature (float): The temperature to use for the LLM. Default is 1.0.
         top_p (float): The top-p value to use for the LLM. Default is 0.95.
-        n_workers (int): The number of concurrent workers making the LLM calls. Default is 10.
+        n_workers (int): The number of concurrent workers making the LLM calls. Default is 10. The value is clamped to the range [1, 10].
             If n_workers is 1, the generation of batches becomes sequential and certain features for better data consistency are enabled.
         return_type (Literal["auto", "dict"]): The format of the returned data. Default is "auto".
@@ -1228,6 +1243,8 @@ def sample(
     sample_size: dict[str, int] = _harmonize_sample_size(sample_size, config)
     primary_keys = {table_name: table_config.primary_key for table_name, table_config in config.root.items()}
+    n_workers = max(min(n_workers, 10), 1)
     execution_plan: list[str] = _build_execution_plan(config)
     data: dict[str, pd.DataFrame] = existing_data or {}

{mostlyai_mock-0.1.8.dist-info → mostlyai_mock-0.1.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mostlyai-mock
-Version: 0.1.8
+Version: 0.1.10
 Summary: Synthetic Mock Data
 Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
 Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock

mostlyai_mock-0.1.10.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+mostlyai/mock/__init__.py,sha256=DJjsLegi5Qn6G2Jow_zn5iT3yUl3EO-WloSPw87oIJ4,715
+mostlyai/mock/core.py,sha256=Cja8wPLVa9LrlOiahft-U-Bxsip5Uv7o8ZyfQh40rls,54593
+mostlyai/mock/mcp_server.py,sha256=MrVUrIsAZsFzjK1suwNl1fxS1ES-wpc-YSM8cS8Fqcw,2259
+mostlyai_mock-0.1.10.dist-info/METADATA,sha256=mHr9nwnKjXo9qIX_TD-dT0VH9YpbHVU3r8teMut5Z7Q,14100
+mostlyai_mock-0.1.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+mostlyai_mock-0.1.10.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
+mostlyai_mock-0.1.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+mostlyai_mock-0.1.10.dist-info/RECORD,,

mostlyai_mock-0.1.8.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-mostlyai/mock/__init__.py,sha256=ig9Ifl6JwFYl952LopbiuZuoqO6W5IN2bdeWpZ2vMjg,714
-mostlyai/mock/core.py,sha256=Ga78HucN2ltGCzmhzJ5p9AOKgMMemL23g6pldHi90-c,53968
-mostlyai/mock/mcp_server.py,sha256=MrVUrIsAZsFzjK1suwNl1fxS1ES-wpc-YSM8cS8Fqcw,2259
-mostlyai_mock-0.1.8.dist-info/METADATA,sha256=tNOj8tufD0VgePB9F0_uovNCCym-TaQCCBKhk2_etIs,14099
-mostlyai_mock-0.1.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-mostlyai_mock-0.1.8.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
-mostlyai_mock-0.1.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-mostlyai_mock-0.1.8.dist-info/RECORD,,

{mostlyai_mock-0.1.8.dist-info → mostlyai_mock-0.1.10.dist-info}/WHEEL RENAMED Viewed

File without changes

{mostlyai_mock-0.1.8.dist-info → mostlyai_mock-0.1.10.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mostlyai_mock-0.1.8.dist-info → mostlyai_mock-0.1.10.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

mostlyai-mock 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

mostlyai-mock 0.1.8py3-none-any.whl → 0.1.10py3-none-any.whl