PyPI - mostlyai-mock - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

mostlyai-mock 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

mostlyai/mock/__init__.py CHANGED Viewed

@@ -15,4 +15,4 @@
 from mostlyai.mock.core import sample
 __all__ = ["sample"]
-__version__ = "0.1.2"  # Do not set this manually. Use poetry version [params].
+__version__ = "0.1.4"  # Do not set this manually. Use poetry version [params].

mostlyai/mock/core.py CHANGED Viewed

@@ -25,6 +25,8 @@ import pandas as pd
 from pydantic import BaseModel, Field, RootModel, create_model, field_validator, model_validator
 from tqdm import tqdm
+litellm.suppress_debug_info = True
 SYSTEM_PROMPT = """
 You are a specialized mock data generator designed to create highly realistic, contextually appropriate data based on schema definitions.
@@ -624,24 +626,25 @@ def sample(
     return_type: Literal["auto", "dict"] = "auto",
 ) -> pd.DataFrame | dict[str, pd.DataFrame]:
     """
-    Generate mock data by prompting an LLM.
+    Generate mock data from scratch or enrich existing data by prompting an LLM.
     Args:
         tables (dict[str, dict]): The table specifications to generate mock data for. See examples for usage.
         sample_size (int | dict[str, int]): The number of rows to generate for each subject table.
             If a single integer is provided, the same number of rows will be generated for each subject table.
-            If a dictionary is provided, the number of rows to generate for each subject table can be specified
-            individually.
+            If a dictionary is provided, the number of rows to generate for each subject table can be specified individually.
             Default is 10. Ignored if existing_data is provided.
+            If a table has a foreign key, the sample size is determined by the corresponding foreign key prompt. If nothing specified, a few rows per parent record are generated.
         existing_data (dict[str, pd.DataFrame] | None): Existing data to augment. If provided, the sample_size argument is ignored.
             Default is None.
-        model (str): The LiteLLM chat completion model to be used. Requires support for structured output / JSON mode.
+        model (str): The LiteLLM chat completion model to be used. Model needs to support structured output / JSON mode.
             Examples include:
-            - `openai/gpt-4.1-nano` (default)
-            - `openai/gpt-4.1-mini`
-            - `openai/gpt-4.1`
+            - `openai/gpt-4.1-nano` (default; fast, and smart)
+            - `openai/gpt-4.1-mini` (slower, but smarter)
+            - `openai/gpt-4.1` (slowest, but smartest)
             - `gemini/gemini-2.0-flash`
             - `gemini/gemini-2.5-flash-preview-04-17`
+            - 'groq/gemma2-9b-it`
             - `groq/llama-3.3-70b-versatile`
             - `anthropic/claude-3-7-sonnet-latest`
             See https://docs.litellm.ai/docs/providers/ for more options.
@@ -654,7 +657,7 @@ def sample(
         - pd.DataFrame: A single DataFrame containing the generated mock data, if only one table is provided.
         - dict[str, pd.DataFrame]: A dictionary containing the generated mock data for each table, if multiple tables are provided.
-    Example of single table (without PK):
+    Example of generating mock data for a single table (without PK):
     ```python
     from mostlyai import mock
@@ -677,7 +680,7 @@ def sample(
     df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1-nano")
     ```
-    Example of multiple tables (with PK/FK relationships):
+    Example of generating mock data for multiple tables (with PK/FK relationships):
     ```python
     from mostlyai import mock
@@ -688,7 +691,7 @@ def sample(
                 "customer_id": {"prompt": "the unique id of the customer", "dtype": "integer"},
                 "name": {"prompt": "first name and last name of the customer", "dtype": "string"},
             },
-            "primary_key": "customer_id",
+            "primary_key": "customer_id",  # single string; no composite keys allowed
         },
         "warehouses": {
             "prompt": "Warehouses of a hardware store",
@@ -744,7 +747,36 @@ def sample(
     df_items = data["items"]
     ```
-    Example of data augmentation:
+    Example of enriching a single dataframe:
+    ```python
+    from mostlyai import mock
+    import pandas as pd
+    tables = {
+        "patients": {
+            "prompt": "Patients of a hospital in Finland",
+            "columns": {
+                "age": {},
+                "gender": {},
+                "full_name": {"prompt": "first name and last name of the patient", "dtype": "string"},
+                "date_of_birth": {"prompt": "date of birth", "dtype": "date"},
+                "place_of_birth": {"prompt": "place of birth", "dtype": "string"},
+            },
+        },
+    }
+    existing_df = pd.DataFrame({
+        "age": [25, 30, 35, 40],
+        "gender": ["male", "male", "female", "female"],
+    })
+    enriched_df = mock.sample(
+        tables=tables,
+        existing_data={"patients": existing_df},
+        model="openai/gpt-4.1-nano"
+    )
+    enriched_df
+    ```
+    Example of enriching / augmenting an existing dataset:
     ```python
     from mostlyai import mock
     import pandas as pd

{mostlyai_mock-0.1.2.dist-info → mostlyai_mock-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mostlyai-mock
-Version: 0.1.2
+Version: 0.1.4
 Summary: LLM-generated Mock Data
 Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
 Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock

mostlyai_mock-0.1.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+mostlyai/mock/__init__.py,sha256=EvV_Tp6ExzQPq4apGq_8F25qw_paNTcQEC94nIVOEog,714
+mostlyai/mock/core.py,sha256=ubarMA3VUlXdjUsCXQK_mD_kWPkTMOYvLz9G4OughGk,36532
+mostlyai/mock/mcp_server.py,sha256=Vp0bWzE8wUyA6k4PHLa0TbkuI9s07E48xPrAUgf_5qU,1563
+mostlyai_mock-0.1.4.dist-info/METADATA,sha256=jibPe0pKcwqyPBoyc7H98LPd72vkGZBStdw_yMNVvJI,14161
+mostlyai_mock-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+mostlyai_mock-0.1.4.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
+mostlyai_mock-0.1.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+mostlyai_mock-0.1.4.dist-info/RECORD,,

mostlyai_mock-0.1.2.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-mostlyai/mock/__init__.py,sha256=eeJjZ-XPDr-e4iE44SBNNt_xYQKnT6OVm75Xr52CYWc,714
-mostlyai/mock/core.py,sha256=buDLbuCIGbNP91TtCnN-dg3wlHLtRcvzzlGQWm-7j8k,35183
-mostlyai/mock/mcp_server.py,sha256=Vp0bWzE8wUyA6k4PHLa0TbkuI9s07E48xPrAUgf_5qU,1563
-mostlyai_mock-0.1.2.dist-info/METADATA,sha256=pSjJ6D5ckyBdvpk_-5SjfF9-c6PXrEOjEc4oW5IE-g4,14161
-mostlyai_mock-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-mostlyai_mock-0.1.2.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
-mostlyai_mock-0.1.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-mostlyai_mock-0.1.2.dist-info/RECORD,,

{mostlyai_mock-0.1.2.dist-info → mostlyai_mock-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{mostlyai_mock-0.1.2.dist-info → mostlyai_mock-0.1.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mostlyai_mock-0.1.2.dist-info → mostlyai_mock-0.1.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

mostlyai-mock 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

mostlyai-mock 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl