PyPI - mostlyai-mock - Versions diffs - 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl - Mend

mostlyai-mock 0.1.14py3-none-any.whl → 0.1.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

mostlyai/mock/__init__.py CHANGED Viewed

@@ -15,4 +15,4 @@
 from mostlyai.mock.core import sample
 __all__ = ["sample"]
-__version__ = "0.1.14"  # Do not set this manually. Use poetry version [params].
+__version__ = "0.1.16"  # Do not set this manually. Use poetry version [params].

mostlyai/mock/core.py CHANGED Viewed

@@ -40,10 +40,10 @@ class LLMOutputFormat(str, Enum):
 class LLMConfig(BaseModel):
-    model: str = "openai/gpt-4.1-nano"
-    api_key: str | None = None
-    temperature: float = 1.0
-    top_p: float = 0.95
+    model: str
+    api_key: str | None
+    temperature: float
+    top_p: float
 class MockConfig(RootModel[dict[str, "TableConfig"]]):
@@ -270,8 +270,7 @@ async def _sample_table(
 def _create_system_prompt(llm_output_format: LLMOutputFormat) -> str:
-    return f"""
-You are a specialized data generator designed to create highly realistic, contextually appropriate data based on schema definitions.
+    return f"""You are a specialized data generator designed to create highly realistic, contextually appropriate data based on schema definitions.
 Your task is to:
@@ -289,8 +288,7 @@ appropriate content. For dates and timestamps, ensure logical chronology. Always
 across tables.
 When enriching existing data, carefully analyze the patterns and relationships in the existing columns \
-to generate compatible and realistic values for the missing columns.
-"""
+to generate compatible and realistic values for the missing columns."""
 def _create_table_prompt(
@@ -647,6 +645,24 @@ async def _worker(
                 "stream": True,
             }
+            # support for openai reasoning models
+            model_only = llm_config.model.split("/")[-1] if "/" in llm_config.model else llm_config.model
+            reasoning_effort = (
+                "low"
+                if (model_only.startswith("o") and (model_only[1:].isdigit() or model_only[1:].split("-")[0].isdigit()))
+                else "minimal"
+                if (
+                    model_only.startswith("gpt-")
+                    and model_only.split("-")[1].isdigit()
+                    and int(model_only.split("-")[1]) >= 5
+                )
+                else None
+            )
+            if reasoning_effort:
+                litellm_kwargs.pop("top_p")
+                litellm_kwargs["reasoning_effort"] = reasoning_effort
             # construct messages
             system_prompt = _create_system_prompt(llm_output_format)
             user_prompt = _create_table_prompt(
@@ -715,7 +731,7 @@ async def _worker(
             if do_repeat_task:
                 # allow 10 retries across all workers before propagating the exception to the orchestrator
                 await retry_queue.put(1)
-                if retry_queue.qsize() < 10:
+                if retry_queue.qsize() <= 10:
                     # put task back to the front of the batch queue
                     await batch_queue.put((batch_idx, task))
                 else:
@@ -1118,7 +1134,7 @@ async def _sample_common(
     tables: dict[str, dict],
     sample_size: int | dict[str, int] = 4,
     existing_data: dict[str, pd.DataFrame] | None = None,
-    model: str = "openai/gpt-4.1-nano",
+    model: str = "openai/gpt-5-nano",
     api_key: str | None = None,
     temperature: float = 1.0,
     top_p: float = 0.95,
@@ -1166,7 +1182,7 @@ def sample(
     tables: dict[str, dict],
     sample_size: int | dict[str, int] = 4,
     existing_data: dict[str, pd.DataFrame] | None = None,
-    model: str = "openai/gpt-4.1-nano",
+    model: str = "openai/gpt-5-nano",
     api_key: str | None = None,
     temperature: float = 1.0,
     top_p: float = 0.95,
@@ -1181,11 +1197,12 @@ def sample(
     or the enrichment of existing datasets with new, context-aware columns.
     It is particularly useful for quickly simulating production-like datasets for testing or prototyping purposes.
-    It is advised to limit mocking to small datasets for performance reasons (rows * cols < 100).
+    It is advised to limit mocking to small datasets for performance reasons (rows * cols < 1000).
     It might take a couple of minutes for bigger datasets.
     Args:
         tables (dict[str, dict]): The table specifications to generate mock data for. See examples for usage.
+            Note: Avoid using double quotes (`"`) and other special characters in column names.
         sample_size (int | dict[str, int]): The number of rows to generate for each subject table.
             If a single integer is provided, the same number of rows will be generated for each subject table.
             If a dictionary is provided, the number of rows to generate for each subject table can be specified individually.
@@ -1195,9 +1212,9 @@ def sample(
             Default is None.
         model (str): The LiteLLM chat completion model to be used.
             Examples include:
-            - `openai/gpt-4.1-nano` (default; fast, and smart)
-            - `openai/gpt-4.1-mini` (slower, but smarter)
-            - `openai/gpt-4.1` (slowest, but smartest)
+            - `openai/gpt-5-nano` (default; fast, and smart)
+            - `openai/gpt-5-mini` (slower, but smarter)
+            - `openai/gpt-5` (slowest, but smartest)
             - `gemini/gemini-2.0-flash`
             - `gemini/gemini-2.5-flash-preview-04-17`
             - 'groq/gemma2-9b-it`
@@ -1235,7 +1252,7 @@ def sample(
             },
         }
     }
-    df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1-nano")
+    df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-5-nano")
     ```
     Example of generating mock data for multiple tables (with PK/FK relationships):
@@ -1298,7 +1315,7 @@ def sample(
             ],
         },
     }
-    data = mock.sample(tables=tables, sample_size=2, model="openai/gpt-4.1")
+    data = mock.sample(tables=tables, sample_size=2, model="openai/gpt-5")
     df_customers = data["customers"]
     df_warehouses = data["warehouses"]
     df_orders = data["orders"]
@@ -1327,7 +1344,7 @@ def sample(
     enriched_df = mock.sample(
         tables=tables,
         existing_data={"patients": existing_df},
-        model="openai/gpt-4.1-nano"
+        model="openai/gpt-5-nano"
     )
     enriched_df
     ```
@@ -1382,7 +1399,7 @@ def sample(
             "customers": existing_customers,
             "orders": existing_orders,
         },
-        model="openai/gpt-4.1-nano"
+        model="openai/gpt-5-nano"
     )
     df_customers = data["customers"]
     df_orders = data["orders"]
@@ -1414,7 +1431,7 @@ async def _asample(
     tables: dict[str, dict],
     sample_size: int | dict[str, int] = 4,
     existing_data: dict[str, pd.DataFrame] | None = None,
-    model: str = "openai/gpt-4.1-nano",
+    model: str = "openai/gpt-5-nano",
     api_key: str | None = None,
     temperature: float = 1.0,
     top_p: float = 0.95,

mostlyai/mock/mcp_server.py CHANGED Viewed

@@ -56,7 +56,7 @@ async def mock_data(
     *,
     tables: dict[str, dict],
     sample_size: int,
-    model: str = "openai/gpt-4.1-nano",
+    model: str = "openai/gpt-5-nano",
     api_key: str | None = None,
     temperature: float = 1.0,
     top_p: float = 0.95,

{mostlyai_mock-0.1.14.dist-info → mostlyai_mock-0.1.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mostlyai-mock
-Version: 0.1.14
+Version: 0.1.16
 Summary: Synthetic Mock Data
 Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
 Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
@@ -24,13 +24,16 @@ Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Software Development :: Libraries
 Classifier: Typing :: Typed
 Requires-Python: >=3.10
-Requires-Dist: fastmcp<3.0.0,>=2.0.0
 Requires-Dist: litellm>=1.67.0
 Requires-Dist: numpy>=1.26.3
 Requires-Dist: pandas>=2.0.0
 Requires-Dist: pyarrow>=14.0.0
 Requires-Dist: pydantic<3.0.0,>=2.0.0
 Requires-Dist: tenacity>=9.1.2
+Provides-Extra: litellm-proxy
+Requires-Dist: litellm[proxy]>=1.67.0; extra == 'litellm-proxy'
+Provides-Extra: mcp
+Requires-Dist: fastmcp<3.0.0,>=2.0.0; extra == 'mcp'
 Description-Content-Type: text/markdown
 # Synthetic Mock Data 🔮
@@ -92,7 +95,7 @@ tables = {
 df = mock.sample(
     tables=tables,   # provide table and column definitions
     sample_size=10,  # generate 10 records
-    model="openai/gpt-4.1-nano",  # select the LLM model (optional)
+    model="openai/gpt-5-nano",  # select the LLM model (optional)
 )
 print(df)
 #   nationality                 name  gender  age date_of_birth        checkin_time is_vip  price_per_night  room_number
@@ -173,7 +176,7 @@ tables = {
 data = mock.sample(
     tables=tables,
     sample_size=2,
-    model="openai/gpt-4.1",
+    model="openai/gpt-5",
     n_workers=1,
 )
 print(data["customers"])
@@ -229,7 +232,7 @@ tables = {
         ],
     }
 }
-df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1", n_workers=1)
+df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-5", n_workers=1)
 print(df)
 #   employee_id              name boss_id                   role
 # 0        B0-1      Patricia Lee    <NA>              President
@@ -270,7 +273,7 @@ existing_guests = pd.DataFrame({
 df = mock.sample(
     tables=tables,
     existing_data={"guests": existing_guests},
-    model="openai/gpt-4.1-nano"
+    model="openai/gpt-5-nano"
 )
 print(df)
 #   guest_id           name nationality  gender  age  room_number is_vip
@@ -285,18 +288,18 @@ This repo comes with MCP Server. It can be easily consumed by any MCP Client by
 ```json
 {
-  "mcpServers": {
-      "mostlyai-mock-mcp": {
-          "command": "uvx",
-          "args": ["--from", "mostlyai-mock", "mcp-server"],
-          "env": {
-              "OPENAI_API_KEY": "PROVIDE YOUR KEY",
-              "GEMINI_API_KEY": "PROVIDE YOUR KEY",
-              "GROQ_API_KEY": "PROVIDE YOUR KEY",
-              "ANTHROPIC_API_KEY": "PROVIDE YOUR KEY"
-          }
-      }
-  }
+    "mcpServers": {
+        "mostlyai-mock-mcp": {
+            "command": "uvx",
+            "args": ["--from", "mostlyai-mock[mcp]", "mcp-server"],
+            "env": {
+                "OPENAI_API_KEY": "PROVIDE YOUR KEY",
+                "GEMINI_API_KEY": "PROVIDE YOUR KEY",
+                "GROQ_API_KEY": "PROVIDE YOUR KEY",
+                "ANTHROPIC_API_KEY": "PROVIDE YOUR KEY"
+            }
+        }
+    }
 }
 ```
@@ -306,5 +309,5 @@ For example:
 Troubleshooting:
 1. If the MCP Client fails to detect the MCP Server, provide the absolute path in the `command` field, for example: `/Users/johnsmith/.local/bin/uvx`
-2. To debug MCP Server issues, you can use MCP Inspector by running: `npx @modelcontextprotocol/inspector -- uvx --from mostlyai-mock mcp-server`
-3. In order to develop locally, modify the configuration by replacing `"command": "uv"` (or use the full path to `uv` if needed) and `"args": ["--directory", "/Users/johnsmith/mostlyai-mock", "run", "mcp-server"]`
+2. To debug MCP Server issues, you can use MCP Inspector by running: `npx @modelcontextprotocol/inspector -- uvx --from mostlyai-mock[mcp] mcp-server`
+3. In order to develop locally, modify the configuration by replacing `"command": "uv"` (or use the full path to `uv` if needed) and `"args": ["--directory", "/Users/johnsmith/mostlyai-mock", "run", "--extra", "mcp", "mcp-server"]`

mostlyai_mock-0.1.16.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+mostlyai/mock/__init__.py,sha256=XEezyGjkXQBReW_ORi83H2WEVhLolDDLbGjxA2g2yEs,715
+mostlyai/mock/core.py,sha256=FTF0BfJowxNHm_L0RpTk6BhS1mXzvjELP-3Z96aFVMQ,62454
+mostlyai/mock/mcp_server.py,sha256=uDLg0SeMPV2VZhXviM-F769W0xlmhGwlmQiQhY0Q-Ik,2365
+mostlyai_mock-0.1.16.dist-info/METADATA,sha256=CT6lcz2cAq5W-u3VjQLr_Dg8VbuEtU-JlvsXg5OsKTk,14297
+mostlyai_mock-0.1.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+mostlyai_mock-0.1.16.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
+mostlyai_mock-0.1.16.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+mostlyai_mock-0.1.16.dist-info/RECORD,,

mostlyai_mock-0.1.14.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-mostlyai/mock/__init__.py,sha256=MLHwi5g6_lAEd8cDEISbVdRWmorOVAQ6IoMm8BsRpqg,715
-mostlyai/mock/core.py,sha256=JdWHix-Pp0s---b_Z3f2ui7J7LSl4_r_gPP0z8UHKY8,61663
-mostlyai/mock/mcp_server.py,sha256=0Vn1jmrdNAvUZSviaaU7Lhn7L7iHFyd8kGFigM0-4s0,2367
-mostlyai_mock-0.1.14.dist-info/METADATA,sha256=PHiUTSEvevYTPVvsMGT-kilTDwaEgIEL0T8Vr56PSiY,14123
-mostlyai_mock-0.1.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-mostlyai_mock-0.1.14.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
-mostlyai_mock-0.1.14.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-mostlyai_mock-0.1.14.dist-info/RECORD,,

{mostlyai_mock-0.1.14.dist-info → mostlyai_mock-0.1.16.dist-info}/WHEEL RENAMED Viewed

File without changes

{mostlyai_mock-0.1.14.dist-info → mostlyai_mock-0.1.16.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mostlyai_mock-0.1.14.dist-info → mostlyai_mock-0.1.16.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

mostlyai-mock 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl

mostlyai-mock 0.1.14py3-none-any.whl → 0.1.16py3-none-any.whl