mostlyai-mock 0.1.14__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mostlyai/mock/__init__.py CHANGED
@@ -15,4 +15,4 @@
15
15
  from mostlyai.mock.core import sample
16
16
 
17
17
  __all__ = ["sample"]
18
- __version__ = "0.1.14" # Do not set this manually. Use poetry version [params].
18
+ __version__ = "0.1.16" # Do not set this manually. Use poetry version [params].
mostlyai/mock/core.py CHANGED
@@ -40,10 +40,10 @@ class LLMOutputFormat(str, Enum):
40
40
 
41
41
 
42
42
  class LLMConfig(BaseModel):
43
- model: str = "openai/gpt-4.1-nano"
44
- api_key: str | None = None
45
- temperature: float = 1.0
46
- top_p: float = 0.95
43
+ model: str
44
+ api_key: str | None
45
+ temperature: float
46
+ top_p: float
47
47
 
48
48
 
49
49
  class MockConfig(RootModel[dict[str, "TableConfig"]]):
@@ -270,8 +270,7 @@ async def _sample_table(
270
270
 
271
271
 
272
272
  def _create_system_prompt(llm_output_format: LLMOutputFormat) -> str:
273
- return f"""
274
- You are a specialized data generator designed to create highly realistic, contextually appropriate data based on schema definitions.
273
+ return f"""You are a specialized data generator designed to create highly realistic, contextually appropriate data based on schema definitions.
275
274
 
276
275
  Your task is to:
277
276
 
@@ -289,8 +288,7 @@ appropriate content. For dates and timestamps, ensure logical chronology. Always
289
288
  across tables.
290
289
 
291
290
  When enriching existing data, carefully analyze the patterns and relationships in the existing columns \
292
- to generate compatible and realistic values for the missing columns.
293
- """
291
+ to generate compatible and realistic values for the missing columns."""
294
292
 
295
293
 
296
294
  def _create_table_prompt(
@@ -647,6 +645,24 @@ async def _worker(
647
645
  "stream": True,
648
646
  }
649
647
 
648
+ # support for openai reasoning models
649
+ model_only = llm_config.model.split("/")[-1] if "/" in llm_config.model else llm_config.model
650
+ reasoning_effort = (
651
+ "low"
652
+ if (model_only.startswith("o") and (model_only[1:].isdigit() or model_only[1:].split("-")[0].isdigit()))
653
+ else "minimal"
654
+ if (
655
+ model_only.startswith("gpt-")
656
+ and model_only.split("-")[1].isdigit()
657
+ and int(model_only.split("-")[1]) >= 5
658
+ )
659
+ else None
660
+ )
661
+
662
+ if reasoning_effort:
663
+ litellm_kwargs.pop("top_p")
664
+ litellm_kwargs["reasoning_effort"] = reasoning_effort
665
+
650
666
  # construct messages
651
667
  system_prompt = _create_system_prompt(llm_output_format)
652
668
  user_prompt = _create_table_prompt(
@@ -715,7 +731,7 @@ async def _worker(
715
731
  if do_repeat_task:
716
732
  # allow 10 retries across all workers before propagating the exception to the orchestrator
717
733
  await retry_queue.put(1)
718
- if retry_queue.qsize() < 10:
734
+ if retry_queue.qsize() <= 10:
719
735
  # put task back to the front of the batch queue
720
736
  await batch_queue.put((batch_idx, task))
721
737
  else:
@@ -1118,7 +1134,7 @@ async def _sample_common(
1118
1134
  tables: dict[str, dict],
1119
1135
  sample_size: int | dict[str, int] = 4,
1120
1136
  existing_data: dict[str, pd.DataFrame] | None = None,
1121
- model: str = "openai/gpt-4.1-nano",
1137
+ model: str = "openai/gpt-5-nano",
1122
1138
  api_key: str | None = None,
1123
1139
  temperature: float = 1.0,
1124
1140
  top_p: float = 0.95,
@@ -1166,7 +1182,7 @@ def sample(
1166
1182
  tables: dict[str, dict],
1167
1183
  sample_size: int | dict[str, int] = 4,
1168
1184
  existing_data: dict[str, pd.DataFrame] | None = None,
1169
- model: str = "openai/gpt-4.1-nano",
1185
+ model: str = "openai/gpt-5-nano",
1170
1186
  api_key: str | None = None,
1171
1187
  temperature: float = 1.0,
1172
1188
  top_p: float = 0.95,
@@ -1181,11 +1197,12 @@ def sample(
1181
1197
  or the enrichment of existing datasets with new, context-aware columns.
1182
1198
 
1183
1199
  It is particularly useful for quickly simulating production-like datasets for testing or prototyping purposes.
1184
- It is advised to limit mocking to small datasets for performance reasons (rows * cols < 100).
1200
+ It is advised to limit mocking to small datasets for performance reasons (rows * cols < 1000).
1185
1201
  It might take a couple of minutes for bigger datasets.
1186
1202
 
1187
1203
  Args:
1188
1204
  tables (dict[str, dict]): The table specifications to generate mock data for. See examples for usage.
1205
+ Note: Avoid using double quotes (`"`) and other special characters in column names.
1189
1206
  sample_size (int | dict[str, int]): The number of rows to generate for each subject table.
1190
1207
  If a single integer is provided, the same number of rows will be generated for each subject table.
1191
1208
  If a dictionary is provided, the number of rows to generate for each subject table can be specified individually.
@@ -1195,9 +1212,9 @@ def sample(
1195
1212
  Default is None.
1196
1213
  model (str): The LiteLLM chat completion model to be used.
1197
1214
  Examples include:
1198
- - `openai/gpt-4.1-nano` (default; fast, and smart)
1199
- - `openai/gpt-4.1-mini` (slower, but smarter)
1200
- - `openai/gpt-4.1` (slowest, but smartest)
1215
+ - `openai/gpt-5-nano` (default; fast, and smart)
1216
+ - `openai/gpt-5-mini` (slower, but smarter)
1217
+ - `openai/gpt-5` (slowest, but smartest)
1201
1218
  - `gemini/gemini-2.0-flash`
1202
1219
  - `gemini/gemini-2.5-flash-preview-04-17`
1203
1220
  - 'groq/gemma2-9b-it`
@@ -1235,7 +1252,7 @@ def sample(
1235
1252
  },
1236
1253
  }
1237
1254
  }
1238
- df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1-nano")
1255
+ df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-5-nano")
1239
1256
  ```
1240
1257
 
1241
1258
  Example of generating mock data for multiple tables (with PK/FK relationships):
@@ -1298,7 +1315,7 @@ def sample(
1298
1315
  ],
1299
1316
  },
1300
1317
  }
1301
- data = mock.sample(tables=tables, sample_size=2, model="openai/gpt-4.1")
1318
+ data = mock.sample(tables=tables, sample_size=2, model="openai/gpt-5")
1302
1319
  df_customers = data["customers"]
1303
1320
  df_warehouses = data["warehouses"]
1304
1321
  df_orders = data["orders"]
@@ -1327,7 +1344,7 @@ def sample(
1327
1344
  enriched_df = mock.sample(
1328
1345
  tables=tables,
1329
1346
  existing_data={"patients": existing_df},
1330
- model="openai/gpt-4.1-nano"
1347
+ model="openai/gpt-5-nano"
1331
1348
  )
1332
1349
  enriched_df
1333
1350
  ```
@@ -1382,7 +1399,7 @@ def sample(
1382
1399
  "customers": existing_customers,
1383
1400
  "orders": existing_orders,
1384
1401
  },
1385
- model="openai/gpt-4.1-nano"
1402
+ model="openai/gpt-5-nano"
1386
1403
  )
1387
1404
  df_customers = data["customers"]
1388
1405
  df_orders = data["orders"]
@@ -1414,7 +1431,7 @@ async def _asample(
1414
1431
  tables: dict[str, dict],
1415
1432
  sample_size: int | dict[str, int] = 4,
1416
1433
  existing_data: dict[str, pd.DataFrame] | None = None,
1417
- model: str = "openai/gpt-4.1-nano",
1434
+ model: str = "openai/gpt-5-nano",
1418
1435
  api_key: str | None = None,
1419
1436
  temperature: float = 1.0,
1420
1437
  top_p: float = 0.95,
@@ -56,7 +56,7 @@ async def mock_data(
56
56
  *,
57
57
  tables: dict[str, dict],
58
58
  sample_size: int,
59
- model: str = "openai/gpt-4.1-nano",
59
+ model: str = "openai/gpt-5-nano",
60
60
  api_key: str | None = None,
61
61
  temperature: float = 1.0,
62
62
  top_p: float = 0.95,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mostlyai-mock
3
- Version: 0.1.14
3
+ Version: 0.1.16
4
4
  Summary: Synthetic Mock Data
5
5
  Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
6
6
  Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
@@ -24,13 +24,16 @@ Classifier: Programming Language :: Python :: 3.13
24
24
  Classifier: Topic :: Software Development :: Libraries
25
25
  Classifier: Typing :: Typed
26
26
  Requires-Python: >=3.10
27
- Requires-Dist: fastmcp<3.0.0,>=2.0.0
28
27
  Requires-Dist: litellm>=1.67.0
29
28
  Requires-Dist: numpy>=1.26.3
30
29
  Requires-Dist: pandas>=2.0.0
31
30
  Requires-Dist: pyarrow>=14.0.0
32
31
  Requires-Dist: pydantic<3.0.0,>=2.0.0
33
32
  Requires-Dist: tenacity>=9.1.2
33
+ Provides-Extra: litellm-proxy
34
+ Requires-Dist: litellm[proxy]>=1.67.0; extra == 'litellm-proxy'
35
+ Provides-Extra: mcp
36
+ Requires-Dist: fastmcp<3.0.0,>=2.0.0; extra == 'mcp'
34
37
  Description-Content-Type: text/markdown
35
38
 
36
39
  # Synthetic Mock Data 🔮
@@ -92,7 +95,7 @@ tables = {
92
95
  df = mock.sample(
93
96
  tables=tables, # provide table and column definitions
94
97
  sample_size=10, # generate 10 records
95
- model="openai/gpt-4.1-nano", # select the LLM model (optional)
98
+ model="openai/gpt-5-nano", # select the LLM model (optional)
96
99
  )
97
100
  print(df)
98
101
  # nationality name gender age date_of_birth checkin_time is_vip price_per_night room_number
@@ -173,7 +176,7 @@ tables = {
173
176
  data = mock.sample(
174
177
  tables=tables,
175
178
  sample_size=2,
176
- model="openai/gpt-4.1",
179
+ model="openai/gpt-5",
177
180
  n_workers=1,
178
181
  )
179
182
  print(data["customers"])
@@ -229,7 +232,7 @@ tables = {
229
232
  ],
230
233
  }
231
234
  }
232
- df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1", n_workers=1)
235
+ df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-5", n_workers=1)
233
236
  print(df)
234
237
  # employee_id name boss_id role
235
238
  # 0 B0-1 Patricia Lee <NA> President
@@ -270,7 +273,7 @@ existing_guests = pd.DataFrame({
270
273
  df = mock.sample(
271
274
  tables=tables,
272
275
  existing_data={"guests": existing_guests},
273
- model="openai/gpt-4.1-nano"
276
+ model="openai/gpt-5-nano"
274
277
  )
275
278
  print(df)
276
279
  # guest_id name nationality gender age room_number is_vip
@@ -285,18 +288,18 @@ This repo comes with MCP Server. It can be easily consumed by any MCP Client by
285
288
 
286
289
  ```json
287
290
  {
288
- "mcpServers": {
289
- "mostlyai-mock-mcp": {
290
- "command": "uvx",
291
- "args": ["--from", "mostlyai-mock", "mcp-server"],
292
- "env": {
293
- "OPENAI_API_KEY": "PROVIDE YOUR KEY",
294
- "GEMINI_API_KEY": "PROVIDE YOUR KEY",
295
- "GROQ_API_KEY": "PROVIDE YOUR KEY",
296
- "ANTHROPIC_API_KEY": "PROVIDE YOUR KEY"
297
- }
298
- }
299
- }
291
+ "mcpServers": {
292
+ "mostlyai-mock-mcp": {
293
+ "command": "uvx",
294
+ "args": ["--from", "mostlyai-mock[mcp]", "mcp-server"],
295
+ "env": {
296
+ "OPENAI_API_KEY": "PROVIDE YOUR KEY",
297
+ "GEMINI_API_KEY": "PROVIDE YOUR KEY",
298
+ "GROQ_API_KEY": "PROVIDE YOUR KEY",
299
+ "ANTHROPIC_API_KEY": "PROVIDE YOUR KEY"
300
+ }
301
+ }
302
+ }
300
303
  }
301
304
  ```
302
305
 
@@ -306,5 +309,5 @@ For example:
306
309
 
307
310
  Troubleshooting:
308
311
  1. If the MCP Client fails to detect the MCP Server, provide the absolute path in the `command` field, for example: `/Users/johnsmith/.local/bin/uvx`
309
- 2. To debug MCP Server issues, you can use MCP Inspector by running: `npx @modelcontextprotocol/inspector -- uvx --from mostlyai-mock mcp-server`
310
- 3. In order to develop locally, modify the configuration by replacing `"command": "uv"` (or use the full path to `uv` if needed) and `"args": ["--directory", "/Users/johnsmith/mostlyai-mock", "run", "mcp-server"]`
312
+ 2. To debug MCP Server issues, you can use MCP Inspector by running: `npx @modelcontextprotocol/inspector -- uvx --from mostlyai-mock[mcp] mcp-server`
313
+ 3. In order to develop locally, modify the configuration by replacing `"command": "uv"` (or use the full path to `uv` if needed) and `"args": ["--directory", "/Users/johnsmith/mostlyai-mock", "run", "--extra", "mcp", "mcp-server"]`
@@ -0,0 +1,8 @@
1
+ mostlyai/mock/__init__.py,sha256=XEezyGjkXQBReW_ORi83H2WEVhLolDDLbGjxA2g2yEs,715
2
+ mostlyai/mock/core.py,sha256=FTF0BfJowxNHm_L0RpTk6BhS1mXzvjELP-3Z96aFVMQ,62454
3
+ mostlyai/mock/mcp_server.py,sha256=uDLg0SeMPV2VZhXviM-F769W0xlmhGwlmQiQhY0Q-Ik,2365
4
+ mostlyai_mock-0.1.16.dist-info/METADATA,sha256=CT6lcz2cAq5W-u3VjQLr_Dg8VbuEtU-JlvsXg5OsKTk,14297
5
+ mostlyai_mock-0.1.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ mostlyai_mock-0.1.16.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
7
+ mostlyai_mock-0.1.16.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
+ mostlyai_mock-0.1.16.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- mostlyai/mock/__init__.py,sha256=MLHwi5g6_lAEd8cDEISbVdRWmorOVAQ6IoMm8BsRpqg,715
2
- mostlyai/mock/core.py,sha256=JdWHix-Pp0s---b_Z3f2ui7J7LSl4_r_gPP0z8UHKY8,61663
3
- mostlyai/mock/mcp_server.py,sha256=0Vn1jmrdNAvUZSviaaU7Lhn7L7iHFyd8kGFigM0-4s0,2367
4
- mostlyai_mock-0.1.14.dist-info/METADATA,sha256=PHiUTSEvevYTPVvsMGT-kilTDwaEgIEL0T8Vr56PSiY,14123
5
- mostlyai_mock-0.1.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- mostlyai_mock-0.1.14.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
7
- mostlyai_mock-0.1.14.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
- mostlyai_mock-0.1.14.dist-info/RECORD,,