mostlyai-mock 0.1.15__tar.gz → 0.1.17__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mostlyai-mock
3
- Version: 0.1.15
3
+ Version: 0.1.17
4
4
  Summary: Synthetic Mock Data
5
5
  Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
6
6
  Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
@@ -95,7 +95,7 @@ tables = {
95
95
  df = mock.sample(
96
96
  tables=tables, # provide table and column definitions
97
97
  sample_size=10, # generate 10 records
98
- model="openai/gpt-4.1-nano", # select the LLM model (optional)
98
+ model="openai/gpt-5-nano", # select the LLM model (optional)
99
99
  )
100
100
  print(df)
101
101
  # nationality name gender age date_of_birth checkin_time is_vip price_per_night room_number
@@ -176,7 +176,7 @@ tables = {
176
176
  data = mock.sample(
177
177
  tables=tables,
178
178
  sample_size=2,
179
- model="openai/gpt-4.1",
179
+ model="openai/gpt-5",
180
180
  n_workers=1,
181
181
  )
182
182
  print(data["customers"])
@@ -208,7 +208,7 @@ print(data["items"])
208
208
  # 9 B4-200510 B1-3010022 Bottled Spring Water (24 Pack) 34.95
209
209
  ```
210
210
 
211
- 6. Create your first self-referencing mock table
211
+ 5. Create your first self-referencing mock table with auto-increment integer primary keys
212
212
 
213
213
  ```python
214
214
  from mostlyai import mock
@@ -217,9 +217,9 @@ tables = {
217
217
  "employees": {
218
218
  "prompt": "Employees of a company",
219
219
  "columns": {
220
- "employee_id": {"prompt": "the unique id of the employee; sequential", "dtype": "string"},
221
- "name": {"prompt": "first name and last name of the president", "dtype": "string"},
222
- "boss_id": {"prompt": "the id of the boss of the employee", "dtype": "string"},
220
+ "employee_id": {"dtype": "integer"},
221
+ "name": {"prompt": "first name and last name of the employee", "dtype": "string"},
222
+ "boss_id": {"dtype": "integer"},
223
223
  "role": {"prompt": "the role of the employee", "dtype": "string"},
224
224
  },
225
225
  "primary_key": "employee_id",
@@ -232,22 +232,22 @@ tables = {
232
232
  ],
233
233
  }
234
234
  }
235
- df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1", n_workers=1)
235
+ df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-5", n_workers=1)
236
236
  print(df)
237
- # employee_id name boss_id role
238
- # 0 B0-1 Patricia Lee <NA> President
239
- # 1 B0-2 Edward Rodriguez B0-1 VP of Operations
240
- # 2 B0-3 Maria Cortez B0-1 VP of Finance
241
- # 3 B0-4 Thomas Nguyen B0-1 VP of Technology
242
- # 4 B0-5 Rachel Kim B0-2 Operations Manager
243
- # 5 B0-6 Jeffrey Patel B0-2 Supply Chain Lead
244
- # 6 B0-7 Olivia Smith B0-2 Facilities Supervisor
245
- # 7 B0-8 Brian Carter B0-3 Accounting Manager
246
- # 8 B0-9 Lauren Anderson B0-3 Financial Analyst
247
- # 9 B0-10 Santiago Romero B0-3 Payroll Specialist
237
+ # employee_id name boss_id role
238
+ # 0 1 Patricia Lee <NA> President
239
+ # 1 2 Edward Rodriguez 1 VP of Operations
240
+ # 2 3 Maria Cortez 1 VP of Finance
241
+ # 3 4 Thomas Nguyen 1 VP of Technology
242
+ # 4 5 Rachel Kim 2 Operations Manager
243
+ # 5 6 Jeffrey Patel 2 Supply Chain Lead
244
+ # 6 7 Olivia Smith 2 Facilities Supervisor
245
+ # 7 8 Brian Carter 3 Accounting Manager
246
+ # 8 9 Lauren Anderson 3 Financial Analyst
247
+ # 9 10 Santiago Romero 3 Payroll Specialist
248
248
  ```
249
249
 
250
- 7. Enrich existing data with additional columns
250
+ 6. Enrich existing data with additional columns
251
251
 
252
252
  ```python
253
253
  from mostlyai import mock
@@ -273,7 +273,7 @@ existing_guests = pd.DataFrame({
273
273
  df = mock.sample(
274
274
  tables=tables,
275
275
  existing_data={"guests": existing_guests},
276
- model="openai/gpt-4.1-nano"
276
+ model="openai/gpt-5-nano"
277
277
  )
278
278
  print(df)
279
279
  # guest_id name nationality gender age room_number is_vip
@@ -57,7 +57,7 @@ tables = {
57
57
  df = mock.sample(
58
58
  tables=tables, # provide table and column definitions
59
59
  sample_size=10, # generate 10 records
60
- model="openai/gpt-4.1-nano", # select the LLM model (optional)
60
+ model="openai/gpt-5-nano", # select the LLM model (optional)
61
61
  )
62
62
  print(df)
63
63
  # nationality name gender age date_of_birth checkin_time is_vip price_per_night room_number
@@ -138,7 +138,7 @@ tables = {
138
138
  data = mock.sample(
139
139
  tables=tables,
140
140
  sample_size=2,
141
- model="openai/gpt-4.1",
141
+ model="openai/gpt-5",
142
142
  n_workers=1,
143
143
  )
144
144
  print(data["customers"])
@@ -170,7 +170,7 @@ print(data["items"])
170
170
  # 9 B4-200510 B1-3010022 Bottled Spring Water (24 Pack) 34.95
171
171
  ```
172
172
 
173
- 6. Create your first self-referencing mock table
173
+ 5. Create your first self-referencing mock table with auto-increment integer primary keys
174
174
 
175
175
  ```python
176
176
  from mostlyai import mock
@@ -179,9 +179,9 @@ tables = {
179
179
  "employees": {
180
180
  "prompt": "Employees of a company",
181
181
  "columns": {
182
- "employee_id": {"prompt": "the unique id of the employee; sequential", "dtype": "string"},
183
- "name": {"prompt": "first name and last name of the president", "dtype": "string"},
184
- "boss_id": {"prompt": "the id of the boss of the employee", "dtype": "string"},
182
+ "employee_id": {"dtype": "integer"},
183
+ "name": {"prompt": "first name and last name of the employee", "dtype": "string"},
184
+ "boss_id": {"dtype": "integer"},
185
185
  "role": {"prompt": "the role of the employee", "dtype": "string"},
186
186
  },
187
187
  "primary_key": "employee_id",
@@ -194,22 +194,22 @@ tables = {
194
194
  ],
195
195
  }
196
196
  }
197
- df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1", n_workers=1)
197
+ df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-5", n_workers=1)
198
198
  print(df)
199
- # employee_id name boss_id role
200
- # 0 B0-1 Patricia Lee <NA> President
201
- # 1 B0-2 Edward Rodriguez B0-1 VP of Operations
202
- # 2 B0-3 Maria Cortez B0-1 VP of Finance
203
- # 3 B0-4 Thomas Nguyen B0-1 VP of Technology
204
- # 4 B0-5 Rachel Kim B0-2 Operations Manager
205
- # 5 B0-6 Jeffrey Patel B0-2 Supply Chain Lead
206
- # 6 B0-7 Olivia Smith B0-2 Facilities Supervisor
207
- # 7 B0-8 Brian Carter B0-3 Accounting Manager
208
- # 8 B0-9 Lauren Anderson B0-3 Financial Analyst
209
- # 9 B0-10 Santiago Romero B0-3 Payroll Specialist
199
+ # employee_id name boss_id role
200
+ # 0 1 Patricia Lee <NA> President
201
+ # 1 2 Edward Rodriguez 1 VP of Operations
202
+ # 2 3 Maria Cortez 1 VP of Finance
203
+ # 3 4 Thomas Nguyen 1 VP of Technology
204
+ # 4 5 Rachel Kim 2 Operations Manager
205
+ # 5 6 Jeffrey Patel 2 Supply Chain Lead
206
+ # 6 7 Olivia Smith 2 Facilities Supervisor
207
+ # 7 8 Brian Carter 3 Accounting Manager
208
+ # 8 9 Lauren Anderson 3 Financial Analyst
209
+ # 9 10 Santiago Romero 3 Payroll Specialist
210
210
  ```
211
211
 
212
- 7. Enrich existing data with additional columns
212
+ 6. Enrich existing data with additional columns
213
213
 
214
214
  ```python
215
215
  from mostlyai import mock
@@ -235,7 +235,7 @@ existing_guests = pd.DataFrame({
235
235
  df = mock.sample(
236
236
  tables=tables,
237
237
  existing_data={"guests": existing_guests},
238
- model="openai/gpt-4.1-nano"
238
+ model="openai/gpt-5-nano"
239
239
  )
240
240
  print(df)
241
241
  # guest_id name nationality gender age room_number is_vip
@@ -15,4 +15,4 @@
15
15
  from mostlyai.mock.core import sample
16
16
 
17
17
  __all__ = ["sample"]
18
- __version__ = "0.1.15" # Do not set this manually. Use poetry version [params].
18
+ __version__ = "0.1.17" # Do not set this manually. Use poetry version [params].
@@ -124,14 +124,14 @@ class MockConfig(RootModel[dict[str, "TableConfig"]]):
124
124
  return self
125
125
 
126
126
  @model_validator(mode="after")
127
- def ensure_primary_key_is_string_dtype(self) -> MockConfig:
127
+ def ensure_primary_key_is_string_or_integer_dtype(self) -> MockConfig:
128
128
  for table_name, table_config in self.root.items():
129
129
  if table_config.primary_key:
130
130
  column_config = table_config.columns[table_config.primary_key]
131
- if column_config.dtype not in [DType.STRING]:
131
+ if column_config.dtype not in [DType.STRING, DType.INTEGER]:
132
132
  raise ValueError(
133
133
  f"Primary key column '{table_config.primary_key}' in table '{table_name}' must be one of the following types:"
134
- f" {[DType.STRING.value]}"
134
+ f" {[DType.STRING.value, DType.INTEGER.value]}"
135
135
  )
136
136
  return self
137
137
 
@@ -248,6 +248,7 @@ async def _sample_table(
248
248
  non_context_size: int | None,
249
249
  n_workers: int,
250
250
  llm_config: LLMConfig,
251
+ config: MockConfig,
251
252
  progress_callback: Callable | None = None,
252
253
  ) -> pd.DataFrame:
253
254
  table_rows_generator = _create_table_rows_generator(
@@ -265,7 +266,13 @@ async def _sample_table(
265
266
  progress_callback=progress_callback,
266
267
  )
267
268
  table_rows_generator = tqdm(table_rows_generator, desc=f"Generating rows for table `{name}`".ljust(45))
268
- table_df = await _convert_table_rows_generator_to_df(table_rows_generator=table_rows_generator, columns=columns)
269
+ table_df = await _convert_table_rows_generator_to_df(
270
+ table_rows_generator=table_rows_generator,
271
+ columns=columns,
272
+ primary_key=primary_keys.get(name),
273
+ foreign_keys=foreign_keys,
274
+ config=config,
275
+ )
269
276
  return table_df
270
277
 
271
278
 
@@ -326,6 +333,15 @@ def _create_table_prompt(
326
333
  column_specifications = {
327
334
  column: spec for column, spec in column_specifications.items() if column not in existing_data.columns
328
335
  }
336
+ # ensure primary keys stay as string in the prompt, even if dtype is integer
337
+ if target_primary_key and target_primary_key in column_specifications:
338
+ if columns[target_primary_key].dtype == DType.INTEGER:
339
+ column_specifications[target_primary_key]["dtype"] = DType.STRING.value
340
+ # ensure foreign keys referencing integer primary keys also stay as string in the prompt
341
+ for fk in foreign_keys:
342
+ if fk.column in column_specifications:
343
+ if columns[fk.column].dtype == DType.INTEGER:
344
+ column_specifications[fk.column]["dtype"] = DType.STRING.value
329
345
  prompt += f"{json.dumps(column_specifications, indent=2)}\n\n"
330
346
 
331
347
  # add previous rows as context to help the LLM generate consistent data
@@ -565,11 +581,17 @@ async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapp
565
581
 
566
582
 
567
583
  def _create_structured_output_schema(
568
- columns: dict[str, ColumnConfig], existing_data: pd.DataFrame | None
584
+ columns: dict[str, ColumnConfig],
585
+ existing_data: pd.DataFrame | None,
586
+ primary_key: str | None,
587
+ foreign_keys: list[ForeignKeyConfig],
569
588
  ) -> type[BaseModel]:
570
- def create_annotation(column_config: ColumnConfig) -> type:
589
+ def create_annotation(column_config: ColumnConfig, is_int_pk_or_fk: bool = False) -> type:
571
590
  if column_config.values or column_config.dtype is DType.CATEGORY:
572
591
  return Literal[tuple(column_config.values)] # type: ignore
592
+ # ensure integer primary keys and foreign keys are treated as strings
593
+ if is_int_pk_or_fk:
594
+ return str | None
573
595
  return {
574
596
  DType.INTEGER: int | None,
575
597
  DType.FLOAT: float | None,
@@ -585,7 +607,9 @@ def _create_structured_output_schema(
585
607
  for column_name, column_config in columns.items():
586
608
  if existing_data is not None and column_name in existing_data.columns:
587
609
  continue # skip columns that already exist in existing data
588
- annotation = create_annotation(column_config)
610
+ is_int_pk = primary_key and column_name == primary_key and column_config.dtype == DType.INTEGER
611
+ is_int_fk = any(fk.column == column_name for fk in foreign_keys) and column_config.dtype == DType.INTEGER
612
+ annotation = create_annotation(column_config, is_int_pk or is_int_fk)
589
613
  fields[column_name] = (annotation, Field(...))
590
614
  TableRow = create_model("TableRow", **fields)
591
615
  TableRows = create_model("TableRows", rows=(list[TableRow], ...))
@@ -632,8 +656,9 @@ async def _worker(
632
656
  # construct schema for Structured Outputs (applies to JSON LLMOutputFormat only)
633
657
  structured_output_schema = None
634
658
  if llm_output_format == LLMOutputFormat.JSON:
659
+ pk_col = primary_keys.get(name)
635
660
  structured_output_schema = _create_structured_output_schema(
636
- columns=columns, existing_data=existing_batch
661
+ columns=columns, existing_data=existing_batch, primary_key=pk_col, foreign_keys=foreign_keys
637
662
  )
638
663
 
639
664
  # construct litellm kwargs
@@ -645,6 +670,24 @@ async def _worker(
645
670
  "stream": True,
646
671
  }
647
672
 
673
+ # support for openai reasoning models
674
+ model_only = llm_config.model.split("/")[-1] if "/" in llm_config.model else llm_config.model
675
+ reasoning_effort = (
676
+ "low"
677
+ if (model_only.startswith("o") and (model_only[1:].isdigit() or model_only[1:].split("-")[0].isdigit()))
678
+ else "minimal"
679
+ if (
680
+ model_only.startswith("gpt-")
681
+ and model_only.split("-")[1].isdigit()
682
+ and int(model_only.split("-")[1]) >= 5
683
+ )
684
+ else None
685
+ )
686
+
687
+ if reasoning_effort:
688
+ litellm_kwargs.pop("top_p")
689
+ litellm_kwargs["reasoning_effort"] = reasoning_effort
690
+
648
691
  # construct messages
649
692
  system_prompt = _create_system_prompt(llm_output_format)
650
693
  user_prompt = _create_table_prompt(
@@ -956,14 +999,47 @@ def _align_series_dtypes_with_column_config(series: pd.Series, column_config: Co
956
999
  return series
957
1000
 
958
1001
 
1002
+ def _get_integer_pk_fk_columns(
1003
+ columns: dict[str, ColumnConfig],
1004
+ primary_key: str | None,
1005
+ foreign_keys: list[ForeignKeyConfig],
1006
+ config: MockConfig,
1007
+ ) -> set[str]:
1008
+ """determine which columns should be kept as strings (integer PKs and FKs that reference integer PKs)"""
1009
+ skip_conversion = set()
1010
+
1011
+ # integer primary keys
1012
+ if primary_key and primary_key in columns and columns[primary_key].dtype == DType.INTEGER:
1013
+ skip_conversion.add(primary_key)
1014
+
1015
+ # foreign keys that reference integer primary keys
1016
+ # note: FK dtype is guaranteed to match referenced PK dtype by config validation
1017
+ for fk in foreign_keys:
1018
+ if fk.column in columns and columns[fk.column].dtype == DType.INTEGER:
1019
+ skip_conversion.add(fk.column)
1020
+
1021
+ return skip_conversion
1022
+
1023
+
959
1024
  async def _convert_table_rows_generator_to_df(
960
1025
  table_rows_generator: AsyncGenerator[dict],
961
1026
  columns: dict[str, ColumnConfig],
1027
+ primary_key: str | None = None,
1028
+ foreign_keys: list[ForeignKeyConfig] | None = None,
1029
+ config: MockConfig | None = None,
962
1030
  ) -> pd.DataFrame:
963
1031
  def align_df_dtypes_with_mock_dtypes(df: pd.DataFrame, columns: dict[str, ColumnConfig]) -> pd.DataFrame:
964
1032
  df = df.copy()
1033
+ skip_int_conversion = (
1034
+ _get_integer_pk_fk_columns(columns, primary_key, foreign_keys or [], config) if config else set()
1035
+ )
1036
+
965
1037
  for column_name, column_config in columns.items():
966
- df[column_name] = _align_series_dtypes_with_column_config(df[column_name], column_config)
1038
+ # keep integer PKs and FKs as strings for now (post-processing will convert them)
1039
+ if column_name in skip_int_conversion:
1040
+ df[column_name] = df[column_name].astype("string[pyarrow]")
1041
+ else:
1042
+ df[column_name] = _align_series_dtypes_with_column_config(df[column_name], column_config)
967
1043
  return df
968
1044
 
969
1045
  # consume entire generator
@@ -1007,11 +1083,6 @@ def _harmonize_tables(tables: dict[str, dict], existing_data: dict[str, pd.DataF
1007
1083
  }
1008
1084
  column_configs = {**existing_column_configs, **column_configs}
1009
1085
 
1010
- # primary keys are always strings
1011
- primary_key = table_config.get("primary_key", None)
1012
- if primary_key is not None:
1013
- column_configs[primary_key]["dtype"] = DType.STRING
1014
-
1015
1086
  table_config["columns"] = column_configs
1016
1087
  return tables
1017
1088
 
@@ -1111,12 +1182,51 @@ def _build_execution_plan(config: MockConfig) -> list[str]:
1111
1182
  return execution_plan
1112
1183
 
1113
1184
 
1185
+ def _postprocess_table(
1186
+ table_name: str,
1187
+ df: pd.DataFrame,
1188
+ table_config: TableConfig,
1189
+ config: MockConfig,
1190
+ pk_mappings: dict[str, dict[str, int]],
1191
+ ) -> pd.DataFrame:
1192
+ """convert integer PKs and FKs from strings to auto-incremented integers"""
1193
+ df = df.copy()
1194
+
1195
+ # convert integer primary keys to 1, 2, 3, ... and build mapping
1196
+ pk_col = table_config.primary_key
1197
+ if pk_col and table_config.columns[pk_col].dtype == DType.INTEGER:
1198
+ old_values = df[pk_col].tolist()
1199
+ new_values = list(range(1, len(df) + 1))
1200
+
1201
+ # build mapping: old LLM-generated string values -> new auto-incremented integers
1202
+ pk_mappings[table_name] = {str(old): new for old, new in zip(old_values, new_values)}
1203
+
1204
+ df[pk_col] = new_values
1205
+
1206
+ # convert foreign keys that reference integer primary keys
1207
+ # note: FK dtype is guaranteed to match referenced PK dtype by config validation
1208
+ for fk in table_config.foreign_keys:
1209
+ # skip if not an integer FK (which means it doesn't reference an integer PK)
1210
+ if table_config.columns[fk.column].dtype != DType.INTEGER:
1211
+ continue
1212
+ if fk.referenced_table not in pk_mappings:
1213
+ continue
1214
+
1215
+ # map FK values from strings to integers
1216
+ mapping = pk_mappings[fk.referenced_table]
1217
+ df[fk.column] = (
1218
+ df[fk.column].apply(lambda val: mapping.get(str(val)) if pd.notna(val) else None).astype("int64[pyarrow]")
1219
+ )
1220
+
1221
+ return df
1222
+
1223
+
1114
1224
  async def _sample_common(
1115
1225
  *,
1116
1226
  tables: dict[str, dict],
1117
1227
  sample_size: int | dict[str, int] = 4,
1118
1228
  existing_data: dict[str, pd.DataFrame] | None = None,
1119
- model: str = "openai/gpt-4.1-nano",
1229
+ model: str = "openai/gpt-5-nano",
1120
1230
  api_key: str | None = None,
1121
1231
  temperature: float = 1.0,
1122
1232
  top_p: float = 0.95,
@@ -1138,6 +1248,10 @@ async def _sample_common(
1138
1248
 
1139
1249
  data: dict[str, pd.DataFrame] = _harmonize_existing_data(existing_data, config) or {}
1140
1250
 
1251
+ # track mappings from old string PK values to new integer PK values
1252
+ pk_mappings: dict[str, dict[str, int]] = {}
1253
+
1254
+ # first, generate all tables (without postprocessing)
1141
1255
  for table_name in execution_plan:
1142
1256
  table_config = config.root[table_name]
1143
1257
  df = await _sample_table(
@@ -1152,10 +1266,16 @@ async def _sample_common(
1152
1266
  non_context_size=10, # pick 10 rows to choose from for each non-context foreign key
1153
1267
  n_workers=n_workers,
1154
1268
  llm_config=llm_config,
1269
+ config=config,
1155
1270
  progress_callback=progress_callback,
1156
1271
  )
1157
1272
  data[table_name] = df
1158
1273
 
1274
+ # then, postprocess all tables (convert integer PKs/FKs from strings to integers)
1275
+ for table_name in execution_plan:
1276
+ table_config = config.root[table_name]
1277
+ data[table_name] = _postprocess_table(table_name, data[table_name], table_config, config, pk_mappings)
1278
+
1159
1279
  return next(iter(data.values())) if len(data) == 1 and return_type == "auto" else data
1160
1280
 
1161
1281
 
@@ -1164,7 +1284,7 @@ def sample(
1164
1284
  tables: dict[str, dict],
1165
1285
  sample_size: int | dict[str, int] = 4,
1166
1286
  existing_data: dict[str, pd.DataFrame] | None = None,
1167
- model: str = "openai/gpt-4.1-nano",
1287
+ model: str = "openai/gpt-5-nano",
1168
1288
  api_key: str | None = None,
1169
1289
  temperature: float = 1.0,
1170
1290
  top_p: float = 0.95,
@@ -1194,9 +1314,9 @@ def sample(
1194
1314
  Default is None.
1195
1315
  model (str): The LiteLLM chat completion model to be used.
1196
1316
  Examples include:
1197
- - `openai/gpt-4.1-nano` (default; fast, and smart)
1198
- - `openai/gpt-4.1-mini` (slower, but smarter)
1199
- - `openai/gpt-4.1` (slowest, but smartest)
1317
+ - `openai/gpt-5-nano` (default; fast, and smart)
1318
+ - `openai/gpt-5-mini` (slower, but smarter)
1319
+ - `openai/gpt-5` (slowest, but smartest)
1200
1320
  - `gemini/gemini-2.0-flash`
1201
1321
  - `gemini/gemini-2.5-flash-preview-04-17`
1202
1322
  - 'groq/gemma2-9b-it`
@@ -1234,7 +1354,7 @@ def sample(
1234
1354
  },
1235
1355
  }
1236
1356
  }
1237
- df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1-nano")
1357
+ df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-5-nano")
1238
1358
  ```
1239
1359
 
1240
1360
  Example of generating mock data for multiple tables (with PK/FK relationships):
@@ -1297,7 +1417,7 @@ def sample(
1297
1417
  ],
1298
1418
  },
1299
1419
  }
1300
- data = mock.sample(tables=tables, sample_size=2, model="openai/gpt-4.1")
1420
+ data = mock.sample(tables=tables, sample_size=2, model="openai/gpt-5")
1301
1421
  df_customers = data["customers"]
1302
1422
  df_warehouses = data["warehouses"]
1303
1423
  df_orders = data["orders"]
@@ -1326,7 +1446,7 @@ def sample(
1326
1446
  enriched_df = mock.sample(
1327
1447
  tables=tables,
1328
1448
  existing_data={"patients": existing_df},
1329
- model="openai/gpt-4.1-nano"
1449
+ model="openai/gpt-5-nano"
1330
1450
  )
1331
1451
  enriched_df
1332
1452
  ```
@@ -1381,7 +1501,7 @@ def sample(
1381
1501
  "customers": existing_customers,
1382
1502
  "orders": existing_orders,
1383
1503
  },
1384
- model="openai/gpt-4.1-nano"
1504
+ model="openai/gpt-5-nano"
1385
1505
  )
1386
1506
  df_customers = data["customers"]
1387
1507
  df_orders = data["orders"]
@@ -1413,7 +1533,7 @@ async def _asample(
1413
1533
  tables: dict[str, dict],
1414
1534
  sample_size: int | dict[str, int] = 4,
1415
1535
  existing_data: dict[str, pd.DataFrame] | None = None,
1416
- model: str = "openai/gpt-4.1-nano",
1536
+ model: str = "openai/gpt-5-nano",
1417
1537
  api_key: str | None = None,
1418
1538
  temperature: float = 1.0,
1419
1539
  top_p: float = 0.95,
@@ -56,7 +56,7 @@ async def mock_data(
56
56
  *,
57
57
  tables: dict[str, dict],
58
58
  sample_size: int,
59
- model: str = "openai/gpt-4.1-nano",
59
+ model: str = "openai/gpt-5-nano",
60
60
  api_key: str | None = None,
61
61
  temperature: float = 1.0,
62
62
  top_p: float = 0.95,
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mostlyai-mock"
3
- version = "0.1.15"
3
+ version = "0.1.17"
4
4
  description = "Synthetic Mock Data"
5
5
  authors = [{ name = "MOSTLY AI", email = "dev@mostly.ai" }]
6
6
  requires-python = ">=3.10"
File without changes