mostlyai-mock 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mostlyai/mock/__init__.py +1 -1
- mostlyai/mock/core.py +151 -33
- {mostlyai_mock-0.1.1.dist-info → mostlyai_mock-0.1.3.dist-info}/METADATA +46 -8
- mostlyai_mock-0.1.3.dist-info/RECORD +8 -0
- mostlyai_mock-0.1.1.dist-info/RECORD +0 -8
- {mostlyai_mock-0.1.1.dist-info → mostlyai_mock-0.1.3.dist-info}/WHEEL +0 -0
- {mostlyai_mock-0.1.1.dist-info → mostlyai_mock-0.1.3.dist-info}/entry_points.txt +0 -0
- {mostlyai_mock-0.1.1.dist-info → mostlyai_mock-0.1.3.dist-info}/licenses/LICENSE +0 -0
mostlyai/mock/__init__.py
CHANGED
mostlyai/mock/core.py
CHANGED
@@ -25,21 +25,28 @@ import pandas as pd
|
|
25
25
|
from pydantic import BaseModel, Field, RootModel, create_model, field_validator, model_validator
|
26
26
|
from tqdm import tqdm
|
27
27
|
|
28
|
+
litellm.suppress_debug_info = True
|
29
|
+
|
28
30
|
SYSTEM_PROMPT = """
|
29
|
-
You are a specialized
|
30
|
-
|
31
|
+
You are a specialized mock data generator designed to create highly realistic, contextually appropriate data based on schema definitions.
|
32
|
+
|
33
|
+
Your task is to:
|
31
34
|
|
32
35
|
1. Generate data that strictly adheres to the provided schema constraints (data types, ranges, formats)
|
33
36
|
2. Ensure logical consistency across related tables and foreign key relationships
|
34
37
|
3. Create contextually appropriate values that reflect real-world patterns and distributions
|
35
38
|
4. Produce diverse, non-repetitive data that avoids obvious patterns
|
36
39
|
5. Respect uniqueness constraints and other data integrity rules
|
37
|
-
6.
|
38
|
-
7.
|
40
|
+
6. When enriching existing data, ensure that new values are consistent with existing values
|
41
|
+
7. Return well-formatted JSON output that can be directly parsed
|
42
|
+
8. Don't use markdown formatting
|
39
43
|
|
40
44
|
For numeric fields, generate realistic distributions rather than random values. For text fields, create contextually \
|
41
45
|
appropriate content. For dates and timestamps, ensure logical chronology. Always maintain referential integrity \
|
42
46
|
across tables.
|
47
|
+
|
48
|
+
When enriching existing data, carefully analyze the patterns and relationships in the existing columns \
|
49
|
+
to generate compatible and realistic values for the missing columns.
|
43
50
|
"""
|
44
51
|
|
45
52
|
|
@@ -197,7 +204,7 @@ def _sample_table(
|
|
197
204
|
columns: dict[str, ColumnConfig],
|
198
205
|
foreign_keys: list[ForeignKeyConfig] | None,
|
199
206
|
primary_keys: dict[str, str] | None,
|
200
|
-
|
207
|
+
data: dict[str, pd.DataFrame],
|
201
208
|
sample_size: int,
|
202
209
|
batch_size: int,
|
203
210
|
previous_rows_size: int,
|
@@ -210,7 +217,7 @@ def _sample_table(
|
|
210
217
|
columns=columns,
|
211
218
|
primary_keys=primary_keys,
|
212
219
|
foreign_keys=foreign_keys,
|
213
|
-
|
220
|
+
data=data,
|
214
221
|
sample_size=sample_size,
|
215
222
|
batch_size=batch_size,
|
216
223
|
previous_rows_size=previous_rows_size,
|
@@ -230,6 +237,7 @@ def _create_table_prompt(
|
|
230
237
|
primary_keys: dict[str, str] | None,
|
231
238
|
batch_size: int | None,
|
232
239
|
foreign_keys: list[ForeignKeyConfig] | None,
|
240
|
+
existing_data: pd.DataFrame | None,
|
233
241
|
context_data: pd.DataFrame | None,
|
234
242
|
non_context_data: dict[str, pd.DataFrame] | None,
|
235
243
|
previous_rows: list[dict] | None,
|
@@ -251,6 +259,11 @@ def _create_table_prompt(
|
|
251
259
|
prompt += f"\n## Previous {len(previous_rows)} Rows:\n\n"
|
252
260
|
prompt += f"{json.dumps(previous_rows, indent=2)}\n\n"
|
253
261
|
|
262
|
+
# add existing data to augment
|
263
|
+
if existing_data is not None:
|
264
|
+
prompt += f"\n## Existing Data to Augment:\n\n"
|
265
|
+
prompt += f"{existing_data.to_json(orient='records', date_format='iso', indent=2)}\n\n"
|
266
|
+
|
254
267
|
# define foreign keys
|
255
268
|
if foreign_keys:
|
256
269
|
prompt += "## Foreign Keys:\n\n"
|
@@ -285,26 +298,46 @@ def _create_table_prompt(
|
|
285
298
|
|
286
299
|
# add instructions
|
287
300
|
prompt += "\n## Instructions:\n\n"
|
288
|
-
|
301
|
+
|
302
|
+
verb = "generate" if existing_data is None else "augment"
|
303
|
+
|
304
|
+
n_rows = None
|
305
|
+
if existing_data is not None:
|
306
|
+
n_rows = len(existing_data)
|
307
|
+
elif not foreign_keys:
|
289
308
|
assert batch_size is not None
|
290
|
-
|
291
|
-
|
309
|
+
n_rows = batch_size
|
310
|
+
|
311
|
+
prompt += f"{verb.capitalize()} data for the `{name}` table.\n\n"
|
312
|
+
if n_rows is not None:
|
313
|
+
prompt += f"Number of rows to {verb}: `{n_rows}`.\n\n"
|
314
|
+
|
315
|
+
if foreign_keys:
|
292
316
|
prompt += (
|
293
|
-
f"Generate data for the `{name}` table. "
|
294
317
|
f"The first Foreign Key column from Foreign Keys section may only contain values from Context Table Data. "
|
295
318
|
f"The following Foreign Key columns from Foreign Keys section (if exists) may only contain values from Non-Context Table Data sections. "
|
296
319
|
f"If either relevant Context Table Data or Non-Context Table Data is not present, this means that table has self-dependency. "
|
297
|
-
f"In this case, ensure that the
|
320
|
+
f"In this case, ensure that the foreign keys are consistent with primary keys of the table. "
|
298
321
|
f"Pay attention to prompt of the Foreign Key column to understand the relationship.\n\n"
|
299
322
|
)
|
300
323
|
|
324
|
+
if existing_data is not None:
|
325
|
+
prompt += (
|
326
|
+
f"You are given existing data for the `{name}` table and asked to generate "
|
327
|
+
f"values for the missing columns. The existing data contains column(s): {', '.join(existing_data.columns)}. "
|
328
|
+
f"You need to generate values for column(s): {', '.join(columns.keys() - existing_data.columns)}. "
|
329
|
+
f"Ensure that the generated values are contextually appropriate and consistent with the existing data. "
|
330
|
+
f"Use the existing columns' values to inform the generation of new values. "
|
331
|
+
f"Don't generate new rows, only augment the existing data.\n\n"
|
332
|
+
)
|
333
|
+
|
301
334
|
if previous_rows:
|
302
335
|
prompt += (
|
303
|
-
"
|
336
|
+
f"{verb.capitalize()} new rows that maintain consistency with the previous rows where appropriate. "
|
304
337
|
"Don't copy previous rows in the output. "
|
305
338
|
"Don't pay attention to the number of previous rows; there might have been more generated than provided.\n\n"
|
306
339
|
)
|
307
|
-
prompt += "Do not use code to
|
340
|
+
prompt += f"Do not use code to {verb} the data.\n\n"
|
308
341
|
prompt += "Return the full data as a JSON string.\n"
|
309
342
|
|
310
343
|
return prompt
|
@@ -317,7 +350,7 @@ def _create_table_rows_generator(
|
|
317
350
|
columns: dict[str, ColumnConfig],
|
318
351
|
foreign_keys: list[ForeignKeyConfig] | None,
|
319
352
|
primary_keys: dict[str, str] | None,
|
320
|
-
|
353
|
+
data: dict[str, pd.DataFrame],
|
321
354
|
sample_size: int,
|
322
355
|
batch_size: int,
|
323
356
|
previous_rows_size: int,
|
@@ -393,27 +426,31 @@ def _create_table_rows_generator(
|
|
393
426
|
"The model does not support structured output / JSON mode."
|
394
427
|
)
|
395
428
|
|
429
|
+
# derive data for augmentation
|
430
|
+
existing_data: pd.DataFrame | None = None
|
431
|
+
if name in data:
|
432
|
+
existing_data = data[name]
|
433
|
+
sample_size = len(existing_data)
|
434
|
+
|
396
435
|
# derive context data (if first foreign key is present) and harmonize sample size accordingly
|
397
436
|
context_data: pd.DataFrame | None = None
|
398
437
|
if foreign_keys and foreign_keys[0].referenced_table != name: # self-dependency is not considered as context
|
399
438
|
context_table_name = foreign_keys[0].referenced_table
|
400
|
-
assert
|
401
|
-
|
402
|
-
context_data = generated_data[context_table_name]
|
439
|
+
assert context_table_name in data
|
440
|
+
context_data = data[context_table_name]
|
403
441
|
batch_size = 1 # generate one sequence at a time
|
404
442
|
sample_size = len(context_data)
|
405
443
|
|
406
444
|
# derive non-context data (if more than one foreign key is present)
|
407
445
|
non_context_data: dict[str, pd.DataFrame] = {}
|
408
446
|
if foreign_keys and len(foreign_keys) > 1:
|
409
|
-
assert generated_data is not None
|
410
447
|
assert non_context_size is not None
|
411
448
|
for fk in foreign_keys[1:]:
|
412
449
|
if fk.referenced_table == name: # self-dependency is not considered as non-context
|
413
450
|
continue
|
414
451
|
non_context_table_name = fk.referenced_table
|
415
|
-
assert non_context_table_name in
|
416
|
-
non_context_data[non_context_table_name] =
|
452
|
+
assert non_context_table_name in data
|
453
|
+
non_context_data[non_context_table_name] = data[non_context_table_name]
|
417
454
|
|
418
455
|
litellm_kwargs = {
|
419
456
|
"response_format": create_table_response_format(columns=columns),
|
@@ -424,14 +461,33 @@ def _create_table_rows_generator(
|
|
424
461
|
"stream": True,
|
425
462
|
}
|
426
463
|
|
464
|
+
batch_idx = 0
|
427
465
|
yielded_sequences = 0
|
428
466
|
previous_rows = deque(maxlen=previous_rows_size)
|
429
467
|
for context_batch in batch_infinitely(context_data):
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
468
|
+
# pick existing rows for current batch
|
469
|
+
existing_batch: pd.DataFrame | None = None
|
470
|
+
if existing_data is not None:
|
471
|
+
if context_batch is None:
|
472
|
+
# progressively pick portions of existing data in case of root tables
|
473
|
+
assert batch_size is not None
|
474
|
+
existing_batch = existing_data.iloc[batch_idx * batch_size : (batch_idx + 1) * batch_size]
|
475
|
+
else:
|
476
|
+
# pick existing rows that match current context batch
|
477
|
+
assert foreign_keys is not None
|
478
|
+
context_table_name, foreign_key = foreign_keys[0].referenced_table, foreign_keys[0].column
|
479
|
+
context_primary_key = primary_keys[context_table_name]
|
480
|
+
existing_batch = existing_data[existing_data[foreign_key].isin(context_batch[context_primary_key])]
|
481
|
+
if existing_batch.empty:
|
482
|
+
existing_batch = None
|
483
|
+
|
484
|
+
# sample candidate rows from non-context tables for current batch
|
485
|
+
non_context_batch: dict[str, pd.DataFrame] | None = None
|
486
|
+
if non_context_data:
|
487
|
+
non_context_batch = {
|
488
|
+
table_name: df.sample(frac=1.0).head(non_context_size) for table_name, df in non_context_data.items()
|
489
|
+
}
|
490
|
+
|
435
491
|
llm_prompt = _create_table_prompt(
|
436
492
|
name=name,
|
437
493
|
prompt=prompt,
|
@@ -439,6 +495,7 @@ def _create_table_rows_generator(
|
|
439
495
|
primary_keys=primary_keys,
|
440
496
|
batch_size=batch_size,
|
441
497
|
foreign_keys=foreign_keys,
|
498
|
+
existing_data=existing_batch,
|
442
499
|
context_data=context_batch,
|
443
500
|
non_context_data=non_context_batch,
|
444
501
|
previous_rows=list(previous_rows),
|
@@ -466,6 +523,8 @@ def _create_table_rows_generator(
|
|
466
523
|
if yielded_sequences >= sample_size:
|
467
524
|
return # move to next table
|
468
525
|
|
526
|
+
batch_idx += 1
|
527
|
+
|
469
528
|
|
470
529
|
def _convert_table_rows_generator_to_df(
|
471
530
|
table_rows_generator: Generator[dict],
|
@@ -559,6 +618,7 @@ def sample(
|
|
559
618
|
*,
|
560
619
|
tables: dict[str, dict],
|
561
620
|
sample_size: int | dict[str, int] = 10,
|
621
|
+
existing_data: dict[str, pd.DataFrame] | None = None,
|
562
622
|
model: str = "openai/gpt-4.1-nano",
|
563
623
|
api_key: str | None = None,
|
564
624
|
temperature: float = 1.0,
|
@@ -574,12 +634,14 @@ def sample(
|
|
574
634
|
If a single integer is provided, the same number of rows will be generated for each subject table.
|
575
635
|
If a dictionary is provided, the number of rows to generate for each subject table can be specified
|
576
636
|
individually.
|
577
|
-
Default is 10.
|
637
|
+
Default is 10. Ignored if existing_data is provided.
|
638
|
+
existing_data (dict[str, pd.DataFrame] | None): Existing data to augment. If provided, the sample_size argument is ignored.
|
639
|
+
Default is None.
|
578
640
|
model (str): The LiteLLM chat completion model to be used. Requires support for structured output / JSON mode.
|
579
641
|
Examples include:
|
580
|
-
- `openai/gpt-4.1-nano` (default)
|
581
|
-
- `openai/gpt-4.1-mini`
|
582
|
-
- `openai/gpt-4.1`
|
642
|
+
- `openai/gpt-4.1-nano` (default; fastest)
|
643
|
+
- `openai/gpt-4.1-mini` (slower, but smarter)
|
644
|
+
- `openai/gpt-4.1` (slowest, but smartest)
|
583
645
|
- `gemini/gemini-2.0-flash`
|
584
646
|
- `gemini/gemini-2.5-flash-preview-04-17`
|
585
647
|
- `groq/llama-3.3-70b-versatile`
|
@@ -628,7 +690,7 @@ def sample(
|
|
628
690
|
"customer_id": {"prompt": "the unique id of the customer", "dtype": "integer"},
|
629
691
|
"name": {"prompt": "first name and last name of the customer", "dtype": "string"},
|
630
692
|
},
|
631
|
-
"primary_key": "customer_id",
|
693
|
+
"primary_key": "customer_id", # single string; no composite keys allowed
|
632
694
|
},
|
633
695
|
"warehouses": {
|
634
696
|
"prompt": "Warehouses of a hardware store",
|
@@ -683,17 +745,73 @@ def sample(
|
|
683
745
|
df_orders = data["orders"]
|
684
746
|
df_items = data["items"]
|
685
747
|
```
|
748
|
+
|
749
|
+
Example of data augmentation:
|
750
|
+
```python
|
751
|
+
from mostlyai import mock
|
752
|
+
import pandas as pd
|
753
|
+
|
754
|
+
tables = {
|
755
|
+
"customers": {
|
756
|
+
"prompt": "Customers of a hardware store",
|
757
|
+
"columns": {
|
758
|
+
"customer_id": {"prompt": "the unique id of the customer", "dtype": "integer"},
|
759
|
+
"name": {"prompt": "first name and last name of the customer", "dtype": "string"},
|
760
|
+
"email": {"prompt": "email address of the customer", "dtype": "string"},
|
761
|
+
"phone": {"prompt": "phone number of the customer", "dtype": "string"},
|
762
|
+
"loyalty_level": {"dtype": "category", "values": ["bronze", "silver", "gold", "platinum"]},
|
763
|
+
},
|
764
|
+
"primary_key": "customer_id",
|
765
|
+
},
|
766
|
+
"orders": {
|
767
|
+
"prompt": "Orders of a Customer",
|
768
|
+
"columns": {
|
769
|
+
"order_id": {"prompt": "the unique id of the order", "dtype": "string"},
|
770
|
+
"customer_id": {"prompt": "the customer id for that order", "dtype": "integer"},
|
771
|
+
"order_date": {"prompt": "the date when the order was placed", "dtype": "date"},
|
772
|
+
"total_amount": {"prompt": "order amount in USD", "dtype": "float"},
|
773
|
+
"status": {"dtype": "category", "values": ["pending", "shipped", "delivered", "cancelled"]},
|
774
|
+
},
|
775
|
+
"primary_key": "order_id",
|
776
|
+
"foreign_keys": [
|
777
|
+
{
|
778
|
+
"column": "customer_id",
|
779
|
+
"referenced_table": "customers",
|
780
|
+
"prompt": "each customer has anywhere between 1 and 3 orders",
|
781
|
+
},
|
782
|
+
],
|
783
|
+
},
|
784
|
+
}
|
785
|
+
existing_customers = pd.DataFrame({
|
786
|
+
"customer_id": [101, 102, 103],
|
787
|
+
"name": ["John Davis", "Maria Garcia", "Wei Chen"],
|
788
|
+
})
|
789
|
+
existing_orders = pd.DataFrame({
|
790
|
+
"order_id": ["ORD-001", "ORD-002"],
|
791
|
+
"customer_id": [101, 101],
|
792
|
+
})
|
793
|
+
data = mock.sample(
|
794
|
+
tables=tables,
|
795
|
+
existing_data={
|
796
|
+
"customers": existing_customers,
|
797
|
+
"orders": existing_orders,
|
798
|
+
},
|
799
|
+
model="openai/gpt-4.1-nano"
|
800
|
+
)
|
801
|
+
df_customers = data["customers"]
|
802
|
+
df_orders = data["orders"]
|
803
|
+
```
|
686
804
|
"""
|
687
805
|
|
688
806
|
config = MockConfig(tables)
|
689
807
|
llm_config = LLMConfig(model=model, api_key=api_key, temperature=temperature, top_p=top_p)
|
690
808
|
|
691
|
-
sample_size = _harmonize_sample_size(sample_size, config)
|
809
|
+
sample_size: dict[str, int] = _harmonize_sample_size(sample_size, config)
|
692
810
|
primary_keys = {table_name: table_config.primary_key for table_name, table_config in config.root.items()}
|
693
811
|
|
694
812
|
execution_plan: list[str] = _build_execution_plan(config)
|
695
813
|
|
696
|
-
data: dict[str, pd.DataFrame] = {}
|
814
|
+
data: dict[str, pd.DataFrame] = existing_data or {}
|
697
815
|
|
698
816
|
for table_name in execution_plan:
|
699
817
|
table_config = config.root[table_name]
|
@@ -703,7 +821,7 @@ def sample(
|
|
703
821
|
columns=table_config.columns,
|
704
822
|
foreign_keys=table_config.foreign_keys,
|
705
823
|
primary_keys=primary_keys,
|
706
|
-
|
824
|
+
data=data,
|
707
825
|
sample_size=sample_size[table_name],
|
708
826
|
batch_size=30, # generate 30 root table rows at a time
|
709
827
|
previous_rows_size=10, # present 10 previously generated rows to the LLM
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mostlyai-mock
|
3
|
-
Version: 0.1.
|
4
|
-
Summary:
|
3
|
+
Version: 0.1.3
|
4
|
+
Summary: LLM-generated Mock Data
|
5
5
|
Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
|
6
6
|
Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
|
7
7
|
Project-URL: documentation, https://mostly-ai.github.io/mostlyai-mock/
|
@@ -32,7 +32,7 @@ Requires-Dist: pyarrow>=14.0.0
|
|
32
32
|
Requires-Dist: pydantic<3.0.0,>=2.0.0
|
33
33
|
Description-Content-Type: text/markdown
|
34
34
|
|
35
|
-
#
|
35
|
+
# LLM-generated Mock Data 🔮
|
36
36
|
|
37
37
|
[](https://mostly-ai.github.io/mostlyai-mock/) [](https://pypi.org/project/mostlyai-mock/)  
|
38
38
|
|
@@ -66,7 +66,7 @@ os.environ["OPENAI_API_KEY"] = "your-api-key"
|
|
66
66
|
|
67
67
|
Note: You will need to obtain your API key directly from the LLM service provider (e.g. for Open AI from [here](https://platform.openai.com/api-keys)). The LLM endpoint will be determined by the chosen `model` when making calls to `mock.sample`.
|
68
68
|
|
69
|
-
3. Create your first basic
|
69
|
+
3. Create your first basic mock table from scratch
|
70
70
|
|
71
71
|
```python
|
72
72
|
from mostlyai import mock
|
@@ -88,7 +88,7 @@ tables = {
|
|
88
88
|
}
|
89
89
|
}
|
90
90
|
df = mock.sample(
|
91
|
-
tables=tables,
|
91
|
+
tables=tables, # provide table and column definitions
|
92
92
|
sample_size=10, # generate 10 records
|
93
93
|
model="openai/gpt-4.1-nano", # select the LLM model (optional)
|
94
94
|
)
|
@@ -106,7 +106,7 @@ print(df)
|
|
106
106
|
# 9 FR Louis Martin male 44 1980-12-05 2025-01-07 10:40:00 False 270.0 103
|
107
107
|
```
|
108
108
|
|
109
|
-
4. Create your first multi-table
|
109
|
+
4. Create your first multi-table mock dataset
|
110
110
|
|
111
111
|
```python
|
112
112
|
from mostlyai import mock
|
@@ -168,7 +168,7 @@ tables = {
|
|
168
168
|
},
|
169
169
|
}
|
170
170
|
data = mock.sample(
|
171
|
-
tables=tables,
|
171
|
+
tables=tables,
|
172
172
|
sample_size=2,
|
173
173
|
model="openai/gpt-4.1"
|
174
174
|
)
|
@@ -201,7 +201,7 @@ print(data["items"])
|
|
201
201
|
# 9 ITM-84312 ORD-11385 Standard Delivery Service 48.5
|
202
202
|
```
|
203
203
|
|
204
|
-
6. Create your first self-referencing
|
204
|
+
6. Create your first self-referencing mock table
|
205
205
|
|
206
206
|
```python
|
207
207
|
from mostlyai import mock
|
@@ -240,6 +240,44 @@ print(df)
|
|
240
240
|
# 9 10 Felix Bennett 3 Senior Systems Analyst
|
241
241
|
```
|
242
242
|
|
243
|
+
7. Enrich existing data with additional columns
|
244
|
+
|
245
|
+
```python
|
246
|
+
from mostlyai import mock
|
247
|
+
import pandas as pd
|
248
|
+
|
249
|
+
tables = {
|
250
|
+
"guests": {
|
251
|
+
"prompt": "Guests of an Alpine ski hotel in Austria",
|
252
|
+
"columns": {
|
253
|
+
"guest_id": {"prompt": "the unique id of the guest", "dtype": "integer"},
|
254
|
+
"name": {"prompt": "first name and last name of the guest", "dtype": "string"},
|
255
|
+
"nationality": {"prompt": "2-letter code for the nationality", "dtype": "string"},
|
256
|
+
"gender": {"dtype": "category", "values": ["male", "female"]},
|
257
|
+
"age": {"prompt": "age in years; min: 18, max: 80; avg: 25", "dtype": "integer"},
|
258
|
+
"room_number": {"prompt": "room number", "dtype": "integer"},
|
259
|
+
"is_vip": {"prompt": "is the guest a VIP", "dtype": "boolean"},
|
260
|
+
},
|
261
|
+
"primary_key": "guest_id",
|
262
|
+
}
|
263
|
+
}
|
264
|
+
existing_guests = pd.DataFrame({
|
265
|
+
"guest_id": [1, 2, 3],
|
266
|
+
"name": ["Anna Schmidt", "Marco Rossi", "Sophie Dupont"],
|
267
|
+
"nationality": ["DE", "IT", "FR"],
|
268
|
+
})
|
269
|
+
df = mock.sample(
|
270
|
+
tables=tables,
|
271
|
+
existing_data={"guests": existing_guests},
|
272
|
+
model="openai/gpt-4.1-nano"
|
273
|
+
)
|
274
|
+
print(df)
|
275
|
+
# guest_id name nationality gender age room_number is_vip
|
276
|
+
# 0 1 Anna Schmidt DE female 29 101 True
|
277
|
+
# 1 2 Marco Rossi IT male 34 102 False
|
278
|
+
# 2 3 Sophie Dupont FR female 27 103 False
|
279
|
+
```
|
280
|
+
|
243
281
|
## MCP Server
|
244
282
|
|
245
283
|
This repo comes with MCP Server. It can be easily consumed by any MCP Client by providing the following configuration:
|
@@ -0,0 +1,8 @@
|
|
1
|
+
mostlyai/mock/__init__.py,sha256=38sp2aKJVtPa3koRxanlBS6fe_ccVQvIieILlKb-xuw,714
|
2
|
+
mostlyai/mock/core.py,sha256=lO5OzuOz7bvjaLHpfiN-wyjFBPD0oSHSqEA4v8q436Y,35318
|
3
|
+
mostlyai/mock/mcp_server.py,sha256=Vp0bWzE8wUyA6k4PHLa0TbkuI9s07E48xPrAUgf_5qU,1563
|
4
|
+
mostlyai_mock-0.1.3.dist-info/METADATA,sha256=rkHeGDlNUM2cqSxWY_R47FWXsOLktpdl_COja8zYz28,14161
|
5
|
+
mostlyai_mock-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
+
mostlyai_mock-0.1.3.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
+
mostlyai_mock-0.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
+
mostlyai_mock-0.1.3.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
mostlyai/mock/__init__.py,sha256=rwv3TboU77Sn6Py635JgvQu64d_R2s1Nc0dIDDbHAZA,714
|
2
|
-
mostlyai/mock/core.py,sha256=MEDVp_woSXlD0JanS3ocxWBa_XilpaWzPhsvNzTZuX0,30138
|
3
|
-
mostlyai/mock/mcp_server.py,sha256=Vp0bWzE8wUyA6k4PHLa0TbkuI9s07E48xPrAUgf_5qU,1563
|
4
|
-
mostlyai_mock-0.1.1.dist-info/METADATA,sha256=tY5BvODgzoiqox8yS8ISfxWtVB1wbch1KNW8CikRImc,12713
|
5
|
-
mostlyai_mock-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
-
mostlyai_mock-0.1.1.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
-
mostlyai_mock-0.1.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
-
mostlyai_mock-0.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|