mostlyai-mock 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mostlyai/mock/__init__.py +1 -1
- mostlyai/mock/core.py +38 -8
- {mostlyai_mock-0.1.3.dist-info → mostlyai_mock-0.1.4.dist-info}/METADATA +1 -1
- mostlyai_mock-0.1.4.dist-info/RECORD +8 -0
- mostlyai_mock-0.1.3.dist-info/RECORD +0 -8
- {mostlyai_mock-0.1.3.dist-info → mostlyai_mock-0.1.4.dist-info}/WHEEL +0 -0
- {mostlyai_mock-0.1.3.dist-info → mostlyai_mock-0.1.4.dist-info}/entry_points.txt +0 -0
- {mostlyai_mock-0.1.3.dist-info → mostlyai_mock-0.1.4.dist-info}/licenses/LICENSE +0 -0
mostlyai/mock/__init__.py
CHANGED
mostlyai/mock/core.py
CHANGED
@@ -626,24 +626,25 @@ def sample(
|
|
626
626
|
return_type: Literal["auto", "dict"] = "auto",
|
627
627
|
) -> pd.DataFrame | dict[str, pd.DataFrame]:
|
628
628
|
"""
|
629
|
-
Generate mock data by prompting an LLM.
|
629
|
+
Generate mock data from scratch or enrich existing data by prompting an LLM.
|
630
630
|
|
631
631
|
Args:
|
632
632
|
tables (dict[str, dict]): The table specifications to generate mock data for. See examples for usage.
|
633
633
|
sample_size (int | dict[str, int]): The number of rows to generate for each subject table.
|
634
634
|
If a single integer is provided, the same number of rows will be generated for each subject table.
|
635
|
-
If a dictionary is provided, the number of rows to generate for each subject table can be specified
|
636
|
-
individually.
|
635
|
+
If a dictionary is provided, the number of rows to generate for each subject table can be specified individually.
|
637
636
|
Default is 10. Ignored if existing_data is provided.
|
637
|
+
If a table has a foreign key, the sample size is determined by the corresponding foreign key prompt. If nothing specified, a few rows per parent record are generated.
|
638
638
|
existing_data (dict[str, pd.DataFrame] | None): Existing data to augment. If provided, the sample_size argument is ignored.
|
639
639
|
Default is None.
|
640
|
-
model (str): The LiteLLM chat completion model to be used.
|
640
|
+
model (str): The LiteLLM chat completion model to be used. Model needs to support structured output / JSON mode.
|
641
641
|
Examples include:
|
642
|
-
- `openai/gpt-4.1-nano` (default;
|
642
|
+
- `openai/gpt-4.1-nano` (default; fast, and smart)
|
643
643
|
- `openai/gpt-4.1-mini` (slower, but smarter)
|
644
644
|
- `openai/gpt-4.1` (slowest, but smartest)
|
645
645
|
- `gemini/gemini-2.0-flash`
|
646
646
|
- `gemini/gemini-2.5-flash-preview-04-17`
|
647
|
+
- 'groq/gemma2-9b-it`
|
647
648
|
- `groq/llama-3.3-70b-versatile`
|
648
649
|
- `anthropic/claude-3-7-sonnet-latest`
|
649
650
|
See https://docs.litellm.ai/docs/providers/ for more options.
|
@@ -656,7 +657,7 @@ def sample(
|
|
656
657
|
- pd.DataFrame: A single DataFrame containing the generated mock data, if only one table is provided.
|
657
658
|
- dict[str, pd.DataFrame]: A dictionary containing the generated mock data for each table, if multiple tables are provided.
|
658
659
|
|
659
|
-
Example of single table (without PK):
|
660
|
+
Example of generating mock data for a single table (without PK):
|
660
661
|
```python
|
661
662
|
from mostlyai import mock
|
662
663
|
|
@@ -679,7 +680,7 @@ def sample(
|
|
679
680
|
df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1-nano")
|
680
681
|
```
|
681
682
|
|
682
|
-
Example of multiple tables (with PK/FK relationships):
|
683
|
+
Example of generating mock data for multiple tables (with PK/FK relationships):
|
683
684
|
```python
|
684
685
|
from mostlyai import mock
|
685
686
|
|
@@ -746,7 +747,36 @@ def sample(
|
|
746
747
|
df_items = data["items"]
|
747
748
|
```
|
748
749
|
|
749
|
-
Example of
|
750
|
+
Example of enriching a single dataframe:
|
751
|
+
```python
|
752
|
+
from mostlyai import mock
|
753
|
+
import pandas as pd
|
754
|
+
|
755
|
+
tables = {
|
756
|
+
"patients": {
|
757
|
+
"prompt": "Patients of a hospital in Finland",
|
758
|
+
"columns": {
|
759
|
+
"age": {},
|
760
|
+
"gender": {},
|
761
|
+
"full_name": {"prompt": "first name and last name of the patient", "dtype": "string"},
|
762
|
+
"date_of_birth": {"prompt": "date of birth", "dtype": "date"},
|
763
|
+
"place_of_birth": {"prompt": "place of birth", "dtype": "string"},
|
764
|
+
},
|
765
|
+
},
|
766
|
+
}
|
767
|
+
existing_df = pd.DataFrame({
|
768
|
+
"age": [25, 30, 35, 40],
|
769
|
+
"gender": ["male", "male", "female", "female"],
|
770
|
+
})
|
771
|
+
enriched_df = mock.sample(
|
772
|
+
tables=tables,
|
773
|
+
existing_data={"patients": existing_df},
|
774
|
+
model="openai/gpt-4.1-nano"
|
775
|
+
)
|
776
|
+
enriched_df
|
777
|
+
```
|
778
|
+
|
779
|
+
Example of enriching / augmenting an existing dataset:
|
750
780
|
```python
|
751
781
|
from mostlyai import mock
|
752
782
|
import pandas as pd
|
@@ -0,0 +1,8 @@
|
|
1
|
+
mostlyai/mock/__init__.py,sha256=EvV_Tp6ExzQPq4apGq_8F25qw_paNTcQEC94nIVOEog,714
|
2
|
+
mostlyai/mock/core.py,sha256=ubarMA3VUlXdjUsCXQK_mD_kWPkTMOYvLz9G4OughGk,36532
|
3
|
+
mostlyai/mock/mcp_server.py,sha256=Vp0bWzE8wUyA6k4PHLa0TbkuI9s07E48xPrAUgf_5qU,1563
|
4
|
+
mostlyai_mock-0.1.4.dist-info/METADATA,sha256=jibPe0pKcwqyPBoyc7H98LPd72vkGZBStdw_yMNVvJI,14161
|
5
|
+
mostlyai_mock-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
+
mostlyai_mock-0.1.4.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
+
mostlyai_mock-0.1.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
+
mostlyai_mock-0.1.4.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
mostlyai/mock/__init__.py,sha256=38sp2aKJVtPa3koRxanlBS6fe_ccVQvIieILlKb-xuw,714
|
2
|
-
mostlyai/mock/core.py,sha256=lO5OzuOz7bvjaLHpfiN-wyjFBPD0oSHSqEA4v8q436Y,35318
|
3
|
-
mostlyai/mock/mcp_server.py,sha256=Vp0bWzE8wUyA6k4PHLa0TbkuI9s07E48xPrAUgf_5qU,1563
|
4
|
-
mostlyai_mock-0.1.3.dist-info/METADATA,sha256=rkHeGDlNUM2cqSxWY_R47FWXsOLktpdl_COja8zYz28,14161
|
5
|
-
mostlyai_mock-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
-
mostlyai_mock-0.1.3.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
-
mostlyai_mock-0.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
-
mostlyai_mock-0.1.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|