mostlyai-mock 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mostlyai/mock/__init__.py CHANGED
@@ -15,4 +15,4 @@
15
15
  from mostlyai.mock.core import sample
16
16
 
17
17
  __all__ = ["sample"]
18
- __version__ = "0.1.3" # Do not set this manually. Use poetry version [params].
18
+ __version__ = "0.1.4" # Do not set this manually. Use poetry version [params].
mostlyai/mock/core.py CHANGED
@@ -626,24 +626,25 @@ def sample(
626
626
  return_type: Literal["auto", "dict"] = "auto",
627
627
  ) -> pd.DataFrame | dict[str, pd.DataFrame]:
628
628
  """
629
- Generate mock data by prompting an LLM.
629
+ Generate mock data from scratch or enrich existing data by prompting an LLM.
630
630
 
631
631
  Args:
632
632
  tables (dict[str, dict]): The table specifications to generate mock data for. See examples for usage.
633
633
  sample_size (int | dict[str, int]): The number of rows to generate for each subject table.
634
634
  If a single integer is provided, the same number of rows will be generated for each subject table.
635
- If a dictionary is provided, the number of rows to generate for each subject table can be specified
636
- individually.
635
+ If a dictionary is provided, the number of rows to generate for each subject table can be specified individually.
637
636
  Default is 10. Ignored if existing_data is provided.
637
+ If a table has a foreign key, the sample size is determined by the corresponding foreign key prompt. If nothing specified, a few rows per parent record are generated.
638
638
  existing_data (dict[str, pd.DataFrame] | None): Existing data to augment. If provided, the sample_size argument is ignored.
639
639
  Default is None.
640
- model (str): The LiteLLM chat completion model to be used. Requires support for structured output / JSON mode.
640
+ model (str): The LiteLLM chat completion model to be used. Model needs to support structured output / JSON mode.
641
641
  Examples include:
642
- - `openai/gpt-4.1-nano` (default; fastest)
642
+ - `openai/gpt-4.1-nano` (default; fast, and smart)
643
643
  - `openai/gpt-4.1-mini` (slower, but smarter)
644
644
  - `openai/gpt-4.1` (slowest, but smartest)
645
645
  - `gemini/gemini-2.0-flash`
646
646
  - `gemini/gemini-2.5-flash-preview-04-17`
647
+ - 'groq/gemma2-9b-it`
647
648
  - `groq/llama-3.3-70b-versatile`
648
649
  - `anthropic/claude-3-7-sonnet-latest`
649
650
  See https://docs.litellm.ai/docs/providers/ for more options.
@@ -656,7 +657,7 @@ def sample(
656
657
  - pd.DataFrame: A single DataFrame containing the generated mock data, if only one table is provided.
657
658
  - dict[str, pd.DataFrame]: A dictionary containing the generated mock data for each table, if multiple tables are provided.
658
659
 
659
- Example of single table (without PK):
660
+ Example of generating mock data for a single table (without PK):
660
661
  ```python
661
662
  from mostlyai import mock
662
663
 
@@ -679,7 +680,7 @@ def sample(
679
680
  df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1-nano")
680
681
  ```
681
682
 
682
- Example of multiple tables (with PK/FK relationships):
683
+ Example of generating mock data for multiple tables (with PK/FK relationships):
683
684
  ```python
684
685
  from mostlyai import mock
685
686
 
@@ -746,7 +747,36 @@ def sample(
746
747
  df_items = data["items"]
747
748
  ```
748
749
 
749
- Example of data augmentation:
750
+ Example of enriching a single dataframe:
751
+ ```python
752
+ from mostlyai import mock
753
+ import pandas as pd
754
+
755
+ tables = {
756
+ "patients": {
757
+ "prompt": "Patients of a hospital in Finland",
758
+ "columns": {
759
+ "age": {},
760
+ "gender": {},
761
+ "full_name": {"prompt": "first name and last name of the patient", "dtype": "string"},
762
+ "date_of_birth": {"prompt": "date of birth", "dtype": "date"},
763
+ "place_of_birth": {"prompt": "place of birth", "dtype": "string"},
764
+ },
765
+ },
766
+ }
767
+ existing_df = pd.DataFrame({
768
+ "age": [25, 30, 35, 40],
769
+ "gender": ["male", "male", "female", "female"],
770
+ })
771
+ enriched_df = mock.sample(
772
+ tables=tables,
773
+ existing_data={"patients": existing_df},
774
+ model="openai/gpt-4.1-nano"
775
+ )
776
+ enriched_df
777
+ ```
778
+
779
+ Example of enriching / augmenting an existing dataset:
750
780
  ```python
751
781
  from mostlyai import mock
752
782
  import pandas as pd
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mostlyai-mock
3
- Version: 0.1.3
3
+ Version: 0.1.4
4
4
  Summary: LLM-generated Mock Data
5
5
  Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
6
6
  Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
@@ -0,0 +1,8 @@
1
+ mostlyai/mock/__init__.py,sha256=EvV_Tp6ExzQPq4apGq_8F25qw_paNTcQEC94nIVOEog,714
2
+ mostlyai/mock/core.py,sha256=ubarMA3VUlXdjUsCXQK_mD_kWPkTMOYvLz9G4OughGk,36532
3
+ mostlyai/mock/mcp_server.py,sha256=Vp0bWzE8wUyA6k4PHLa0TbkuI9s07E48xPrAUgf_5qU,1563
4
+ mostlyai_mock-0.1.4.dist-info/METADATA,sha256=jibPe0pKcwqyPBoyc7H98LPd72vkGZBStdw_yMNVvJI,14161
5
+ mostlyai_mock-0.1.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ mostlyai_mock-0.1.4.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
7
+ mostlyai_mock-0.1.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
+ mostlyai_mock-0.1.4.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- mostlyai/mock/__init__.py,sha256=38sp2aKJVtPa3koRxanlBS6fe_ccVQvIieILlKb-xuw,714
2
- mostlyai/mock/core.py,sha256=lO5OzuOz7bvjaLHpfiN-wyjFBPD0oSHSqEA4v8q436Y,35318
3
- mostlyai/mock/mcp_server.py,sha256=Vp0bWzE8wUyA6k4PHLa0TbkuI9s07E48xPrAUgf_5qU,1563
4
- mostlyai_mock-0.1.3.dist-info/METADATA,sha256=rkHeGDlNUM2cqSxWY_R47FWXsOLktpdl_COja8zYz28,14161
5
- mostlyai_mock-0.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- mostlyai_mock-0.1.3.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
7
- mostlyai_mock-0.1.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
- mostlyai_mock-0.1.3.dist-info/RECORD,,