PyPI - mostlyai-mock - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

mostlyai-mock 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{mostlyai_mock-0.2.0 → mostlyai_mock-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mostlyai-mock
-Version: 0.2.0
+Version: 0.2.2
 Summary: Synthetic Mock Data
 Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
 Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock

{mostlyai_mock-0.2.0 → mostlyai_mock-0.2.2}/mostlyai/mock/__init__.py RENAMED Viewed

@@ -15,4 +15,4 @@
 from mostlyai.mock.core import sample
 __all__ = ["sample"]
-__version__ = "0.2.0"  # Do not set this manually. Use poetry version [params].
+__version__ = "0.2.2"  # Do not set this manually. Use poetry version [params].

{mostlyai_mock-0.2.0 → mostlyai_mock-0.2.2}/mostlyai/mock/core.py RENAMED Viewed

@@ -880,6 +880,10 @@ async def _create_table_rows_generator(
         # +2 because LLM may not always count the rows correctly
         batch_sizes[-1] = sample_size - sum(batch_sizes[:-1]) + 2
+    # emit initial progress message right away
+    if progress_callback:
+        await progress_callback(table=name, progress=0, total=n_total_batches, rows=0, elapsed_time=0)
     # initialize queues for async communication
     batch_queue = asyncio.PriorityQueue()
     result_queue = asyncio.Queue()
@@ -944,6 +948,7 @@ async def _create_table_rows_generator(
     n_completed_batches = 0
     n_yielded_sequences = 0
+    n_generated_rows = 0
     table_start_time = time.time()
     while n_yielded_sequences < sample_size:
         if n_completed_batches >= n_total_batches:
@@ -968,6 +973,7 @@ async def _create_table_rows_generator(
         rows = result
         for row_idx, row in enumerate(rows):
             yield (batch_idx, row)
+            n_generated_rows += 1
             if context_batches is None or row_idx == len(rows) - 1:
                 # in case of flat table, each row is considered a single sequence
                 # in case of linked table, all rows are considered a single sequence
@@ -982,7 +988,7 @@ async def _create_table_rows_generator(
                 table=name,
                 progress=n_completed_batches,
                 total=n_total_batches,
-                rows=n_yielded_sequences,
+                rows=n_generated_rows,
                 elapsed_time=round(elapsed_time, 2),
             )
         result_queue.task_done()
@@ -1328,6 +1334,7 @@ def sample(
         tables (dict[str, dict]): The table specifications to generate mock data for. See examples for usage.
             Note: Avoid using double quotes (`"`) and other special characters in column names.
             Available dtypes: `string`, `integer`, `float`, `category`, `boolean`, `date`, `datetime`.
+            Primary key dtypes: `integer` → auto-increment (1, 2, 3, ...); `string` → LLM-generated unique IDs.
         sample_size (int | dict[str, int]): The number of rows to generate for each subject table.
             If a single integer is provided, the same number of rows will be generated for each subject table.
             If a dictionary is provided, the number of rows to generate for each subject table can be specified individually.
@@ -1452,6 +1459,32 @@ def sample(
     df_items = data["items"]
     ```
+    Example of auto-increment integer primary keys (self-referencing table):
+    ```python
+    from mostlyai import mock
+    tables = {
+        "employees": {
+            "prompt": "Employees of a company",
+            "columns": {
+                "employee_id": {"dtype": "integer"},  # integer PK → auto-increment (1, 2, 3, ...)
+                "name": {"prompt": "first name and last name of the employee", "dtype": "string"},
+                "boss_id": {"dtype": "integer"},  # integer FK → references auto-incremented values
+                "role": {"prompt": "the role of the employee", "dtype": "string"},
+            },
+            "primary_key": "employee_id",
+            "foreign_keys": [
+                {
+                    "column": "boss_id",
+                    "referenced_table": "employees",
+                    "prompt": "each boss has at most 3 employees",
+                },
+            ],
+        }
+    }
+    df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-5", n_workers=1)
+    ```
     Example of enriching a single dataframe:
     ```python
     from mostlyai import mock

{mostlyai_mock-0.2.0 → mostlyai_mock-0.2.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "mostlyai-mock"
-version = "0.2.0"
+version = "0.2.2"
 description = "Synthetic Mock Data"
 authors = [{ name = "MOSTLY AI", email = "dev@mostly.ai" }]
 requires-python = ">=3.10"

{mostlyai_mock-0.2.0 → mostlyai_mock-0.2.2}/.gitignore RENAMED Viewed

File without changes

{mostlyai_mock-0.2.0 → mostlyai_mock-0.2.2}/LICENSE RENAMED Viewed

File without changes

{mostlyai_mock-0.2.0 → mostlyai_mock-0.2.2}/README.md RENAMED Viewed

File without changes

{mostlyai_mock-0.2.0 → mostlyai_mock-0.2.2}/mostlyai/mock/mcp_server.py RENAMED Viewed

File without changes

mostlyai-mock 0.2.0__tar.gz → 0.2.2__tar.gz

mostlyai-mock 0.2.0tar.gz → 0.2.2tar.gz