mostlyai-mock 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mostlyai/mock/__init__.py +1 -1
- mostlyai/mock/core.py +34 -1
- {mostlyai_mock-0.2.0.dist-info → mostlyai_mock-0.2.2.dist-info}/METADATA +1 -1
- mostlyai_mock-0.2.2.dist-info/RECORD +8 -0
- mostlyai_mock-0.2.0.dist-info/RECORD +0 -8
- {mostlyai_mock-0.2.0.dist-info → mostlyai_mock-0.2.2.dist-info}/WHEEL +0 -0
- {mostlyai_mock-0.2.0.dist-info → mostlyai_mock-0.2.2.dist-info}/entry_points.txt +0 -0
- {mostlyai_mock-0.2.0.dist-info → mostlyai_mock-0.2.2.dist-info}/licenses/LICENSE +0 -0
mostlyai/mock/__init__.py
CHANGED
mostlyai/mock/core.py
CHANGED
|
@@ -880,6 +880,10 @@ async def _create_table_rows_generator(
|
|
|
880
880
|
# +2 because LLM may not always count the rows correctly
|
|
881
881
|
batch_sizes[-1] = sample_size - sum(batch_sizes[:-1]) + 2
|
|
882
882
|
|
|
883
|
+
# emit initial progress message right away
|
|
884
|
+
if progress_callback:
|
|
885
|
+
await progress_callback(table=name, progress=0, total=n_total_batches, rows=0, elapsed_time=0)
|
|
886
|
+
|
|
883
887
|
# initialize queues for async communication
|
|
884
888
|
batch_queue = asyncio.PriorityQueue()
|
|
885
889
|
result_queue = asyncio.Queue()
|
|
@@ -944,6 +948,7 @@ async def _create_table_rows_generator(
|
|
|
944
948
|
|
|
945
949
|
n_completed_batches = 0
|
|
946
950
|
n_yielded_sequences = 0
|
|
951
|
+
n_generated_rows = 0
|
|
947
952
|
table_start_time = time.time()
|
|
948
953
|
while n_yielded_sequences < sample_size:
|
|
949
954
|
if n_completed_batches >= n_total_batches:
|
|
@@ -968,6 +973,7 @@ async def _create_table_rows_generator(
|
|
|
968
973
|
rows = result
|
|
969
974
|
for row_idx, row in enumerate(rows):
|
|
970
975
|
yield (batch_idx, row)
|
|
976
|
+
n_generated_rows += 1
|
|
971
977
|
if context_batches is None or row_idx == len(rows) - 1:
|
|
972
978
|
# in case of flat table, each row is considered a single sequence
|
|
973
979
|
# in case of linked table, all rows are considered a single sequence
|
|
@@ -982,7 +988,7 @@ async def _create_table_rows_generator(
|
|
|
982
988
|
table=name,
|
|
983
989
|
progress=n_completed_batches,
|
|
984
990
|
total=n_total_batches,
|
|
985
|
-
rows=
|
|
991
|
+
rows=n_generated_rows,
|
|
986
992
|
elapsed_time=round(elapsed_time, 2),
|
|
987
993
|
)
|
|
988
994
|
result_queue.task_done()
|
|
@@ -1328,6 +1334,7 @@ def sample(
|
|
|
1328
1334
|
tables (dict[str, dict]): The table specifications to generate mock data for. See examples for usage.
|
|
1329
1335
|
Note: Avoid using double quotes (`"`) and other special characters in column names.
|
|
1330
1336
|
Available dtypes: `string`, `integer`, `float`, `category`, `boolean`, `date`, `datetime`.
|
|
1337
|
+
Primary key dtypes: `integer` → auto-increment (1, 2, 3, ...); `string` → LLM-generated unique IDs.
|
|
1331
1338
|
sample_size (int | dict[str, int]): The number of rows to generate for each subject table.
|
|
1332
1339
|
If a single integer is provided, the same number of rows will be generated for each subject table.
|
|
1333
1340
|
If a dictionary is provided, the number of rows to generate for each subject table can be specified individually.
|
|
@@ -1452,6 +1459,32 @@ def sample(
|
|
|
1452
1459
|
df_items = data["items"]
|
|
1453
1460
|
```
|
|
1454
1461
|
|
|
1462
|
+
Example of auto-increment integer primary keys (self-referencing table):
|
|
1463
|
+
```python
|
|
1464
|
+
from mostlyai import mock
|
|
1465
|
+
|
|
1466
|
+
tables = {
|
|
1467
|
+
"employees": {
|
|
1468
|
+
"prompt": "Employees of a company",
|
|
1469
|
+
"columns": {
|
|
1470
|
+
"employee_id": {"dtype": "integer"}, # integer PK → auto-increment (1, 2, 3, ...)
|
|
1471
|
+
"name": {"prompt": "first name and last name of the employee", "dtype": "string"},
|
|
1472
|
+
"boss_id": {"dtype": "integer"}, # integer FK → references auto-incremented values
|
|
1473
|
+
"role": {"prompt": "the role of the employee", "dtype": "string"},
|
|
1474
|
+
},
|
|
1475
|
+
"primary_key": "employee_id",
|
|
1476
|
+
"foreign_keys": [
|
|
1477
|
+
{
|
|
1478
|
+
"column": "boss_id",
|
|
1479
|
+
"referenced_table": "employees",
|
|
1480
|
+
"prompt": "each boss has at most 3 employees",
|
|
1481
|
+
},
|
|
1482
|
+
],
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-5", n_workers=1)
|
|
1486
|
+
```
|
|
1487
|
+
|
|
1455
1488
|
Example of enriching a single dataframe:
|
|
1456
1489
|
```python
|
|
1457
1490
|
from mostlyai import mock
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
mostlyai/mock/__init__.py,sha256=AH7k50HRTH8fZE6Sy-ZjssidQ7k9rknhICE5f33Z45A,714
|
|
2
|
+
mostlyai/mock/core.py,sha256=mfeEs60vkUlo0aaaGjrYi_Dww1mj98fMBGmNtcznteE,70373
|
|
3
|
+
mostlyai/mock/mcp_server.py,sha256=uDLg0SeMPV2VZhXviM-F769W0xlmhGwlmQiQhY0Q-Ik,2365
|
|
4
|
+
mostlyai_mock-0.2.2.dist-info/METADATA,sha256=7Uf-YoCh0sMxXr8uSYAz__HfFrT1io2NvtDXk_psmEo,14253
|
|
5
|
+
mostlyai_mock-0.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
mostlyai_mock-0.2.2.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
|
7
|
+
mostlyai_mock-0.2.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
8
|
+
mostlyai_mock-0.2.2.dist-info/RECORD,,
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
mostlyai/mock/__init__.py,sha256=BawpuIqBl4JCGc_o1k07sP2b19pOnwRPAI9kHjn5odk,714
|
|
2
|
-
mostlyai/mock/core.py,sha256=GU-TyR8NlIT32g1H9-VRZ5QVczOWIGgAPerQlLDiE24,68994
|
|
3
|
-
mostlyai/mock/mcp_server.py,sha256=uDLg0SeMPV2VZhXviM-F769W0xlmhGwlmQiQhY0Q-Ik,2365
|
|
4
|
-
mostlyai_mock-0.2.0.dist-info/METADATA,sha256=Ae7bJzxECQuLRGPh3roPNV_I-p21-uSgPWsV-BYBinY,14253
|
|
5
|
-
mostlyai_mock-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
-
mostlyai_mock-0.2.0.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
|
7
|
-
mostlyai_mock-0.2.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
8
|
-
mostlyai_mock-0.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|