mostlyai-mock 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mostlyai/mock/__init__.py +1 -1
- mostlyai/mock/core.py +487 -214
- mostlyai/mock/mcp_server.py +3 -1
- {mostlyai_mock-0.1.6.dist-info → mostlyai_mock-0.1.8.dist-info}/METADATA +7 -6
- mostlyai_mock-0.1.8.dist-info/RECORD +8 -0
- mostlyai_mock-0.1.6.dist-info/RECORD +0 -8
- {mostlyai_mock-0.1.6.dist-info → mostlyai_mock-0.1.8.dist-info}/WHEEL +0 -0
- {mostlyai_mock-0.1.6.dist-info → mostlyai_mock-0.1.8.dist-info}/entry_points.txt +0 -0
- {mostlyai_mock-0.1.6.dist-info → mostlyai_mock-0.1.8.dist-info}/licenses/LICENSE +0 -0
mostlyai/mock/__init__.py
CHANGED
mostlyai/mock/core.py
CHANGED
@@ -14,42 +14,29 @@
|
|
14
14
|
|
15
15
|
from __future__ import annotations
|
16
16
|
|
17
|
-
import
|
17
|
+
import asyncio
|
18
|
+
import concurrent.futures
|
18
19
|
import json
|
20
|
+
import math
|
19
21
|
from collections import deque
|
20
|
-
from collections.abc import
|
22
|
+
from collections.abc import AsyncGenerator
|
21
23
|
from enum import Enum
|
24
|
+
from io import StringIO
|
22
25
|
from typing import Any, Literal
|
23
26
|
|
27
|
+
import dateutil.parser
|
24
28
|
import litellm
|
25
29
|
import pandas as pd
|
26
30
|
import tenacity
|
27
31
|
from pydantic import BaseModel, Field, RootModel, create_model, field_validator, model_validator
|
28
|
-
from tqdm import tqdm
|
32
|
+
from tqdm.asyncio import tqdm
|
29
33
|
|
30
34
|
litellm.suppress_debug_info = True
|
31
35
|
|
32
|
-
SYSTEM_PROMPT = """
|
33
|
-
You are a specialized mock data generator designed to create highly realistic, contextually appropriate data based on schema definitions.
|
34
36
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
2. Ensure logical consistency across related tables and foreign key relationships
|
39
|
-
3. Create contextually appropriate values that reflect real-world patterns and distributions
|
40
|
-
4. Produce diverse, non-repetitive data that avoids obvious patterns
|
41
|
-
5. Respect uniqueness constraints and other data integrity rules
|
42
|
-
6. When enriching existing data, ensure that new values are consistent with existing values
|
43
|
-
7. Return well-formatted JSON output that can be directly parsed
|
44
|
-
8. Don't use markdown formatting
|
45
|
-
|
46
|
-
For numeric fields, generate realistic distributions rather than random values. For text fields, create contextually \
|
47
|
-
appropriate content. For dates and timestamps, ensure logical chronology. Always maintain referential integrity \
|
48
|
-
across tables.
|
49
|
-
|
50
|
-
When enriching existing data, carefully analyze the patterns and relationships in the existing columns \
|
51
|
-
to generate compatible and realistic values for the missing columns.
|
52
|
-
"""
|
37
|
+
class LLMOutputFormat(str, Enum):
|
38
|
+
JSON = "JSON"
|
39
|
+
CSV = "CSV"
|
53
40
|
|
54
41
|
|
55
42
|
class LLMConfig(BaseModel):
|
@@ -162,6 +149,12 @@ class ColumnConfig(BaseModel):
|
|
162
149
|
raise ValueError("At least one value must be provided when dtype is 'category'")
|
163
150
|
return self
|
164
151
|
|
152
|
+
@model_validator(mode="after")
|
153
|
+
def override_values_for_boolean_dtype(self) -> ColumnConfig:
|
154
|
+
if self.dtype == DType.BOOLEAN:
|
155
|
+
self.values = [True, False]
|
156
|
+
return self
|
157
|
+
|
165
158
|
@model_validator(mode="after")
|
166
159
|
def harmonize_values_with_dtypes(self) -> ColumnConfig:
|
167
160
|
if self.values:
|
@@ -199,18 +192,18 @@ class ForeignKeyConfig(BaseModel):
|
|
199
192
|
prompt: str | None = None
|
200
193
|
|
201
194
|
|
202
|
-
def _sample_table(
|
195
|
+
async def _sample_table(
|
203
196
|
*,
|
204
197
|
name: str,
|
205
198
|
prompt: str,
|
206
199
|
columns: dict[str, ColumnConfig],
|
207
|
-
foreign_keys: list[ForeignKeyConfig]
|
208
|
-
primary_keys: dict[str, str]
|
200
|
+
foreign_keys: list[ForeignKeyConfig],
|
201
|
+
primary_keys: dict[str, str],
|
209
202
|
data: dict[str, pd.DataFrame],
|
210
203
|
sample_size: int,
|
211
|
-
batch_size: int,
|
212
204
|
previous_rows_size: int,
|
213
205
|
non_context_size: int | None,
|
206
|
+
n_workers: int,
|
214
207
|
llm_config: LLMConfig,
|
215
208
|
) -> pd.DataFrame:
|
216
209
|
table_rows_generator = _create_table_rows_generator(
|
@@ -221,28 +214,62 @@ def _sample_table(
|
|
221
214
|
foreign_keys=foreign_keys,
|
222
215
|
data=data,
|
223
216
|
sample_size=sample_size,
|
224
|
-
batch_size=batch_size,
|
225
217
|
previous_rows_size=previous_rows_size,
|
226
218
|
non_context_size=non_context_size,
|
219
|
+
n_workers=n_workers,
|
227
220
|
llm_config=llm_config,
|
228
221
|
)
|
229
222
|
table_rows_generator = tqdm(table_rows_generator, desc=f"Generating rows for table `{name}`".ljust(45))
|
230
|
-
table_df = _convert_table_rows_generator_to_df(table_rows_generator=table_rows_generator, columns=columns)
|
223
|
+
table_df = await _convert_table_rows_generator_to_df(table_rows_generator=table_rows_generator, columns=columns)
|
231
224
|
return table_df
|
232
225
|
|
233
226
|
|
227
|
+
def _sample_table_sync(*args, **kwargs) -> pd.DataFrame:
|
228
|
+
loop = asyncio.new_event_loop()
|
229
|
+
asyncio.set_event_loop(loop)
|
230
|
+
try:
|
231
|
+
return loop.run_until_complete(_sample_table(*args, **kwargs))
|
232
|
+
finally:
|
233
|
+
loop.close()
|
234
|
+
|
235
|
+
|
236
|
+
def _create_system_prompt(llm_output_format: LLMOutputFormat) -> str:
|
237
|
+
return f"""
|
238
|
+
You are a specialized data generator designed to create highly realistic, contextually appropriate data based on schema definitions.
|
239
|
+
|
240
|
+
Your task is to:
|
241
|
+
|
242
|
+
1. Generate data that strictly adheres to the provided schema constraints (data types, ranges, formats)
|
243
|
+
2. Ensure logical consistency across related tables and foreign key relationships
|
244
|
+
3. Create contextually appropriate values that reflect real-world patterns and distributions
|
245
|
+
4. Produce diverse, non-repetitive data that avoids obvious patterns
|
246
|
+
5. Respect uniqueness constraints and other data integrity rules
|
247
|
+
6. When enriching existing data, ensure that new values are consistent with existing values
|
248
|
+
7. Return well-formatted {llm_output_format.value} output that can be directly parsed
|
249
|
+
8. Don't use markdown formatting
|
250
|
+
|
251
|
+
For numeric fields, generate realistic distributions rather than random values. For text fields, create contextually \
|
252
|
+
appropriate content. For dates and timestamps, ensure logical chronology. Always maintain referential integrity \
|
253
|
+
across tables.
|
254
|
+
|
255
|
+
When enriching existing data, carefully analyze the patterns and relationships in the existing columns \
|
256
|
+
to generate compatible and realistic values for the missing columns.
|
257
|
+
"""
|
258
|
+
|
259
|
+
|
234
260
|
def _create_table_prompt(
|
235
261
|
*,
|
236
262
|
name: str,
|
237
263
|
prompt: str,
|
238
264
|
columns: dict[str, ColumnConfig],
|
239
|
-
primary_keys: dict[str, str]
|
265
|
+
primary_keys: dict[str, str],
|
240
266
|
batch_size: int | None,
|
241
|
-
foreign_keys: list[ForeignKeyConfig]
|
267
|
+
foreign_keys: list[ForeignKeyConfig],
|
242
268
|
existing_data: pd.DataFrame | None,
|
243
269
|
context_data: pd.DataFrame | None,
|
244
270
|
non_context_data: dict[str, pd.DataFrame] | None,
|
245
271
|
previous_rows: list[dict] | None,
|
272
|
+
llm_output_format: LLMOutputFormat,
|
246
273
|
) -> str:
|
247
274
|
# add table prompt
|
248
275
|
prompt = f"# {prompt}\n\n"
|
@@ -345,7 +372,7 @@ def _create_table_prompt(
|
|
345
372
|
|
346
373
|
prompt += f"{verb.capitalize()} data for the Target Table `{name}`.\n\n"
|
347
374
|
if n_rows is not None:
|
348
|
-
prompt += f"Number of rows to {verb}: `{n_rows}`.\n\n"
|
375
|
+
prompt += f"Number of data rows to {verb}: `{n_rows}`.\n\n"
|
349
376
|
|
350
377
|
if has_context_table_section:
|
351
378
|
assert foreign_keys
|
@@ -387,131 +414,326 @@ def _create_table_prompt(
|
|
387
414
|
|
388
415
|
prompt += f"Do not use code to {verb} the data.\n\n"
|
389
416
|
|
390
|
-
prompt += "Return data as a
|
391
|
-
|
392
|
-
|
417
|
+
prompt += f"Return data as a {llm_output_format.value} string."
|
418
|
+
if llm_output_format == LLMOutputFormat.JSON:
|
419
|
+
prompt += " The JSON string should have 'rows' key at the top level."
|
420
|
+
prompt += " The value of 'rows' key should be a list of JSON objects."
|
421
|
+
prompt += " Each JSON object should have column names as keys and values as column values."
|
422
|
+
else: # llm_output_format == LLMOutputFormat.CSV
|
423
|
+
prompt += " The CSV string should have a header row with column names."
|
424
|
+
prompt += " The CSV string should have a data row for each row to be generated."
|
425
|
+
prompt += " The CSV string should have a newline character at the end of each row."
|
426
|
+
prompt += " Each value in the CSV string should be enclosed in double quotes."
|
427
|
+
|
393
428
|
if existing_data is not None:
|
394
|
-
prompt += (
|
395
|
-
|
396
|
-
|
429
|
+
prompt += f" Only include the following columns in the {llm_output_format.value} string: {list(columns.keys() - existing_data.columns)}."
|
430
|
+
|
431
|
+
if llm_output_format == LLMOutputFormat.CSV and batch_size > 10:
|
432
|
+
prompt += " Additionally, add column called `_ROW_IDX` that is a counter from 1 to the number of rows generated so far within current batch."
|
433
|
+
|
397
434
|
prompt += "\n"
|
398
435
|
return prompt
|
399
436
|
|
400
437
|
|
401
|
-
def
|
438
|
+
def _completion_with_retries(*args, **kwargs):
|
439
|
+
n_attempts = 3
|
440
|
+
|
441
|
+
def print_on_retry(_):
|
442
|
+
print(" * Calling LLM again... * ", end="", flush=True)
|
443
|
+
|
444
|
+
# try up to 3 times, print a message to the user on each retry
|
445
|
+
retryer = tenacity.AsyncRetrying(
|
446
|
+
stop=tenacity.stop_after_attempt(n_attempts), reraise=True, before_sleep=print_on_retry
|
447
|
+
)
|
448
|
+
return retryer(litellm.acompletion, *args, **kwargs)
|
449
|
+
|
450
|
+
|
451
|
+
async def _yield_rows_from_json_chunks_stream(response: litellm.CustomStreamWrapper) -> AsyncGenerator[dict]:
|
452
|
+
# starting with dirty buffer is to handle the `{"rows": []}` case
|
453
|
+
buffer = list("garbage")
|
454
|
+
rows_json_started = False
|
455
|
+
in_row_json = False
|
456
|
+
async for chunk in response:
|
457
|
+
delta = chunk.choices[0].delta.content
|
458
|
+
if delta is None:
|
459
|
+
continue
|
460
|
+
for char in delta:
|
461
|
+
buffer.append(char)
|
462
|
+
if char == "{" and not rows_json_started:
|
463
|
+
# {"rows": [{"name": "Jo\}h\{n"}]}
|
464
|
+
# * <- start of rows json stream
|
465
|
+
rows_json_started = True
|
466
|
+
elif char == "{" and not in_row_json:
|
467
|
+
# {"rows": [{"name": "Jo\}h\{n"}]}
|
468
|
+
# * <- start of single row json stream
|
469
|
+
buffer = list("{")
|
470
|
+
in_row_json = True
|
471
|
+
elif char == "}":
|
472
|
+
# {"rows": [{"name": "Jo\}h\{n"}]}
|
473
|
+
# * * * <- any of these
|
474
|
+
try:
|
475
|
+
row = json.loads("".join(buffer))
|
476
|
+
yield row
|
477
|
+
buffer = list()
|
478
|
+
in_row_json = False
|
479
|
+
except json.JSONDecodeError:
|
480
|
+
continue
|
481
|
+
|
482
|
+
|
483
|
+
async def _yield_rows_from_csv_chunks_stream(response: litellm.CustomStreamWrapper) -> AsyncGenerator[dict]:
|
484
|
+
def buffer_to_row(buffer: list[str]) -> list[str]:
|
485
|
+
return pd.read_csv(StringIO("".join(buffer)), header=None).astype(str).iloc[0].to_list()
|
486
|
+
|
487
|
+
buffer = list()
|
488
|
+
header = None
|
489
|
+
async for chunk in response:
|
490
|
+
delta = chunk.choices[0].delta.content
|
491
|
+
if delta is None:
|
492
|
+
continue
|
493
|
+
for char in delta:
|
494
|
+
buffer.append(char)
|
495
|
+
if char == "\n":
|
496
|
+
row = buffer_to_row(buffer)
|
497
|
+
if header is None:
|
498
|
+
# column1,column2,column3\n
|
499
|
+
# ** <- end of header row
|
500
|
+
header = row
|
501
|
+
else:
|
502
|
+
# value_1,value_2,value_3\n
|
503
|
+
# ** <- end of data row
|
504
|
+
yield dict(zip(header, row))
|
505
|
+
buffer = list()
|
506
|
+
if buffer:
|
507
|
+
# last row might not finish with a newline, in which case the buffer would not be empty here
|
508
|
+
last_row = buffer_to_row(buffer)
|
509
|
+
yield dict(zip(header, last_row))
|
510
|
+
|
511
|
+
|
512
|
+
def _create_structured_output_schema(
|
513
|
+
columns: dict[str, ColumnConfig], existing_data: pd.DataFrame | None
|
514
|
+
) -> type[BaseModel]:
|
515
|
+
def create_annotation(column_config: ColumnConfig) -> type:
|
516
|
+
if column_config.values or column_config.dtype is DType.CATEGORY:
|
517
|
+
return Literal[tuple(column_config.values)]
|
518
|
+
return {
|
519
|
+
DType.INTEGER: int | None,
|
520
|
+
DType.FLOAT: float | None,
|
521
|
+
DType.STRING: str | None,
|
522
|
+
DType.BOOLEAN: bool | None,
|
523
|
+
# response_format has limited support for JSON Schema features
|
524
|
+
# thus we represent dates and datetimes as strings
|
525
|
+
DType.DATE: str | None,
|
526
|
+
DType.DATETIME: str | None,
|
527
|
+
}[column_config.dtype]
|
528
|
+
|
529
|
+
fields = {}
|
530
|
+
for column_name, column_config in columns.items():
|
531
|
+
if existing_data is not None and column_name in existing_data.columns:
|
532
|
+
continue # skip columns that already exist in existing data
|
533
|
+
annotation = create_annotation(column_config)
|
534
|
+
fields[column_name] = (annotation, Field(...))
|
535
|
+
TableRow = create_model("TableRow", **fields)
|
536
|
+
TableRows = create_model("TableRows", rows=(list[TableRow], ...))
|
537
|
+
return TableRows
|
538
|
+
|
539
|
+
|
540
|
+
async def _worker(
|
541
|
+
*,
|
542
|
+
name: str,
|
543
|
+
prompt: str,
|
544
|
+
columns: dict[str, ColumnConfig],
|
545
|
+
foreign_keys: list[ForeignKeyConfig],
|
546
|
+
primary_keys: dict[str, str],
|
547
|
+
previous_rows: deque[dict],
|
548
|
+
batch_queue: asyncio.Queue,
|
549
|
+
result_queue: asyncio.Queue,
|
550
|
+
retry_queue: asyncio.Queue,
|
551
|
+
n_workers: int,
|
552
|
+
llm_output_format: LLMOutputFormat,
|
553
|
+
llm_config: LLMConfig,
|
554
|
+
):
|
555
|
+
try:
|
556
|
+
while True:
|
557
|
+
do_repeat_task = False
|
558
|
+
|
559
|
+
# get task from the batch_queue
|
560
|
+
batch_idx, task = await batch_queue.get()
|
561
|
+
if task is None:
|
562
|
+
# no more tasks for the worker; break the loop
|
563
|
+
batch_queue.task_done()
|
564
|
+
break
|
565
|
+
|
566
|
+
# deconstruct task
|
567
|
+
batch_size = task["batch_size"]
|
568
|
+
existing_batch = task.get("existing_batch")
|
569
|
+
context_batch = task.get("context_batch")
|
570
|
+
non_context_batch = task.get("non_context_batch")
|
571
|
+
|
572
|
+
# resolve columns to generate
|
573
|
+
generated_columns = set(columns.keys())
|
574
|
+
if existing_batch is not None:
|
575
|
+
generated_columns = generated_columns - set(existing_batch.columns)
|
576
|
+
|
577
|
+
# construct schema for Structured Outputs (applies to JSON LLMOutputFormat only)
|
578
|
+
structured_output_schema = None
|
579
|
+
if llm_output_format == LLMOutputFormat.JSON:
|
580
|
+
structured_output_schema = _create_structured_output_schema(
|
581
|
+
columns=columns, existing_data=existing_batch
|
582
|
+
)
|
583
|
+
|
584
|
+
# construct litellm kwargs
|
585
|
+
litellm_kwargs = {
|
586
|
+
"temperature": llm_config.temperature,
|
587
|
+
"top_p": llm_config.top_p,
|
588
|
+
"model": llm_config.model,
|
589
|
+
"api_key": llm_config.api_key,
|
590
|
+
"stream": True,
|
591
|
+
}
|
592
|
+
|
593
|
+
# construct messages
|
594
|
+
system_prompt = _create_system_prompt(llm_output_format)
|
595
|
+
user_prompt = _create_table_prompt(
|
596
|
+
name=name,
|
597
|
+
prompt=prompt,
|
598
|
+
columns=columns,
|
599
|
+
primary_keys=primary_keys,
|
600
|
+
batch_size=batch_size,
|
601
|
+
foreign_keys=foreign_keys,
|
602
|
+
existing_data=existing_batch,
|
603
|
+
context_data=context_batch,
|
604
|
+
non_context_data=non_context_batch,
|
605
|
+
previous_rows=list(previous_rows),
|
606
|
+
llm_output_format=llm_output_format,
|
607
|
+
)
|
608
|
+
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
|
609
|
+
|
610
|
+
if generated_columns:
|
611
|
+
# make LLM call
|
612
|
+
response = await _completion_with_retries(
|
613
|
+
messages=messages, response_format=structured_output_schema, **litellm_kwargs
|
614
|
+
)
|
615
|
+
yield_rows_from_chunks_stream = {
|
616
|
+
LLMOutputFormat.JSON: _yield_rows_from_json_chunks_stream,
|
617
|
+
LLMOutputFormat.CSV: _yield_rows_from_csv_chunks_stream,
|
618
|
+
}[llm_output_format]
|
619
|
+
rows_stream = yield_rows_from_chunks_stream(response)
|
620
|
+
else:
|
621
|
+
# skip roundtrip to LLM in case all columns are provided in existing data
|
622
|
+
assert existing_batch is not None
|
623
|
+
|
624
|
+
async def _yield_empty_rows(n_rows: int) -> AsyncGenerator[dict]:
|
625
|
+
for _ in range(n_rows):
|
626
|
+
yield {}
|
627
|
+
|
628
|
+
rows_stream = _yield_empty_rows(len(existing_batch))
|
629
|
+
|
630
|
+
# we first generate all rows in the batch, in order to run consistency checks
|
631
|
+
rows_generated_part = []
|
632
|
+
async for row_generated_part in rows_stream:
|
633
|
+
# remove internal columns, if exist
|
634
|
+
row_generated_part = {k: v for k, v in row_generated_part.items() if k in generated_columns}
|
635
|
+
|
636
|
+
if set(row_generated_part.keys()) != generated_columns:
|
637
|
+
if context_batch is not None or existing_batch is not None:
|
638
|
+
# in case of linked tables and data enrichment, it's critical that all rows have expected columns
|
639
|
+
print(" * Malformed row, repeating batch... * ", end="", flush=True)
|
640
|
+
do_repeat_task = True
|
641
|
+
break
|
642
|
+
else:
|
643
|
+
# in case of flat tables generation, each row is independent, therefore we only skip the invalid row
|
644
|
+
continue
|
645
|
+
rows_generated_part.append(row_generated_part)
|
646
|
+
|
647
|
+
# at least some valid rows are expected per batch, repeat the batch otherwise
|
648
|
+
if len(rows_generated_part) == 0:
|
649
|
+
print(" * No valid rows were generated, repeating batch... * ", end="", flush=True)
|
650
|
+
do_repeat_task = True
|
651
|
+
|
652
|
+
# in case of data enrichment, check that all rows were completed successfully
|
653
|
+
if existing_batch is not None and len(rows_generated_part) != len(existing_batch):
|
654
|
+
print(" * Some rows were not enriched successfully, repeating batch... * ", end="", flush=True)
|
655
|
+
do_repeat_task = True
|
656
|
+
|
657
|
+
if do_repeat_task:
|
658
|
+
# allow 10 retries across all workers before propagating the exception to the orchestrator
|
659
|
+
await retry_queue.put(1)
|
660
|
+
if retry_queue.qsize() < 10:
|
661
|
+
# put task back to the front of the batch queue
|
662
|
+
await batch_queue.put((batch_idx, task))
|
663
|
+
else:
|
664
|
+
# inform the orchestrator that max retries were reached
|
665
|
+
raise RuntimeError(
|
666
|
+
"Too many malformed batches were generated. "
|
667
|
+
"Consider changing the model in order to make generation more stable."
|
668
|
+
)
|
669
|
+
|
670
|
+
# mark current task as done
|
671
|
+
batch_queue.task_done()
|
672
|
+
continue
|
673
|
+
|
674
|
+
# collapse existing and generated parts into coherent rows
|
675
|
+
rows = []
|
676
|
+
for row_idx, row_generated_part in enumerate(rows_generated_part):
|
677
|
+
row_existing_part = existing_batch.iloc[row_idx].to_dict() if existing_batch is not None else {}
|
678
|
+
row = {**row_generated_part, **row_existing_part}
|
679
|
+
# keep columns order according to user's spec
|
680
|
+
row = {column: row[column] for column in columns.keys()}
|
681
|
+
rows.append(row)
|
682
|
+
|
683
|
+
# track previous rows for improved data consistency, in case of sequential generation
|
684
|
+
if n_workers == 1:
|
685
|
+
previous_rows.extend(rows)
|
686
|
+
|
687
|
+
# put rows to the result queue and mark current task as done
|
688
|
+
await result_queue.put((batch_idx, rows))
|
689
|
+
batch_queue.task_done()
|
690
|
+
except Exception as e:
|
691
|
+
# propagate any exception through the result queue
|
692
|
+
await result_queue.put((batch_idx, e))
|
693
|
+
raise
|
694
|
+
|
695
|
+
|
696
|
+
async def _create_table_rows_generator(
|
402
697
|
*,
|
403
698
|
name: str,
|
404
699
|
prompt: str,
|
405
700
|
columns: dict[str, ColumnConfig],
|
406
|
-
foreign_keys: list[ForeignKeyConfig]
|
407
|
-
primary_keys: dict[str, str]
|
701
|
+
foreign_keys: list[ForeignKeyConfig],
|
702
|
+
primary_keys: dict[str, str],
|
408
703
|
data: dict[str, pd.DataFrame],
|
409
704
|
sample_size: int,
|
410
|
-
batch_size: int,
|
411
705
|
previous_rows_size: int,
|
412
706
|
non_context_size: int | None,
|
707
|
+
n_workers: int,
|
413
708
|
llm_config: LLMConfig,
|
414
|
-
) ->
|
415
|
-
|
416
|
-
columns: dict[str, ColumnConfig], existing_data: pd.DataFrame | None
|
417
|
-
) -> tuple[type[BaseModel], int]:
|
418
|
-
def create_annotation(column_config: ColumnConfig) -> type:
|
419
|
-
if column_config.values or column_config.dtype is DType.CATEGORY:
|
420
|
-
return Literal[tuple(column_config.values)]
|
421
|
-
return {
|
422
|
-
DType.INTEGER: int | None,
|
423
|
-
DType.FLOAT: float | None,
|
424
|
-
DType.STRING: str | None,
|
425
|
-
DType.BOOLEAN: bool | None,
|
426
|
-
# response_format has limited support for JSON Schema features
|
427
|
-
# thus we represent dates and datetimes as strings
|
428
|
-
DType.DATE: str | None,
|
429
|
-
DType.DATETIME: str | None,
|
430
|
-
}[column_config.dtype]
|
431
|
-
|
432
|
-
fields = {}
|
433
|
-
for column_name, column_config in columns.items():
|
434
|
-
if existing_data is not None and column_name in existing_data.columns:
|
435
|
-
continue # skip columns that already exist in existing data
|
436
|
-
annotation = create_annotation(column_config)
|
437
|
-
fields[column_name] = (annotation, Field(...))
|
438
|
-
TableRow = create_model("TableRow", **fields)
|
439
|
-
TableRows = create_model("TableRows", rows=(list[TableRow], ...))
|
440
|
-
n_enforced_columns = len(fields)
|
441
|
-
return TableRows, n_enforced_columns
|
442
|
-
|
443
|
-
def yield_rows_from_json_chunks_stream(response: litellm.CustomStreamWrapper) -> Generator[dict]:
|
444
|
-
# starting with dirty buffer is to handle the `{"rows": []}` case
|
445
|
-
buffer = "garbage"
|
446
|
-
rows_json_started = False
|
447
|
-
in_row_json = False
|
448
|
-
for chunk in response:
|
449
|
-
delta = chunk.choices[0].delta.content
|
450
|
-
if delta is None:
|
451
|
-
continue
|
452
|
-
for char in delta:
|
453
|
-
buffer += char
|
454
|
-
if char == "{" and not rows_json_started:
|
455
|
-
# {"rows": [{"name": "Jo\}h\{n"}]}
|
456
|
-
# * <- start of rows json stream
|
457
|
-
rows_json_started = True
|
458
|
-
elif char == "{" and not in_row_json:
|
459
|
-
# {"rows": [{"name": "Jo\}h\{n"}]}
|
460
|
-
# * <- start of single row json stream
|
461
|
-
buffer = "{"
|
462
|
-
in_row_json = True
|
463
|
-
elif char == "}":
|
464
|
-
# {"rows": [{"name": "Jo\}h\{n"}]}
|
465
|
-
# * * * <- any of these
|
466
|
-
try:
|
467
|
-
row = json.loads(buffer)
|
468
|
-
yield row
|
469
|
-
buffer = ""
|
470
|
-
in_row_json = False
|
471
|
-
except json.JSONDecodeError:
|
472
|
-
continue
|
709
|
+
) -> AsyncGenerator[dict]:
|
710
|
+
batch_size = 20 # generate 20 root table rows at a time
|
473
711
|
|
474
|
-
def
|
475
|
-
|
476
|
-
|
477
|
-
yield None
|
478
|
-
else:
|
479
|
-
for i in range(0, len(data), batch_size):
|
480
|
-
yield data.iloc[i : i + batch_size]
|
481
|
-
|
482
|
-
def completion_with_retries(*args, **kwargs):
|
483
|
-
n_attempts = 3
|
712
|
+
def supports_structured_outputs(model: str) -> bool:
|
713
|
+
supported_params = litellm.get_supported_openai_params(model=model) or []
|
714
|
+
return "response_format" in supported_params and litellm.supports_response_schema(model)
|
484
715
|
|
485
|
-
|
486
|
-
print(" * Trying again... * ", end="", flush=True)
|
716
|
+
llm_output_format = LLMOutputFormat.JSON if supports_structured_outputs(llm_config.model) else LLMOutputFormat.CSV
|
487
717
|
|
488
|
-
|
489
|
-
retryer = tenacity.Retrying(
|
490
|
-
stop=tenacity.stop_after_attempt(n_attempts), reraise=True, before_sleep=print_on_retry
|
491
|
-
)
|
492
|
-
return retryer(litellm.completion, *args, **kwargs)
|
493
|
-
|
494
|
-
if not llm_config.model.startswith("litellm_proxy/"):
|
495
|
-
# ensure model supports response_format and json schema (this check does not work with litellm_proxy)
|
496
|
-
supported_params = litellm.get_supported_openai_params(model=llm_config.model) or []
|
497
|
-
assert "response_format" in supported_params and litellm.supports_response_schema(llm_config.model), (
|
498
|
-
"The model does not support structured output / JSON mode."
|
499
|
-
)
|
718
|
+
previous_rows = deque(maxlen=previous_rows_size)
|
500
719
|
|
501
720
|
# derive data for augmentation
|
502
721
|
existing_data: pd.DataFrame | None = None
|
503
722
|
if name in data:
|
504
723
|
existing_data = data[name]
|
505
724
|
sample_size = len(existing_data)
|
725
|
+
batch_size = 10 # augment 10 root table rows at a time
|
506
726
|
|
507
727
|
# derive context data (if first foreign key is present) and harmonize sample size accordingly
|
508
728
|
context_data: pd.DataFrame | None = None
|
729
|
+
context_batches: list[pd.DataFrame] | None = None
|
509
730
|
if foreign_keys and foreign_keys[0].referenced_table != name: # self-dependency is not considered as context
|
510
731
|
context_table_name = foreign_keys[0].referenced_table
|
511
732
|
assert context_table_name in data
|
512
733
|
context_data = data[context_table_name]
|
513
|
-
batch_size = 1 # generate
|
734
|
+
batch_size = 1 # generate 1 sequence at a time
|
514
735
|
sample_size = len(context_data)
|
736
|
+
context_batches = [data.iloc[i : i + batch_size] for i in range(0, len(data), batch_size)]
|
515
737
|
|
516
738
|
# derive non-context data (if more than one foreign key is present)
|
517
739
|
non_context_data: dict[str, pd.DataFrame] = {}
|
@@ -524,18 +746,23 @@ def _create_table_rows_generator(
|
|
524
746
|
assert non_context_table_name in data
|
525
747
|
non_context_data[non_context_table_name] = data[non_context_table_name]
|
526
748
|
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
749
|
+
# calculate batch_sizes
|
750
|
+
n_total_batches = len(context_batches) if context_batches is not None else math.ceil(sample_size / batch_size)
|
751
|
+
batch_sizes = [batch_size] * n_total_batches
|
752
|
+
if context_batches is None:
|
753
|
+
# optimise the last batch size for flat tables
|
754
|
+
# +2 because LLM may not always count the rows correctly
|
755
|
+
batch_sizes[-1] = sample_size - sum(batch_sizes[:-1]) + 2
|
756
|
+
|
757
|
+
# initialize queues for async communication
|
758
|
+
batch_queue = asyncio.PriorityQueue()
|
759
|
+
result_queue = asyncio.Queue()
|
760
|
+
retry_queue = asyncio.Queue()
|
761
|
+
|
762
|
+
# populate batch queue
|
763
|
+
for batch_idx in range(n_total_batches):
|
764
|
+
context_batch = context_batches[batch_idx] if context_batches is not None else None
|
534
765
|
|
535
|
-
batch_idx = 0
|
536
|
-
yielded_sequences = 0
|
537
|
-
previous_rows = deque(maxlen=previous_rows_size)
|
538
|
-
for context_batch in batch_infinitely(context_data):
|
539
766
|
# pick existing rows for current batch
|
540
767
|
existing_batch: pd.DataFrame | None = None
|
541
768
|
if existing_data is not None:
|
@@ -559,71 +786,94 @@ def _create_table_rows_generator(
|
|
559
786
|
table_name: df.sample(frac=1.0).head(non_context_size) for table_name, df in non_context_data.items()
|
560
787
|
}
|
561
788
|
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
789
|
+
task = {
|
790
|
+
"batch_size": batch_sizes[batch_idx],
|
791
|
+
"existing_batch": existing_batch,
|
792
|
+
"context_batch": context_batch,
|
793
|
+
"non_context_batch": non_context_batch,
|
794
|
+
}
|
795
|
+
await batch_queue.put((batch_idx, task))
|
796
|
+
|
797
|
+
# initialize workers
|
798
|
+
n_workers = min(n_total_batches, n_workers)
|
799
|
+
workers = [
|
800
|
+
asyncio.create_task(
|
801
|
+
_worker(
|
802
|
+
name=name,
|
803
|
+
prompt=prompt,
|
804
|
+
columns=columns,
|
805
|
+
foreign_keys=foreign_keys,
|
806
|
+
primary_keys=primary_keys,
|
807
|
+
previous_rows=previous_rows,
|
808
|
+
batch_queue=batch_queue,
|
809
|
+
result_queue=result_queue,
|
810
|
+
retry_queue=retry_queue,
|
811
|
+
n_workers=n_workers,
|
812
|
+
llm_output_format=llm_output_format,
|
813
|
+
llm_config=llm_config,
|
814
|
+
)
|
583
815
|
)
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
if
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
816
|
+
for _ in range(n_workers)
|
817
|
+
]
|
818
|
+
|
819
|
+
n_completed_batches = 0
|
820
|
+
n_yielded_sequences = 0
|
821
|
+
while n_yielded_sequences < sample_size:
|
822
|
+
if n_completed_batches >= n_total_batches:
|
823
|
+
assert context_data is None, "n_total_batches is fixed for linked tables"
|
824
|
+
assert existing_data is None, "n_total_batches is fixed for data enrichment"
|
825
|
+
# LLMs may not generate exactly the number of rows requested
|
826
|
+
# in case of flat tables, we still accept such incomplete batches,
|
827
|
+
# but that means we may need to generate more batches to reach the sample size
|
828
|
+
# +2 because LLM may not always count the rows correctly
|
829
|
+
n_total_batches += 1
|
830
|
+
task = {
|
831
|
+
"batch_size": sample_size - n_yielded_sequences + 2,
|
832
|
+
}
|
833
|
+
await batch_queue.put((n_total_batches, task))
|
834
|
+
batch_idx, result = await result_queue.get()
|
835
|
+
if isinstance(result, Exception):
|
836
|
+
# if an exception is raised by any worker, cancel all workers and raise that exception
|
837
|
+
for worker in workers:
|
838
|
+
worker.cancel()
|
839
|
+
await asyncio.gather(*workers)
|
840
|
+
raise result
|
841
|
+
rows = result
|
842
|
+
for row_idx, row in enumerate(rows):
|
843
|
+
yield (batch_idx, row)
|
844
|
+
if context_batches is None or row_idx == len(rows) - 1:
|
845
|
+
# in case of flat table, each row is considered a single sequence
|
846
|
+
# in case of linked table, all rows are considered a single sequence
|
847
|
+
# NOTE: this assumes that we generate a single sequence per batch
|
848
|
+
n_yielded_sequences += 1
|
849
|
+
if n_yielded_sequences >= sample_size:
|
850
|
+
break
|
851
|
+
n_completed_batches += 1
|
852
|
+
result_queue.task_done()
|
853
|
+
|
854
|
+
# gracefully shutdown workers
|
855
|
+
await batch_queue.join()
|
856
|
+
for _ in workers:
|
857
|
+
await batch_queue.put((n_total_batches + 1, None))
|
858
|
+
await asyncio.gather(*workers)
|
859
|
+
|
860
|
+
|
861
|
+
async def _convert_table_rows_generator_to_df(
|
862
|
+
table_rows_generator: AsyncGenerator[dict],
|
621
863
|
columns: dict[str, ColumnConfig],
|
622
864
|
) -> pd.DataFrame:
|
623
865
|
def align_df_dtypes_with_mock_dtypes(df: pd.DataFrame, columns: dict[str, ColumnConfig]) -> pd.DataFrame:
|
866
|
+
df = df.copy()
|
624
867
|
for column_name, column_config in columns.items():
|
625
868
|
if column_config.dtype in [DType.DATE, DType.DATETIME]:
|
626
|
-
|
869
|
+
|
870
|
+
def harmonize_datetime(x):
|
871
|
+
try:
|
872
|
+
return dateutil.parser.parse(x)
|
873
|
+
except Exception:
|
874
|
+
return pd.NaT
|
875
|
+
|
876
|
+
df[column_name] = pd.to_datetime(df[column_name].apply(harmonize_datetime), errors="coerce")
|
627
877
|
elif column_config.dtype is DType.INTEGER:
|
628
878
|
df[column_name] = pd.to_numeric(df[column_name], errors="coerce", downcast="integer").astype(
|
629
879
|
"int64[pyarrow]"
|
@@ -631,6 +881,8 @@ def _convert_table_rows_generator_to_df(
|
|
631
881
|
elif column_config.dtype is DType.FLOAT:
|
632
882
|
df[column_name] = pd.to_numeric(df[column_name], errors="coerce").astype("double[pyarrow]")
|
633
883
|
elif column_config.dtype is DType.BOOLEAN:
|
884
|
+
df[column_name] = df[column_name].map(lambda x: True if str(x).lower() == "true" else x)
|
885
|
+
df[column_name] = df[column_name].map(lambda x: False if str(x).lower() == "false" else x)
|
634
886
|
df[column_name] = pd.to_numeric(df[column_name], errors="coerce").astype("boolean[pyarrow]")
|
635
887
|
elif column_config.dtype is DType.CATEGORY:
|
636
888
|
df[column_name] = pd.Categorical(df[column_name], categories=column_config.values)
|
@@ -638,7 +890,13 @@ def _convert_table_rows_generator_to_df(
|
|
638
890
|
df[column_name] = df[column_name].astype("string[pyarrow]")
|
639
891
|
return df
|
640
892
|
|
641
|
-
|
893
|
+
# consume entire generator
|
894
|
+
items = [{"batch_idx": batch_idx, "row": row} async for batch_idx, row in table_rows_generator]
|
895
|
+
# sort items by batch_idx to maintain order (relevant especially for keeping the order of existing data)
|
896
|
+
items = sorted(items, key=lambda x: x["batch_idx"])
|
897
|
+
# extract rows and convert to DataFrame
|
898
|
+
rows = [item["row"] for item in items]
|
899
|
+
df = pd.DataFrame(rows)
|
642
900
|
df = align_df_dtypes_with_mock_dtypes(df, columns)
|
643
901
|
return df
|
644
902
|
|
@@ -737,27 +995,36 @@ def _build_execution_plan(config: MockConfig) -> list[str]:
|
|
737
995
|
def sample(
|
738
996
|
*,
|
739
997
|
tables: dict[str, dict],
|
740
|
-
sample_size: int | dict[str, int] =
|
998
|
+
sample_size: int | dict[str, int] = 4,
|
741
999
|
existing_data: dict[str, pd.DataFrame] | None = None,
|
742
1000
|
model: str = "openai/gpt-4.1-nano",
|
743
1001
|
api_key: str | None = None,
|
744
1002
|
temperature: float = 1.0,
|
745
1003
|
top_p: float = 0.95,
|
1004
|
+
n_workers: int = 10,
|
746
1005
|
return_type: Literal["auto", "dict"] = "auto",
|
747
1006
|
) -> pd.DataFrame | dict[str, pd.DataFrame]:
|
748
1007
|
"""
|
749
|
-
Generate
|
1008
|
+
Generate synthetic data from scratch or enrich existing data with new columns.
|
1009
|
+
|
1010
|
+
While faker and numpy are useful to create fake data, this utility is unique as it allows
|
1011
|
+
the creation of coherent, realistic multi-table tabular mock data
|
1012
|
+
or the enrichment of existing datasets with new, context-aware columns.
|
1013
|
+
|
1014
|
+
It is particularly useful for quickly simulating production-like datasets for testing or prototyping purposes.
|
1015
|
+
It is advised to limit mocking to small datasets for performance reasons (rows * cols < 100).
|
1016
|
+
It might take a couple of minutes for bigger datasets.
|
750
1017
|
|
751
1018
|
Args:
|
752
1019
|
tables (dict[str, dict]): The table specifications to generate mock data for. See examples for usage.
|
753
1020
|
sample_size (int | dict[str, int]): The number of rows to generate for each subject table.
|
754
1021
|
If a single integer is provided, the same number of rows will be generated for each subject table.
|
755
1022
|
If a dictionary is provided, the number of rows to generate for each subject table can be specified individually.
|
756
|
-
Default is
|
1023
|
+
Default is 4. Ignored if existing_data is provided.
|
757
1024
|
If a table has a foreign key, the sample size is determined by the corresponding foreign key prompt. If nothing specified, a few rows per parent record are generated.
|
758
1025
|
existing_data (dict[str, pd.DataFrame] | None): Existing data to augment. If provided, the sample_size argument is ignored.
|
759
1026
|
Default is None.
|
760
|
-
model (str): The LiteLLM chat completion model to be used.
|
1027
|
+
model (str): The LiteLLM chat completion model to be used.
|
761
1028
|
Examples include:
|
762
1029
|
- `openai/gpt-4.1-nano` (default; fast, and smart)
|
763
1030
|
- `openai/gpt-4.1-mini` (slower, but smarter)
|
@@ -771,6 +1038,8 @@ def sample(
|
|
771
1038
|
api_key (str | None): The API key to use for the LLM. If not provided, LiteLLM will take it from the environment variables.
|
772
1039
|
temperature (float): The temperature to use for the LLM. Default is 1.0.
|
773
1040
|
top_p (float): The top-p value to use for the LLM. Default is 0.95.
|
1041
|
+
n_workers (int): The number of concurrent workers making the LLM calls. Default is 10.
|
1042
|
+
If n_workers is 1, the generation of batches becomes sequential and certain features for better data consistency are enabled.
|
774
1043
|
return_type (Literal["auto", "dict"]): The format of the returned data. Default is "auto".
|
775
1044
|
|
776
1045
|
Returns:
|
@@ -965,19 +1234,23 @@ def sample(
|
|
965
1234
|
|
966
1235
|
for table_name in execution_plan:
|
967
1236
|
table_config = config.root[table_name]
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
1237
|
+
|
1238
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
1239
|
+
future = executor.submit(
|
1240
|
+
_sample_table_sync,
|
1241
|
+
name=table_name,
|
1242
|
+
prompt=table_config.prompt,
|
1243
|
+
columns=table_config.columns,
|
1244
|
+
foreign_keys=table_config.foreign_keys,
|
1245
|
+
primary_keys=primary_keys,
|
1246
|
+
data=data,
|
1247
|
+
sample_size=sample_size[table_name],
|
1248
|
+
previous_rows_size=10, # present 10 previously generated rows to the LLM
|
1249
|
+
non_context_size=10, # pick 10 rows to choose from for each non-context foreign key
|
1250
|
+
n_workers=n_workers,
|
1251
|
+
llm_config=llm_config,
|
1252
|
+
)
|
1253
|
+
df = future.result()
|
981
1254
|
data[table_name] = df
|
982
1255
|
|
983
1256
|
return next(iter(data.values())) if len(data) == 1 and return_type == "auto" else data
|
mostlyai/mock/mcp_server.py
CHANGED
@@ -21,7 +21,9 @@ from fastmcp import FastMCP
|
|
21
21
|
from mostlyai import mock
|
22
22
|
|
23
23
|
SAMPLE_MOCK_TOOL_DESCRIPTION = f"""
|
24
|
-
|
24
|
+
Synthetic Mock Data.
|
25
|
+
|
26
|
+
Use LLMs to generate any Tabular Data towards your needs. Create from scratch, expand existing datasets, or enrich tables with new columns.
|
25
27
|
|
26
28
|
This tool is a proxy to the `mostlyai.mock.sample` function, but returns a dictionary of paths to the generated CSV files.
|
27
29
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mostlyai-mock
|
3
|
-
Version: 0.1.
|
4
|
-
Summary:
|
3
|
+
Version: 0.1.8
|
4
|
+
Summary: Synthetic Mock Data
|
5
5
|
Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
|
6
6
|
Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
|
7
7
|
Project-URL: documentation, https://mostly-ai.github.io/mostlyai-mock/
|
@@ -33,19 +33,20 @@ Requires-Dist: pydantic<3.0.0,>=2.0.0
|
|
33
33
|
Requires-Dist: tenacity>=9.1.2
|
34
34
|
Description-Content-Type: text/markdown
|
35
35
|
|
36
|
-
#
|
36
|
+
# Synthetic Mock Data 🔮
|
37
37
|
|
38
38
|
[](https://mostly-ai.github.io/mostlyai-mock/) [](https://pypi.org/project/mostlyai-mock/)  
|
39
39
|
|
40
|
-
Create
|
40
|
+
Use LLMs to generate any Tabular Data towards your needs. Create from scratch, expand existing datasets, or enrich tables with new columns. Your prompts, your rules, your data.
|
41
41
|
|
42
42
|
## Key Features
|
43
43
|
|
44
|
-
* A light-weight python client for prompting LLMs for mixed-type tabular data
|
45
|
-
* Select from a range of LLM endpoints
|
44
|
+
* A light-weight python client for prompting LLMs for mixed-type tabular data.
|
45
|
+
* Select from a wide range of LLM endpoints and LLM models.
|
46
46
|
* Supports single-table as well as multi-table scenarios.
|
47
47
|
* Supports variety of data types: `string`, `categorical`, `integer`, `float`, `boolean`, `date`, and `datetime`.
|
48
48
|
* Specify context, distributions and rules via dataset-, table- or column-level prompts.
|
49
|
+
* Create from scratch or enrich existing datasets with new columns and/or rows.
|
49
50
|
* Tailor the diversity and realism of your generated data via temperature and top_p.
|
50
51
|
|
51
52
|
## Getting Started
|
@@ -0,0 +1,8 @@
|
|
1
|
+
mostlyai/mock/__init__.py,sha256=ig9Ifl6JwFYl952LopbiuZuoqO6W5IN2bdeWpZ2vMjg,714
|
2
|
+
mostlyai/mock/core.py,sha256=Ga78HucN2ltGCzmhzJ5p9AOKgMMemL23g6pldHi90-c,53968
|
3
|
+
mostlyai/mock/mcp_server.py,sha256=MrVUrIsAZsFzjK1suwNl1fxS1ES-wpc-YSM8cS8Fqcw,2259
|
4
|
+
mostlyai_mock-0.1.8.dist-info/METADATA,sha256=tNOj8tufD0VgePB9F0_uovNCCym-TaQCCBKhk2_etIs,14099
|
5
|
+
mostlyai_mock-0.1.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
+
mostlyai_mock-0.1.8.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
+
mostlyai_mock-0.1.8.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
+
mostlyai_mock-0.1.8.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
mostlyai/mock/__init__.py,sha256=8UddMHmwpfwSb7ChuVNvIaWNLTlWkN0Cxh63CskmtBw,714
|
2
|
-
mostlyai/mock/core.py,sha256=NFfyucqjT3iC9lqfu4dPmRnYizxtfFH1Tf3KHRRxHvg,42242
|
3
|
-
mostlyai/mock/mcp_server.py,sha256=kWMIjKCwnvYfjY8B2IdP4JNs8ik_8jA6ISCDqrG9utc,2137
|
4
|
-
mostlyai_mock-0.1.6.dist-info/METADATA,sha256=RMYEgGG4P3WfhavNC_4ph6dTCtumqQ3uA-swot9WKyc,13918
|
5
|
-
mostlyai_mock-0.1.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
-
mostlyai_mock-0.1.6.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
-
mostlyai_mock-0.1.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
-
mostlyai_mock-0.1.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|