mostlyai-mock 0.0.12__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mostlyai/mock/__init__.py +1 -1
- mostlyai/mock/core.py +11 -10
- mostlyai/mock/mcp_server.py +5 -15
- {mostlyai_mock-0.0.12.dist-info → mostlyai_mock-0.1.1.dist-info}/METADATA +2 -29
- mostlyai_mock-0.1.1.dist-info/RECORD +8 -0
- mostlyai_mock-0.0.12.dist-info/RECORD +0 -8
- {mostlyai_mock-0.0.12.dist-info → mostlyai_mock-0.1.1.dist-info}/WHEEL +0 -0
- {mostlyai_mock-0.0.12.dist-info → mostlyai_mock-0.1.1.dist-info}/entry_points.txt +0 -0
- {mostlyai_mock-0.0.12.dist-info → mostlyai_mock-0.1.1.dist-info}/licenses/LICENSE +0 -0
mostlyai/mock/__init__.py
CHANGED
mostlyai/mock/core.py
CHANGED
@@ -18,7 +18,6 @@ import json
|
|
18
18
|
from collections import deque
|
19
19
|
from collections.abc import Generator
|
20
20
|
from enum import Enum
|
21
|
-
import os
|
22
21
|
from typing import Any, Literal, Type
|
23
22
|
|
24
23
|
import litellm
|
@@ -26,7 +25,7 @@ import pandas as pd
|
|
26
25
|
from pydantic import BaseModel, Field, RootModel, create_model, field_validator, model_validator
|
27
26
|
from tqdm import tqdm
|
28
27
|
|
29
|
-
SYSTEM_PROMPT =
|
28
|
+
SYSTEM_PROMPT = """
|
30
29
|
You are a specialized synthetic data generator designed to create
|
31
30
|
highly realistic, contextually appropriate data based on schema definitions. Your task is to:
|
32
31
|
|
@@ -265,7 +264,7 @@ def _create_table_prompt(
|
|
265
264
|
|
266
265
|
prompt += f"## Context Table Primary Key: `{primary_keys[fk.referenced_table]}`\n\n"
|
267
266
|
|
268
|
-
prompt +=
|
267
|
+
prompt += "## Context Table Data:\n\n"
|
269
268
|
prompt += f"{context_data.to_json(orient='records', date_format='iso', indent=2)}\n\n"
|
270
269
|
|
271
270
|
# add non-context table names, primary keys and data
|
@@ -279,8 +278,10 @@ def _create_table_prompt(
|
|
279
278
|
|
280
279
|
prompt += f"## Non-Context Table Primary Key: `{primary_keys[fk.referenced_table]}`\n\n"
|
281
280
|
|
282
|
-
prompt +=
|
283
|
-
prompt +=
|
281
|
+
prompt += "## Non-Context Table Data:\n\n"
|
282
|
+
prompt += (
|
283
|
+
f"{non_context_data[fk.referenced_table].to_json(orient='records', date_format='iso', indent=2)}\n\n"
|
284
|
+
)
|
284
285
|
|
285
286
|
# add instructions
|
286
287
|
prompt += "\n## Instructions:\n\n"
|
@@ -303,8 +304,8 @@ def _create_table_prompt(
|
|
303
304
|
"Don't copy previous rows in the output. "
|
304
305
|
"Don't pay attention to the number of previous rows; there might have been more generated than provided.\n\n"
|
305
306
|
)
|
306
|
-
prompt +=
|
307
|
-
prompt +=
|
307
|
+
prompt += "Do not use code to generate the data.\n\n"
|
308
|
+
prompt += "Return the full data as a JSON string.\n"
|
308
309
|
|
309
310
|
return prompt
|
310
311
|
|
@@ -387,7 +388,7 @@ def _create_table_rows_generator(
|
|
387
388
|
|
388
389
|
if not llm_config.model.startswith("litellm_proxy/"):
|
389
390
|
# ensure model supports response_format and json schema (this check does not work with litellm_proxy)
|
390
|
-
supported_params =
|
391
|
+
supported_params = litellm.get_supported_openai_params(model=llm_config.model) or []
|
391
392
|
assert "response_format" in supported_params and litellm.supports_response_schema(llm_config.model), (
|
392
393
|
"The model does not support structured output / JSON mode."
|
393
394
|
)
|
@@ -431,7 +432,7 @@ def _create_table_rows_generator(
|
|
431
432
|
if non_context_data
|
432
433
|
else None
|
433
434
|
)
|
434
|
-
|
435
|
+
llm_prompt = _create_table_prompt(
|
435
436
|
name=name,
|
436
437
|
prompt=prompt,
|
437
438
|
columns=columns,
|
@@ -442,7 +443,7 @@ def _create_table_rows_generator(
|
|
442
443
|
non_context_data=non_context_batch,
|
443
444
|
previous_rows=list(previous_rows),
|
444
445
|
)
|
445
|
-
messages = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content":
|
446
|
+
messages = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": llm_prompt}]
|
446
447
|
|
447
448
|
response = litellm.completion(messages=messages, **litellm_kwargs)
|
448
449
|
rows_stream = yield_rows_from_json_chunks_stream(response)
|
mostlyai/mock/mcp_server.py
CHANGED
@@ -2,13 +2,14 @@ import os
|
|
2
2
|
import tempfile
|
3
3
|
|
4
4
|
import pandas as pd
|
5
|
-
from fastmcp import
|
5
|
+
from fastmcp import FastMCP
|
6
6
|
|
7
7
|
from mostlyai import mock
|
8
8
|
|
9
9
|
SAMPLE_MOCK_TOOL_DESCRIPTION = f"""
|
10
|
-
|
11
|
-
|
10
|
+
Generate mock data by prompting an LLM.
|
11
|
+
|
12
|
+
This tool is a proxy to the `mostlyai.mock.sample` function, but returns a dictionary of paths to the generated CSV files.
|
12
13
|
|
13
14
|
Present the result nicely to the user, in Markdown format. Example:
|
14
15
|
|
@@ -16,10 +17,7 @@ Mock data can be found under the following paths:
|
|
16
17
|
- `/tmp/tmpl41bwa6n/players.csv`
|
17
18
|
- `/tmp/tmpl41bwa6n/seasons.csv`
|
18
19
|
|
19
|
-
|
20
|
-
What comes after the `=============================` is the documentation of the `mostlyai.mock.sample` function.
|
21
|
-
|
22
|
-
=============================
|
20
|
+
== mostlyai.mock.sample DocString ==
|
23
21
|
{mock.sample.__doc__}
|
24
22
|
"""
|
25
23
|
|
@@ -45,14 +43,7 @@ def mock_data(
|
|
45
43
|
api_key: str | None = None,
|
46
44
|
temperature: float = 1.0,
|
47
45
|
top_p: float = 0.95,
|
48
|
-
ctx: Context,
|
49
46
|
) -> dict[str, str]:
|
50
|
-
# Notes:
|
51
|
-
# 1. Returning DataFrames directly results in converting them into truncated string.
|
52
|
-
# 2. The logs / progress bars are not propagated to the MCP Client. There is a dedicated API to do that (e.g. `ctx.info(...)`)
|
53
|
-
# 3. MCP Server inherits only selected environment variables (PATH, USER...); one way to pass LLM keys is through client configuration (`mcpServers->env`)
|
54
|
-
# 4. Some MCP Clients, e.g. Cursor, do not like Unions or Optionals in type hints
|
55
|
-
ctx.info(f"Generating mock data for `{len(tables)}` tables")
|
56
47
|
data = mock.sample(
|
57
48
|
tables=tables,
|
58
49
|
sample_size=sample_size,
|
@@ -62,7 +53,6 @@ def mock_data(
|
|
62
53
|
top_p=top_p,
|
63
54
|
return_type="dict",
|
64
55
|
)
|
65
|
-
ctx.info(f"Generated mock data for `{len(tables)}` tables")
|
66
56
|
locations = _store_locally(data)
|
67
57
|
return locations
|
68
58
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mostlyai-mock
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: Synthetic Mock Data
|
5
5
|
Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
|
6
6
|
Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
|
@@ -225,7 +225,7 @@ tables = {
|
|
225
225
|
],
|
226
226
|
}
|
227
227
|
}
|
228
|
-
df = sample(tables=tables, sample_size=10, model="openai/gpt-4.1")
|
228
|
+
df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1")
|
229
229
|
print(df)
|
230
230
|
# employee_id name boss_id role
|
231
231
|
# 0 1 Sandra Phillips <NA> President
|
@@ -269,30 +269,3 @@ Troubleshooting:
|
|
269
269
|
1. If the MCP Client fails to detect the MCP Server, provide the absolute path in the `command` field, for example: `/Users/johnsmith/.local/bin/uvx`
|
270
270
|
2. To debug MCP Server issues, you can use MCP Inspector by running: `npx @modelcontextprotocol/inspector -- uvx --from mostlyai-mock mcp-server`
|
271
271
|
3. In order to develop locally, modify the configuration by replacing `"command": "uv"` (or use the full path to `uv` if needed) and `"args": ["--directory", "/Users/johnsmith/mostlyai-mock", "run", "mcp-server"]`
|
272
|
-
|
273
|
-
|
274
|
-
## LiteLLM Proxy Server
|
275
|
-
|
276
|
-
In order to consume LiteLLM Proxy Server, the user must:
|
277
|
-
- Set `LITELLM_PROXY_API_KEY` and `LITELLM_PROXY_API_BASE`
|
278
|
-
- Prefix the `model` with LiteLLM Proxy Server provider: `litellm_proxy`. For example, `litellm_proxy/openai/gpt-4.1-nano`
|
279
|
-
|
280
|
-
```python
|
281
|
-
from mostlyai import mock
|
282
|
-
import os
|
283
|
-
|
284
|
-
os.environ["LITELLM_PROXY_API_BASE"] = "https://litellm-proxy-production-7a86.up.railway.app/"
|
285
|
-
tables = {
|
286
|
-
"guests": {
|
287
|
-
"prompt": "Guests of an Alpine ski hotel in Austria",
|
288
|
-
"columns": {
|
289
|
-
"name": {"prompt": "first name and last name of the guest", "dtype": "string"},
|
290
|
-
},
|
291
|
-
}
|
292
|
-
}
|
293
|
-
df = mock.sample(tables=tables, sample_size=10, model="litellm_proxy/mostlyai/openai/gpt-4.1-nano")
|
294
|
-
|
295
|
-
print(df)
|
296
|
-
```
|
297
|
-
|
298
|
-
Read more [here](https://docs.litellm.ai/docs/providers/litellm_proxy).
|
@@ -0,0 +1,8 @@
|
|
1
|
+
mostlyai/mock/__init__.py,sha256=rwv3TboU77Sn6Py635JgvQu64d_R2s1Nc0dIDDbHAZA,714
|
2
|
+
mostlyai/mock/core.py,sha256=MEDVp_woSXlD0JanS3ocxWBa_XilpaWzPhsvNzTZuX0,30138
|
3
|
+
mostlyai/mock/mcp_server.py,sha256=Vp0bWzE8wUyA6k4PHLa0TbkuI9s07E48xPrAUgf_5qU,1563
|
4
|
+
mostlyai_mock-0.1.1.dist-info/METADATA,sha256=tY5BvODgzoiqox8yS8ISfxWtVB1wbch1KNW8CikRImc,12713
|
5
|
+
mostlyai_mock-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
+
mostlyai_mock-0.1.1.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
+
mostlyai_mock-0.1.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
+
mostlyai_mock-0.1.1.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
mostlyai/mock/__init__.py,sha256=EzC1pduN2tfMeNk5Q4DHpNCZ8Erlo4KdsEoiZSq76GY,715
|
2
|
-
mostlyai/mock/core.py,sha256=OpVipCV-7t9WtdQ7NDs2Nl58Y_9jsLhwqT8vFrQKgLM,30115
|
3
|
-
mostlyai/mock/mcp_server.py,sha256=FqtgGdtuncpLcPySQk2V2RXASFvfV27rqlrnXnmSE7M,2311
|
4
|
-
mostlyai_mock-0.0.12.dist-info/METADATA,sha256=O7ACxfwzQE7M8pipE1kLwf-sdWkIaAZzBfHFOS1vwA8,13526
|
5
|
-
mostlyai_mock-0.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
-
mostlyai_mock-0.0.12.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
-
mostlyai_mock-0.0.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
-
mostlyai_mock-0.0.12.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|