mostlyai-mock 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mostlyai/mock/__init__.py +1 -1
- mostlyai/mock/core.py +10 -8
- mostlyai/mock/mcp_server.py +19 -19
- {mostlyai_mock-0.0.10.dist-info → mostlyai_mock-0.0.12.dist-info}/METADATA +32 -4
- mostlyai_mock-0.0.12.dist-info/RECORD +8 -0
- mostlyai_mock-0.0.10.dist-info/RECORD +0 -8
- {mostlyai_mock-0.0.10.dist-info → mostlyai_mock-0.0.12.dist-info}/WHEEL +0 -0
- {mostlyai_mock-0.0.10.dist-info → mostlyai_mock-0.0.12.dist-info}/entry_points.txt +0 -0
- {mostlyai_mock-0.0.10.dist-info → mostlyai_mock-0.0.12.dist-info}/licenses/LICENSE +0 -0
mostlyai/mock/__init__.py
CHANGED
mostlyai/mock/core.py
CHANGED
@@ -18,6 +18,7 @@ import json
|
|
18
18
|
from collections import deque
|
19
19
|
from collections.abc import Generator
|
20
20
|
from enum import Enum
|
21
|
+
import os
|
21
22
|
from typing import Any, Literal, Type
|
22
23
|
|
23
24
|
import litellm
|
@@ -265,7 +266,7 @@ def _create_table_prompt(
|
|
265
266
|
prompt += f"## Context Table Primary Key: `{primary_keys[fk.referenced_table]}`\n\n"
|
266
267
|
|
267
268
|
prompt += f"## Context Table Data:\n\n"
|
268
|
-
prompt += f"{context_data.to_json(orient='records', indent=2)}\n\n"
|
269
|
+
prompt += f"{context_data.to_json(orient='records', date_format='iso', indent=2)}\n\n"
|
269
270
|
|
270
271
|
# add non-context table names, primary keys and data
|
271
272
|
if foreign_keys and len(foreign_keys) > 1:
|
@@ -279,7 +280,7 @@ def _create_table_prompt(
|
|
279
280
|
prompt += f"## Non-Context Table Primary Key: `{primary_keys[fk.referenced_table]}`\n\n"
|
280
281
|
|
281
282
|
prompt += f"## Non-Context Table Data:\n\n"
|
282
|
-
prompt += f"{non_context_data[fk.referenced_table].to_json(orient='records', indent=2)}\n\n"
|
283
|
+
prompt += f"{non_context_data[fk.referenced_table].to_json(orient='records', date_format='iso', indent=2)}\n\n"
|
283
284
|
|
284
285
|
# add instructions
|
285
286
|
prompt += "\n## Instructions:\n\n"
|
@@ -384,12 +385,12 @@ def _create_table_rows_generator(
|
|
384
385
|
for i in range(0, len(data), batch_size):
|
385
386
|
yield data.iloc[i : i + batch_size]
|
386
387
|
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
388
|
+
if not llm_config.model.startswith("litellm_proxy/"):
|
389
|
+
# ensure model supports response_format and json schema (this check does not work with litellm_proxy)
|
390
|
+
supported_params = (litellm.get_supported_openai_params(model=llm_config.model) or [])
|
391
|
+
assert "response_format" in supported_params and litellm.supports_response_schema(llm_config.model), (
|
392
|
+
"The model does not support structured output / JSON mode."
|
393
|
+
)
|
393
394
|
|
394
395
|
# derive context data (if first foreign key is present) and harmonize sample size accordingly
|
395
396
|
context_data: pd.DataFrame | None = None
|
@@ -398,6 +399,7 @@ def _create_table_rows_generator(
|
|
398
399
|
assert generated_data is not None
|
399
400
|
assert context_table_name in generated_data
|
400
401
|
context_data = generated_data[context_table_name]
|
402
|
+
batch_size = 1 # generate one sequence at a time
|
401
403
|
sample_size = len(context_data)
|
402
404
|
|
403
405
|
# derive non-context data (if more than one foreign key is present)
|
mostlyai/mock/mcp_server.py
CHANGED
@@ -1,19 +1,21 @@
|
|
1
1
|
import os
|
2
2
|
import tempfile
|
3
|
-
import zipfile
|
4
3
|
|
4
|
+
import pandas as pd
|
5
5
|
from fastmcp import Context, FastMCP
|
6
6
|
|
7
7
|
from mostlyai import mock
|
8
8
|
|
9
9
|
SAMPLE_MOCK_TOOL_DESCRIPTION = f"""
|
10
|
-
|
10
|
+
This tool is a proxy to the `mostlyai.mock.sample` function.
|
11
|
+
It returns a dictionary. The keys are the table names, the values are the Paths to the generated CSV files.
|
11
12
|
|
12
|
-
|
13
|
-
|
13
|
+
Present the result nicely to the user, in Markdown format. Example:
|
14
|
+
|
15
|
+
Mock data can be found under the following paths:
|
16
|
+
- `/tmp/tmpl41bwa6n/players.csv`
|
17
|
+
- `/tmp/tmpl41bwa6n/seasons.csv`
|
14
18
|
|
15
|
-
"Mock data is ready to download: [Mock Data](https://example.com/mock_data.zip)" (if result is a link)
|
16
|
-
"Mock data can be found in `/tmp/tmpl41bwa6n/mock_data.zip`" (if result is a path)
|
17
19
|
|
18
20
|
What comes after the `=============================` is the documentation of the `mostlyai.mock.sample` function.
|
19
21
|
|
@@ -24,20 +26,18 @@ What comes after the `=============================` is the documentation of the
|
|
24
26
|
mcp = FastMCP(name="MostlyAI Mock MCP Server")
|
25
27
|
|
26
28
|
|
27
|
-
def _store_locally(data: dict) -> str:
|
29
|
+
def _store_locally(data: dict[str, pd.DataFrame]) -> dict[str, str]:
|
28
30
|
temp_dir = tempfile.mkdtemp()
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
return os.path.abspath(zip_path)
|
31
|
+
locations = {}
|
32
|
+
for table_name, df in data.items():
|
33
|
+
csv_path = os.path.join(temp_dir, f"{table_name}.csv")
|
34
|
+
df.to_csv(csv_path, index=False)
|
35
|
+
locations[table_name] = csv_path
|
36
|
+
return locations
|
37
37
|
|
38
38
|
|
39
39
|
@mcp.tool(description=SAMPLE_MOCK_TOOL_DESCRIPTION)
|
40
|
-
def
|
40
|
+
def mock_data(
|
41
41
|
*,
|
42
42
|
tables: dict[str, dict],
|
43
43
|
sample_size: int,
|
@@ -46,7 +46,7 @@ def sample_mock_data(
|
|
46
46
|
temperature: float = 1.0,
|
47
47
|
top_p: float = 0.95,
|
48
48
|
ctx: Context,
|
49
|
-
) -> str:
|
49
|
+
) -> dict[str, str]:
|
50
50
|
# Notes:
|
51
51
|
# 1. Returning DataFrames directly results in converting them into truncated string.
|
52
52
|
# 2. The logs / progress bars are not propagated to the MCP Client. There is a dedicated API to do that (e.g. `ctx.info(...)`)
|
@@ -63,8 +63,8 @@ def sample_mock_data(
|
|
63
63
|
return_type="dict",
|
64
64
|
)
|
65
65
|
ctx.info(f"Generated mock data for `{len(tables)}` tables")
|
66
|
-
|
67
|
-
return
|
66
|
+
locations = _store_locally(data)
|
67
|
+
return locations
|
68
68
|
|
69
69
|
|
70
70
|
def main():
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mostlyai-mock
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.12
|
4
4
|
Summary: Synthetic Mock Data
|
5
5
|
Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
|
6
6
|
Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
|
@@ -244,7 +244,7 @@ print(df)
|
|
244
244
|
|
245
245
|
This repo comes with MCP Server. It can be easily consumed by any MCP Client by providing the following configuration:
|
246
246
|
|
247
|
-
```
|
247
|
+
```json
|
248
248
|
{
|
249
249
|
"mcpServers": {
|
250
250
|
"mostlyai-mock-mcp": {
|
@@ -266,5 +266,33 @@ For example:
|
|
266
266
|
- in Cursor, go to "Settings" > "Cursor Settings" > "MCP" > "Add new global MCP server" and paste the above into `mcp.json`
|
267
267
|
|
268
268
|
Troubleshooting:
|
269
|
-
1. If MCP
|
270
|
-
2. MCP
|
269
|
+
1. If the MCP Client fails to detect the MCP Server, provide the absolute path in the `command` field, for example: `/Users/johnsmith/.local/bin/uvx`
|
270
|
+
2. To debug MCP Server issues, you can use MCP Inspector by running: `npx @modelcontextprotocol/inspector -- uvx --from mostlyai-mock mcp-server`
|
271
|
+
3. In order to develop locally, modify the configuration by replacing `"command": "uv"` (or use the full path to `uv` if needed) and `"args": ["--directory", "/Users/johnsmith/mostlyai-mock", "run", "mcp-server"]`
|
272
|
+
|
273
|
+
|
274
|
+
## LiteLLM Proxy Server
|
275
|
+
|
276
|
+
In order to consume LiteLLM Proxy Server, the user must:
|
277
|
+
- Set `LITELLM_PROXY_API_KEY` and `LITELLM_PROXY_API_BASE`
|
278
|
+
- Prefix the `model` with LiteLLM Proxy Server provider: `litellm_proxy`. For example, `litellm_proxy/openai/gpt-4.1-nano`
|
279
|
+
|
280
|
+
```python
|
281
|
+
from mostlyai import mock
|
282
|
+
import os
|
283
|
+
|
284
|
+
os.environ["LITELLM_PROXY_API_BASE"] = "https://litellm-proxy-production-7a86.up.railway.app/"
|
285
|
+
tables = {
|
286
|
+
"guests": {
|
287
|
+
"prompt": "Guests of an Alpine ski hotel in Austria",
|
288
|
+
"columns": {
|
289
|
+
"name": {"prompt": "first name and last name of the guest", "dtype": "string"},
|
290
|
+
},
|
291
|
+
}
|
292
|
+
}
|
293
|
+
df = mock.sample(tables=tables, sample_size=10, model="litellm_proxy/mostlyai/openai/gpt-4.1-nano")
|
294
|
+
|
295
|
+
print(df)
|
296
|
+
```
|
297
|
+
|
298
|
+
Read more [here](https://docs.litellm.ai/docs/providers/litellm_proxy).
|
@@ -0,0 +1,8 @@
|
|
1
|
+
mostlyai/mock/__init__.py,sha256=EzC1pduN2tfMeNk5Q4DHpNCZ8Erlo4KdsEoiZSq76GY,715
|
2
|
+
mostlyai/mock/core.py,sha256=OpVipCV-7t9WtdQ7NDs2Nl58Y_9jsLhwqT8vFrQKgLM,30115
|
3
|
+
mostlyai/mock/mcp_server.py,sha256=FqtgGdtuncpLcPySQk2V2RXASFvfV27rqlrnXnmSE7M,2311
|
4
|
+
mostlyai_mock-0.0.12.dist-info/METADATA,sha256=O7ACxfwzQE7M8pipE1kLwf-sdWkIaAZzBfHFOS1vwA8,13526
|
5
|
+
mostlyai_mock-0.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
+
mostlyai_mock-0.0.12.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
+
mostlyai_mock-0.0.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
+
mostlyai_mock-0.0.12.dist-info/RECORD,,
|
@@ -1,8 +0,0 @@
|
|
1
|
-
mostlyai/mock/__init__.py,sha256=grEzN1CxhOdKf-ow8fWVNTzUrB2Ogg8IBQIKqyaOc7I,715
|
2
|
-
mostlyai/mock/core.py,sha256=p5VAsRppzAc4P8FqKEunfQ3cPjImUU2cEc6yqHJVhMg,29884
|
3
|
-
mostlyai/mock/mcp_server.py,sha256=juy5n6-Xo-ZVC3u5o2zylLgN1CaAwzIaS_bk2qXxpcU,2439
|
4
|
-
mostlyai_mock-0.0.10.dist-info/METADATA,sha256=OHHgoJQ6_RL20Ze5ofFvMscXK5oBOXTfYyEYy0XwsCU,12443
|
5
|
-
mostlyai_mock-0.0.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
6
|
-
mostlyai_mock-0.0.10.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
|
7
|
-
mostlyai_mock-0.0.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
8
|
-
mostlyai_mock-0.0.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|