mostlyai-mock 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mostlyai/mock/__init__.py CHANGED
@@ -15,4 +15,4 @@
15
15
  from mostlyai.mock.core import sample
16
16
 
17
17
  __all__ = ["sample"]
18
- __version__ = "0.0.10" # Do not set this manually. Use poetry version [params].
18
+ __version__ = "0.0.12" # Do not set this manually. Use poetry version [params].
mostlyai/mock/core.py CHANGED
@@ -18,6 +18,7 @@ import json
18
18
  from collections import deque
19
19
  from collections.abc import Generator
20
20
  from enum import Enum
21
+ import os
21
22
  from typing import Any, Literal, Type
22
23
 
23
24
  import litellm
@@ -265,7 +266,7 @@ def _create_table_prompt(
265
266
  prompt += f"## Context Table Primary Key: `{primary_keys[fk.referenced_table]}`\n\n"
266
267
 
267
268
  prompt += f"## Context Table Data:\n\n"
268
- prompt += f"{context_data.to_json(orient='records', indent=2)}\n\n"
269
+ prompt += f"{context_data.to_json(orient='records', date_format='iso', indent=2)}\n\n"
269
270
 
270
271
  # add non-context table names, primary keys and data
271
272
  if foreign_keys and len(foreign_keys) > 1:
@@ -279,7 +280,7 @@ def _create_table_prompt(
279
280
  prompt += f"## Non-Context Table Primary Key: `{primary_keys[fk.referenced_table]}`\n\n"
280
281
 
281
282
  prompt += f"## Non-Context Table Data:\n\n"
282
- prompt += f"{non_context_data[fk.referenced_table].to_json(orient='records', indent=2)}\n\n"
283
+ prompt += f"{non_context_data[fk.referenced_table].to_json(orient='records', date_format='iso', indent=2)}\n\n"
283
284
 
284
285
  # add instructions
285
286
  prompt += "\n## Instructions:\n\n"
@@ -384,12 +385,12 @@ def _create_table_rows_generator(
384
385
  for i in range(0, len(data), batch_size):
385
386
  yield data.iloc[i : i + batch_size]
386
387
 
387
- # ensure model supports response_format and json schema
388
- supported_params = litellm.get_supported_openai_params(model=llm_config.model)
389
- assert "response_format" in supported_params
390
- assert litellm.supports_response_schema(llm_config.model), (
391
- "The model does not support structured output / JSON mode."
392
- )
388
+ if not llm_config.model.startswith("litellm_proxy/"):
389
+ # ensure model supports response_format and json schema (this check does not work with litellm_proxy)
390
+ supported_params = (litellm.get_supported_openai_params(model=llm_config.model) or [])
391
+ assert "response_format" in supported_params and litellm.supports_response_schema(llm_config.model), (
392
+ "The model does not support structured output / JSON mode."
393
+ )
393
394
 
394
395
  # derive context data (if first foreign key is present) and harmonize sample size accordingly
395
396
  context_data: pd.DataFrame | None = None
@@ -398,6 +399,7 @@ def _create_table_rows_generator(
398
399
  assert generated_data is not None
399
400
  assert context_table_name in generated_data
400
401
  context_data = generated_data[context_table_name]
402
+ batch_size = 1 # generate one sequence at a time
401
403
  sample_size = len(context_data)
402
404
 
403
405
  # derive non-context data (if more than one foreign key is present)
@@ -1,19 +1,21 @@
1
1
  import os
2
2
  import tempfile
3
- import zipfile
4
3
 
4
+ import pandas as pd
5
5
  from fastmcp import Context, FastMCP
6
6
 
7
7
  from mostlyai import mock
8
8
 
9
9
  SAMPLE_MOCK_TOOL_DESCRIPTION = f"""
10
- It is proxy to the `mostlyai.mock.sample` function.
10
+ This tool is a proxy to the `mostlyai.mock.sample` function.
11
+ It returns a dictionary. The keys are the table names, the values are the Paths to the generated CSV files.
11
12
 
12
- This tool returns an URL or a Path to the generated CSV bundle (as ZIP file).
13
- Present the result nicely to the user, in Markdown format. Some examples:
13
+ Present the result nicely to the user, in Markdown format. Example:
14
+
15
+ Mock data can be found under the following paths:
16
+ - `/tmp/tmpl41bwa6n/players.csv`
17
+ - `/tmp/tmpl41bwa6n/seasons.csv`
14
18
 
15
- "Mock data is ready to download: [Mock Data](https://example.com/mock_data.zip)" (if result is a link)
16
- "Mock data can be found in `/tmp/tmpl41bwa6n/mock_data.zip`" (if result is a path)
17
19
 
18
20
  What comes after the `=============================` is the documentation of the `mostlyai.mock.sample` function.
19
21
 
@@ -24,20 +26,18 @@ What comes after the `=============================` is the documentation of the
24
26
  mcp = FastMCP(name="MostlyAI Mock MCP Server")
25
27
 
26
28
 
27
- def _store_locally(data: dict) -> str:
29
+ def _store_locally(data: dict[str, pd.DataFrame]) -> dict[str, str]:
28
30
  temp_dir = tempfile.mkdtemp()
29
- zip_path = os.path.join(temp_dir, "mock_data.zip")
30
- with zipfile.ZipFile(zip_path, "w") as zip_file:
31
- for table_name, df in data.items():
32
- csv_path = os.path.join(temp_dir, f"{table_name}.csv")
33
- df.to_csv(csv_path, index=False)
34
- zip_file.write(csv_path, arcname=f"{table_name}.csv")
35
-
36
- return os.path.abspath(zip_path)
31
+ locations = {}
32
+ for table_name, df in data.items():
33
+ csv_path = os.path.join(temp_dir, f"{table_name}.csv")
34
+ df.to_csv(csv_path, index=False)
35
+ locations[table_name] = csv_path
36
+ return locations
37
37
 
38
38
 
39
39
  @mcp.tool(description=SAMPLE_MOCK_TOOL_DESCRIPTION)
40
- def sample_mock_data(
40
+ def mock_data(
41
41
  *,
42
42
  tables: dict[str, dict],
43
43
  sample_size: int,
@@ -46,7 +46,7 @@ def sample_mock_data(
46
46
  temperature: float = 1.0,
47
47
  top_p: float = 0.95,
48
48
  ctx: Context,
49
- ) -> str:
49
+ ) -> dict[str, str]:
50
50
  # Notes:
51
51
  # 1. Returning DataFrames directly results in converting them into truncated string.
52
52
  # 2. The logs / progress bars are not propagated to the MCP Client. There is a dedicated API to do that (e.g. `ctx.info(...)`)
@@ -63,8 +63,8 @@ def sample_mock_data(
63
63
  return_type="dict",
64
64
  )
65
65
  ctx.info(f"Generated mock data for `{len(tables)}` tables")
66
- url = _store_locally(data)
67
- return url
66
+ locations = _store_locally(data)
67
+ return locations
68
68
 
69
69
 
70
70
  def main():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mostlyai-mock
3
- Version: 0.0.10
3
+ Version: 0.0.12
4
4
  Summary: Synthetic Mock Data
5
5
  Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
6
6
  Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
@@ -244,7 +244,7 @@ print(df)
244
244
 
245
245
  This repo comes with MCP Server. It can be easily consumed by any MCP Client by providing the following configuration:
246
246
 
247
- ```yaml
247
+ ```json
248
248
  {
249
249
  "mcpServers": {
250
250
  "mostlyai-mock-mcp": {
@@ -266,5 +266,33 @@ For example:
266
266
  - in Cursor, go to "Settings" > "Cursor Settings" > "MCP" > "Add new global MCP server" and paste the above into `mcp.json`
267
267
 
268
268
  Troubleshooting:
269
- 1. If MCP Server is not picked up by the MCP Client, specify full path in `command`, e.g. `/Users/johnsmith/.local/bin/uvx`
270
- 2. MCP Inspector can be used for debugging: `npx @modelcontextprotocol/inspector -- uvx --from mostlyai-mock mcp-server`
269
+ 1. If the MCP Client fails to detect the MCP Server, provide the absolute path in the `command` field, for example: `/Users/johnsmith/.local/bin/uvx`
270
+ 2. To debug MCP Server issues, you can use MCP Inspector by running: `npx @modelcontextprotocol/inspector -- uvx --from mostlyai-mock mcp-server`
271
+ 3. In order to develop locally, modify the configuration by replacing `"command": "uv"` (or use the full path to `uv` if needed) and `"args": ["--directory", "/Users/johnsmith/mostlyai-mock", "run", "mcp-server"]`
272
+
273
+
274
+ ## LiteLLM Proxy Server
275
+
276
+ In order to consume LiteLLM Proxy Server, the user must:
277
+ - Set `LITELLM_PROXY_API_KEY` and `LITELLM_PROXY_API_BASE`
278
+ - Prefix the `model` with LiteLLM Proxy Server provider: `litellm_proxy`. For example, `litellm_proxy/openai/gpt-4.1-nano`
279
+
280
+ ```python
281
+ from mostlyai import mock
282
+ import os
283
+
284
+ os.environ["LITELLM_PROXY_API_BASE"] = "https://litellm-proxy-production-7a86.up.railway.app/"
285
+ tables = {
286
+ "guests": {
287
+ "prompt": "Guests of an Alpine ski hotel in Austria",
288
+ "columns": {
289
+ "name": {"prompt": "first name and last name of the guest", "dtype": "string"},
290
+ },
291
+ }
292
+ }
293
+ df = mock.sample(tables=tables, sample_size=10, model="litellm_proxy/mostlyai/openai/gpt-4.1-nano")
294
+
295
+ print(df)
296
+ ```
297
+
298
+ Read more [here](https://docs.litellm.ai/docs/providers/litellm_proxy).
@@ -0,0 +1,8 @@
1
+ mostlyai/mock/__init__.py,sha256=EzC1pduN2tfMeNk5Q4DHpNCZ8Erlo4KdsEoiZSq76GY,715
2
+ mostlyai/mock/core.py,sha256=OpVipCV-7t9WtdQ7NDs2Nl58Y_9jsLhwqT8vFrQKgLM,30115
3
+ mostlyai/mock/mcp_server.py,sha256=FqtgGdtuncpLcPySQk2V2RXASFvfV27rqlrnXnmSE7M,2311
4
+ mostlyai_mock-0.0.12.dist-info/METADATA,sha256=O7ACxfwzQE7M8pipE1kLwf-sdWkIaAZzBfHFOS1vwA8,13526
5
+ mostlyai_mock-0.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ mostlyai_mock-0.0.12.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
7
+ mostlyai_mock-0.0.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
+ mostlyai_mock-0.0.12.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- mostlyai/mock/__init__.py,sha256=grEzN1CxhOdKf-ow8fWVNTzUrB2Ogg8IBQIKqyaOc7I,715
2
- mostlyai/mock/core.py,sha256=p5VAsRppzAc4P8FqKEunfQ3cPjImUU2cEc6yqHJVhMg,29884
3
- mostlyai/mock/mcp_server.py,sha256=juy5n6-Xo-ZVC3u5o2zylLgN1CaAwzIaS_bk2qXxpcU,2439
4
- mostlyai_mock-0.0.10.dist-info/METADATA,sha256=OHHgoJQ6_RL20Ze5ofFvMscXK5oBOXTfYyEYy0XwsCU,12443
5
- mostlyai_mock-0.0.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- mostlyai_mock-0.0.10.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
7
- mostlyai_mock-0.0.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
- mostlyai_mock-0.0.10.dist-info/RECORD,,