mostlyai-mock 0.0.12__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mostlyai/mock/__init__.py CHANGED
@@ -15,4 +15,4 @@
15
15
  from mostlyai.mock.core import sample
16
16
 
17
17
  __all__ = ["sample"]
18
- __version__ = "0.0.12" # Do not set this manually. Use poetry version [params].
18
+ __version__ = "0.1.1" # Do not set this manually. Use poetry version [params].
mostlyai/mock/core.py CHANGED
@@ -18,7 +18,6 @@ import json
18
18
  from collections import deque
19
19
  from collections.abc import Generator
20
20
  from enum import Enum
21
- import os
22
21
  from typing import Any, Literal, Type
23
22
 
24
23
  import litellm
@@ -26,7 +25,7 @@ import pandas as pd
26
25
  from pydantic import BaseModel, Field, RootModel, create_model, field_validator, model_validator
27
26
  from tqdm import tqdm
28
27
 
29
- SYSTEM_PROMPT = f"""
28
+ SYSTEM_PROMPT = """
30
29
  You are a specialized synthetic data generator designed to create
31
30
  highly realistic, contextually appropriate data based on schema definitions. Your task is to:
32
31
 
@@ -265,7 +264,7 @@ def _create_table_prompt(
265
264
 
266
265
  prompt += f"## Context Table Primary Key: `{primary_keys[fk.referenced_table]}`\n\n"
267
266
 
268
- prompt += f"## Context Table Data:\n\n"
267
+ prompt += "## Context Table Data:\n\n"
269
268
  prompt += f"{context_data.to_json(orient='records', date_format='iso', indent=2)}\n\n"
270
269
 
271
270
  # add non-context table names, primary keys and data
@@ -279,8 +278,10 @@ def _create_table_prompt(
279
278
 
280
279
  prompt += f"## Non-Context Table Primary Key: `{primary_keys[fk.referenced_table]}`\n\n"
281
280
 
282
- prompt += f"## Non-Context Table Data:\n\n"
283
- prompt += f"{non_context_data[fk.referenced_table].to_json(orient='records', date_format='iso', indent=2)}\n\n"
281
+ prompt += "## Non-Context Table Data:\n\n"
282
+ prompt += (
283
+ f"{non_context_data[fk.referenced_table].to_json(orient='records', date_format='iso', indent=2)}\n\n"
284
+ )
284
285
 
285
286
  # add instructions
286
287
  prompt += "\n## Instructions:\n\n"
@@ -303,8 +304,8 @@ def _create_table_prompt(
303
304
  "Don't copy previous rows in the output. "
304
305
  "Don't pay attention to the number of previous rows; there might have been more generated than provided.\n\n"
305
306
  )
306
- prompt += f"Do not use code to generate the data.\n\n"
307
- prompt += f"Return the full data as a JSON string.\n"
307
+ prompt += "Do not use code to generate the data.\n\n"
308
+ prompt += "Return the full data as a JSON string.\n"
308
309
 
309
310
  return prompt
310
311
 
@@ -387,7 +388,7 @@ def _create_table_rows_generator(
387
388
 
388
389
  if not llm_config.model.startswith("litellm_proxy/"):
389
390
  # ensure model supports response_format and json schema (this check does not work with litellm_proxy)
390
- supported_params = (litellm.get_supported_openai_params(model=llm_config.model) or [])
391
+ supported_params = litellm.get_supported_openai_params(model=llm_config.model) or []
391
392
  assert "response_format" in supported_params and litellm.supports_response_schema(llm_config.model), (
392
393
  "The model does not support structured output / JSON mode."
393
394
  )
@@ -431,7 +432,7 @@ def _create_table_rows_generator(
431
432
  if non_context_data
432
433
  else None
433
434
  )
434
- prompt = _create_table_prompt(
435
+ llm_prompt = _create_table_prompt(
435
436
  name=name,
436
437
  prompt=prompt,
437
438
  columns=columns,
@@ -442,7 +443,7 @@ def _create_table_rows_generator(
442
443
  non_context_data=non_context_batch,
443
444
  previous_rows=list(previous_rows),
444
445
  )
445
- messages = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt}]
446
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": llm_prompt}]
446
447
 
447
448
  response = litellm.completion(messages=messages, **litellm_kwargs)
448
449
  rows_stream = yield_rows_from_json_chunks_stream(response)
@@ -2,13 +2,14 @@ import os
2
2
  import tempfile
3
3
 
4
4
  import pandas as pd
5
- from fastmcp import Context, FastMCP
5
+ from fastmcp import FastMCP
6
6
 
7
7
  from mostlyai import mock
8
8
 
9
9
  SAMPLE_MOCK_TOOL_DESCRIPTION = f"""
10
- This tool is a proxy to the `mostlyai.mock.sample` function.
11
- It returns a dictionary. The keys are the table names, the values are the Paths to the generated CSV files.
10
+ Generate mock data by prompting an LLM.
11
+
12
+ This tool is a proxy to the `mostlyai.mock.sample` function, but returns a dictionary of paths to the generated CSV files.
12
13
 
13
14
  Present the result nicely to the user, in Markdown format. Example:
14
15
 
@@ -16,10 +17,7 @@ Mock data can be found under the following paths:
16
17
  - `/tmp/tmpl41bwa6n/players.csv`
17
18
  - `/tmp/tmpl41bwa6n/seasons.csv`
18
19
 
19
-
20
- What comes after the `=============================` is the documentation of the `mostlyai.mock.sample` function.
21
-
22
- =============================
20
+ == mostlyai.mock.sample DocString ==
23
21
  {mock.sample.__doc__}
24
22
  """
25
23
 
@@ -45,14 +43,7 @@ def mock_data(
45
43
  api_key: str | None = None,
46
44
  temperature: float = 1.0,
47
45
  top_p: float = 0.95,
48
- ctx: Context,
49
46
  ) -> dict[str, str]:
50
- # Notes:
51
- # 1. Returning DataFrames directly results in converting them into truncated string.
52
- # 2. The logs / progress bars are not propagated to the MCP Client. There is a dedicated API to do that (e.g. `ctx.info(...)`)
53
- # 3. MCP Server inherits only selected environment variables (PATH, USER...); one way to pass LLM keys is through client configuration (`mcpServers->env`)
54
- # 4. Some MCP Clients, e.g. Cursor, do not like Unions or Optionals in type hints
55
- ctx.info(f"Generating mock data for `{len(tables)}` tables")
56
47
  data = mock.sample(
57
48
  tables=tables,
58
49
  sample_size=sample_size,
@@ -62,7 +53,6 @@ def mock_data(
62
53
  top_p=top_p,
63
54
  return_type="dict",
64
55
  )
65
- ctx.info(f"Generated mock data for `{len(tables)}` tables")
66
56
  locations = _store_locally(data)
67
57
  return locations
68
58
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mostlyai-mock
3
- Version: 0.0.12
3
+ Version: 0.1.1
4
4
  Summary: Synthetic Mock Data
5
5
  Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
6
6
  Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
@@ -225,7 +225,7 @@ tables = {
225
225
  ],
226
226
  }
227
227
  }
228
- df = sample(tables=tables, sample_size=10, model="openai/gpt-4.1")
228
+ df = mock.sample(tables=tables, sample_size=10, model="openai/gpt-4.1")
229
229
  print(df)
230
230
  # employee_id name boss_id role
231
231
  # 0 1 Sandra Phillips <NA> President
@@ -269,30 +269,3 @@ Troubleshooting:
269
269
  1. If the MCP Client fails to detect the MCP Server, provide the absolute path in the `command` field, for example: `/Users/johnsmith/.local/bin/uvx`
270
270
  2. To debug MCP Server issues, you can use MCP Inspector by running: `npx @modelcontextprotocol/inspector -- uvx --from mostlyai-mock mcp-server`
271
271
  3. In order to develop locally, modify the configuration by replacing `"command": "uv"` (or use the full path to `uv` if needed) and `"args": ["--directory", "/Users/johnsmith/mostlyai-mock", "run", "mcp-server"]`
272
-
273
-
274
- ## LiteLLM Proxy Server
275
-
276
- In order to consume LiteLLM Proxy Server, the user must:
277
- - Set `LITELLM_PROXY_API_KEY` and `LITELLM_PROXY_API_BASE`
278
- - Prefix the `model` with LiteLLM Proxy Server provider: `litellm_proxy`. For example, `litellm_proxy/openai/gpt-4.1-nano`
279
-
280
- ```python
281
- from mostlyai import mock
282
- import os
283
-
284
- os.environ["LITELLM_PROXY_API_BASE"] = "https://litellm-proxy-production-7a86.up.railway.app/"
285
- tables = {
286
- "guests": {
287
- "prompt": "Guests of an Alpine ski hotel in Austria",
288
- "columns": {
289
- "name": {"prompt": "first name and last name of the guest", "dtype": "string"},
290
- },
291
- }
292
- }
293
- df = mock.sample(tables=tables, sample_size=10, model="litellm_proxy/mostlyai/openai/gpt-4.1-nano")
294
-
295
- print(df)
296
- ```
297
-
298
- Read more [here](https://docs.litellm.ai/docs/providers/litellm_proxy).
@@ -0,0 +1,8 @@
1
+ mostlyai/mock/__init__.py,sha256=rwv3TboU77Sn6Py635JgvQu64d_R2s1Nc0dIDDbHAZA,714
2
+ mostlyai/mock/core.py,sha256=MEDVp_woSXlD0JanS3ocxWBa_XilpaWzPhsvNzTZuX0,30138
3
+ mostlyai/mock/mcp_server.py,sha256=Vp0bWzE8wUyA6k4PHLa0TbkuI9s07E48xPrAUgf_5qU,1563
4
+ mostlyai_mock-0.1.1.dist-info/METADATA,sha256=tY5BvODgzoiqox8yS8ISfxWtVB1wbch1KNW8CikRImc,12713
5
+ mostlyai_mock-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ mostlyai_mock-0.1.1.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
7
+ mostlyai_mock-0.1.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
+ mostlyai_mock-0.1.1.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- mostlyai/mock/__init__.py,sha256=EzC1pduN2tfMeNk5Q4DHpNCZ8Erlo4KdsEoiZSq76GY,715
2
- mostlyai/mock/core.py,sha256=OpVipCV-7t9WtdQ7NDs2Nl58Y_9jsLhwqT8vFrQKgLM,30115
3
- mostlyai/mock/mcp_server.py,sha256=FqtgGdtuncpLcPySQk2V2RXASFvfV27rqlrnXnmSE7M,2311
4
- mostlyai_mock-0.0.12.dist-info/METADATA,sha256=O7ACxfwzQE7M8pipE1kLwf-sdWkIaAZzBfHFOS1vwA8,13526
5
- mostlyai_mock-0.0.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
- mostlyai_mock-0.0.12.dist-info/entry_points.txt,sha256=XDbppUIAaCWW0nresVep8zb71pkzZuFA16jCBHq8CU8,61
7
- mostlyai_mock-0.0.12.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
8
- mostlyai_mock-0.0.12.dist-info/RECORD,,