mostlyai-mock 0.0.6__tar.gz → 0.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mostlyai-mock
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Summary: Synthetic Mock Data
5
5
  Project-URL: homepage, https://github.com/mostly-ai/mostlyai-mock
6
6
  Project-URL: repository, https://github.com/mostly-ai/mostlyai-mock
@@ -24,16 +24,18 @@ Classifier: Programming Language :: Python :: 3.13
24
24
  Classifier: Topic :: Software Development :: Libraries
25
25
  Classifier: Typing :: Typed
26
26
  Requires-Python: >=3.10
27
+ Requires-Dist: fastmcp<3.0.0,>=2.0.0
27
28
  Requires-Dist: litellm>=1.67.0
28
29
  Requires-Dist: numpy>=1.26.3
29
30
  Requires-Dist: pandas>=2.0.0
30
31
  Requires-Dist: pyarrow>=14.0.0
31
32
  Requires-Dist: pydantic<3.0.0,>=2.0.0
33
+ Requires-Dist: typer<1.0.0,>=0.9.0
32
34
  Description-Content-Type: text/markdown
33
35
 
34
36
  # Synthetic Mock Data 🔮
35
37
 
36
- [![Documentation](https://img.shields.io/badge/docs-latest-green)](https://mostly-ai.github.io/mostlyai-mock/) [![stats](https://pepy.tech/badge/mostlyai-mock)](https://pypi.org/project/mostlyai-mock/) ![license](https://img.shields.io/github/license/mostly-ai/mostlyai-mock) ![GitHub Release](https://img.shields.io/github/v/release/mostly-ai/mostlyai-mock) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mostlyai-mock)
38
+ [![Documentation](https://img.shields.io/badge/docs-latest-green)](https://mostly-ai.github.io/mostlyai-mock/) [![stats](https://pepy.tech/badge/mostlyai-mock)](https://pypi.org/project/mostlyai-mock/) ![license](https://img.shields.io/github/license/mostly-ai/mostlyai-mock) ![GitHub Release](https://img.shields.io/github/v/release/mostly-ai/mostlyai-mock)
37
39
 
38
40
  Create data out of nothing. Prompt LLMs for Tabular Data.
39
41
 
@@ -72,7 +74,7 @@ from mostlyai import mock
72
74
 
73
75
  tables = {
74
76
  "guests": {
75
- "description": "Guests of an Alpine ski hotel in Austria",
77
+ "prompt": "Guests of an Alpine ski hotel in Austria",
76
78
  "columns": {
77
79
  "nationality": {"prompt": "2-letter code for the nationality", "dtype": "string"},
78
80
  "name": {"prompt": "first name and last name of the guest", "dtype": "string"},
@@ -112,17 +114,26 @@ from mostlyai import mock
112
114
 
113
115
  tables = {
114
116
  "customers": {
115
- "description": "Customers of a hardware store",
117
+ "prompt": "Customers of a hardware store",
116
118
  "columns": {
117
119
  "customer_id": {"prompt": "the unique id of the customer", "dtype": "integer"},
118
120
  "name": {"prompt": "first name and last name of the customer", "dtype": "string"},
119
121
  },
120
122
  "primary_key": "customer_id",
121
123
  },
124
+ "warehouses": {
125
+ "prompt": "Warehouses of a hardware store",
126
+ "columns": {
127
+ "warehouse_id": {"prompt": "the unique id of the warehouse", "dtype": "integer"},
128
+ "name": {"prompt": "the name of the warehouse", "dtype": "string"},
129
+ },
130
+ "primary_key": "warehouse_id",
131
+ },
122
132
  "orders": {
123
- "description": "Orders of a Customer",
133
+ "prompt": "Orders of a Customer",
124
134
  "columns": {
125
135
  "customer_id": {"prompt": "the customer id for that order", "dtype": "integer"},
136
+ "warehouse_id": {"prompt": "the warehouse id for that order", "dtype": "integer"},
126
137
  "order_id": {"prompt": "the unique id of the order", "dtype": "string"},
127
138
  "text": {"prompt": "order text description", "dtype": "string"},
128
139
  "amount": {"prompt": "order amount in USD", "dtype": "float"},
@@ -132,12 +143,16 @@ tables = {
132
143
  {
133
144
  "column": "customer_id",
134
145
  "referenced_table": "customers",
135
- "description": "each customer has anywhere between 2 and 3 orders",
136
- }
146
+ "prompt": "each customer has anywhere between 2 and 3 orders",
147
+ },
148
+ {
149
+ "column": "warehouse_id",
150
+ "referenced_table": "warehouses",
151
+ },
137
152
  ],
138
153
  },
139
154
  "items": {
140
- "description": "Items in an Order",
155
+ "prompt": "Items in an Order",
141
156
  "columns": {
142
157
  "item_id": {"prompt": "the unique id of the item", "dtype": "string"},
143
158
  "order_id": {"prompt": "the order id for that item", "dtype": "string"},
@@ -148,7 +163,7 @@ tables = {
148
163
  {
149
164
  "column": "order_id",
150
165
  "referenced_table": "orders",
151
- "description": "each order has between 1 and 2 items",
166
+ "prompt": "each order has between 1 and 2 items",
152
167
  }
153
168
  ],
154
169
  },
@@ -159,28 +174,69 @@ data = mock.sample(
159
174
  model="openai/gpt-4.1"
160
175
  )
161
176
  print(data["customers"])
162
- # customer_id name
163
- # 0 1 Michael Torres
164
- # 1 2 Elaine Kim
177
+ # customer_id name
178
+ # 0 1 Matthew Carlson
179
+ # 1 2 Priya Shah
180
+ print(data["warehouses"])
181
+ # warehouse_id name
182
+ # 0 1 Central Distribution Hub
183
+ # 1 2 Northgate Storage Facility
165
184
  print(data["orders"])
166
- # customer_id order_id text amount
167
- # 0 1 ORD20240612001 Home office desk and ergonomic chair bundle 412.95
168
- # 1 1 ORD20240517322 Wireless noise-cancelling headphones 226.49
169
- # 2 1 ORD20240430307 Smart LED desk lamp with USB charging port 69.99
170
- # 3 2 ORD20240614015 Eco-friendly bamboo kitchen utensil set 39.95
171
- # 4 2 ORD20240528356 Air fryer with digital touch screen, 5-quart c... 129.99
172
- # 5 2 ORD20240510078 Double-walled glass coffee mugs, set of 4 48.5
185
+ # customer_id warehouse_id order_id text amount
186
+ # 0 1 2 ORD-10294 3-tier glass shelving units, expedited deliver... 649.25
187
+ # 1 1 1 ORD-10541 Office desk chairs, set of 6, with assembly se... 824.9
188
+ # 2 1 1 ORD-10802 Executive standing desk, walnut finish, standa... 519.0
189
+ # 3 2 1 ORD-11017 Maple conference table, cable management inclu... 1225.5
190
+ # 4 2 2 ORD-11385 Set of ergonomic task chairs, black mesh, stan... 767.75
173
191
  print(data["items"])
174
- # item_id order_id name price
175
- # 0 ITEM100001A ORD20240612001 Ergonomic Mesh Office Chair 179.99
176
- # 1 ITEM100001B ORD20240612001 Adjustable Home Office Desk 232.96
177
- # 2 ITEM100002A ORD20240517322 Wireless Noise-Cancelling Headphones 226.49
178
- # 3 ITEM100003A ORD20240430307 Smart LED Desk Lamp 59.99
179
- # 4 ITEM100003B ORD20240430307 USB Charging Cable (Desk Lamp Compatible) 10.0
180
- # 5 ITEM100004A ORD20240614015 Bamboo Cooking Spoon 13.49
181
- # 6 ITEM100004B ORD20240614015 Bamboo Slotted Turner 12.99
182
- # 7 ITEM100005A ORD20240528356 Digital Air Fryer (5-Quart, Black) 115.99
183
- # 8 ITEM100005B ORD20240528356 Silicone Liner for Air Fryer (5-Quart) 13.99
184
- # 9 ITEM100006A ORD20240510078 Double-Walled Glass Coffee Mug (12oz) 13.75
185
- # 10 ITEM100006B ORD20240510078 Double-Walled Glass Coffee Mug (8oz) 11.25
192
+ # item_id order_id name price
193
+ # 0 ITM-80265 ORD-10294 3-Tier Tempered Glass Shelving Unit 409.0
194
+ # 1 ITM-80266 ORD-10294 Brushed Aluminum Shelf Brackets (Set of 4) 240.25
195
+ # 2 ITM-81324 ORD-10541 Ergonomic Mesh-Back Desk Chair 132.5
196
+ # 3 ITM-81325 ORD-10541 Professional Office Chair Assembly Service 45.0
197
+ # 4 ITM-82101 ORD-10802 Executive Standing Desk, Walnut Finish 469.0
198
+ # 5 ITM-82102 ORD-10802 Desk Installation and Setup Service 50.0
199
+ # 6 ITM-83391 ORD-11017 Maple Conference Table, 10-Seat 1125.5
200
+ # 7 ITM-83392 ORD-11017 Integrated Table Cable Management Kit 100.0
201
+ # 8 ITM-84311 ORD-11385 Ergonomic Task Chair, Black Mesh 359.25
202
+ # 9 ITM-84312 ORD-11385 Standard Delivery Service 48.5
186
203
  ```
204
+
205
+ 6. Create your first self-referencing synthetic table
206
+
207
+ ```python
208
+ from mostlyai import mock
209
+
210
+ tables = {
211
+ "employees": {
212
+ "prompt": "Employees of a company",
213
+ "columns": {
214
+ "employee_id": {"prompt": "the unique id of the employee", "dtype": "integer"},
215
+ "name": {"prompt": "first name and last name of the president", "dtype": "string"},
216
+ "boss_id": {"prompt": "the id of the boss of the employee", "dtype": "integer"},
217
+ "role": {"prompt": "the role of the employee", "dtype": "string"},
218
+ },
219
+ "primary_key": "employee_id",
220
+ "foreign_keys": [
221
+ {
222
+ "column": "boss_id",
223
+ "referenced_table": "employees",
224
+ "prompt": "each boss has at most 3 employees",
225
+ },
226
+ ],
227
+ }
228
+ }
229
+ df = sample(tables=tables, sample_size=10, model="openai/gpt-4.1")
230
+ print(df)
231
+ # employee_id name boss_id role
232
+ # 0 1 Sandra Phillips <NA> President
233
+ # 1 2 Marcus Tran 1 Chief Financial Officer
234
+ # 2 3 Ava Whittaker 1 Chief Technology Officer
235
+ # 3 4 Sophie Martin 1 Chief Operations Officer
236
+ # 4 5 Chad Nelson 2 Finance Manager
237
+ # 5 6 Ethan Glover 2 Senior Accountant
238
+ # 6 7 Kimberly Ortiz 2 Junior Accountant
239
+ # 7 8 Lucas Romero 3 IT Manager
240
+ # 8 9 Priya Desai 3 Lead Software Engineer
241
+ # 9 10 Felix Bennett 3 Senior Systems Analyst
242
+ ```
@@ -1,6 +1,6 @@
1
1
  # Synthetic Mock Data 🔮
2
2
 
3
- [![Documentation](https://img.shields.io/badge/docs-latest-green)](https://mostly-ai.github.io/mostlyai-mock/) [![stats](https://pepy.tech/badge/mostlyai-mock)](https://pypi.org/project/mostlyai-mock/) ![license](https://img.shields.io/github/license/mostly-ai/mostlyai-mock) ![GitHub Release](https://img.shields.io/github/v/release/mostly-ai/mostlyai-mock) ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mostlyai-mock)
3
+ [![Documentation](https://img.shields.io/badge/docs-latest-green)](https://mostly-ai.github.io/mostlyai-mock/) [![stats](https://pepy.tech/badge/mostlyai-mock)](https://pypi.org/project/mostlyai-mock/) ![license](https://img.shields.io/github/license/mostly-ai/mostlyai-mock) ![GitHub Release](https://img.shields.io/github/v/release/mostly-ai/mostlyai-mock)
4
4
 
5
5
  Create data out of nothing. Prompt LLMs for Tabular Data.
6
6
 
@@ -39,7 +39,7 @@ from mostlyai import mock
39
39
 
40
40
  tables = {
41
41
  "guests": {
42
- "description": "Guests of an Alpine ski hotel in Austria",
42
+ "prompt": "Guests of an Alpine ski hotel in Austria",
43
43
  "columns": {
44
44
  "nationality": {"prompt": "2-letter code for the nationality", "dtype": "string"},
45
45
  "name": {"prompt": "first name and last name of the guest", "dtype": "string"},
@@ -79,17 +79,26 @@ from mostlyai import mock
79
79
 
80
80
  tables = {
81
81
  "customers": {
82
- "description": "Customers of a hardware store",
82
+ "prompt": "Customers of a hardware store",
83
83
  "columns": {
84
84
  "customer_id": {"prompt": "the unique id of the customer", "dtype": "integer"},
85
85
  "name": {"prompt": "first name and last name of the customer", "dtype": "string"},
86
86
  },
87
87
  "primary_key": "customer_id",
88
88
  },
89
+ "warehouses": {
90
+ "prompt": "Warehouses of a hardware store",
91
+ "columns": {
92
+ "warehouse_id": {"prompt": "the unique id of the warehouse", "dtype": "integer"},
93
+ "name": {"prompt": "the name of the warehouse", "dtype": "string"},
94
+ },
95
+ "primary_key": "warehouse_id",
96
+ },
89
97
  "orders": {
90
- "description": "Orders of a Customer",
98
+ "prompt": "Orders of a Customer",
91
99
  "columns": {
92
100
  "customer_id": {"prompt": "the customer id for that order", "dtype": "integer"},
101
+ "warehouse_id": {"prompt": "the warehouse id for that order", "dtype": "integer"},
93
102
  "order_id": {"prompt": "the unique id of the order", "dtype": "string"},
94
103
  "text": {"prompt": "order text description", "dtype": "string"},
95
104
  "amount": {"prompt": "order amount in USD", "dtype": "float"},
@@ -99,12 +108,16 @@ tables = {
99
108
  {
100
109
  "column": "customer_id",
101
110
  "referenced_table": "customers",
102
- "description": "each customer has anywhere between 2 and 3 orders",
103
- }
111
+ "prompt": "each customer has anywhere between 2 and 3 orders",
112
+ },
113
+ {
114
+ "column": "warehouse_id",
115
+ "referenced_table": "warehouses",
116
+ },
104
117
  ],
105
118
  },
106
119
  "items": {
107
- "description": "Items in an Order",
120
+ "prompt": "Items in an Order",
108
121
  "columns": {
109
122
  "item_id": {"prompt": "the unique id of the item", "dtype": "string"},
110
123
  "order_id": {"prompt": "the order id for that item", "dtype": "string"},
@@ -115,7 +128,7 @@ tables = {
115
128
  {
116
129
  "column": "order_id",
117
130
  "referenced_table": "orders",
118
- "description": "each order has between 1 and 2 items",
131
+ "prompt": "each order has between 1 and 2 items",
119
132
  }
120
133
  ],
121
134
  },
@@ -126,28 +139,69 @@ data = mock.sample(
126
139
  model="openai/gpt-4.1"
127
140
  )
128
141
  print(data["customers"])
129
- # customer_id name
130
- # 0 1 Michael Torres
131
- # 1 2 Elaine Kim
142
+ # customer_id name
143
+ # 0 1 Matthew Carlson
144
+ # 1 2 Priya Shah
145
+ print(data["warehouses"])
146
+ # warehouse_id name
147
+ # 0 1 Central Distribution Hub
148
+ # 1 2 Northgate Storage Facility
132
149
  print(data["orders"])
133
- # customer_id order_id text amount
134
- # 0 1 ORD20240612001 Home office desk and ergonomic chair bundle 412.95
135
- # 1 1 ORD20240517322 Wireless noise-cancelling headphones 226.49
136
- # 2 1 ORD20240430307 Smart LED desk lamp with USB charging port 69.99
137
- # 3 2 ORD20240614015 Eco-friendly bamboo kitchen utensil set 39.95
138
- # 4 2 ORD20240528356 Air fryer with digital touch screen, 5-quart c... 129.99
139
- # 5 2 ORD20240510078 Double-walled glass coffee mugs, set of 4 48.5
150
+ # customer_id warehouse_id order_id text amount
151
+ # 0 1 2 ORD-10294 3-tier glass shelving units, expedited deliver... 649.25
152
+ # 1 1 1 ORD-10541 Office desk chairs, set of 6, with assembly se... 824.9
153
+ # 2 1 1 ORD-10802 Executive standing desk, walnut finish, standa... 519.0
154
+ # 3 2 1 ORD-11017 Maple conference table, cable management inclu... 1225.5
155
+ # 4 2 2 ORD-11385 Set of ergonomic task chairs, black mesh, stan... 767.75
140
156
  print(data["items"])
141
- # item_id order_id name price
142
- # 0 ITEM100001A ORD20240612001 Ergonomic Mesh Office Chair 179.99
143
- # 1 ITEM100001B ORD20240612001 Adjustable Home Office Desk 232.96
144
- # 2 ITEM100002A ORD20240517322 Wireless Noise-Cancelling Headphones 226.49
145
- # 3 ITEM100003A ORD20240430307 Smart LED Desk Lamp 59.99
146
- # 4 ITEM100003B ORD20240430307 USB Charging Cable (Desk Lamp Compatible) 10.0
147
- # 5 ITEM100004A ORD20240614015 Bamboo Cooking Spoon 13.49
148
- # 6 ITEM100004B ORD20240614015 Bamboo Slotted Turner 12.99
149
- # 7 ITEM100005A ORD20240528356 Digital Air Fryer (5-Quart, Black) 115.99
150
- # 8 ITEM100005B ORD20240528356 Silicone Liner for Air Fryer (5-Quart) 13.99
151
- # 9 ITEM100006A ORD20240510078 Double-Walled Glass Coffee Mug (12oz) 13.75
152
- # 10 ITEM100006B ORD20240510078 Double-Walled Glass Coffee Mug (8oz) 11.25
157
+ # item_id order_id name price
158
+ # 0 ITM-80265 ORD-10294 3-Tier Tempered Glass Shelving Unit 409.0
159
+ # 1 ITM-80266 ORD-10294 Brushed Aluminum Shelf Brackets (Set of 4) 240.25
160
+ # 2 ITM-81324 ORD-10541 Ergonomic Mesh-Back Desk Chair 132.5
161
+ # 3 ITM-81325 ORD-10541 Professional Office Chair Assembly Service 45.0
162
+ # 4 ITM-82101 ORD-10802 Executive Standing Desk, Walnut Finish 469.0
163
+ # 5 ITM-82102 ORD-10802 Desk Installation and Setup Service 50.0
164
+ # 6 ITM-83391 ORD-11017 Maple Conference Table, 10-Seat 1125.5
165
+ # 7 ITM-83392 ORD-11017 Integrated Table Cable Management Kit 100.0
166
+ # 8 ITM-84311 ORD-11385 Ergonomic Task Chair, Black Mesh 359.25
167
+ # 9 ITM-84312 ORD-11385 Standard Delivery Service 48.5
153
168
  ```
169
+
170
+ 6. Create your first self-referencing synthetic table
171
+
172
+ ```python
173
+ from mostlyai import mock
174
+
175
+ tables = {
176
+ "employees": {
177
+ "prompt": "Employees of a company",
178
+ "columns": {
179
+ "employee_id": {"prompt": "the unique id of the employee", "dtype": "integer"},
180
+ "name": {"prompt": "first name and last name of the president", "dtype": "string"},
181
+ "boss_id": {"prompt": "the id of the boss of the employee", "dtype": "integer"},
182
+ "role": {"prompt": "the role of the employee", "dtype": "string"},
183
+ },
184
+ "primary_key": "employee_id",
185
+ "foreign_keys": [
186
+ {
187
+ "column": "boss_id",
188
+ "referenced_table": "employees",
189
+ "prompt": "each boss has at most 3 employees",
190
+ },
191
+ ],
192
+ }
193
+ }
194
+ df = sample(tables=tables, sample_size=10, model="openai/gpt-4.1")
195
+ print(df)
196
+ # employee_id name boss_id role
197
+ # 0 1 Sandra Phillips <NA> President
198
+ # 1 2 Marcus Tran 1 Chief Financial Officer
199
+ # 2 3 Ava Whittaker 1 Chief Technology Officer
200
+ # 3 4 Sophie Martin 1 Chief Operations Officer
201
+ # 4 5 Chad Nelson 2 Finance Manager
202
+ # 5 6 Ethan Glover 2 Senior Accountant
203
+ # 6 7 Kimberly Ortiz 2 Junior Accountant
204
+ # 7 8 Lucas Romero 3 IT Manager
205
+ # 8 9 Priya Desai 3 Lead Software Engineer
206
+ # 9 10 Felix Bennett 3 Senior Systems Analyst
207
+ ```
@@ -15,4 +15,4 @@
15
15
  from mostlyai.mock.core import sample
16
16
 
17
17
  __all__ = ["sample"]
18
- __version__ = "0.0.6" # Do not set this manually. Use poetry version [params].
18
+ __version__ = "0.0.8" # Do not set this manually. Use poetry version [params].
@@ -44,8 +44,10 @@ across tables.
44
44
 
45
45
 
46
46
  class LLMConfig(BaseModel):
47
- model: str
47
+ model: str = "openai/gpt-4.1-nano"
48
48
  api_key: str | None = None
49
+ temperature: float = 1.0
50
+ top_p: float = 0.95
49
51
 
50
52
 
51
53
  class MockConfig(RootModel[dict[str, "TableConfig"]]):
@@ -100,7 +102,8 @@ class MockConfig(RootModel[dict[str, "TableConfig"]]):
100
102
  if table_name in path:
101
103
  cycle_start = path.index(table_name)
102
104
  cycle = path[cycle_start:] + [table_name]
103
- raise ValueError(f"Circular dependency detected: {' -> '.join(cycle)}")
105
+ if len(cycle) > 2: # len(cycle) == 2 means self-referencing table, which is allowed
106
+ raise ValueError(f"Circular dependency detected: {' -> '.join(cycle)}.")
104
107
  if table_name in visited:
105
108
  return
106
109
  visited.add(table_name)
@@ -116,10 +119,10 @@ class MockConfig(RootModel[dict[str, "TableConfig"]]):
116
119
 
117
120
 
118
121
  class TableConfig(BaseModel):
119
- description: str = ""
122
+ prompt: str = ""
120
123
  columns: dict[str, ColumnConfig] = Field(..., min_items=1)
121
124
  primary_key: str | None = None
122
- foreign_keys: list[ForeignKeyConfig] = Field(default_factory=list, min_length=0, max_length=1)
125
+ foreign_keys: list[ForeignKeyConfig] = Field(default_factory=list)
123
126
 
124
127
 
125
128
  class ColumnConfig(BaseModel):
@@ -163,7 +166,7 @@ class ColumnConfig(BaseModel):
163
166
  DType.DATETIME: (str, "strings"),
164
167
  }[self.dtype]
165
168
  try:
166
- self.values = [cast_fn(c) for c in self.values]
169
+ self.values = [cast_fn(c) if pd.notna(c) else None for c in self.values]
167
170
  except ValueError:
168
171
  raise ValueError(
169
172
  f"All values must be convertible to {convertible_to} when dtype is '{self.dtype.value}'"
@@ -184,85 +187,78 @@ class DType(str, Enum):
184
187
  class ForeignKeyConfig(BaseModel):
185
188
  column: str
186
189
  referenced_table: str
187
- description: str | None = None
190
+ prompt: str | None = None
188
191
 
189
192
 
190
193
  def _sample_table(
191
194
  *,
192
- table_name: str,
193
- table_config: TableConfig,
195
+ name: str,
196
+ prompt: str,
197
+ columns: dict[str, ColumnConfig],
198
+ foreign_keys: list[ForeignKeyConfig] | None,
194
199
  primary_keys: dict[str, str] | None,
195
- sample_size: int | None,
196
- context_data: pd.DataFrame | None,
197
- temperature: float,
198
- top_p: float,
200
+ generated_data: dict[str, pd.DataFrame] | None,
201
+ sample_size: int,
199
202
  batch_size: int,
200
203
  previous_rows_size: int,
204
+ non_context_size: int | None,
201
205
  llm_config: LLMConfig,
202
206
  ) -> pd.DataFrame:
203
- assert (sample_size is None) != (context_data is None), (
204
- "Exactly one of sample_size or context_data must be provided"
205
- )
206
- if sample_size is None:
207
- sample_size = len(context_data)
208
207
  table_rows_generator = _create_table_rows_generator(
209
- table_name=table_name,
210
- table_config=table_config,
208
+ name=name,
209
+ prompt=prompt,
210
+ columns=columns,
211
211
  primary_keys=primary_keys,
212
+ foreign_keys=foreign_keys,
213
+ generated_data=generated_data,
212
214
  sample_size=sample_size,
213
- context_data=context_data,
214
- temperature=temperature,
215
- top_p=top_p,
216
215
  batch_size=batch_size,
217
216
  previous_rows_size=previous_rows_size,
217
+ non_context_size=non_context_size,
218
218
  llm_config=llm_config,
219
219
  )
220
- table_rows_generator = tqdm(table_rows_generator, desc=f"Generating rows for table `{table_name}`".ljust(45))
221
- table_df = _convert_table_rows_generator_to_df(table_rows_generator=table_rows_generator, table_config=table_config)
220
+ table_rows_generator = tqdm(table_rows_generator, desc=f"Generating rows for table `{name}`".ljust(45))
221
+ table_df = _convert_table_rows_generator_to_df(table_rows_generator=table_rows_generator, columns=columns)
222
222
  return table_df
223
223
 
224
224
 
225
225
  def _create_table_prompt(
226
226
  *,
227
- table_name: str,
228
- table_description: str,
227
+ name: str,
228
+ prompt: str,
229
229
  columns: dict[str, ColumnConfig],
230
230
  primary_keys: dict[str, str] | None,
231
231
  batch_size: int | None,
232
232
  foreign_keys: list[ForeignKeyConfig] | None,
233
233
  context_data: pd.DataFrame | None,
234
- previous_rows: list[dict],
234
+ non_context_data: dict[str, pd.DataFrame] | None,
235
+ previous_rows: list[dict] | None,
235
236
  ) -> str:
236
- if batch_size is not None:
237
- assert foreign_keys is None
238
- assert context_data is None
239
- else:
240
- assert foreign_keys is not None
241
- assert context_data is not None
242
- assert primary_keys is not None
243
-
244
- # add description
245
- prompt = f"# {table_description}\n\n"
237
+ # add table prompt
238
+ prompt = f"# {prompt}\n\n"
246
239
 
247
240
  # define table
248
- prompt += f"## Table: {table_name}\n\n"
241
+ prompt += f"## Table: {name}\n\n"
242
+
243
+ prompt += f"## Table Primary Key: `{primary_keys[name]}`\n\n"
249
244
 
250
245
  # add columns specifications
251
246
  prompt += "## Columns Specifications:\n\n"
252
247
  prompt += f"{json.dumps({name: config.model_dump() for name, config in columns.items()}, indent=2)}\n\n"
253
248
 
254
- # define foreign keys
255
- if foreign_keys is not None:
256
- prompt += "## Foreign Keys:\n\n"
257
- prompt += f"{json.dumps([fk.model_dump() for fk in foreign_keys], indent=2)}\n\n"
258
-
259
249
  # add previous rows as context to help the LLM generate consistent data
260
250
  if previous_rows:
261
251
  prompt += f"\n## Previous {len(previous_rows)} Rows:\n\n"
262
252
  prompt += f"{json.dumps(previous_rows, indent=2)}\n\n"
263
253
 
254
+ # define foreign keys
255
+ if foreign_keys:
256
+ prompt += "## Foreign Keys:\n\n"
257
+ prompt += f"{json.dumps([fk.model_dump() for fk in foreign_keys], indent=2)}\n\n"
258
+
264
259
  # add context table name, primary key and data
265
- if context_data is not None:
260
+ if foreign_keys and foreign_keys[0].referenced_table != name: # self-dependency is not considered as context
261
+ assert context_data is not None
266
262
  fk = foreign_keys[0]
267
263
  prompt += f"## Context Table: `{fk.referenced_table}`\n\n"
268
264
 
@@ -271,16 +267,35 @@ def _create_table_prompt(
271
267
  prompt += f"## Context Table Data:\n\n"
272
268
  prompt += f"{context_data.to_json(orient='records', indent=2)}\n\n"
273
269
 
270
+ # add non-context table names, primary keys and data
271
+ if foreign_keys and len(foreign_keys) > 1:
272
+ for fk in foreign_keys[1:]:
273
+ if fk.referenced_table == name: # self-dependency is not considered as non-context
274
+ continue
275
+ assert non_context_data is not None
276
+ assert fk.referenced_table in non_context_data
277
+ prompt += f"## Non-Context Table: `{fk.referenced_table}`\n\n"
278
+
279
+ prompt += f"## Non-Context Table Primary Key: `{primary_keys[fk.referenced_table]}`\n\n"
280
+
281
+ prompt += f"## Non-Context Table Data:\n\n"
282
+ prompt += f"{non_context_data[fk.referenced_table].to_json(orient='records', indent=2)}\n\n"
283
+
274
284
  # add instructions
275
285
  prompt += "\n## Instructions:\n\n"
276
- if batch_size is not None:
277
- prompt += f"Generate {batch_size} rows for the `{table_name}` table.\n\n"
286
+ if not foreign_keys:
287
+ assert batch_size is not None
288
+ prompt += f"Generate {batch_size} rows for the `{name}` table.\n\n"
278
289
  else:
279
290
  prompt += (
280
- f"Generate data for the `{table_name}` table. "
281
- f"The Foreign Key column may only contain values from Context Table Data. "
282
- f"Pay attention to description of the Foreign Key column to understand the relationship.\n\n"
291
+ f"Generate data for the `{name}` table. "
292
+ f"The first Foreign Key column from Foreign Keys section may only contain values from Context Table Data. "
293
+ f"The following Foreign Key columns from Foreign Keys section (if exists) may only contain values from Non-Context Table Data sections. "
294
+ f"If either relevant Context Table Data or Non-Context Table Data is not present, this means that table has self-dependency. "
295
+ f"In this case, ensure that the generated foreign keys are consistent with generated primary keys of the table. "
296
+ f"Pay attention to prompt of the Foreign Key column to understand the relationship.\n\n"
283
297
  )
298
+
284
299
  if previous_rows:
285
300
  prompt += (
286
301
  "Generate new rows that maintain consistency with the previous rows where appropriate. "
@@ -295,15 +310,16 @@ def _create_table_prompt(
295
310
 
296
311
  def _create_table_rows_generator(
297
312
  *,
298
- table_name: str,
299
- table_config: TableConfig,
313
+ name: str,
314
+ prompt: str,
315
+ columns: dict[str, ColumnConfig],
316
+ foreign_keys: list[ForeignKeyConfig] | None,
300
317
  primary_keys: dict[str, str] | None,
318
+ generated_data: dict[str, pd.DataFrame] | None,
301
319
  sample_size: int,
302
- temperature: float,
303
- top_p: float,
304
- context_data: pd.DataFrame | None,
305
320
  batch_size: int,
306
321
  previous_rows_size: int,
322
+ non_context_size: int | None,
307
323
  llm_config: LLMConfig,
308
324
  ) -> Generator[dict]:
309
325
  def create_table_response_format(columns: dict[str, ColumnConfig]) -> BaseModel:
@@ -311,14 +327,14 @@ def _create_table_rows_generator(
311
327
  if column_config.values or column_config.dtype is DType.CATEGORY:
312
328
  return Literal[tuple(column_config.values)]
313
329
  return {
314
- DType.INTEGER: int,
315
- DType.FLOAT: float,
316
- DType.STRING: str,
317
- DType.BOOLEAN: bool,
330
+ DType.INTEGER: int | None,
331
+ DType.FLOAT: float | None,
332
+ DType.STRING: str | None,
333
+ DType.BOOLEAN: bool | None,
318
334
  # response_format has limited support for JSON Schema features
319
335
  # thus we represent dates and datetimes as strings
320
- DType.DATE: str,
321
- DType.DATETIME: str,
336
+ DType.DATE: str | None,
337
+ DType.DATETIME: str | None,
322
338
  }[column_config.dtype]
323
339
 
324
340
  fields = {}
@@ -375,10 +391,31 @@ def _create_table_rows_generator(
375
391
  "The model does not support structured output / JSON mode."
376
392
  )
377
393
 
394
+ # derive context data (if first foreign key is present) and harmonize sample size accordingly
395
+ context_data: pd.DataFrame | None = None
396
+ if foreign_keys and foreign_keys[0].referenced_table != name: # self-dependency is not considered as context
397
+ context_table_name = foreign_keys[0].referenced_table
398
+ assert generated_data is not None
399
+ assert context_table_name in generated_data
400
+ context_data = generated_data[context_table_name]
401
+ sample_size = len(context_data)
402
+
403
+ # derive non-context data (if more than one foreign key is present)
404
+ non_context_data: dict[str, pd.DataFrame] = {}
405
+ if foreign_keys and len(foreign_keys) > 1:
406
+ assert generated_data is not None
407
+ assert non_context_size is not None
408
+ for fk in foreign_keys[1:]:
409
+ if fk.referenced_table == name: # self-dependency is not considered as non-context
410
+ continue
411
+ non_context_table_name = fk.referenced_table
412
+ assert non_context_table_name in generated_data
413
+ non_context_data[non_context_table_name] = generated_data[non_context_table_name]
414
+
378
415
  litellm_kwargs = {
379
- "response_format": create_table_response_format(columns=table_config.columns),
380
- "temperature": temperature,
381
- "top_p": top_p,
416
+ "response_format": create_table_response_format(columns=columns),
417
+ "temperature": llm_config.temperature,
418
+ "top_p": llm_config.top_p,
382
419
  "model": llm_config.model,
383
420
  "api_key": llm_config.api_key,
384
421
  "stream": True,
@@ -387,17 +424,22 @@ def _create_table_rows_generator(
387
424
  yielded_sequences = 0
388
425
  previous_rows = deque(maxlen=previous_rows_size)
389
426
  for context_batch in batch_infinitely(context_data):
390
- prompt_kwargs = {
391
- "table_name": table_name,
392
- "table_description": table_config.description,
393
- "columns": table_config.columns,
394
- "primary_keys": primary_keys,
395
- "batch_size": batch_size if context_batch is None else None,
396
- "foreign_keys": table_config.foreign_keys if context_batch is not None else None,
397
- "context_data": context_batch if context_batch is not None else None,
398
- "previous_rows": list(previous_rows),
399
- }
400
- prompt = _create_table_prompt(**prompt_kwargs)
427
+ non_context_batch = (
428
+ {table_name: df.sample(frac=1.0).head(non_context_size) for table_name, df in non_context_data.items()}
429
+ if non_context_data
430
+ else None
431
+ )
432
+ prompt = _create_table_prompt(
433
+ name=name,
434
+ prompt=prompt,
435
+ columns=columns,
436
+ primary_keys=primary_keys,
437
+ batch_size=batch_size,
438
+ foreign_keys=foreign_keys,
439
+ context_data=context_batch,
440
+ non_context_data=non_context_batch,
441
+ previous_rows=list(previous_rows),
442
+ )
401
443
  messages = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt}]
402
444
 
403
445
  response = litellm.completion(messages=messages, **litellm_kwargs)
@@ -423,16 +465,21 @@ def _create_table_rows_generator(
423
465
 
424
466
 
425
467
  def _convert_table_rows_generator_to_df(
426
- table_rows_generator: Generator[dict], table_config: TableConfig
468
+ table_rows_generator: Generator[dict],
469
+ columns: dict[str, ColumnConfig],
427
470
  ) -> pd.DataFrame:
428
471
  def align_df_dtypes_with_mock_dtypes(df: pd.DataFrame, columns: dict[str, ColumnConfig]) -> pd.DataFrame:
429
472
  for column_name, column_config in columns.items():
430
473
  if column_config.dtype in [DType.DATE, DType.DATETIME]:
431
474
  df[column_name] = pd.to_datetime(df[column_name], errors="coerce")
432
- elif column_config.dtype in [DType.INTEGER, DType.FLOAT]:
433
- df[column_name] = pd.to_numeric(df[column_name], errors="coerce", dtype_backend="pyarrow")
475
+ elif column_config.dtype is DType.INTEGER:
476
+ df[column_name] = pd.to_numeric(df[column_name], errors="coerce", downcast="integer").astype(
477
+ "int64[pyarrow]"
478
+ )
479
+ elif column_config.dtype is DType.FLOAT:
480
+ df[column_name] = pd.to_numeric(df[column_name], errors="coerce").astype("double[pyarrow]")
434
481
  elif column_config.dtype is DType.BOOLEAN:
435
- df[column_name] = df[column_name].astype(bool)
482
+ df[column_name] = pd.to_numeric(df[column_name], errors="coerce").astype("boolean[pyarrow]")
436
483
  elif column_config.dtype is DType.CATEGORY:
437
484
  df[column_name] = pd.Categorical(df[column_name], categories=column_config.values)
438
485
  else:
@@ -440,7 +487,7 @@ def _convert_table_rows_generator_to_df(
440
487
  return df
441
488
 
442
489
  df = pd.DataFrame(list(table_rows_generator))
443
- df = align_df_dtypes_with_mock_dtypes(df, table_config.columns)
490
+ df = align_df_dtypes_with_mock_dtypes(df, columns)
444
491
  return df
445
492
 
446
493
 
@@ -453,28 +500,32 @@ def _harmonize_sample_size(sample_size: int | dict[str, int], config: MockConfig
453
500
  return sample_size
454
501
 
455
502
 
456
- def _build_dependency_graph(config: MockConfig) -> tuple[dict[str, list[str]], dict[str, list[str]], list[str]]:
457
- child_to_parents = {}
458
- parent_to_children = {}
503
+ def _build_execution_plan(config: MockConfig) -> list[str]:
504
+ def build_dependency_mappings(config: MockConfig) -> tuple[dict[str, list[str]], dict[str, list[str]], list[str]]:
505
+ child_to_parents = {}
506
+ parent_to_children = {}
459
507
 
460
- for table_name in config.root:
461
- child_to_parents[table_name] = []
462
- parent_to_children[table_name] = []
508
+ for table_name in config.root:
509
+ child_to_parents[table_name] = set()
510
+ parent_to_children[table_name] = set()
463
511
 
464
- for table_name, table_config in config.root.items():
465
- if table_config.foreign_keys:
466
- for fk in table_config.foreign_keys:
467
- referenced_table = fk.referenced_table
468
- child_to_parents[table_name].append(referenced_table)
469
- parent_to_children[referenced_table].append(table_name)
512
+ for table_name, table_config in config.root.items():
513
+ if table_config.foreign_keys:
514
+ for fk in table_config.foreign_keys:
515
+ referenced_table = fk.referenced_table
516
+ child_to_parents[table_name].add(referenced_table)
517
+ parent_to_children[referenced_table].add(table_name)
470
518
 
471
- subject_tables = [table_name for table_name, deps in child_to_parents.items() if not deps]
472
- return child_to_parents, parent_to_children, subject_tables
519
+ root_tables = []
520
+ for table_name, parents in child_to_parents.items():
521
+ if not parents or parents == {table_name}: # no dependencies or only self-dependency
522
+ root_tables.append(table_name)
523
+ return child_to_parents, parent_to_children, root_tables
473
524
 
525
+ child_to_parents, parent_to_children, root_tables = build_dependency_mappings(config)
474
526
 
475
- def _build_execution_plan(parent_to_children: dict[str, list[str]], subject_tables: list[str]) -> list[str]:
476
527
  execution_plan = []
477
- bfs_queue = list(subject_tables)
528
+ bfs_queue = list(root_tables)
478
529
  processed = set()
479
530
 
480
531
  while bfs_queue:
@@ -482,6 +533,16 @@ def _build_execution_plan(parent_to_children: dict[str, list[str]], subject_tabl
482
533
  if table_name in processed:
483
534
  continue
484
535
 
536
+ # ensure all parents are processed before processing this table
537
+ unprocessed_parents = []
538
+ for parent in child_to_parents[table_name]:
539
+ if parent not in processed and parent != table_name: # exclude self-dependency
540
+ unprocessed_parents.append(parent)
541
+ if unprocessed_parents:
542
+ bfs_queue.extend(unprocessed_parents)
543
+ bfs_queue.append(table_name)
544
+ continue
545
+
485
546
  execution_plan.append(table_name)
486
547
  processed.add(table_name)
487
548
 
@@ -499,6 +560,7 @@ def sample(
499
560
  api_key: str | None = None,
500
561
  temperature: float = 1.0,
501
562
  top_p: float = 0.95,
563
+ return_type: Literal["auto", "dict"] = "auto",
502
564
  ) -> pd.DataFrame | dict[str, pd.DataFrame]:
503
565
  """
504
566
  Generate mock data by prompting an LLM.
@@ -523,6 +585,7 @@ def sample(
523
585
  api_key (str | None): The API key to use for the LLM. If not provided, LiteLLM will take it from the environment variables.
524
586
  temperature (float): The temperature to use for the LLM. Default is 1.0.
525
587
  top_p (float): The top-p value to use for the LLM. Default is 0.95.
588
+ return_type (Literal["auto", "dict"]): The format of the returned data. Default is "auto".
526
589
 
527
590
  Returns:
528
591
  - pd.DataFrame: A single DataFrame containing the generated mock data, if only one table is provided.
@@ -534,7 +597,7 @@ def sample(
534
597
 
535
598
  tables = {
536
599
  "guests": {
537
- "description": "Guests of an Alpine ski hotel in Austria",
600
+ "prompt": "Guests of an Alpine ski hotel in Austria",
538
601
  "columns": {
539
602
  "nationality": {"prompt": "2-letter code for the nationality", "dtype": "string"},
540
603
  "name": {"prompt": "first name and last name of the guest", "dtype": "string"},
@@ -557,17 +620,26 @@ def sample(
557
620
 
558
621
  tables = {
559
622
  "customers": {
560
- "description": "Customers of a hardware store",
623
+ "prompt": "Customers of a hardware store",
561
624
  "columns": {
562
625
  "customer_id": {"prompt": "the unique id of the customer", "dtype": "integer"},
563
626
  "name": {"prompt": "first name and last name of the customer", "dtype": "string"},
564
627
  },
565
628
  "primary_key": "customer_id",
566
629
  },
630
+ "warehouses": {
631
+ "prompt": "Warehouses of a hardware store",
632
+ "columns": {
633
+ "warehouse_id": {"prompt": "the unique id of the warehouse", "dtype": "integer"},
634
+ "name": {"prompt": "the name of the warehouse", "dtype": "string"},
635
+ },
636
+ "primary_key": "warehouse_id",
637
+ },
567
638
  "orders": {
568
- "description": "Orders of a Customer",
639
+ "prompt": "Orders of a Customer",
569
640
  "columns": {
570
641
  "customer_id": {"prompt": "the customer id for that order", "dtype": "integer"},
642
+ "warehouse_id": {"prompt": "the warehouse id for that order", "dtype": "integer"},
571
643
  "order_id": {"prompt": "the unique id of the order", "dtype": "string"},
572
644
  "text": {"prompt": "order text description", "dtype": "string"},
573
645
  "amount": {"prompt": "order amount in USD", "dtype": "float"},
@@ -577,12 +649,16 @@ def sample(
577
649
  {
578
650
  "column": "customer_id",
579
651
  "referenced_table": "customers",
580
- "description": "each customer has anywhere between 2 and 3 orders",
581
- }
652
+ "prompt": "each customer has anywhere between 2 and 3 orders",
653
+ },
654
+ {
655
+ "column": "warehouse_id",
656
+ "referenced_table": "warehouses",
657
+ },
582
658
  ],
583
659
  },
584
660
  "items": {
585
- "description": "Items in an Order",
661
+ "prompt": "Items in an Order",
586
662
  "columns": {
587
663
  "item_id": {"prompt": "the unique id of the item", "dtype": "string"},
588
664
  "order_id": {"prompt": "the order id for that item", "dtype": "string"},
@@ -593,59 +669,44 @@ def sample(
593
669
  {
594
670
  "column": "order_id",
595
671
  "referenced_table": "orders",
596
- "description": "each order has between 1 and 2 items",
672
+ "prompt": "each order has between 1 and 2 items",
597
673
  }
598
674
  ],
599
675
  },
600
676
  }
601
677
  data = mock.sample(tables=tables, sample_size=2, model="openai/gpt-4.1")
602
678
  df_customers = data["customers"]
679
+ df_warehouses = data["warehouses"]
603
680
  df_orders = data["orders"]
604
681
  df_items = data["items"]
605
682
  ```
606
683
  """
607
684
 
608
685
  config = MockConfig(tables)
686
+ llm_config = LLMConfig(model=model, api_key=api_key, temperature=temperature, top_p=top_p)
609
687
 
610
688
  sample_size = _harmonize_sample_size(sample_size, config)
611
689
  primary_keys = {table_name: table_config.primary_key for table_name, table_config in config.root.items()}
612
690
 
613
- child_to_parents, parent_to_children, subject_tables = _build_dependency_graph(config)
614
- execution_plan: list[str] = _build_execution_plan(parent_to_children, subject_tables)
691
+ execution_plan: list[str] = _build_execution_plan(config)
615
692
 
616
- results: dict[str, pd.DataFrame] = {}
693
+ data: dict[str, pd.DataFrame] = {}
617
694
 
618
695
  for table_name in execution_plan:
619
696
  table_config = config.root[table_name]
620
- if not child_to_parents[table_name]:
621
- # subject table
622
- df = _sample_table(
623
- table_name=table_name,
624
- table_config=table_config,
625
- primary_keys=None,
626
- sample_size=sample_size[table_name],
627
- context_data=None,
628
- temperature=temperature,
629
- top_p=top_p,
630
- batch_size=20, # generate 20 subjects at a time
631
- previous_rows_size=5,
632
- llm_config=LLMConfig(model=model, api_key=api_key),
633
- )
634
- else:
635
- # sequencial table
636
- referenced_table = table_config.foreign_keys[0].referenced_table
637
- df = _sample_table(
638
- table_name=table_name,
639
- table_config=table_config,
640
- primary_keys=primary_keys,
641
- sample_size=None,
642
- context_data=results[referenced_table],
643
- temperature=temperature,
644
- top_p=top_p,
645
- batch_size=1, # generate one sequence at a time
646
- previous_rows_size=5,
647
- llm_config=LLMConfig(model=model, api_key=api_key),
648
- )
649
- results[table_name] = df
650
-
651
- return results if len(results) > 1 else next(iter(results.values()))
697
+ df = _sample_table(
698
+ name=table_name,
699
+ prompt=table_config.prompt,
700
+ columns=table_config.columns,
701
+ foreign_keys=table_config.foreign_keys,
702
+ primary_keys=primary_keys,
703
+ generated_data=data,
704
+ sample_size=sample_size[table_name],
705
+ batch_size=30, # generate 30 root table rows at a time
706
+ previous_rows_size=10, # present 10 previously generated rows to the LLM
707
+ non_context_size=10, # pick 10 rows to choose from for each non-context foreign key
708
+ llm_config=llm_config,
709
+ )
710
+ data[table_name] = df
711
+
712
+ return next(iter(data.values())) if len(data) == 1 and return_type == "auto" else data
@@ -0,0 +1,46 @@
1
+ import json
2
+
3
+ import pandas as pd
4
+ from fastmcp import Context, FastMCP
5
+
6
+ from mostlyai import mock
7
+
8
+ mcp = FastMCP(name="MostlyAI Mock MCP Server")
9
+
10
+
11
+ @mcp.tool(description=mock.sample.__doc__)
12
+ def sample_mock_data(
13
+ *,
14
+ tables: dict[str, dict],
15
+ sample_size: int,
16
+ model: str = "openai/gpt-4.1-nano",
17
+ api_key: str | None = None,
18
+ temperature: float = 1.0,
19
+ top_p: float = 0.95,
20
+ ctx: Context,
21
+ ) -> str:
22
+ # Notes:
23
+ # 1. Returning DataFrames directly results in converting them into truncated string.
24
+ # 2. The logs / progress bars are not propagated to the MCP Client. There is a dedicated API to do that (e.g. `ctx.info(...)`)
25
+ # 3. MCP Server inherits only selected environment variables (PATH, USER...); one way to pass LLM keys is through client configuration (`mcpServers->env`)
26
+ # 4. Some MCP Clients, e.g. Cursor, do not like Unions or Optionals in type hints
27
+ ctx.info(f"Generating mock data for `{len(tables)}` tables")
28
+ data = mock.sample(
29
+ tables=tables,
30
+ sample_size=sample_size,
31
+ model=model,
32
+ api_key=api_key,
33
+ temperature=temperature,
34
+ top_p=top_p,
35
+ return_type="dict",
36
+ )
37
+ ctx.info(f"Generated mock data for `{len(tables)}` tables")
38
+ return {k: v.to_dict(orient="records") for k, v in data.items()}
39
+
40
+
41
+ def main():
42
+ mcp.run(transport="stdio")
43
+
44
+
45
+ if __name__ == "__main__":
46
+ main()
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "mostlyai-mock"
3
- version = "0.0.6"
3
+ version = "0.0.8"
4
4
  description = "Synthetic Mock Data"
5
5
  authors = [{ name = "MOSTLY AI", email = "dev@mostly.ai" }]
6
6
  requires-python = ">=3.10"
@@ -29,8 +29,13 @@ dependencies = [
29
29
  "pandas>=2.0.0",
30
30
  "pyarrow>=14.0.0",
31
31
  "litellm>=1.67.0",
32
+ "typer>=0.9.0,<1.0.0",
33
+ "fastmcp>=2.0.0,<3.0.0",
32
34
  ]
33
35
 
36
+ [project.scripts]
37
+ mcp-server = "mostlyai.mock.mcp:main"
38
+
34
39
  [project.urls]
35
40
  homepage = "https://github.com/mostly-ai/mostlyai-mock"
36
41
  repository = "https://github.com/mostly-ai/mostlyai-mock"
File without changes
File without changes