openbusdata-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ # OpenBusData MCP Server package
@@ -0,0 +1,975 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ OpenBusData MCP Server
4
+
5
+ Exposes OpenBusData Services APIs as MCP tools based on local OpenAPI YAML specs
6
+ and rich timetable data parsing for route/stop discovery, timetable search,
7
+ and multi-leg journey planning.
8
+
9
+ API key is read from the OPENBUS_API_KEY environment variable.
10
+ """
11
+
12
+ import os
13
+ import sys
14
+ import yaml
15
+ import json
16
+ import zipfile
17
+ import io
18
+ import xml.etree.ElementTree as ET
19
+ import re
20
+ from pathlib import Path
21
+ from typing import Any, Optional
22
+ from urllib.parse import urljoin, urlencode
23
+ from dataclasses import dataclass, field, asdict
24
+ from datetime import datetime, timedelta, time
25
+ from collections import defaultdict
26
+
27
+ import httpx
28
+ from mcp.server.fastmcp import FastMCP
29
+
30
+ # Suppress noisy httpx logs (they would break stdio MCP transport)
31
+ import logging
32
+ logging.getLogger("httpx").setLevel(logging.WARNING)
33
+
34
+ # ---------------------------------------------------------------------------
35
+ # Configuration
36
+ # ---------------------------------------------------------------------------
37
+ BASE_URL = os.environ.get("OPENBUS_BASE_URL", "https://data.bus-data.dft.gov.uk")
38
+ API_KEY = os.environ.get("OPENBUS_API_KEY", "")
39
+ # Resolve specs from project root (where server.json and YAML files live)
40
+ SPECS_DIR = Path(__file__).parent.parent.parent
41
+ CACHE_DIR = Path.home() / ".cache" / "openbusdata"
42
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
43
+
44
+ mcp = FastMCP("openbusdata")
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Data structures for timetable parsing
49
+ # ---------------------------------------------------------------------------
50
+ @dataclass
51
+ class Stop:
52
+ naptan: str
53
+ name: str
54
+ lat: Optional[float] = None
55
+ lon: Optional[float] = None
56
+
57
+
58
+ @dataclass
59
+ class Route:
60
+ operator: str
61
+ route_num: str
62
+ directions: set = field(default_factory=set)
63
+ stops: list = field(default_factory=list)
64
+
65
+
66
+ @dataclass
67
+ class JourneyStop:
68
+ naptan: str
69
+ arrival: Optional[time] = None
70
+ departure: Optional[time] = None
71
+
72
+
73
+ @dataclass
74
+ class Journey:
75
+ operator: str
76
+ route_num: str
77
+ direction: str
78
+ journey_code: str
79
+ stops: list[JourneyStop] = field(default_factory=list)
80
+ days: set[str] = field(default_factory=set) # mon, tue, wed, thu, fri, sat, sun
81
+
82
+
83
+ class TimetableIndex:
84
+ """In-memory index of all parsed timetable data."""
85
+
86
+ def __init__(self):
87
+ self.stops: dict[str, Stop] = {}
88
+ self.routes: dict[str, Route] = {}
89
+ self.stop_to_routes: dict[str, set[str]] = {}
90
+ self.journeys: list[Journey] = []
91
+ self.loaded_datasets: set[int] = set()
92
+
93
+ def add_stop(self, stop: Stop):
94
+ if stop.naptan not in self.stops:
95
+ self.stops[stop.naptan] = stop
96
+ else:
97
+ if not self.stops[stop.naptan].name:
98
+ self.stops[stop.naptan].name = stop.name
99
+
100
+ def add_route(self, route: Route):
101
+ key = f"{route.operator}|{route.route_num}"
102
+ if key not in self.routes:
103
+ self.routes[key] = route
104
+ else:
105
+ self.routes[key].directions |= route.directions
106
+ if len(route.stops) > len(self.routes[key].stops):
107
+ self.routes[key].stops = route.stops
108
+ for naptan in route.stops:
109
+ if naptan not in self.stop_to_routes:
110
+ self.stop_to_routes[naptan] = set()
111
+ self.stop_to_routes[naptan].add(key)
112
+
113
+ def add_journey(self, journey: Journey):
114
+ self.journeys.append(journey)
115
+
116
+ def save_cache(self):
117
+ cache = {
118
+ "stops": {k: asdict(v) for k, v in self.stops.items()},
119
+ "routes": {
120
+ k: {"operator": v.operator, "route_num": v.route_num,
121
+ "directions": list(v.directions), "stops": v.stops}
122
+ for k, v in self.routes.items()
123
+ },
124
+ "stop_to_routes": {k: list(v) for k, v in self.stop_to_routes.items()},
125
+ "journeys": [
126
+ {
127
+ "operator": j.operator,
128
+ "route_num": j.route_num,
129
+ "direction": j.direction,
130
+ "journey_code": j.journey_code,
131
+ "stops": [
132
+ {"naptan": s.naptan,
133
+ "arrival": s.arrival.isoformat() if s.arrival else None,
134
+ "departure": s.departure.isoformat() if s.departure else None}
135
+ for s in j.stops
136
+ ],
137
+ "days": list(j.days),
138
+ }
139
+ for j in self.journeys
140
+ ],
141
+ "loaded_datasets": list(self.loaded_datasets),
142
+ }
143
+ with open(CACHE_DIR / "timetable_cache.json", "w", encoding="utf-8") as f:
144
+ json.dump(cache, f, indent=2)
145
+
146
+ def load_cache(self) -> bool:
147
+ cache_file = CACHE_DIR / "timetable_cache.json"
148
+ if not cache_file.exists():
149
+ return False
150
+ try:
151
+ with open(cache_file, "r", encoding="utf-8") as f:
152
+ cache = json.load(f)
153
+ self.stops = {k: Stop(**v) for k, v in cache.get("stops", {}).items()}
154
+ self.routes = {
155
+ k: Route(operator=v["operator"], route_num=v["route_num"],
156
+ directions=set(v.get("directions", [])), stops=v.get("stops", []))
157
+ for k, v in cache.get("routes", {}).items()
158
+ }
159
+ self.stop_to_routes = {k: set(v) for k, v in cache.get("stop_to_routes", {}).items()}
160
+ self.loaded_datasets = set(cache.get("loaded_datasets", []))
161
+ self.journeys = []
162
+ for j in cache.get("journeys", []):
163
+ stops = []
164
+ for s in j.get("stops", []):
165
+ arr = time.fromisoformat(s["arrival"]) if s.get("arrival") else None
166
+ dep = time.fromisoformat(s["departure"]) if s.get("departure") else None
167
+ stops.append(JourneyStop(naptan=s["naptan"], arrival=arr, departure=dep))
168
+ self.journeys.append(Journey(
169
+ operator=j["operator"], route_num=j["route_num"],
170
+ direction=j["direction"], journey_code=j["journey_code"],
171
+ stops=stops, days=set(j.get("days", []))
172
+ ))
173
+ return True
174
+ except Exception as e:
175
+ print(f"Cache load failed: {e}", file=sys.stderr)
176
+ return False
177
+
178
+ def clear(self):
179
+ self.stops.clear()
180
+ self.routes.clear()
181
+ self.stop_to_routes.clear()
182
+ self.journeys.clear()
183
+ self.loaded_datasets.clear()
184
+
185
+
186
+ # Global index
187
+ index = TimetableIndex()
188
+
189
+
190
+ # ---------------------------------------------------------------------------
191
+ # TransXChange XML parsing helpers
192
+ # ---------------------------------------------------------------------------
193
+ def _get_ns(tag: str, ns: str) -> str:
194
+ return f"{{{ns}}}{tag}" if ns else tag
195
+
196
+
197
+ def _parse_duration(text: str) -> timedelta:
198
+ """Parse ISO 8601 duration like PT5M or PT1H30M."""
199
+ if not text:
200
+ return timedelta(0)
201
+ total = timedelta(0)
202
+ text = text.strip()
203
+ if text.startswith("PT"):
204
+ text = text[2:]
205
+ # Hours
206
+ h_match = re.search(r'(\d+)H', text)
207
+ if h_match:
208
+ total += timedelta(hours=int(h_match.group(1)))
209
+ # Minutes
210
+ m_match = re.search(r'(\d+)M', text)
211
+ if m_match:
212
+ total += timedelta(minutes=int(m_match.group(1)))
213
+ # Seconds
214
+ s_match = re.search(r'(\d+)S', text)
215
+ if s_match:
216
+ total += timedelta(seconds=int(s_match.group(1)))
217
+ return total
218
+
219
+
220
+ def _parse_time(text: str) -> Optional[time]:
221
+ """Parse HH:MM or HH:MM:SS."""
222
+ if not text:
223
+ return None
224
+ parts = text.strip().split(":")
225
+ try:
226
+ h = int(parts[0])
227
+ m = int(parts[1]) if len(parts) > 1 else 0
228
+ s = int(parts[2]) if len(parts) > 2 else 0
229
+ return time(hour=h % 24, minute=m, second=s)
230
+ except (ValueError, IndexError):
231
+ return None
232
+
233
+
234
+ def _parse_days(op_profile) -> set[str]:
235
+ """Extract operating days from OperatingProfile or SpecialDaysOperation."""
236
+ days = set()
237
+ day_map = {
238
+ "Monday": "mon", "Tuesday": "tue", "Wednesday": "wed",
239
+ "Thursday": "thu", "Friday": "fri", "Saturday": "sat", "Sunday": "sun",
240
+ }
241
+ if op_profile is None:
242
+ return set(day_map.values()) # Assume every day if not specified
243
+
244
+ for regular in op_profile.iter():
245
+ tag = regular.tag.split("}")[-1] if "}" in regular.tag else regular.tag
246
+ if tag in day_map and regular.text and regular.text.lower() in ("true", "1"):
247
+ days.add(day_map[tag])
248
+
249
+ if not days:
250
+ return set(day_map.values())
251
+ return days
252
+
253
+
254
+ def parse_transxchange(content: str, operator_name: str) -> tuple[list[Stop], list[Route], list[Journey]]:
255
+ """Parse a single TransXChange XML string. Returns (stops, routes, journeys)."""
256
+ try:
257
+ root = ET.fromstring(content)
258
+ except ET.ParseError:
259
+ return [], [], []
260
+
261
+ ns = ""
262
+ if 'xmlns="' in content:
263
+ ns = content.split('xmlns="')[1].split('"')[0]
264
+
265
+ q = lambda tag: _get_ns(tag, ns)
266
+
267
+ stops: list[Stop] = []
268
+ routes: list[Route] = []
269
+ journeys: list[Journey] = []
270
+
271
+ # --- Extract StopPoints ---
272
+ stop_map: dict[str, str] = {}
273
+ for asp in root.iter(q("AnnotatedStopPointRef")):
274
+ ref_elem = asp.find(q("StopPointRef"))
275
+ name_elem = asp.find(q("CommonName"))
276
+ if ref_elem is not None:
277
+ naptan = ref_elem.text
278
+ name = name_elem.text if name_elem is not None else "Unknown"
279
+ stop_map[naptan] = name
280
+ stops.append(Stop(naptan=naptan, name=name))
281
+
282
+ # --- Extract JourneyPatternSections with timing ---
283
+ # jps_id -> list of (from_stop, to_stop, runtime)
284
+ jps_links: dict[str, list[tuple[str, str, timedelta]]] = {}
285
+ for jps in root.iter(q("JourneyPatternSection")):
286
+ jps_id = jps.get("id")
287
+ if not jps_id:
288
+ continue
289
+ links = []
290
+ for link in jps.iter(q("JourneyPatternTimingLink")):
291
+ from_stop = link.find(q("From"))
292
+ to_stop = link.find(q("To"))
293
+ runtime_elem = link.find(q("RunTime"))
294
+ from_ref = from_stop.find(q("StopPointRef")).text if from_stop is not None else None
295
+ to_ref = to_stop.find(q("StopPointRef")).text if to_stop is not None else None
296
+ runtime = _parse_duration(runtime_elem.text if runtime_elem is not None else "")
297
+ if from_ref and to_ref:
298
+ links.append((from_ref, to_ref, runtime))
299
+ jps_links[jps_id] = links
300
+
301
+ # --- Extract JourneyPatterns ---
302
+ jp_map: dict[str, dict] = {} # jp_id -> {direction, route_ref, section_ids}
303
+ for jp in root.iter(q("JourneyPattern")):
304
+ jp_id = jp.get("id")
305
+ direction_elem = jp.find(q("Direction"))
306
+ direction = direction_elem.text if direction_elem is not None else "unknown"
307
+ section_ids = [ref.text for ref in jp.findall(q("JourneyPatternSectionRefs")) if ref.text]
308
+ jp_map[jp_id] = {"direction": direction, "section_ids": section_ids}
309
+
310
+ # --- Extract Routes ---
311
+ route_num = None
312
+ for pln in root.iter(q("PublishedLineName")):
313
+ if pln.text:
314
+ route_num = pln.text
315
+ break
316
+ if not route_num:
317
+ for lr in root.iter(q("LineRef")):
318
+ if lr.text:
319
+ route_num = lr.text.split(":")[-1]
320
+ break
321
+ if not route_num:
322
+ route_num = "Unknown"
323
+
324
+ # Build route stop sequence from first journey pattern
325
+ all_stop_seqs = []
326
+ all_directions = set()
327
+ for jp_id, jp_data in jp_map.items():
328
+ seq = []
329
+ for sid in jp_data["section_ids"]:
330
+ if sid in jps_links:
331
+ for from_ref, to_ref, _ in jps_links[sid]:
332
+ if not seq or seq[-1] != from_ref:
333
+ seq.append(from_ref)
334
+ seq.append(to_ref)
335
+ if seq:
336
+ all_stop_seqs.append(seq)
337
+ all_directions.add(jp_data["direction"])
338
+
339
+ if all_stop_seqs:
340
+ longest = max(all_stop_seqs, key=len)
341
+ routes.append(Route(
342
+ operator=operator_name, route_num=route_num,
343
+ directions=all_directions, stops=longest,
344
+ ))
345
+
346
+ # --- Extract VehicleJourneys with times ---
347
+ for vj in root.iter(q("VehicleJourney")):
348
+ jpref = vj.find(q("JourneyPatternRef"))
349
+ if jpref is None or jpref.text not in jp_map:
350
+ continue
351
+
352
+ jp_data = jp_map[jpref.text]
353
+ dep_time_elem = vj.find(q("DepartureTime"))
354
+ dep_time = _parse_time(dep_time_elem.text if dep_time_elem is not None else None)
355
+ if dep_time is None:
356
+ continue
357
+
358
+ vj_code_elem = vj.find(q("VehicleJourneyCode"))
359
+ journey_code = vj_code_elem.text if vj_code_elem is not None else "unknown"
360
+
361
+ # Operating profile (days)
362
+ op_profile = vj.find(q("OperatingProfile"))
363
+ days = _parse_days(op_profile)
364
+
365
+ # Build stop schedule by accumulating run times
366
+ journey_stops: list[JourneyStop] = []
367
+ current_time = datetime.combine(datetime.today(), dep_time)
368
+
369
+ for sid in jp_data["section_ids"]:
370
+ if sid not in jps_links:
371
+ continue
372
+ for i, (from_ref, to_ref, runtime) in enumerate(jps_links[sid]):
373
+ if i == 0 and not journey_stops:
374
+ # First stop
375
+ journey_stops.append(JourneyStop(naptan=from_ref, departure=current_time.time()))
376
+ # Travel to next stop
377
+ current_time += runtime
378
+ journey_stops.append(JourneyStop(naptan=to_ref, arrival=current_time.time()))
379
+
380
+ if len(journey_stops) >= 2:
381
+ journeys.append(Journey(
382
+ operator=operator_name, route_num=route_num,
383
+ direction=jp_data["direction"], journey_code=journey_code,
384
+ stops=journey_stops, days=days,
385
+ ))
386
+
387
+ return stops, routes, journeys
388
+
389
+
390
+ # ---------------------------------------------------------------------------
391
+ # Dataset loading
392
+ # ---------------------------------------------------------------------------
393
+ async def load_dataset(ds_id: int) -> dict:
394
+ """Download and parse a single timetable dataset. Returns metadata."""
395
+ if ds_id in index.loaded_datasets:
396
+ return {}
397
+
398
+ async with httpx.AsyncClient(timeout=120.0, follow_redirects=True) as client:
399
+ meta_resp = await client.get(f"{BASE_URL}/api/v1/dataset/{ds_id}/?api_key={API_KEY}")
400
+ if meta_resp.status_code != 200:
401
+ return {}
402
+ meta = meta_resp.json()
403
+
404
+ operator = meta.get("operatorName", "Unknown")
405
+ download_url = meta.get("url")
406
+ if not download_url:
407
+ return meta
408
+
409
+ zip_resp = await client.get(f"{download_url}?api_key={API_KEY}")
410
+ if zip_resp.status_code != 200 or len(zip_resp.content) < 100:
411
+ return meta
412
+
413
+ try:
414
+ z = zipfile.ZipFile(io.BytesIO(zip_resp.content))
415
+ except zipfile.BadZipFile:
416
+ return meta
417
+
418
+ xml_files = [n for n in z.namelist() if n.endswith(".xml")]
419
+ for fname in xml_files:
420
+ try:
421
+ content = z.read(fname).decode("utf-8", errors="ignore")
422
+ except Exception:
423
+ continue
424
+ stops, routes, journeys = parse_transxchange(content, operator)
425
+ for stop in stops:
426
+ index.add_stop(stop)
427
+ for route in routes:
428
+ index.add_route(route)
429
+ for journey in journeys:
430
+ index.add_journey(journey)
431
+
432
+ index.loaded_datasets.add(ds_id)
433
+ return meta
434
+
435
+
436
+ async def load_all_timetable_data(force_refresh: bool = False) -> str:
437
+ """Load all accessible timetable datasets and build indexes."""
438
+ if not force_refresh and index.load_cache():
439
+ return (f"Loaded {len(index.loaded_datasets)} datasets from cache "
440
+ f"({len(index.stops)} stops, {len(index.routes)} routes, {len(index.journeys)} journeys).")
441
+
442
+ if force_refresh:
443
+ index.clear()
444
+
445
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
446
+ all_ids: list[int] = []
447
+ offset = 0
448
+ limit = 100
449
+ while True:
450
+ resp = await client.get(f"{BASE_URL}/api/v1/dataset/?limit={limit}&offset={offset}&api_key={API_KEY}")
451
+ if resp.status_code != 200:
452
+ break
453
+ data = resp.json()
454
+ results = data.get("results", [])
455
+ if not results:
456
+ break
457
+ for r in results:
458
+ all_ids.append(r["id"])
459
+ if len(results) < limit:
460
+ break
461
+ offset += limit
462
+
463
+ loaded = 0
464
+ errors = 0
465
+ for ds_id in all_ids:
466
+ try:
467
+ await load_dataset(ds_id)
468
+ loaded += 1
469
+ except Exception:
470
+ errors += 1
471
+
472
+ index.save_cache()
473
+ return (f"Loaded {loaded} datasets ({errors} errors). "
474
+ f"Total: {len(index.stops)} stops, {len(index.routes)} routes, {len(index.journeys)} journeys.")
475
+
476
+
477
+ # ---------------------------------------------------------------------------
478
+ # Existing OpenAPI-based tools (from specs)
479
+ # ---------------------------------------------------------------------------
480
+ def load_specs() -> dict[str, Any]:
481
+ specs = {}
482
+ for yml_file in sorted(SPECS_DIR.glob("*.yml")):
483
+ with open(yml_file, "r", encoding="utf-8") as f:
484
+ specs[yml_file.stem] = yaml.safe_load(f)
485
+ return specs
486
+
487
+
488
+ def build_param_schema(param: dict) -> dict:
489
+ schema = param.get("schema", {})
490
+ result = {
491
+ "type": schema.get("type", "string"),
492
+ "description": param.get("description", "").strip().replace("\n", " "),
493
+ }
494
+ if "enum" in schema:
495
+ result["enum"] = schema["enum"]
496
+ if "example" in schema:
497
+ result["example"] = schema["example"]
498
+ if param.get("required"):
499
+ result["required"] = True
500
+ return result
501
+
502
+
503
+ def register_tools_from_specs(specs: dict[str, Any]):
504
+ for spec_name, spec in specs.items():
505
+ servers = spec.get("servers", [{}])
506
+ base_path = servers[0].get("url", "/api/v1/") if servers else "/api/v1/"
507
+ paths = spec.get("paths", {})
508
+ for path_template, methods in paths.items():
509
+ for method, operation in methods.items():
510
+ if method.lower() != "get":
511
+ continue
512
+ tag = "general"
513
+ if operation.get("tags"):
514
+ tag = operation["tags"][0].replace(" ", "_").replace("-", "_")
515
+ op_id = operation.get("operationId")
516
+ summary = operation.get("summary", "")
517
+ description = operation.get("description", summary or "No description")
518
+ if op_id:
519
+ tool_name = op_id
520
+ else:
521
+ clean_path = path_template.strip("/").replace("/", "_").replace("{", "by_").replace("}", "")
522
+ tool_name = f"{tag}_{clean_path}"
523
+ tool_name = tool_name.replace("-", "_").replace(".", "_")
524
+ parameters = operation.get("parameters", [])
525
+ param_defs = {}
526
+ required_params = []
527
+ for p in parameters:
528
+ pname = p["name"]
529
+ param_defs[pname] = build_param_schema(p)
530
+ if p.get("required"):
531
+ required_params.append(pname)
532
+
533
+ def make_tool(path_tpl=path_template, bp=base_path, params_def=parameters):
534
+ async def tool_func(**kwargs) -> str:
535
+ url_path = bp.rstrip("/") + path_tpl
536
+ for p in params_def:
537
+ if p["in"] == "path" and p["name"] in kwargs:
538
+ url_path = url_path.replace(f"{{{p['name']}}}", str(kwargs[p["name"]]))
539
+ full_url = urljoin(BASE_URL + "/", url_path.lstrip("/"))
540
+ query = {}
541
+ for p in params_def:
542
+ pname = p["name"]
543
+ if p["in"] == "query" and pname in kwargs and kwargs[pname] is not None:
544
+ val = kwargs[pname]
545
+ schema = p.get("schema", {})
546
+ if schema.get("type") == "array":
547
+ query[pname] = ",".join(str(v) for v in val) if isinstance(val, list) else str(val)
548
+ elif schema.get("type") == "boolean":
549
+ query[pname] = "true" if val else "false"
550
+ else:
551
+ query[pname] = str(val)
552
+ if query:
553
+ full_url += "?" + urlencode(query)
554
+ if API_KEY:
555
+ sep = "&" if "?" in full_url else "?"
556
+ full_url += f"{sep}api_key={API_KEY}"
557
+ try:
558
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
559
+ resp = await client.get(full_url)
560
+ resp.raise_for_status()
561
+ try:
562
+ return json.dumps(resp.json(), indent=2, ensure_ascii=False)
563
+ except Exception:
564
+ return resp.text
565
+ except httpx.HTTPStatusError as e:
566
+ return f"HTTP Error {e.response.status_code}: {e.response.text}"
567
+ except Exception as e:
568
+ return f"Error: {type(e).__name__}: {str(e)}"
569
+ return tool_func
570
+
571
+ tool_func = make_tool()
572
+ tool_func.__name__ = tool_name
573
+ tool_func.__doc__ = f"{description}\n\nParameters:\n"
574
+ for pname, pdef in param_defs.items():
575
+ req_flag = " (required)" if pname in required_params else ""
576
+ tool_func.__doc__ += f" {pname}{req_flag}: {pdef.get('description', '')}\n"
577
+ mcp.tool(name=tool_name)(tool_func)
578
+
579
+
580
+ # ---------------------------------------------------------------------------
581
+ # NEW Rich timetable tools
582
+ # ---------------------------------------------------------------------------
583
+ @mcp.tool()
584
+ async def load_timetable_index(force_refresh: bool = False) -> str:
585
+ """
586
+ Download and index all accessible timetable datasets.
587
+ Call this first if stop/route search tools return no results.
588
+
589
+ Parameters:
590
+ force_refresh: If true, re-download all data instead of using cache.
591
+ """
592
+ return await load_all_timetable_data(force_refresh=force_refresh)
593
+
594
+
595
+ @mcp.tool()
596
+ async def search_stops(query: str) -> str:
597
+ """
598
+ Search for bus stops by name across all loaded timetable data.
599
+ Returns matching stops with their NaPTAN codes.
600
+
601
+ Parameters:
602
+ query: Substring to search for in stop names (case-insensitive).
603
+ """
604
+ if not index.stops:
605
+ if not index.load_cache():
606
+ return "No timetable data loaded. Please call load_timetable_index() first."
607
+ query_lower = query.lower()
608
+ matches = []
609
+ for naptan, stop in index.stops.items():
610
+ if query_lower in stop.name.lower():
611
+ matches.append({"naptan": naptan, "name": stop.name})
612
+ matches.sort(key=lambda x: x["name"])
613
+ if not matches:
614
+ return f'No stops found matching "{query}".'
615
+ return json.dumps(matches[:50], indent=2, ensure_ascii=False)
616
+
617
+
618
+ @mcp.tool()
619
+ async def find_routes_between_stops(stop_a: str, stop_b: str) -> str:
620
+ """
621
+ Find all bus routes that serve BOTH of the given stops.
622
+ Stops can be specified by NaPTAN code or by name.
623
+
624
+ Parameters:
625
+ stop_a: First stop (NaPTAN code or name substring).
626
+ stop_b: Second stop (NaPTAN code or name substring).
627
+ """
628
+ if not index.stops:
629
+ if not index.load_cache():
630
+ return "No timetable data loaded. Please call load_timetable_index() first."
631
+ naptans_a = _resolve_stop(stop_a)
632
+ naptans_b = _resolve_stop(stop_b)
633
+ if not naptans_a:
634
+ return f'Could not resolve stop_a: "{stop_a}". Try search_stops().'
635
+ if not naptans_b:
636
+ return f'Could not resolve stop_b: "{stop_b}". Try search_stops().'
637
+
638
+ results = []
639
+ seen = set()
640
+ for na in naptans_a:
641
+ for nb in naptans_b:
642
+ routes_a = index.stop_to_routes.get(na, set())
643
+ routes_b = index.stop_to_routes.get(nb, set())
644
+ for route_key in routes_a & routes_b:
645
+ if route_key in seen:
646
+ continue
647
+ seen.add(route_key)
648
+ route = index.routes[route_key]
649
+ try:
650
+ idx_a = [i for i, s in enumerate(route.stops) if s in naptans_a][0]
651
+ idx_b = [i for i, s in enumerate(route.stops) if s in naptans_b][0]
652
+ direction = "A->B" if idx_a < idx_b else "B->A"
653
+ except IndexError:
654
+ direction = "unknown"
655
+ results.append({
656
+ "operator": route.operator, "route": route.route_num,
657
+ "directions": sorted(route.directions), "stop_order": direction,
658
+ })
659
+ results.sort(key=lambda x: (x["operator"], x["route"]))
660
+ return json.dumps(results, indent=2, ensure_ascii=False) if results else f"No single route serves both '{stop_a}' and '{stop_b}'."
661
+
662
+
663
+ @mcp.tool()
664
+ async def get_route_stops(operator: str, route: str, direction: Optional[str] = None) -> str:
665
+ """
666
+ Get the full ordered list of stops for a specific bus route.
667
+
668
+ Parameters:
669
+ operator: Operator name (exact or partial match).
670
+ route: Route number/identifier.
671
+ direction: Optional filter: 'inbound', 'outbound', or leave blank for all.
672
+ """
673
+ if not index.routes:
674
+ if not index.load_cache():
675
+ return "No timetable data loaded. Please call load_timetable_index() first."
676
+ matches = []
677
+ for key, r in index.routes.items():
678
+ if route.lower() in r.route_num.lower() and operator.lower() in r.operator.lower():
679
+ if direction and direction.lower() not in [d.lower() for d in r.directions]:
680
+ continue
681
+ stop_names = [{"naptan": n, "name": index.stops.get(n, Stop(n, "Unknown")).name} for n in r.stops]
682
+ matches.append({"operator": r.operator, "route": r.route_num,
683
+ "directions": sorted(r.directions), "stops": stop_names})
684
+ return json.dumps(matches, indent=2, ensure_ascii=False) if matches else f"No route found."
685
+
686
+
687
+ @mcp.tool()
688
+ async def find_buses_by_arrival_time(stop_a: str, stop_b: str, arrive_by: str, day: Optional[str] = None) -> str:
689
+ """
690
+ Find scheduled buses that board at stop_a and arrive at stop_b by the given time.
691
+
692
+ Parameters:
693
+ stop_a: Boarding stop (NaPTAN code or name).
694
+ stop_b: Alighting stop (NaPTAN code or name).
695
+ arrive_by: Target arrival time (HH:MM, 24h format).
696
+ day: Optional day filter: mon, tue, wed, thu, fri, sat, sun. Defaults to today.
697
+ """
698
+ if not index.journeys:
699
+ if not index.load_cache():
700
+ return "No timetable data loaded. Please call load_timetable_index() first."
701
+
702
+ naptans_a = _resolve_stop(stop_a)
703
+ naptans_b = _resolve_stop(stop_b)
704
+ if not naptans_a:
705
+ return f'Could not resolve stop_a: "{stop_a}". Try search_stops().'
706
+ if not naptans_b:
707
+ return f'Could not resolve stop_b: "{stop_b}". Try search_stops().'
708
+
709
+ target_time = _parse_time(arrive_by)
710
+ if target_time is None:
711
+ return f'Invalid time format: "{arrive_by}". Use HH:MM (24h).'
712
+
713
+ if day is None:
714
+ day = datetime.now().strftime("%a").lower()
715
+ day = day.lower()[:3]
716
+
717
+ results = []
718
+ for journey in index.journeys:
719
+ if day not in journey.days:
720
+ continue
721
+
722
+ # Find stop indices
723
+ idx_a = None
724
+ idx_b = None
725
+ for i, js in enumerate(journey.stops):
726
+ if js.naptan in naptans_a:
727
+ idx_a = i
728
+ if js.naptan in naptans_b:
729
+ idx_b = i
730
+
731
+ if idx_a is None or idx_b is None or idx_a >= idx_b:
732
+ continue
733
+
734
+ arrival_at_b = journey.stops[idx_b].arrival
735
+ departure_from_a = journey.stops[idx_a].departure
736
+
737
+ if arrival_at_b is None:
738
+ continue
739
+
740
+ if arrival_at_b <= target_time:
741
+ results.append({
742
+ "operator": journey.operator,
743
+ "route": journey.route_num,
744
+ "direction": journey.direction,
745
+ "journey_code": journey.journey_code,
746
+ "board_at": index.stops.get(journey.stops[idx_a].naptan, Stop("", "Unknown")).name,
747
+ "depart": departure_from_a.isoformat() if departure_from_a else None,
748
+ "alight_at": index.stops.get(journey.stops[idx_b].naptan, Stop("", "Unknown")).name,
749
+ "arrive": arrival_at_b.isoformat(),
750
+ })
751
+
752
+ results.sort(key=lambda x: x["arrive"] or "")
753
+ return json.dumps(results[:20], indent=2, ensure_ascii=False) if results else f"No buses found arriving at '{stop_b}' by {arrive_by} on {day}."
754
+
755
+
756
+ @mcp.tool()
757
+ async def plan_journey(stop_a: str, stop_b: str, arrive_by: str, day: Optional[str] = None, max_changes: int = 1) -> str:
758
+ """
759
+ Plan a journey from stop_a to stop_b arriving by a given time.
760
+ Supports direct routes and single changes.
761
+
762
+ Parameters:
763
+ stop_a: Starting stop (NaPTAN code or name).
764
+ stop_b: Destination stop (NaPTAN code or name).
765
+ arrive_by: Target arrival time (HH:MM, 24h format).
766
+ day: Optional day filter: mon, tue, wed, thu, fri, sat, sun. Defaults to today.
767
+ max_changes: Maximum number of bus changes (0 = direct only, 1 = one change). Default 1.
768
+ """
769
+ if not index.journeys:
770
+ if not index.load_cache():
771
+ return "No timetable data loaded. Please call load_timetable_index() first."
772
+
773
+ naptans_a = _resolve_stop(stop_a)
774
+ naptans_b = _resolve_stop(stop_b)
775
+ if not naptans_a:
776
+ return f'Could not resolve stop_a: "{stop_a}". Try search_stops().'
777
+ if not naptans_b:
778
+ return f'Could not resolve stop_b: "{stop_b}". Try search_stops().'
779
+
780
+ target_time = _parse_time(arrive_by)
781
+ if target_time is None:
782
+ return f'Invalid time format: "{arrive_by}". Use HH:MM (24h).'
783
+
784
+ if day is None:
785
+ day = datetime.now().strftime("%a").lower()
786
+ day = day.lower()[:3]
787
+
788
+ plans = []
789
+
790
+ # --- Direct journeys ---
791
+ for journey in index.journeys:
792
+ if day not in journey.days:
793
+ continue
794
+ idx_a = next((i for i, s in enumerate(journey.stops) if s.naptan in naptans_a), None)
795
+ idx_b = next((i for i, s in enumerate(journey.stops) if s.naptan in naptans_b), None)
796
+ if idx_a is None or idx_b is None or idx_a >= idx_b:
797
+ continue
798
+ arrival_at_b = journey.stops[idx_b].arrival
799
+ if arrival_at_b and arrival_at_b <= target_time:
800
+ plans.append({
801
+ "type": "direct",
802
+ "legs": [{
803
+ "operator": journey.operator,
804
+ "route": journey.route_num,
805
+ "board": index.stops.get(journey.stops[idx_a].naptan, Stop("", "Unknown")).name,
806
+ "depart": journey.stops[idx_a].departure.isoformat() if journey.stops[idx_a].departure else None,
807
+ "alight": index.stops.get(journey.stops[idx_b].naptan, Stop("", "Unknown")).name,
808
+ "arrive": arrival_at_b.isoformat(),
809
+ }],
810
+ "total_changes": 0,
811
+ })
812
+
813
+ if max_changes >= 1:
814
+ # --- Single change ---
815
+ # Group journeys by the stops they serve for fast lookup
816
+ for j1 in index.journeys:
817
+ if day not in j1.days:
818
+ continue
819
+ idx_a1 = next((i for i, s in enumerate(j1.stops) if s.naptan in naptans_a), None)
820
+ if idx_a1 is None:
821
+ continue
822
+ arr_a1 = j1.stops[idx_a1].arrival or j1.stops[idx_a1].departure
823
+ if arr_a1 is None:
824
+ continue
825
+
826
+ # Every stop after A on j1 is a potential change point
827
+ for mid_idx in range(idx_a1 + 1, len(j1.stops)):
828
+ mid_naptan = j1.stops[mid_idx].naptan
829
+ mid_arrival = j1.stops[mid_idx].arrival
830
+ if mid_arrival is None:
831
+ continue
832
+
833
+ # Find j2 that departs from mid_naptan and goes to B
834
+ for j2 in index.journeys:
835
+ if day not in j2.days:
836
+ continue
837
+ if j2.operator == j1.operator and j2.route_num == j1.route_num and j2.direction == j1.direction:
838
+ continue # Same journey
839
+
840
+ idx_mid2 = next((i for i, s in enumerate(j2.stops) if s.naptan == mid_naptan), None)
841
+ idx_b2 = next((i for i, s in enumerate(j2.stops) if s.naptan in naptans_b), None)
842
+ if idx_mid2 is None or idx_b2 is None or idx_mid2 >= idx_b2:
843
+ continue
844
+
845
+ dep_mid2 = j2.stops[idx_mid2].departure or j2.stops[idx_mid2].arrival
846
+ arr_b2 = j2.stops[idx_b2].arrival
847
+ if dep_mid2 is None or arr_b2 is None:
848
+ continue
849
+
850
+ # Connection time: must depart mid after arriving there
851
+ if dep_mid2 < mid_arrival:
852
+ continue
853
+
854
+ # Must arrive at B by target
855
+ if arr_b2 > target_time:
856
+ continue
857
+
858
+ plans.append({
859
+ "type": "change",
860
+ "legs": [
861
+ {
862
+ "operator": j1.operator,
863
+ "route": j1.route_num,
864
+ "board": index.stops.get(j1.stops[idx_a1].naptan, Stop("", "Unknown")).name,
865
+ "depart": j1.stops[idx_a1].departure.isoformat() if j1.stops[idx_a1].departure else None,
866
+ "alight": index.stops.get(mid_naptan, Stop("", "Unknown")).name,
867
+ "arrive": mid_arrival.isoformat(),
868
+ },
869
+ {
870
+ "operator": j2.operator,
871
+ "route": j2.route_num,
872
+ "board": index.stops.get(mid_naptan, Stop("", "Unknown")).name,
873
+ "depart": dep_mid2.isoformat(),
874
+ "alight": index.stops.get(j2.stops[idx_b2].naptan, Stop("", "Unknown")).name,
875
+ "arrive": arr_b2.isoformat(),
876
+ },
877
+ ],
878
+ "total_changes": 1,
879
+ })
880
+
881
+ # Deduplicate by journey codes
882
+ seen = set()
883
+ deduped = []
884
+ for plan in plans:
885
+ key = tuple(leg.get("route", "") + "@" + (leg.get("depart") or "") for leg in plan["legs"])
886
+ if key not in seen:
887
+ seen.add(key)
888
+ deduped.append(plan)
889
+
890
+ deduped.sort(key=lambda p: p["legs"][-1]["arrive"])
891
+ return json.dumps(deduped[:15], indent=2, ensure_ascii=False) if deduped else f"No journey found from '{stop_a}' to '{stop_b}' by {arrive_by} on {day}."
892
+
893
+
894
+ @mcp.tool()
895
+ async def get_live_buses_on_route(operator_ref: str, line_ref: str) -> str:
896
+ """
897
+ Get real-time bus locations for a specific operator and route.
898
+
899
+ Parameters:
900
+ operator_ref: Operator NOC code (e.g. ARBB, SCCM, CBBH).
901
+ line_ref: Route number (e.g. 12, MK1, 100).
902
+ """
903
+ full_url = f"{BASE_URL}/api/v1/datafeed/?operatorRef={operator_ref}&lineRef={line_ref}&api_key={API_KEY}"
904
+ try:
905
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
906
+ resp = await client.get(full_url)
907
+ resp.raise_for_status()
908
+ root = ET.fromstring(resp.content)
909
+ ns = "http://www.siri.org.uk/siri"
910
+ buses = []
911
+ for activity in root.iter(f"{{{ns}}}VehicleActivity"):
912
+ mvj = activity.find(f"{{{ns}}}MonitoredVehicleJourney")
913
+ if mvj is None:
914
+ continue
915
+ def get_text(tag):
916
+ el = mvj.find(f"{{{ns}}}{tag}")
917
+ return el.text if el is not None else "N/A"
918
+ loc = mvj.find(f"{{{ns}}}VehicleLocation")
919
+ lat = lon = "N/A"
920
+ if loc is not None:
921
+ lat_el = loc.find(f"{{{ns}}}Latitude")
922
+ lon_el = loc.find(f"{{{ns}}}Longitude")
923
+ lat = lat_el.text if lat_el is not None else "N/A"
924
+ lon = lon_el.text if lon_el is not None else "N/A"
925
+ buses.append({
926
+ "vehicle_id": get_text("VehicleRef"), "direction": get_text("DirectionRef"),
927
+ "origin": get_text("OriginName"), "destination": get_text("DestinationName"),
928
+ "location": {"lat": lat, "lon": lon}, "bearing": get_text("Bearing"),
929
+ })
930
+ return json.dumps(buses, indent=2, ensure_ascii=False) if buses else f"No live buses found."
931
+ except Exception as e:
932
+ return f"Error: {type(e).__name__}: {str(e)}"
933
+
934
+
935
+ # ---------------------------------------------------------------------------
936
+ # Helpers
937
+ # ---------------------------------------------------------------------------
938
+ def _resolve_stop(stop_query: str) -> set[str]:
939
+ """Resolve a stop query to a set of NaPTAN codes."""
940
+ if stop_query.isdigit() or (len(stop_query) >= 8 and stop_query[:2].isdigit()):
941
+ return {stop_query}
942
+ query_lower = stop_query.lower()
943
+ matches = set()
944
+ for naptan, stop in index.stops.items():
945
+ if query_lower in stop.name.lower():
946
+ matches.add(naptan)
947
+ return matches
948
+
949
+
950
+ # ---------------------------------------------------------------------------
951
+ # Startup
952
+ # ---------------------------------------------------------------------------
953
+ specs = load_specs()
954
+ if specs:
955
+ register_tools_from_specs(specs)
956
+
957
+ if index.load_cache():
958
+ print(f"[OpenBusData MCP] Loaded timetable cache: {len(index.stops)} stops, {len(index.routes)} routes, {len(index.journeys)} journeys from {len(index.loaded_datasets)} datasets.", file=sys.stderr)
959
+ else:
960
+ print("[OpenBusData MCP] No timetable cache found. Call load_timetable_index() to download and parse all timetable data.", file=sys.stderr)
961
+
962
+
963
+ def main():
964
+ """Entry point for the openbusdata-mcp console script."""
965
+ if not API_KEY:
966
+ print("WARNING: OPENBUS_API_KEY not set.", file=sys.stderr)
967
+ if not specs:
968
+ print(f"No .yml spec files found in {SPECS_DIR}", file=sys.stderr)
969
+ sys.exit(1)
970
+ print(f"Loaded {len(specs)} OpenAPI specs: {', '.join(specs.keys())}", file=sys.stderr)
971
+ mcp.run()
972
+
973
+
974
+ if __name__ == "__main__":
975
+ main()
@@ -0,0 +1,90 @@
1
+ Metadata-Version: 2.4
2
+ Name: openbusdata-mcp
3
+ Version: 1.0.0
4
+ Summary: MCP server for UK Bus Open Data Service with timetable parsing and journey planning
5
+ Project-URL: Homepage, https://github.com/AndrewAubury/openbusdata-mcp
6
+ Project-URL: Repository, https://github.com/AndrewAubury/openbusdata-mcp
7
+ Project-URL: Issues, https://github.com/AndrewAubury/openbusdata-mcp/issues
8
+ Author-email: Andrew Aubury <projects@aubury.me>
9
+ License: MIT
10
+ Keywords: bus,mcp,open-data,transport,uk
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
20
+ Requires-Python: >=3.10
21
+ Requires-Dist: httpx>=0.27
22
+ Requires-Dist: mcp>=1.12
23
+ Requires-Dist: pyyaml>=6.0
24
+ Description-Content-Type: text/markdown
25
+
26
+ # OpenBusData MCP Server
27
+
28
+ mcp-name: io.github.AndrewAubury/openbusdata
29
+
30
+ MCP server for the [UK Bus Open Data Service](https://data.bus-data.dft.gov.uk) with rich
31
+ timetable parsing, stop search, route discovery, journey planning and real-time bus tracking.
32
+
33
+ ## Features
34
+
35
+ - **Live API tools** — query timetables, fares, disruptions, cancellations and real-time bus locations
36
+ - **Stop search** — fuzzy text search across every bus stop in the UK
37
+ - **Route finder** — discover all routes serving a pair of stops
38
+ - **Journey planner** — "get to X by Y o'clock" with support for direct and chained multi-leg journeys
39
+ - **Live tracking** — see exactly where buses are right now
40
+
41
+ ## Installation
42
+
43
+ ```bash
44
+ pip install openbusdata-mcp
45
+ ```
46
+
47
+ ## Configuration
48
+
49
+ Set your Bus Open Data Service API key as an environment variable:
50
+
51
+ ```bash
52
+ export OPENBUS_API_KEY="your-api-key-here"
53
+ ```
54
+
55
+ Get a free key at [data.bus-data.dft.gov.uk](https://data.bus-data.dft.gov.uk).
56
+
57
+ ## Usage
58
+
59
+ Add to your MCP client (Claude Desktop, Cursor, etc.):
60
+
61
+ ```json
62
+ {
63
+ "mcpServers": {
64
+ "openbusdata": {
65
+ "command": "openbusdata-mcp",
66
+ "env": {
67
+ "OPENBUS_API_KEY": "your-api-key-here"
68
+ }
69
+ }
70
+ }
71
+ }
72
+ ```
73
+
74
+ ## Development
75
+
76
+ ```bash
77
+ # Create virtual environment
78
+ python -m venv .venv
79
+ source .venv/bin/activate # or .venv\Scripts\activate on Windows
80
+
81
+ # Install dependencies
82
+ pip install -e ".[dev]"
83
+
84
+ # Run server
85
+ python -m openbusdata_mcp.server
86
+ ```
87
+
88
+ ## License
89
+
90
+ MIT
@@ -0,0 +1,6 @@
1
+ openbusdata_mcp/__init__.py,sha256=seaLZ4wG6idGVmWX_LSxoGl4YpU4kvfPa-X57VHT7sg,33
2
+ openbusdata_mcp/server.py,sha256=rP9vLf0YPQaxuE1T7J0zRt2no65luZ2bs5MxNj_zQ8w,39039
3
+ openbusdata_mcp-1.0.0.dist-info/METADATA,sha256=WynIzMvDIiyUOchzmbB0dtMBAXUfixwdDjEsWiL6yNs,2527
4
+ openbusdata_mcp-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
5
+ openbusdata_mcp-1.0.0.dist-info/entry_points.txt,sha256=6l7GTyptX8Uzo0YgRd-8Vna2tPWvX1KoVKiFdC2LsQk,64
6
+ openbusdata_mcp-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ openbusdata-mcp = openbusdata_mcp.server:main