awslabs.terraform-mcp-server 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of awslabs.terraform-mcp-server might be problematic. Click here for more details.

Files changed (32) hide show
  1. awslabs/__init__.py +2 -0
  2. awslabs/terraform_mcp_server/__init__.py +3 -0
  3. awslabs/terraform_mcp_server/impl/resources/__init__.py +11 -0
  4. awslabs/terraform_mcp_server/impl/resources/terraform_aws_provider_resources_listing.py +52 -0
  5. awslabs/terraform_mcp_server/impl/resources/terraform_awscc_provider_resources_listing.py +55 -0
  6. awslabs/terraform_mcp_server/impl/tools/__init__.py +15 -0
  7. awslabs/terraform_mcp_server/impl/tools/execute_terraform_command.py +206 -0
  8. awslabs/terraform_mcp_server/impl/tools/run_checkov_scan.py +359 -0
  9. awslabs/terraform_mcp_server/impl/tools/search_aws_provider_docs.py +677 -0
  10. awslabs/terraform_mcp_server/impl/tools/search_awscc_provider_docs.py +627 -0
  11. awslabs/terraform_mcp_server/impl/tools/search_specific_aws_ia_modules.py +444 -0
  12. awslabs/terraform_mcp_server/impl/tools/utils.py +558 -0
  13. awslabs/terraform_mcp_server/models/__init__.py +27 -0
  14. awslabs/terraform_mcp_server/models/models.py +260 -0
  15. awslabs/terraform_mcp_server/scripts/generate_aws_provider_resources.py +1224 -0
  16. awslabs/terraform_mcp_server/scripts/generate_awscc_provider_resources.py +1020 -0
  17. awslabs/terraform_mcp_server/scripts/scrape_aws_terraform_best_practices.py +129 -0
  18. awslabs/terraform_mcp_server/server.py +329 -0
  19. awslabs/terraform_mcp_server/static/AWSCC_PROVIDER_RESOURCES.md +3125 -0
  20. awslabs/terraform_mcp_server/static/AWS_PROVIDER_RESOURCES.md +3833 -0
  21. awslabs/terraform_mcp_server/static/AWS_TERRAFORM_BEST_PRACTICES.md +2523 -0
  22. awslabs/terraform_mcp_server/static/MCP_INSTRUCTIONS.md +126 -0
  23. awslabs/terraform_mcp_server/static/TERRAFORM_WORKFLOW_GUIDE.md +198 -0
  24. awslabs/terraform_mcp_server/static/__init__.py +22 -0
  25. awslabs/terraform_mcp_server/tests/__init__.py +1 -0
  26. awslabs/terraform_mcp_server/tests/run_tests.sh +35 -0
  27. awslabs/terraform_mcp_server/tests/test_parameter_annotations.py +207 -0
  28. awslabs/terraform_mcp_server/tests/test_tool_implementations.py +309 -0
  29. awslabs_terraform_mcp_server-0.0.1.dist-info/METADATA +97 -0
  30. awslabs_terraform_mcp_server-0.0.1.dist-info/RECORD +32 -0
  31. awslabs_terraform_mcp_server-0.0.1.dist-info/WHEEL +4 -0
  32. awslabs_terraform_mcp_server-0.0.1.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1224 @@
1
+ """Script to generate AWS provider resources markdown for the Terraform Expert MCP server.
2
+
3
+ This script scrapes the Terraform AWS provider documentation using Playwright
4
+ and generates a comprehensive markdown file listing all AWS service categories,
5
+ resources, and data sources.
6
+
7
+ The generated markdown is saved to the static directory for use by the MCP server.
8
+
9
+ Usage:
10
+ python generate_aws_provider_resources.py [--max-categories N] [--output PATH]
11
+
12
+ Options:
13
+ --max-categories N Limit to N categories (default: all)
14
+ --output PATH Output file path (default: terraform_mcp_server/static/AWS_PROVIDER_RESOURCES.md)
15
+ --no-fallback Don't use fallback data if scraping fails
16
+ """
17
+
18
+ import argparse
19
+ import asyncio
20
+ import os
21
+ import re
22
+ import sys
23
+ import tempfile
24
+ import time
25
+ from bs4 import BeautifulSoup, Tag
26
+ from bs4.element import PageElement, ResultSet
27
+ from bs4.filter import SoupStrainer
28
+ from datetime import datetime
29
+ from loguru import logger
30
+ from pathlib import Path
31
+ from typing import Any, Dict, List, Optional, Tuple, TypedDict, TypeVar, cast
32
+
33
+
34
+ ## Playwright optional import
35
+ try:
36
+ from playwright.async_api import async_playwright
37
+ except ImportError:
38
+ # Playwright is optional, we'll use fallback data if it's not available
39
+ async_playwright = None
40
+
41
+ # Add the parent directory to sys.path so we can import from terraform_mcp_server
42
+ script_dir = Path(__file__).resolve().parent
43
+ repo_root = script_dir.parent.parent.parent
44
+ sys.path.insert(0, str(repo_root))
45
+
46
+
47
+ # Configure logger for enhanced diagnostics with stacktraces
48
+ logger.configure(
49
+ handlers=[
50
+ {
51
+ 'sink': sys.stderr,
52
+ 'backtrace': True,
53
+ 'diagnose': True,
54
+ 'format': '<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>',
55
+ }
56
+ ]
57
+ )
58
+
59
+ # Environment variable to control whether to use Playwright or go straight to fallback data
60
+ USE_PLAYWRIGHT = os.environ.get('USE_PLAYWRIGHT', '1').lower() in ('1', 'true', 'yes')
61
+ # Shorter timeout to fail faster if it's not going to work
62
+ NAVIGATION_TIMEOUT = 20000 # 20 seconds
63
+ # Default output path
64
+ DEFAULT_OUTPUT_PATH = (
65
+ repo_root / 'awslabs' / 'terraform_mcp_server' / 'static' / 'AWS_PROVIDER_RESOURCES.md'
66
+ )
67
+ # AWS provider URL
68
+ AWS_PROVIDER_URL = 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs'
69
+
70
+
71
+ # Define TypedDict classes for the structures used in the script
72
+ class ResourceItem(TypedDict):
73
+ """Type definition for a Terraform resource or data source item.
74
+
75
+ Attributes:
76
+ name: The name/identifier of the resource (e.g. 'aws_acm_certificate')
77
+ url: The documentation URL for the resource
78
+ type: The type of item - either 'resource' or 'data_source'
79
+ """
80
+
81
+ name: str
82
+ url: str
83
+ type: str
84
+
85
+
86
+ class CategoryData(TypedDict):
87
+ """Type definition for a category of Terraform resources and data sources.
88
+
89
+ Attributes:
90
+ resources: List of ResourceItem objects representing Terraform resources in this category
91
+ data_sources: List of ResourceItem objects representing Terraform data sources in this category
92
+ """
93
+
94
+ resources: List[ResourceItem]
95
+ data_sources: List[ResourceItem]
96
+
97
+
98
+ class ProviderResult(TypedDict):
99
+ """Type definition for the result of fetching AWS provider data.
100
+
101
+ Attributes:
102
+ categories: Dictionary mapping AWS service category names to their resources and data sources
103
+ version: AWS provider version string (e.g. "5.91.0")
104
+ """
105
+
106
+ categories: Dict[str, CategoryData]
107
+ version: str
108
+
109
+
110
+ # Type helpers for BeautifulSoup
111
+ T = TypeVar('T')
112
+
113
+
114
+ def ensure_tag(element: Optional[PageElement]) -> Optional[Tag]:
115
+ """Ensure an element is a Tag or return None."""
116
+ if isinstance(element, Tag):
117
+ return element
118
+ return None
119
+
120
+
121
+ def safe_find(element: Any, *args: Any, **kwargs: Any) -> Optional[Tag]:
122
+ """Safely find an element in a Tag."""
123
+ if not isinstance(element, Tag):
124
+ return None
125
+ result = element.find(*args, **kwargs)
126
+ return ensure_tag(result)
127
+
128
+
129
+ def safe_find_all(element: Any, *args: Any, **kwargs: Any) -> ResultSet:
130
+ """Safely find all elements in a Tag."""
131
+ if not isinstance(element, Tag):
132
+ return ResultSet(SoupStrainer(), [])
133
+ return element.find_all(*args, **kwargs)
134
+
135
+
136
+ def safe_get_text(element: Any, strip: bool = False) -> str:
137
+ """Safely get text from an element."""
138
+ if hasattr(element, 'get_text'):
139
+ return element.get_text(strip=strip)
140
+ return str(element) if element is not None else ''
141
+
142
+
143
+ async def fetch_aws_provider_page() -> ProviderResult:
144
+ """Fetch the AWS provider documentation page using Playwright.
145
+
146
+ This function uses a headless browser to render the JavaScript-driven
147
+ Terraform Registry website and extract the AWS provider resources.
148
+
149
+ It will fall back to pre-defined data if:
150
+ - The USE_PLAYWRIGHT environment variable is set to 0/false/no
151
+ - There's any error during the scraping process
152
+
153
+ Returns:
154
+ A dictionary containing:
155
+ - 'categories': Dictionary of AWS service categories with resources and data sources
156
+ - 'version': AWS provider version string (e.g., "5.91.0")
157
+ """
158
+ # Check if we should skip Playwright and use fallback data directly
159
+ if not USE_PLAYWRIGHT or async_playwright is None:
160
+ logger.info(
161
+ 'Skipping Playwright and using pre-defined resource structure (USE_PLAYWRIGHT=0)'
162
+ )
163
+ return cast(
164
+ ProviderResult, {'categories': get_fallback_resource_data(), 'version': 'unknown'}
165
+ )
166
+ else:
167
+ logger.info('Playwright is available and will be used to scrape the AWS provider docs')
168
+ logger.info('Starting browser to extract AWS provider resources structure')
169
+ start_time = time.time()
170
+ categories = {}
171
+
172
+ try:
173
+ async with async_playwright() as p:
174
+ # Launch the browser with specific options for better performance
175
+ browser = await p.chromium.launch(
176
+ headless=True,
177
+ args=['--disable-dev-shm-usage', '--no-sandbox', '--disable-setuid-sandbox'],
178
+ )
179
+ context = await browser.new_context(
180
+ viewport={'width': 1280, 'height': 800},
181
+ user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
182
+ )
183
+ page = await context.new_page()
184
+
185
+ # Set a shorter timeout for navigation
186
+ page.set_default_timeout(NAVIGATION_TIMEOUT)
187
+
188
+ # Navigate to the AWS provider docs with reduced timeout
189
+ logger.info(
190
+ f'Navigating to Terraform AWS provider documentation (timeout: {NAVIGATION_TIMEOUT}ms)'
191
+ )
192
+ try:
193
+ await page.goto(
194
+ AWS_PROVIDER_URL,
195
+ wait_until='domcontentloaded',
196
+ ) # Using 'domcontentloaded' instead of 'networkidle'
197
+ logger.info('Basic page loaded successfully')
198
+ except Exception as nav_error:
199
+ logger.error(f'Error during navigation: {nav_error}')
200
+ await browser.close()
201
+ return cast(
202
+ ProviderResult,
203
+ {'categories': get_fallback_resource_data(), 'version': 'unknown'},
204
+ )
205
+
206
+ # Wait for the content to be fully loaded
207
+ logger.info('Waiting for page to render completely')
208
+
209
+ # Add a small fixed delay to let JavaScript finish rendering
210
+ await asyncio.sleep(2)
211
+
212
+ # Extract AWS provider version
213
+ provider_version = 'unknown'
214
+ try:
215
+ # Try to extract version using the selector provided
216
+ logger.info('Attempting to extract AWS provider version')
217
+
218
+ # Try using the selector approach
219
+ version_element = await page.query_selector(
220
+ 'body > div.provider-view > div.provider-nav > nav.bread-crumbs.is-light > div > div > ul > li:nth-child(4) > span'
221
+ )
222
+ if version_element:
223
+ # Try to extract text from the element
224
+ version_text = await version_element.inner_text()
225
+ logger.debug(f'Found version element with text: {version_text}')
226
+
227
+ # Extract just the version number using regex
228
+ version_match = re.search(r'Version\s+([0-9.]+)', version_text)
229
+ if version_match:
230
+ provider_version = version_match.group(1) # e.g., "5.91.0"
231
+ logger.info(f'Extracted AWS provider version: {provider_version}')
232
+ else:
233
+ # If regex doesn't match, try JavaScript approach
234
+ logger.debug("Regex pattern didn't match, trying JavaScript approach")
235
+ provider_version = await page.evaluate("""
236
+ () => {
237
+ const versionEl = document.querySelector('.version-dropdown button span');
238
+ return versionEl ? versionEl.innerText.trim() : null;
239
+ }
240
+ """)
241
+ # Clean up the version string if needed
242
+ if provider_version:
243
+ provider_version = provider_version.strip()
244
+ version_match = re.search(r'([0-9.]+)', provider_version)
245
+ if version_match:
246
+ provider_version = version_match.group(1)
247
+ logger.info(
248
+ f'Extracted AWS provider version via JavaScript: {provider_version}'
249
+ )
250
+ else:
251
+ # If the specific selector doesn't work, try a more general approach
252
+ logger.debug(
253
+ 'Specific version selector not found, trying alternative selectors'
254
+ )
255
+ provider_version = await page.evaluate("""
256
+ () => {
257
+ // Try different selectors that might contain the version
258
+ const selectors = [
259
+ '.version-dropdown button span',
260
+ '.dropdown-trigger button span',
261
+ 'span:contains("Version")'
262
+ ];
263
+ for (const selector of selectors) {
264
+ try {
265
+ const el = document.querySelector(selector);
266
+ if (el && el.innerText.includes('Version')) {
267
+ return el.innerText.trim();
268
+ }
269
+ } catch (e) {}
270
+ }
271
+ return null;
272
+ }
273
+ """)
274
+
275
+ # Extract version number from text if found
276
+ if provider_version:
277
+ version_match = re.search(r'([0-9.]+)', provider_version)
278
+ if version_match:
279
+ provider_version = version_match.group(1)
280
+ logger.info(
281
+ f'Extracted AWS provider version via alternative selector: {provider_version}'
282
+ )
283
+ except Exception as version_error:
284
+ logger.warning(f'Error extracting AWS provider version: {version_error}')
285
+
286
+ # Check for and handle cookie consent banner
287
+ logger.info('Checking for cookie consent banner')
288
+ try:
289
+ # Check if the consent banner is present
290
+ consent_banner = await page.query_selector('#consent-banner')
291
+ if consent_banner:
292
+ logger.info('Cookie consent banner detected, attempting to dismiss')
293
+
294
+ # Target the specific dismiss button based on the HTML structure provided
295
+ dismiss_button_selectors = [
296
+ 'button.hds-button:has-text("Dismiss")',
297
+ 'button.hds-button .hds-button__text:has-text("Dismiss")',
298
+ 'button.hds-button--color-primary',
299
+ ]
300
+
301
+ for selector in dismiss_button_selectors:
302
+ try:
303
+ # Check if the button exists with this selector
304
+ button = await page.query_selector(selector)
305
+ if button:
306
+ logger.info(f'Found dismiss button with selector: {selector}')
307
+ await button.click()
308
+ logger.info('Clicked the dismiss button')
309
+
310
+ # Wait a moment for the banner to disappear
311
+ await asyncio.sleep(1)
312
+
313
+ # Check if the banner is gone
314
+ banner_still_visible = await page.query_selector(
315
+ '#consent-banner'
316
+ )
317
+ if not banner_still_visible:
318
+ logger.info('Banner successfully dismissed')
319
+ break
320
+ except Exception as button_error:
321
+ logger.warning(
322
+ f'Failed to click button {selector}: {button_error}'
323
+ )
324
+
325
+ # If button clicking didn't work, try JavaScript approach as a fallback
326
+ banner_still_visible = await page.query_selector('#consent-banner')
327
+ if banner_still_visible:
328
+ logger.info('Attempting to remove banner via JavaScript')
329
+ try:
330
+ # Try to remove the banner using JavaScript
331
+ await page.evaluate("""() => {
332
+ const banner = document.getElementById('consent-banner');
333
+ if (banner) banner.remove();
334
+ return true;
335
+ }""")
336
+ logger.info('Removed banner using JavaScript')
337
+ except Exception as js_error:
338
+ logger.warning(
339
+ f'Failed to remove banner via JavaScript: {js_error}'
340
+ )
341
+
342
+ except Exception as banner_error:
343
+ logger.warning(f'Error handling consent banner: {banner_error}')
344
+
345
+ # Progressive wait strategy - try multiple conditions in sequence
346
+ # Define selectors to try in order of preference
347
+ selectors = [
348
+ '.provider-docs-menu-content',
349
+ 'nav',
350
+ '.docs-nav',
351
+ 'aside',
352
+ 'ul.nav',
353
+ 'div[role="navigation"]',
354
+ ]
355
+
356
+ # Try each selector with a short timeout
357
+ for selector in selectors:
358
+ try:
359
+ logger.info(f'Trying to locate element with selector: {selector}')
360
+ await page.wait_for_selector(selector, timeout=5000)
361
+ logger.info(f'Found element with selector: {selector}')
362
+ break
363
+ except Exception as se:
364
+ logger.warning(f"Selector '{selector}' not found: {se}")
365
+
366
+ # Extract the HTML content after JS rendering
367
+ logger.info('Extracting page content')
368
+ content = await page.content()
369
+
370
+ # Save HTML for debugging using tempfile for security
371
+ with tempfile.NamedTemporaryFile(
372
+ prefix='terraform_aws_debug_playwright_',
373
+ suffix='.html',
374
+ mode='w',
375
+ delete=False,
376
+ ) as temp_file:
377
+ temp_file.write(content)
378
+ debug_file_path = temp_file.name
379
+ logger.debug(f'Saved rendered HTML content to {debug_file_path}')
380
+
381
+ # Parse the HTML
382
+ soup: BeautifulSoup = BeautifulSoup(content, 'html.parser')
383
+
384
+ # First try the specific provider-docs-menu-content selector
385
+ menu_content = soup.select_one('.provider-docs-menu-content')
386
+
387
+ if not menu_content:
388
+ logger.warning(
389
+ "Couldn't find the .provider-docs-menu-content element, trying alternatives"
390
+ )
391
+
392
+ # Try each selector that might contain the menu
393
+ for selector in selectors:
394
+ menu_content = soup.select_one(selector)
395
+ if menu_content:
396
+ logger.info(f'Found menu content with selector: {selector}')
397
+ break
398
+
399
+ # If still not found, look for any substantial navigation
400
+ if not menu_content:
401
+ logger.warning("Still couldn't find navigation using standard selectors")
402
+
403
+ # Try to find any element with many links as a potential menu
404
+ potential_menus: List[Tuple[Tag, int]] = []
405
+ for elem in soup.find_all(['div', 'nav', 'ul']):
406
+ if isinstance(elem, Tag): # Type guard to ensure elem is a Tag
407
+ links = elem.find_all('a')
408
+ if (
409
+ len(links) > 10
410
+ ): # Any element with many links might be navigation
411
+ potential_menus.append((elem, len(links)))
412
+
413
+ # Sort by number of links, highest first
414
+ potential_menus.sort(key=lambda x: x[1], reverse=True)
415
+
416
+ if potential_menus:
417
+ menu_content = potential_menus[0][0]
418
+ logger.info(
419
+ f'Using element with {potential_menus[0][1]} links as menu'
420
+ )
421
+
422
+ # If we still have nothing, use fallback
423
+ if not menu_content:
424
+ logger.error("Couldn't find any navigation element, using fallback data")
425
+ await browser.close()
426
+ return cast(
427
+ ProviderResult,
428
+ {'categories': get_fallback_resource_data(), 'version': 'unknown'},
429
+ )
430
+
431
+ # Find all category titles (excluding 'guides' and 'functions')
432
+ category_titles = menu_content.select('.menu-list-category-link-title')
433
+
434
+ if not category_titles:
435
+ logger.error("Couldn't find any .menu-list-category-link-title elements")
436
+ await browser.close()
437
+ return cast(
438
+ ProviderResult,
439
+ {'categories': get_fallback_resource_data(), 'version': 'unknown'},
440
+ )
441
+
442
+ logger.info(f'Found {len(category_titles)} category titles')
443
+
444
+ # First collect all categories that we need to process
445
+ categories_to_process = []
446
+ for category_el in category_titles:
447
+ category_name = category_el.get_text(strip=True)
448
+
449
+ # Skip non-service entries like 'Guides' and 'Functions'
450
+ if category_name.lower() in ['guides', 'functions', 'aws provider']:
451
+ logger.debug(f'Skipping category: {category_name}')
452
+ continue
453
+
454
+ logger.debug(f'Will process category: {category_name}')
455
+ categories_to_process.append((category_name, category_el))
456
+
457
+ # Initialize category entry
458
+ categories[category_name] = {'resources': [], 'data_sources': []}
459
+
460
+ # Process a smaller set of categories if there are too many (for testing/development)
461
+ MAX_CATEGORIES = int(os.environ.get('MAX_CATEGORIES', '999'))
462
+ if len(categories_to_process) > MAX_CATEGORIES:
463
+ logger.info(
464
+ f'Limiting to {MAX_CATEGORIES} categories (from {len(categories_to_process)})'
465
+ )
466
+ categories_to_process = categories_to_process[:MAX_CATEGORIES]
467
+
468
+ logger.info(
469
+ f'Processing {len(categories_to_process)} categories with click interaction'
470
+ )
471
+
472
+ # Now process each category by clicking on it first
473
+ for category_idx, (category_name, category_el) in enumerate(categories_to_process):
474
+ try:
475
+ # Get the DOM path or some identifier for this category
476
+ # Try to find a unique identifier for the category to click on
477
+ # First, try to get the href attribute from the parent <a> tag
478
+ href = None
479
+ parent_a = category_el.parent
480
+ if parent_a and parent_a.name == 'a':
481
+ href = parent_a.get('href')
482
+
483
+ logger.info(
484
+ f'[{category_idx + 1}/{len(categories_to_process)}] Clicking on category: {category_name}'
485
+ )
486
+
487
+ # Handle potential cookie consent banner interference
488
+ try:
489
+ # Check if banner reappeared
490
+ consent_banner = await page.query_selector('#consent-banner')
491
+ if consent_banner:
492
+ logger.info(
493
+ 'Cookie consent banner detected again, removing via JavaScript'
494
+ )
495
+ await page.evaluate("""() => {
496
+ const banner = document.getElementById('consent-banner');
497
+ if (banner) banner.remove();
498
+ return true;
499
+ }""")
500
+ except Exception:
501
+ pass # Ignore errors in this extra banner check
502
+
503
+ # Click with increased timeout and multiple attempts
504
+ click_success = False
505
+ click_attempts = 0
506
+ max_attempts = 3
507
+
508
+ while not click_success and click_attempts < max_attempts:
509
+ click_attempts += 1
510
+ try:
511
+ if href:
512
+ # If we have an href, use that to locate the element
513
+ try:
514
+ selector = f"a[href='{href}']"
515
+ await page.click(
516
+ selector, timeout=8000
517
+ ) # Increased timeout
518
+ logger.debug(
519
+ f'Clicked category using href selector: {selector}'
520
+ )
521
+ click_success = True
522
+ except Exception as click_error:
523
+ logger.warning(
524
+ f'Failed to click using href, trying text: {click_error}'
525
+ )
526
+ # If that fails, try to click by text content
527
+ escaped_name = category_name.replace("'", "\\'")
528
+ await page.click(
529
+ f"text='{escaped_name}'", timeout=8000
530
+ ) # Increased timeout
531
+ click_success = True
532
+ else:
533
+ # Otherwise try to click by text content
534
+ escaped_name = category_name.replace("'", "\\'")
535
+ await page.click(
536
+ f"text='{escaped_name}'", timeout=8000
537
+ ) # Increased timeout
538
+ click_success = True
539
+
540
+ except Exception as click_error:
541
+ logger.warning(
542
+ f'Click attempt {click_attempts} failed for {category_name}: {click_error}'
543
+ )
544
+ if click_attempts >= max_attempts:
545
+ logger.error(
546
+ f'Failed to click category {category_name} after {max_attempts} attempts'
547
+ )
548
+ # Don't break the loop, continue with next category
549
+ raise click_error
550
+
551
+ # Try removing any overlays before next attempt
552
+ try:
553
+ await page.evaluate("""() => {
554
+ // Remove common overlay patterns
555
+ document.querySelectorAll('[id*="banner"],[id*="overlay"],[id*="popup"],[class*="banner"],[class*="overlay"],[class*="popup"]')
556
+ .forEach(el => el.remove());
557
+ return true;
558
+ }""")
559
+ await asyncio.sleep(0.5) # Brief pause between attempts
560
+ except Exception:
561
+ pass # Ignore errors in overlay removal
562
+
563
+ # Wait briefly for content to load
564
+ await asyncio.sleep(0.3)
565
+
566
+ # Extract resources and data sources from the now-expanded category
567
+ # We need to use the HTML structure to locate the specific sections for this category
568
+ try:
569
+ # Get the updated HTML after clicking
570
+ current_html = await page.content()
571
+ current_soup = BeautifulSoup(current_html, 'html.parser')
572
+
573
+ resource_count = 0
574
+ data_source_count = 0
575
+
576
+ # Find the clicked category element in the updated DOM
577
+ # This is important because the structure changes after clicking
578
+ # First, find the category span by its text
579
+ category_spans = current_soup.find_all(
580
+ 'span', class_='menu-list-category-link-title'
581
+ )
582
+ clicked_category_span = None
583
+ for span in category_spans:
584
+ if span.get_text(strip=True) == category_name:
585
+ clicked_category_span = span
586
+ break
587
+
588
+ if not clicked_category_span:
589
+ logger.warning(
590
+ f'Could not find clicked category {category_name} in updated DOM'
591
+ )
592
+ continue
593
+
594
+ # Navigate up to find the parent LI, which contains all content for this category
595
+ parent_li = clicked_category_span.find_parent('li')
596
+ if not parent_li:
597
+ logger.warning(
598
+ f'Could not find parent LI for category {category_name}'
599
+ )
600
+ continue
601
+
602
+ # Find the ul.menu-list that contains both Resources and Data Sources sections
603
+ category_menu_list = safe_find(
604
+ parent_li, 'ul', attrs={'class': 'menu-list'}
605
+ )
606
+ if not category_menu_list:
607
+ logger.warning(
608
+ f'Could not find menu-list for category {category_name}'
609
+ )
610
+ continue
611
+
612
+ # Process Resources section
613
+ # Find the span with text "Resources"
614
+ resource_spans = category_menu_list.find_all(
615
+ 'span', class_='menu-list-category-link-title'
616
+ )
617
+ resource_section = None
618
+ for span in resource_spans:
619
+ if span.get_text(strip=True) == 'Resources':
620
+ # Use parent property safely to find parent li
621
+ parent_elem = span
622
+ resource_section_li = None
623
+ while parent_elem and parent_elem.parent:
624
+ parent_elem = parent_elem.parent
625
+ if (
626
+ isinstance(parent_elem, Tag)
627
+ and parent_elem.name == 'li'
628
+ ):
629
+ resource_section_li = parent_elem
630
+ break
631
+
632
+ if resource_section_li:
633
+ resource_section = safe_find(
634
+ resource_section_li, 'ul', attrs={'class': 'menu-list'}
635
+ )
636
+ break
637
+
638
+ # Extract resources
639
+ if resource_section:
640
+ resource_links = safe_find_all(
641
+ resource_section, 'li', class_='menu-list-link'
642
+ )
643
+ for item in resource_links:
644
+ link = safe_find(item, 'a')
645
+ if not isinstance(link, Tag):
646
+ continue
647
+
648
+ # Safely get href attribute
649
+ href = None
650
+ if hasattr(link, 'attrs') and 'href' in link.attrs:
651
+ href = link.attrs['href']
652
+ if not href:
653
+ continue
654
+
655
+ link_text = safe_get_text(link, strip=True)
656
+ if not link_text:
657
+ continue
658
+
659
+ # Complete the URL if it's a relative path
660
+ full_url = (
661
+ f'https://registry.terraform.io{href}'
662
+ if isinstance(href, str) and href.startswith('/')
663
+ else href
664
+ )
665
+
666
+ # Add to resources
667
+ resource = {
668
+ 'name': link_text,
669
+ 'url': full_url,
670
+ 'type': 'resource',
671
+ }
672
+
673
+ categories[category_name]['resources'].append(resource)
674
+ resource_count += 1
675
+
676
+ # Process Data Sources section
677
+ # Find the span with text "Data Sources"
678
+ data_spans = category_menu_list.find_all(
679
+ 'span', class_='menu-list-category-link-title'
680
+ )
681
+ data_section = None
682
+ for span in data_spans:
683
+ if span.get_text(strip=True) == 'Data Sources':
684
+ # Use parent property safely to find parent li
685
+ parent_elem = span
686
+ data_section_li = None
687
+ while parent_elem and parent_elem.parent:
688
+ parent_elem = parent_elem.parent
689
+ if (
690
+ isinstance(parent_elem, Tag)
691
+ and parent_elem.name == 'li'
692
+ ):
693
+ data_section_li = parent_elem
694
+ break
695
+
696
+ if data_section_li:
697
+ data_section = safe_find(
698
+ data_section_li, 'ul', attrs={'class': 'menu-list'}
699
+ )
700
+ break
701
+
702
+ # Extract data sources
703
+ if data_section:
704
+ data_links = safe_find_all(
705
+ data_section, 'li', class_='menu-list-link'
706
+ )
707
+ for item in data_links:
708
+ link = safe_find(item, 'a')
709
+ if not isinstance(link, Tag):
710
+ continue
711
+
712
+ # Safely get href attribute
713
+ href = None
714
+ if hasattr(link, 'attrs') and 'href' in link.attrs:
715
+ href = link.attrs['href']
716
+ if not href:
717
+ continue
718
+
719
+ link_text = safe_get_text(link, strip=True)
720
+ if not link_text:
721
+ continue
722
+
723
+ # Complete the URL if it's a relative path
724
+ full_url = (
725
+ f'https://registry.terraform.io{href}'
726
+ if isinstance(href, str) and href.startswith('/')
727
+ else href
728
+ )
729
+
730
+ # Add to data sources
731
+ data_source = {
732
+ 'name': link_text,
733
+ 'url': full_url,
734
+ 'type': 'data_source',
735
+ }
736
+
737
+ categories[category_name]['data_sources'].append(data_source)
738
+ data_source_count += 1
739
+
740
+ logger.info(
741
+ f'Category {category_name}: found {resource_count} resources, {data_source_count} data sources'
742
+ )
743
+
744
+ except Exception as extract_error:
745
+ logger.error(
746
+ f'Error extracting resources for {category_name}: {extract_error}'
747
+ )
748
+
749
+ except Exception as click_error:
750
+ logger.warning(
751
+ f'Error interacting with category {category_name}: {click_error}'
752
+ )
753
+
754
+ # Close the browser
755
+ await browser.close()
756
+
757
+ # Count statistics for logging
758
+ service_count = len(categories)
759
+ resource_count = sum(len(cat['resources']) for cat in categories.values())
760
+ data_source_count = sum(len(cat['data_sources']) for cat in categories.values())
761
+
762
+ duration = time.time() - start_time
763
+ logger.info(
764
+ f'Extracted {service_count} service categories with {resource_count} resources and {data_source_count} data sources in {duration:.2f} seconds'
765
+ )
766
+
767
+ # Return the structure if we have data
768
+ if service_count > 0:
769
+ return {'categories': categories, 'version': provider_version}
770
+ else:
771
+ logger.warning('No categories found, using fallback data')
772
+ return {'categories': get_fallback_resource_data(), 'version': 'unknown'}
773
+
774
+ except Exception as e:
775
+ logger.error(f'Error extracting AWS provider resources: {str(e)}')
776
+ # Return fallback data in case of error
777
+ return cast(
778
+ ProviderResult, {'categories': get_fallback_resource_data(), 'version': 'unknown'}
779
+ )
780
+
781
+
782
+ def get_fallback_resource_data() -> Dict[str, CategoryData]:
783
+ """Provide fallback resource data in case the scraping fails.
784
+
785
+ Returns:
786
+ A dictionary with pre-defined AWS resources and data sources
787
+ """
788
+ logger.warning('Using pre-defined resource structure as fallback')
789
+
790
+ # Pre-defined structure of AWS services and their resources/data sources
791
+ categories: Dict[str, CategoryData] = {
792
+ 'ACM (Certificate Manager)': {
793
+ 'resources': [
794
+ {
795
+ 'name': 'aws_acm_certificate',
796
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/acm_certificate',
797
+ 'type': 'resource',
798
+ },
799
+ {
800
+ 'name': 'aws_acm_certificate_validation',
801
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/acm_certificate_validation',
802
+ 'type': 'resource',
803
+ },
804
+ ],
805
+ 'data_sources': [
806
+ {
807
+ 'name': 'aws_acm_certificate',
808
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/acm_certificate',
809
+ 'type': 'data_source',
810
+ }
811
+ ],
812
+ },
813
+ 'API Gateway': {
814
+ 'resources': [
815
+ {
816
+ 'name': 'aws_api_gateway_account',
817
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_account',
818
+ 'type': 'resource',
819
+ },
820
+ {
821
+ 'name': 'aws_api_gateway_api_key',
822
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_api_key',
823
+ 'type': 'resource',
824
+ },
825
+ {
826
+ 'name': 'aws_api_gateway_authorizer',
827
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_authorizer',
828
+ 'type': 'resource',
829
+ },
830
+ ],
831
+ 'data_sources': [
832
+ {
833
+ 'name': 'aws_api_gateway_api_key',
834
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/api_gateway_api_key',
835
+ 'type': 'data_source',
836
+ }
837
+ ],
838
+ },
839
+ 'AMP (Managed Prometheus)': {
840
+ 'resources': [
841
+ {
842
+ 'name': 'aws_prometheus_workspace',
843
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_workspace',
844
+ 'type': 'resource',
845
+ },
846
+ {
847
+ 'name': 'aws_prometheus_alert_manager_definition',
848
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_alert_manager_definition',
849
+ 'type': 'resource',
850
+ },
851
+ ],
852
+ 'data_sources': [
853
+ {
854
+ 'name': 'aws_prometheus_workspace',
855
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/prometheus_workspace',
856
+ 'type': 'data_source',
857
+ }
858
+ ],
859
+ },
860
+ 'CloudWatch': {
861
+ 'resources': [
862
+ {
863
+ 'name': 'aws_cloudwatch_metric_alarm',
864
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm',
865
+ 'type': 'resource',
866
+ },
867
+ {
868
+ 'name': 'aws_cloudwatch_log_group',
869
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group',
870
+ 'type': 'resource',
871
+ },
872
+ ],
873
+ 'data_sources': [
874
+ {
875
+ 'name': 'aws_cloudwatch_log_group',
876
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/cloudwatch_log_group',
877
+ 'type': 'data_source',
878
+ }
879
+ ],
880
+ },
881
+ 'EC2': {
882
+ 'resources': [
883
+ {
884
+ 'name': 'aws_instance',
885
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/instance',
886
+ 'type': 'resource',
887
+ },
888
+ {
889
+ 'name': 'aws_security_group',
890
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group',
891
+ 'type': 'resource',
892
+ },
893
+ {
894
+ 'name': 'aws_vpc',
895
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/vpc',
896
+ 'type': 'resource',
897
+ },
898
+ {
899
+ 'name': 'aws_subnet',
900
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/subnet',
901
+ 'type': 'resource',
902
+ },
903
+ ],
904
+ 'data_sources': [
905
+ {
906
+ 'name': 'aws_instance',
907
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/instance',
908
+ 'type': 'data_source',
909
+ },
910
+ {
911
+ 'name': 'aws_vpc',
912
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/vpc',
913
+ 'type': 'data_source',
914
+ },
915
+ ],
916
+ },
917
+ 'IAM': {
918
+ 'resources': [
919
+ {
920
+ 'name': 'aws_iam_role',
921
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role',
922
+ 'type': 'resource',
923
+ },
924
+ {
925
+ 'name': 'aws_iam_policy',
926
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy',
927
+ 'type': 'resource',
928
+ },
929
+ {
930
+ 'name': 'aws_iam_user',
931
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_user',
932
+ 'type': 'resource',
933
+ },
934
+ ],
935
+ 'data_sources': [
936
+ {
937
+ 'name': 'aws_iam_role',
938
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_role',
939
+ 'type': 'data_source',
940
+ },
941
+ {
942
+ 'name': 'aws_iam_policy',
943
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy',
944
+ 'type': 'data_source',
945
+ },
946
+ ],
947
+ },
948
+ 'Lambda': {
949
+ 'resources': [
950
+ {
951
+ 'name': 'aws_lambda_function',
952
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function',
953
+ 'type': 'resource',
954
+ },
955
+ {
956
+ 'name': 'aws_lambda_permission',
957
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission',
958
+ 'type': 'resource',
959
+ },
960
+ ],
961
+ 'data_sources': [
962
+ {
963
+ 'name': 'aws_lambda_function',
964
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/lambda_function',
965
+ 'type': 'data_source',
966
+ }
967
+ ],
968
+ },
969
+ 'S3': {
970
+ 'resources': [
971
+ {
972
+ 'name': 'aws_s3_bucket',
973
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket',
974
+ 'type': 'resource',
975
+ },
976
+ {
977
+ 'name': 'aws_s3_bucket_policy',
978
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_policy',
979
+ 'type': 'resource',
980
+ },
981
+ ],
982
+ 'data_sources': [
983
+ {
984
+ 'name': 'aws_s3_bucket',
985
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket',
986
+ 'type': 'data_source',
987
+ },
988
+ {
989
+ 'name': 'aws_s3_object',
990
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_object',
991
+ 'type': 'data_source',
992
+ },
993
+ ],
994
+ },
995
+ 'DynamoDB': {
996
+ 'resources': [
997
+ {
998
+ 'name': 'aws_dynamodb_table',
999
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/dynamodb_table',
1000
+ 'type': 'resource',
1001
+ },
1002
+ {
1003
+ 'name': 'aws_dynamodb_table_item',
1004
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/dynamodb_table_item',
1005
+ 'type': 'resource',
1006
+ },
1007
+ ],
1008
+ 'data_sources': [
1009
+ {
1010
+ 'name': 'aws_dynamodb_table',
1011
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/dynamodb_table',
1012
+ 'type': 'data_source',
1013
+ }
1014
+ ],
1015
+ },
1016
+ 'Route53': {
1017
+ 'resources': [
1018
+ {
1019
+ 'name': 'aws_route53_zone',
1020
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route53_zone',
1021
+ 'type': 'resource',
1022
+ },
1023
+ {
1024
+ 'name': 'aws_route53_record',
1025
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route53_record',
1026
+ 'type': 'resource',
1027
+ },
1028
+ ],
1029
+ 'data_sources': [
1030
+ {
1031
+ 'name': 'aws_route53_zone',
1032
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/route53_zone',
1033
+ 'type': 'data_source',
1034
+ }
1035
+ ],
1036
+ },
1037
+ 'SNS': {
1038
+ 'resources': [
1039
+ {
1040
+ 'name': 'aws_sns_topic',
1041
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic',
1042
+ 'type': 'resource',
1043
+ },
1044
+ {
1045
+ 'name': 'aws_sns_topic_subscription',
1046
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic_subscription',
1047
+ 'type': 'resource',
1048
+ },
1049
+ ],
1050
+ 'data_sources': [
1051
+ {
1052
+ 'name': 'aws_sns_topic',
1053
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/sns_topic',
1054
+ 'type': 'data_source',
1055
+ }
1056
+ ],
1057
+ },
1058
+ 'SQS': {
1059
+ 'resources': [
1060
+ {
1061
+ 'name': 'aws_sqs_queue',
1062
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue',
1063
+ 'type': 'resource',
1064
+ },
1065
+ {
1066
+ 'name': 'aws_sqs_queue_policy',
1067
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue_policy',
1068
+ 'type': 'resource',
1069
+ },
1070
+ ],
1071
+ 'data_sources': [
1072
+ {
1073
+ 'name': 'aws_sqs_queue',
1074
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/sqs_queue',
1075
+ 'type': 'data_source',
1076
+ }
1077
+ ],
1078
+ },
1079
+ }
1080
+
1081
+ return categories
1082
+
1083
+
1084
+ def parse_arguments():
1085
+ """Parse command line arguments."""
1086
+ parser = argparse.ArgumentParser(
1087
+ description='Generate AWS provider resources markdown for the Terraform Expert MCP server.'
1088
+ )
1089
+ parser.add_argument(
1090
+ '--max-categories',
1091
+ type=int,
1092
+ default=999,
1093
+ help='Limit to N categories (default: all)',
1094
+ )
1095
+ parser.add_argument(
1096
+ '--output',
1097
+ type=Path,
1098
+ default=DEFAULT_OUTPUT_PATH,
1099
+ help=f'Output file path (default: {DEFAULT_OUTPUT_PATH})',
1100
+ )
1101
+ parser.add_argument(
1102
+ '--no-fallback',
1103
+ action='store_true',
1104
+ help="Don't use fallback data if scraping fails",
1105
+ )
1106
+ return parser.parse_args()
1107
+
1108
+
1109
+ async def main():
1110
+ """Main entry point for the script."""
1111
+ start_time = datetime.now()
1112
+
1113
+ # Parse command line arguments
1114
+ args = parse_arguments()
1115
+
1116
+ print('Generating AWS provider resources markdown...')
1117
+ print(f'Output path: {args.output}')
1118
+ print(f'Max categories: {args.max_categories if args.max_categories < 999 else "all"}')
1119
+
1120
+ # Set environment variable for max categories
1121
+ os.environ['MAX_CATEGORIES'] = str(args.max_categories)
1122
+
1123
+ # Set environment variable for fallback behavior
1124
+ if args.no_fallback:
1125
+ os.environ['USE_PLAYWRIGHT'] = '1'
1126
+ print('Using live scraping without fallback')
1127
+
1128
+ try:
1129
+ # Fetch AWS provider data using the existing implementation
1130
+ result = await fetch_aws_provider_page()
1131
+
1132
+ # Extract categories and version
1133
+ if isinstance(result, dict) and 'categories' in result and 'version' in result:
1134
+ categories = result['categories']
1135
+ provider_version = result.get('version', 'unknown')
1136
+ else:
1137
+ # Handle backward compatibility with older API
1138
+ categories = result
1139
+ provider_version = 'unknown'
1140
+
1141
+ # Sort categories alphabetically
1142
+ sorted_categories = sorted(categories.keys())
1143
+
1144
+ # Count totals
1145
+ total_resources = sum(len(cat['resources']) for cat in categories.values())
1146
+ total_data_sources = sum(len(cat['data_sources']) for cat in categories.values())
1147
+
1148
+ print(
1149
+ f'Found {len(categories)} categories, {total_resources} resources, and {total_data_sources} data sources'
1150
+ )
1151
+
1152
+ # Generate markdown
1153
+ markdown = []
1154
+ markdown.append('# AWS Provider Resources Listing')
1155
+ markdown.append(f'\nAWS Provider Version: {provider_version}')
1156
+ markdown.append(f'\nLast updated: {datetime.now().strftime("%B %d, %Y %H:%M:%S")}')
1157
+ markdown.append(
1158
+ f'\nFound {total_resources} resources and {total_data_sources} data sources across {len(categories)} AWS service categories.\n'
1159
+ )
1160
+
1161
+ # Generate table of contents
1162
+ # markdown.append('## Table of Contents')
1163
+ # for category in sorted_categories:
1164
+ # sanitized_category = (
1165
+ # category.replace(' ', '-').replace('(', '').replace(')', '').lower()
1166
+ # )
1167
+ # markdown.append(f'- [{category}](#{sanitized_category})')
1168
+ # markdown.append('')
1169
+
1170
+ # Generate content for each category
1171
+ for category in sorted_categories:
1172
+ cat_data = categories[category]
1173
+ sanitized_heading = category.replace('(', '').replace(')', '')
1174
+
1175
+ markdown.append(f'## {sanitized_heading}')
1176
+
1177
+ resource_count = len(cat_data['resources'])
1178
+ data_source_count = len(cat_data['data_sources'])
1179
+
1180
+ # Add category summary
1181
+ markdown.append(
1182
+ f'\n*{resource_count} resources and {data_source_count} data sources*\n'
1183
+ )
1184
+
1185
+ # Add resources section if available
1186
+ if cat_data['resources']:
1187
+ markdown.append('### Resources')
1188
+ for resource in sorted(cat_data['resources'], key=lambda x: x['name']):
1189
+ markdown.append(f'- [{resource["name"]}]({resource["url"]})')
1190
+
1191
+ # Add data sources section if available
1192
+ if cat_data['data_sources']:
1193
+ markdown.append('\n### Data Sources')
1194
+ for data_source in sorted(cat_data['data_sources'], key=lambda x: x['name']):
1195
+ markdown.append(f'- [{data_source["name"]}]({data_source["url"]})')
1196
+
1197
+ markdown.append('') # Add blank line between categories
1198
+
1199
+ # Add generation metadata at the end
1200
+ duration = datetime.now() - start_time
1201
+ markdown.append('---')
1202
+ markdown.append(
1203
+ '*This document was generated automatically by the AWS Provider Resources Generator script.*'
1204
+ )
1205
+ markdown.append(f'*Generation time: {duration.total_seconds():.2f} seconds*')
1206
+
1207
+ # Ensure directory exists
1208
+ args.output.parent.mkdir(parents=True, exist_ok=True)
1209
+
1210
+ # Write markdown to output file
1211
+ with open(args.output, 'w') as f:
1212
+ f.write('\n'.join(markdown))
1213
+
1214
+ print(f'Successfully generated markdown file at: {args.output}')
1215
+ print(f'Generation completed in {duration.total_seconds():.2f} seconds')
1216
+ return 0
1217
+
1218
+ except Exception as e:
1219
+ print(f'Error generating AWS provider resources: {str(e)}', file=sys.stderr)
1220
+ return 1
1221
+
1222
+
1223
+ if __name__ == '__main__':
1224
+ sys.exit(asyncio.run(main()))