awslabs.terraform-mcp-server 1.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. awslabs/__init__.py +17 -0
  2. awslabs/terraform_mcp_server/__init__.py +17 -0
  3. awslabs/terraform_mcp_server/impl/resources/__init__.py +25 -0
  4. awslabs/terraform_mcp_server/impl/resources/terraform_aws_provider_resources_listing.py +66 -0
  5. awslabs/terraform_mcp_server/impl/resources/terraform_awscc_provider_resources_listing.py +69 -0
  6. awslabs/terraform_mcp_server/impl/tools/__init__.py +33 -0
  7. awslabs/terraform_mcp_server/impl/tools/execute_terraform_command.py +223 -0
  8. awslabs/terraform_mcp_server/impl/tools/execute_terragrunt_command.py +320 -0
  9. awslabs/terraform_mcp_server/impl/tools/run_checkov_scan.py +376 -0
  10. awslabs/terraform_mcp_server/impl/tools/search_aws_provider_docs.py +691 -0
  11. awslabs/terraform_mcp_server/impl/tools/search_awscc_provider_docs.py +641 -0
  12. awslabs/terraform_mcp_server/impl/tools/search_specific_aws_ia_modules.py +458 -0
  13. awslabs/terraform_mcp_server/impl/tools/search_user_provided_module.py +349 -0
  14. awslabs/terraform_mcp_server/impl/tools/utils.py +572 -0
  15. awslabs/terraform_mcp_server/models/__init__.py +49 -0
  16. awslabs/terraform_mcp_server/models/models.py +381 -0
  17. awslabs/terraform_mcp_server/scripts/generate_aws_provider_resources.py +1240 -0
  18. awslabs/terraform_mcp_server/scripts/generate_awscc_provider_resources.py +1039 -0
  19. awslabs/terraform_mcp_server/scripts/scrape_aws_terraform_best_practices.py +143 -0
  20. awslabs/terraform_mcp_server/server.py +440 -0
  21. awslabs/terraform_mcp_server/static/AWSCC_PROVIDER_RESOURCES.md +3125 -0
  22. awslabs/terraform_mcp_server/static/AWS_PROVIDER_RESOURCES.md +3833 -0
  23. awslabs/terraform_mcp_server/static/AWS_TERRAFORM_BEST_PRACTICES.md +2523 -0
  24. awslabs/terraform_mcp_server/static/MCP_INSTRUCTIONS.md +142 -0
  25. awslabs/terraform_mcp_server/static/TERRAFORM_WORKFLOW_GUIDE.md +330 -0
  26. awslabs/terraform_mcp_server/static/__init__.py +38 -0
  27. awslabs_terraform_mcp_server-1.0.14.dist-info/METADATA +166 -0
  28. awslabs_terraform_mcp_server-1.0.14.dist-info/RECORD +30 -0
  29. awslabs_terraform_mcp_server-1.0.14.dist-info/WHEEL +4 -0
  30. awslabs_terraform_mcp_server-1.0.14.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,1240 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Script to generate AWS provider resources markdown for the Terraform Expert MCP server.
16
+
17
+ This script scrapes the Terraform AWS provider documentation using Playwright
18
+ and generates a comprehensive markdown file listing all AWS service categories,
19
+ resources, and data sources.
20
+
21
+ The generated markdown is saved to the static directory for use by the MCP server.
22
+
23
+ Usage:
24
+ python generate_aws_provider_resources.py [--max-categories N] [--output PATH]
25
+
26
+ Options:
27
+ --max-categories N Limit to N categories (default: all)
28
+ --output PATH Output file path (default: terraform_mcp_server/static/AWS_PROVIDER_RESOURCES.md)
29
+ --no-fallback Don't use fallback data if scraping fails
30
+ """
31
+
32
+ import argparse
33
+ import asyncio
34
+ import os
35
+ import re
36
+ import sys
37
+ import tempfile
38
+ import time
39
+ from bs4 import BeautifulSoup, Tag
40
+ from bs4.element import PageElement, ResultSet
41
+ from bs4.filter import SoupStrainer
42
+ from datetime import datetime
43
+ from loguru import logger
44
+ from pathlib import Path
45
+ from typing import Any, Dict, List, Optional, Tuple, TypedDict, TypeVar, cast
46
+
47
+
48
+ ## Playwright optional import
49
+ try:
50
+ from playwright.async_api import async_playwright
51
+ except ImportError:
52
+ # Playwright is optional, we'll use fallback data if it's not available
53
+ async_playwright = None
54
+
55
+ # Add the parent directory to sys.path so we can import from terraform_mcp_server
56
+ script_dir = Path(__file__).resolve().parent
57
+ repo_root = script_dir.parent.parent.parent
58
+ sys.path.insert(0, str(repo_root))
59
+
60
+
61
+ # Configure logger for enhanced diagnostics with stacktraces
62
+ logger.configure(
63
+ handlers=[
64
+ {
65
+ 'sink': sys.stderr,
66
+ 'backtrace': True,
67
+ 'diagnose': True,
68
+ 'format': '<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>',
69
+ }
70
+ ]
71
+ )
72
+
73
+ # Environment variable to control whether to use Playwright or go straight to fallback data
74
+ USE_PLAYWRIGHT = os.environ.get('USE_PLAYWRIGHT', '1').lower() in ('1', 'true', 'yes')
75
+ # Shorter timeout to fail faster if it's not going to work
76
+ NAVIGATION_TIMEOUT = 20000 # 20 seconds
77
+ # Default output path
78
+ DEFAULT_OUTPUT_PATH = (
79
+ repo_root / 'awslabs' / 'terraform_mcp_server' / 'static' / 'AWS_PROVIDER_RESOURCES.md'
80
+ )
81
+ # AWS provider URL
82
+ AWS_PROVIDER_URL = 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs'
83
+
84
+
85
+ # Define TypedDict classes for the structures used in the script
86
+ class ResourceItem(TypedDict):
87
+ """Type definition for a Terraform resource or data source item.
88
+
89
+ Attributes:
90
+ name: The name/identifier of the resource (e.g. 'aws_acm_certificate')
91
+ url: The documentation URL for the resource
92
+ type: The type of item - either 'resource' or 'data_source'
93
+ """
94
+
95
+ name: str
96
+ url: str
97
+ type: str
98
+
99
+
100
+ class CategoryData(TypedDict):
101
+ """Type definition for a category of Terraform resources and data sources.
102
+
103
+ Attributes:
104
+ resources: List of ResourceItem objects representing Terraform resources in this category
105
+ data_sources: List of ResourceItem objects representing Terraform data sources in this category
106
+ """
107
+
108
+ resources: List[ResourceItem]
109
+ data_sources: List[ResourceItem]
110
+
111
+
112
+ class ProviderResult(TypedDict):
113
+ """Type definition for the result of fetching AWS provider data.
114
+
115
+ Attributes:
116
+ categories: Dictionary mapping AWS service category names to their resources and data sources
117
+ version: AWS provider version string (e.g. "5.91.0")
118
+ """
119
+
120
+ categories: Dict[str, CategoryData]
121
+ version: str
122
+
123
+
124
+ # Type helpers for BeautifulSoup
125
+ T = TypeVar('T')
126
+
127
+
128
+ def ensure_tag(element: Optional[PageElement]) -> Optional[Tag]:
129
+ """Ensure an element is a Tag or return None."""
130
+ if isinstance(element, Tag):
131
+ return element
132
+ return None
133
+
134
+
135
+ def safe_find(element: Any, *args: Any, **kwargs: Any) -> Optional[Tag]:
136
+ """Safely find an element in a Tag."""
137
+ if not isinstance(element, Tag):
138
+ return None
139
+ result = element.find(*args, **kwargs)
140
+ return ensure_tag(result)
141
+
142
+
143
+ def safe_find_all(element: Any, *args: Any, **kwargs: Any) -> ResultSet:
144
+ """Safely find all elements in a Tag."""
145
+ if not isinstance(element, Tag):
146
+ return ResultSet(SoupStrainer(), [])
147
+ return element.find_all(*args, **kwargs)
148
+
149
+
150
+ def safe_get_text(element: Any, strip: bool = False) -> str:
151
+ """Safely get text from an element."""
152
+ if hasattr(element, 'get_text'):
153
+ return element.get_text(strip=strip)
154
+ return str(element) if element is not None else ''
155
+
156
+
157
+ async def fetch_aws_provider_page() -> ProviderResult:
158
+ """Fetch the AWS provider documentation page using Playwright.
159
+
160
+ This function uses a headless browser to render the JavaScript-driven
161
+ Terraform Registry website and extract the AWS provider resources.
162
+
163
+ It will fall back to pre-defined data if:
164
+ - The USE_PLAYWRIGHT environment variable is set to 0/false/no
165
+ - There's any error during the scraping process
166
+
167
+ Returns:
168
+ A dictionary containing:
169
+ - 'categories': Dictionary of AWS service categories with resources and data sources
170
+ - 'version': AWS provider version string (e.g., "5.91.0")
171
+ """
172
+ # Check if we should skip Playwright and use fallback data directly
173
+ if not USE_PLAYWRIGHT or async_playwright is None:
174
+ logger.info(
175
+ 'Skipping Playwright and using pre-defined resource structure (USE_PLAYWRIGHT=0)'
176
+ )
177
+ return cast(
178
+ ProviderResult, {'categories': get_fallback_resource_data(), 'version': 'unknown'}
179
+ )
180
+ else:
181
+ logger.info('Playwright is available and will be used to scrape the AWS provider docs')
182
+ logger.info('Starting browser to extract AWS provider resources structure')
183
+ start_time = time.time()
184
+ categories = {}
185
+
186
+ try:
187
+ async with async_playwright() as p:
188
+ # Launch the browser with specific options for better performance
189
+ browser = await p.chromium.launch(
190
+ headless=True,
191
+ args=['--disable-dev-shm-usage', '--no-sandbox', '--disable-setuid-sandbox'],
192
+ )
193
+ context = await browser.new_context(
194
+ viewport={'width': 1280, 'height': 800},
195
+ user_agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
196
+ )
197
+ page = await context.new_page()
198
+
199
+ # Set a shorter timeout for navigation
200
+ page.set_default_timeout(NAVIGATION_TIMEOUT)
201
+
202
+ # Navigate to the AWS provider docs with reduced timeout
203
+ logger.info(
204
+ f'Navigating to Terraform AWS provider documentation (timeout: {NAVIGATION_TIMEOUT}ms)'
205
+ )
206
+ try:
207
+ await page.goto(
208
+ AWS_PROVIDER_URL,
209
+ wait_until='domcontentloaded',
210
+ ) # Using 'domcontentloaded' instead of 'networkidle'
211
+ logger.info('Basic page loaded successfully')
212
+ except Exception as nav_error:
213
+ logger.error(f'Error during navigation: {nav_error}')
214
+ await browser.close()
215
+ return cast(
216
+ ProviderResult,
217
+ {'categories': get_fallback_resource_data(), 'version': 'unknown'},
218
+ )
219
+
220
+ # Wait for the content to be fully loaded
221
+ logger.info('Waiting for page to render completely')
222
+
223
+ # Add a small fixed delay to let JavaScript finish rendering
224
+ await asyncio.sleep(2)
225
+
226
+ # Extract AWS provider version
227
+ provider_version = 'unknown'
228
+ try:
229
+ # Try to extract version using the selector provided
230
+ logger.info('Attempting to extract AWS provider version')
231
+
232
+ # Try using the selector approach
233
+ version_element = await page.query_selector(
234
+ 'body > div.provider-view > div.provider-nav > nav.bread-crumbs.is-light > div > div > ul > li:nth-child(4) > span'
235
+ )
236
+ if version_element:
237
+ # Try to extract text from the element
238
+ version_text = await version_element.inner_text()
239
+ logger.debug(f'Found version element with text: {version_text}')
240
+
241
+ # Extract just the version number using regex
242
+ version_match = re.search(r'Version\s+([0-9.]+)', version_text)
243
+ if version_match:
244
+ provider_version = version_match.group(1) # e.g., "5.91.0"
245
+ logger.info(f'Extracted AWS provider version: {provider_version}')
246
+ else:
247
+ # If regex doesn't match, try JavaScript approach
248
+ logger.debug("Regex pattern didn't match, trying JavaScript approach")
249
+ provider_version = await page.evaluate("""
250
+ () => {
251
+ const versionEl = document.querySelector('.version-dropdown button span');
252
+ return versionEl ? versionEl.innerText.trim() : null;
253
+ }
254
+ """)
255
+ # Clean up the version string if needed
256
+ if provider_version:
257
+ provider_version = provider_version.strip()
258
+ version_match = re.search(r'([0-9.]+)', provider_version)
259
+ if version_match:
260
+ provider_version = version_match.group(1)
261
+ logger.info(
262
+ f'Extracted AWS provider version via JavaScript: {provider_version}'
263
+ )
264
+ else:
265
+ # If the specific selector doesn't work, try a more general approach
266
+ logger.debug(
267
+ 'Specific version selector not found, trying alternative selectors'
268
+ )
269
+ provider_version = await page.evaluate("""
270
+ () => {
271
+ // Try different selectors that might contain the version
272
+ const selectors = [
273
+ '.version-dropdown button span',
274
+ '.dropdown-trigger button span',
275
+ 'span:contains("Version")'
276
+ ];
277
+ for (const selector of selectors) {
278
+ try {
279
+ const el = document.querySelector(selector);
280
+ if (el && el.innerText.includes('Version')) {
281
+ return el.innerText.trim();
282
+ }
283
+ } catch (e) {}
284
+ }
285
+ return null;
286
+ }
287
+ """)
288
+
289
+ # Extract version number from text if found
290
+ if provider_version:
291
+ version_match = re.search(r'([0-9.]+)', provider_version)
292
+ if version_match:
293
+ provider_version = version_match.group(1)
294
+ logger.info(
295
+ f'Extracted AWS provider version via alternative selector: {provider_version}'
296
+ )
297
+ except Exception as version_error:
298
+ logger.warning(f'Error extracting AWS provider version: {version_error}')
299
+
300
+ # Check for and handle cookie consent banner
301
+ logger.info('Checking for cookie consent banner')
302
+ try:
303
+ # Check if the consent banner is present
304
+ consent_banner = await page.query_selector('#consent-banner')
305
+ if consent_banner:
306
+ logger.info('Cookie consent banner detected, attempting to dismiss')
307
+
308
+ # Target the specific dismiss button based on the HTML structure provided
309
+ dismiss_button_selectors = [
310
+ 'button.hds-button:has-text("Dismiss")',
311
+ 'button.hds-button .hds-button__text:has-text("Dismiss")',
312
+ 'button.hds-button--color-primary',
313
+ ]
314
+
315
+ for selector in dismiss_button_selectors:
316
+ try:
317
+ # Check if the button exists with this selector
318
+ button = await page.query_selector(selector)
319
+ if button:
320
+ logger.info(f'Found dismiss button with selector: {selector}')
321
+ await button.click()
322
+ logger.info('Clicked the dismiss button')
323
+
324
+ # Wait a moment for the banner to disappear
325
+ await asyncio.sleep(1)
326
+
327
+ # Check if the banner is gone
328
+ banner_still_visible = await page.query_selector(
329
+ '#consent-banner'
330
+ )
331
+ if not banner_still_visible:
332
+ logger.info('Banner successfully dismissed')
333
+ break
334
+ except Exception as button_error:
335
+ logger.warning(
336
+ f'Failed to click button {selector}: {button_error}'
337
+ )
338
+
339
+ # If button clicking didn't work, try JavaScript approach as a fallback
340
+ banner_still_visible = await page.query_selector('#consent-banner')
341
+ if banner_still_visible:
342
+ logger.info('Attempting to remove banner via JavaScript')
343
+ try:
344
+ # Try to remove the banner using JavaScript
345
+ await page.evaluate("""() => {
346
+ const banner = document.getElementById('consent-banner');
347
+ if (banner) banner.remove();
348
+ return true;
349
+ }""")
350
+ logger.info('Removed banner using JavaScript')
351
+ except Exception as js_error:
352
+ logger.warning(
353
+ f'Failed to remove banner via JavaScript: {js_error}'
354
+ )
355
+
356
+ except Exception as banner_error:
357
+ logger.warning(f'Error handling consent banner: {banner_error}')
358
+
359
+ # Progressive wait strategy - try multiple conditions in sequence
360
+ # Define selectors to try in order of preference
361
+ selectors = [
362
+ '.provider-docs-menu-content',
363
+ 'nav',
364
+ '.docs-nav',
365
+ 'aside',
366
+ 'ul.nav',
367
+ 'div[role="navigation"]',
368
+ ]
369
+
370
+ # Try each selector with a short timeout
371
+ for selector in selectors:
372
+ try:
373
+ logger.info(f'Trying to locate element with selector: {selector}')
374
+ await page.wait_for_selector(selector, timeout=5000)
375
+ logger.info(f'Found element with selector: {selector}')
376
+ break
377
+ except Exception as se:
378
+ logger.warning(f"Selector '{selector}' not found: {se}")
379
+
380
+ # Extract the HTML content after JS rendering
381
+ logger.info('Extracting page content')
382
+ content = await page.content()
383
+
384
+ # Save HTML for debugging using tempfile for security
385
+ with tempfile.NamedTemporaryFile(
386
+ prefix='terraform_aws_debug_playwright_',
387
+ suffix='.html',
388
+ mode='w',
389
+ encoding='utf-8',
390
+ delete=False,
391
+ ) as temp_file:
392
+ temp_file.write(content)
393
+ temp_file.flush()
394
+ debug_file_path = temp_file.name
395
+ logger.debug(f'Saved rendered HTML content to {debug_file_path}')
396
+
397
+ # Parse the HTML
398
+ soup: BeautifulSoup = BeautifulSoup(content, 'html.parser')
399
+
400
+ # First try the specific provider-docs-menu-content selector
401
+ menu_content = soup.select_one('.provider-docs-menu-content')
402
+
403
+ if not menu_content:
404
+ logger.warning(
405
+ "Couldn't find the .provider-docs-menu-content element, trying alternatives"
406
+ )
407
+
408
+ # Try each selector that might contain the menu
409
+ for selector in selectors:
410
+ menu_content = soup.select_one(selector)
411
+ if menu_content:
412
+ logger.info(f'Found menu content with selector: {selector}')
413
+ break
414
+
415
+ # If still not found, look for any substantial navigation
416
+ if not menu_content:
417
+ logger.warning("Still couldn't find navigation using standard selectors")
418
+
419
+ # Try to find any element with many links as a potential menu
420
+ potential_menus: List[Tuple[Tag, int]] = []
421
+ for elem in soup.find_all(['div', 'nav', 'ul']):
422
+ if isinstance(elem, Tag): # Type guard to ensure elem is a Tag
423
+ links = elem.find_all('a')
424
+ if (
425
+ len(links) > 10
426
+ ): # Any element with many links might be navigation
427
+ potential_menus.append((elem, len(links)))
428
+
429
+ # Sort by number of links, highest first
430
+ potential_menus.sort(key=lambda x: x[1], reverse=True)
431
+
432
+ if potential_menus:
433
+ menu_content = potential_menus[0][0]
434
+ logger.info(
435
+ f'Using element with {potential_menus[0][1]} links as menu'
436
+ )
437
+
438
+ # If we still have nothing, use fallback
439
+ if not menu_content:
440
+ logger.error("Couldn't find any navigation element, using fallback data")
441
+ await browser.close()
442
+ return cast(
443
+ ProviderResult,
444
+ {'categories': get_fallback_resource_data(), 'version': 'unknown'},
445
+ )
446
+
447
+ # Find all category titles (excluding 'guides' and 'functions')
448
+ category_titles = menu_content.select('.menu-list-category-link-title')
449
+
450
+ if not category_titles:
451
+ logger.error("Couldn't find any .menu-list-category-link-title elements")
452
+ await browser.close()
453
+ return cast(
454
+ ProviderResult,
455
+ {'categories': get_fallback_resource_data(), 'version': 'unknown'},
456
+ )
457
+
458
+ logger.info(f'Found {len(category_titles)} category titles')
459
+
460
+ # First collect all categories that we need to process
461
+ categories_to_process = []
462
+ for category_el in category_titles:
463
+ category_name = category_el.get_text(strip=True)
464
+
465
+ # Skip non-service entries like 'Guides' and 'Functions'
466
+ if category_name.lower() in ['guides', 'functions', 'aws provider']:
467
+ logger.debug(f'Skipping category: {category_name}')
468
+ continue
469
+
470
+ logger.debug(f'Will process category: {category_name}')
471
+ categories_to_process.append((category_name, category_el))
472
+
473
+ # Initialize category entry
474
+ categories[category_name] = {'resources': [], 'data_sources': []}
475
+
476
+ # Process a smaller set of categories if there are too many (for testing/development)
477
+ MAX_CATEGORIES = int(os.environ.get('MAX_CATEGORIES', '999'))
478
+ if len(categories_to_process) > MAX_CATEGORIES:
479
+ logger.info(
480
+ f'Limiting to {MAX_CATEGORIES} categories (from {len(categories_to_process)})'
481
+ )
482
+ categories_to_process = categories_to_process[:MAX_CATEGORIES]
483
+
484
+ logger.info(
485
+ f'Processing {len(categories_to_process)} categories with click interaction'
486
+ )
487
+
488
+ # Now process each category by clicking on it first
489
+ for category_idx, (category_name, category_el) in enumerate(categories_to_process):
490
+ try:
491
+ # Get the DOM path or some identifier for this category
492
+ # Try to find a unique identifier for the category to click on
493
+ # First, try to get the href attribute from the parent <a> tag
494
+ href = None
495
+ parent_a = category_el.parent
496
+ if parent_a and parent_a.name == 'a':
497
+ href = parent_a.get('href')
498
+
499
+ logger.info(
500
+ f'[{category_idx + 1}/{len(categories_to_process)}] Clicking on category: {category_name}'
501
+ )
502
+
503
+ # Handle potential cookie consent banner interference
504
+ try:
505
+ # Check if banner reappeared
506
+ consent_banner = await page.query_selector('#consent-banner')
507
+ if consent_banner:
508
+ logger.info(
509
+ 'Cookie consent banner detected again, removing via JavaScript'
510
+ )
511
+ await page.evaluate("""() => {
512
+ const banner = document.getElementById('consent-banner');
513
+ if (banner) banner.remove();
514
+ return true;
515
+ }""")
516
+ except Exception:
517
+ pass # Ignore errors in this extra banner check
518
+
519
+ # Click with increased timeout and multiple attempts
520
+ click_success = False
521
+ click_attempts = 0
522
+ max_attempts = 3
523
+
524
+ while not click_success and click_attempts < max_attempts:
525
+ click_attempts += 1
526
+ try:
527
+ if href:
528
+ # If we have an href, use that to locate the element
529
+ try:
530
+ selector = f"a[href='{href}']"
531
+ await page.click(
532
+ selector, timeout=8000
533
+ ) # Increased timeout
534
+ logger.debug(
535
+ f'Clicked category using href selector: {selector}'
536
+ )
537
+ click_success = True
538
+ except Exception as click_error:
539
+ logger.warning(
540
+ f'Failed to click using href, trying text: {click_error}'
541
+ )
542
+ # If that fails, try to click by text content
543
+ escaped_name = category_name.replace("'", "\\'")
544
+ await page.click(
545
+ f"text='{escaped_name}'", timeout=8000
546
+ ) # Increased timeout
547
+ click_success = True
548
+ else:
549
+ # Otherwise try to click by text content
550
+ escaped_name = category_name.replace("'", "\\'")
551
+ await page.click(
552
+ f"text='{escaped_name}'", timeout=8000
553
+ ) # Increased timeout
554
+ click_success = True
555
+
556
+ except Exception as click_error:
557
+ logger.warning(
558
+ f'Click attempt {click_attempts} failed for {category_name}: {click_error}'
559
+ )
560
+ if click_attempts >= max_attempts:
561
+ logger.error(
562
+ f'Failed to click category {category_name} after {max_attempts} attempts'
563
+ )
564
+ # Don't break the loop, continue with next category
565
+ raise click_error
566
+
567
+ # Try removing any overlays before next attempt
568
+ try:
569
+ await page.evaluate("""() => {
570
+ // Remove common overlay patterns
571
+ document.querySelectorAll('[id*="banner"],[id*="overlay"],[id*="popup"],[class*="banner"],[class*="overlay"],[class*="popup"]')
572
+ .forEach(el => el.remove());
573
+ return true;
574
+ }""")
575
+ await asyncio.sleep(0.5) # Brief pause between attempts
576
+ except Exception:
577
+ pass # Ignore errors in overlay removal
578
+
579
+ # Wait briefly for content to load
580
+ await asyncio.sleep(0.3)
581
+
582
+ # Extract resources and data sources from the now-expanded category
583
+ # We need to use the HTML structure to locate the specific sections for this category
584
+ try:
585
+ # Get the updated HTML after clicking
586
+ current_html = await page.content()
587
+ current_soup = BeautifulSoup(current_html, 'html.parser')
588
+
589
+ resource_count = 0
590
+ data_source_count = 0
591
+
592
+ # Find the clicked category element in the updated DOM
593
+ # This is important because the structure changes after clicking
594
+ # First, find the category span by its text
595
+ category_spans = current_soup.find_all(
596
+ 'span', class_='menu-list-category-link-title'
597
+ )
598
+ clicked_category_span = None
599
+ for span in category_spans:
600
+ if span.get_text(strip=True) == category_name:
601
+ clicked_category_span = span
602
+ break
603
+
604
+ if not clicked_category_span:
605
+ logger.warning(
606
+ f'Could not find clicked category {category_name} in updated DOM'
607
+ )
608
+ continue
609
+
610
+ # Navigate up to find the parent LI, which contains all content for this category
611
+ parent_li = clicked_category_span.find_parent('li')
612
+ if not parent_li:
613
+ logger.warning(
614
+ f'Could not find parent LI for category {category_name}'
615
+ )
616
+ continue
617
+
618
+ # Find the ul.menu-list that contains both Resources and Data Sources sections
619
+ category_menu_list = safe_find(
620
+ parent_li, 'ul', attrs={'class': 'menu-list'}
621
+ )
622
+ if not category_menu_list:
623
+ logger.warning(
624
+ f'Could not find menu-list for category {category_name}'
625
+ )
626
+ continue
627
+
628
+ # Process Resources section
629
+ # Find the span with text "Resources"
630
+ resource_spans = category_menu_list.find_all(
631
+ 'span', class_='menu-list-category-link-title'
632
+ )
633
+ resource_section = None
634
+ for span in resource_spans:
635
+ if span.get_text(strip=True) == 'Resources':
636
+ # Use parent property safely to find parent li
637
+ parent_elem = span
638
+ resource_section_li = None
639
+ while parent_elem and parent_elem.parent:
640
+ parent_elem = parent_elem.parent
641
+ if (
642
+ isinstance(parent_elem, Tag)
643
+ and parent_elem.name == 'li'
644
+ ):
645
+ resource_section_li = parent_elem
646
+ break
647
+
648
+ if resource_section_li:
649
+ resource_section = safe_find(
650
+ resource_section_li, 'ul', attrs={'class': 'menu-list'}
651
+ )
652
+ break
653
+
654
+ # Extract resources
655
+ if resource_section:
656
+ resource_links = safe_find_all(
657
+ resource_section, 'li', class_='menu-list-link'
658
+ )
659
+ for item in resource_links:
660
+ link = safe_find(item, 'a')
661
+ if not isinstance(link, Tag):
662
+ continue
663
+
664
+ # Safely get href attribute
665
+ href = None
666
+ if hasattr(link, 'attrs') and 'href' in link.attrs:
667
+ href = link.attrs['href']
668
+ if not href:
669
+ continue
670
+
671
+ link_text = safe_get_text(link, strip=True)
672
+ if not link_text:
673
+ continue
674
+
675
+ # Complete the URL if it's a relative path
676
+ full_url = (
677
+ f'https://registry.terraform.io{href}'
678
+ if isinstance(href, str) and href.startswith('/')
679
+ else href
680
+ )
681
+
682
+ # Add to resources
683
+ resource = {
684
+ 'name': link_text,
685
+ 'url': full_url,
686
+ 'type': 'resource',
687
+ }
688
+
689
+ categories[category_name]['resources'].append(resource)
690
+ resource_count += 1
691
+
692
+ # Process Data Sources section
693
+ # Find the span with text "Data Sources"
694
+ data_spans = category_menu_list.find_all(
695
+ 'span', class_='menu-list-category-link-title'
696
+ )
697
+ data_section = None
698
+ for span in data_spans:
699
+ if span.get_text(strip=True) == 'Data Sources':
700
+ # Use parent property safely to find parent li
701
+ parent_elem = span
702
+ data_section_li = None
703
+ while parent_elem and parent_elem.parent:
704
+ parent_elem = parent_elem.parent
705
+ if (
706
+ isinstance(parent_elem, Tag)
707
+ and parent_elem.name == 'li'
708
+ ):
709
+ data_section_li = parent_elem
710
+ break
711
+
712
+ if data_section_li:
713
+ data_section = safe_find(
714
+ data_section_li, 'ul', attrs={'class': 'menu-list'}
715
+ )
716
+ break
717
+
718
+ # Extract data sources
719
+ if data_section:
720
+ data_links = safe_find_all(
721
+ data_section, 'li', class_='menu-list-link'
722
+ )
723
+ for item in data_links:
724
+ link = safe_find(item, 'a')
725
+ if not isinstance(link, Tag):
726
+ continue
727
+
728
+ # Safely get href attribute
729
+ href = None
730
+ if hasattr(link, 'attrs') and 'href' in link.attrs:
731
+ href = link.attrs['href']
732
+ if not href:
733
+ continue
734
+
735
+ link_text = safe_get_text(link, strip=True)
736
+ if not link_text:
737
+ continue
738
+
739
+ # Complete the URL if it's a relative path
740
+ full_url = (
741
+ f'https://registry.terraform.io{href}'
742
+ if isinstance(href, str) and href.startswith('/')
743
+ else href
744
+ )
745
+
746
+ # Add to data sources
747
+ data_source = {
748
+ 'name': link_text,
749
+ 'url': full_url,
750
+ 'type': 'data_source',
751
+ }
752
+
753
+ categories[category_name]['data_sources'].append(data_source)
754
+ data_source_count += 1
755
+
756
+ logger.info(
757
+ f'Category {category_name}: found {resource_count} resources, {data_source_count} data sources'
758
+ )
759
+
760
+ except Exception as extract_error:
761
+ logger.error(
762
+ f'Error extracting resources for {category_name}: {extract_error}'
763
+ )
764
+
765
+ except Exception as click_error:
766
+ logger.warning(
767
+ f'Error interacting with category {category_name}: {click_error}'
768
+ )
769
+
770
+ # Close the browser
771
+ await browser.close()
772
+
773
+ # Count statistics for logging
774
+ service_count = len(categories)
775
+ resource_count = sum(len(cat['resources']) for cat in categories.values())
776
+ data_source_count = sum(len(cat['data_sources']) for cat in categories.values())
777
+
778
+ duration = time.time() - start_time
779
+ logger.info(
780
+ f'Extracted {service_count} service categories with {resource_count} resources and {data_source_count} data sources in {duration:.2f} seconds'
781
+ )
782
+
783
+ # Return the structure if we have data
784
+ if service_count > 0:
785
+ return {'categories': categories, 'version': provider_version}
786
+ else:
787
+ logger.warning('No categories found, using fallback data')
788
+ return {'categories': get_fallback_resource_data(), 'version': 'unknown'}
789
+
790
+ except Exception as e:
791
+ logger.error(f'Error extracting AWS provider resources: {str(e)}')
792
+ # Return fallback data in case of error
793
+ return cast(
794
+ ProviderResult, {'categories': get_fallback_resource_data(), 'version': 'unknown'}
795
+ )
796
+
797
+
798
+ def get_fallback_resource_data() -> Dict[str, CategoryData]:
799
+ """Provide fallback resource data in case the scraping fails.
800
+
801
+ Returns:
802
+ A dictionary with pre-defined AWS resources and data sources
803
+ """
804
+ logger.warning('Using pre-defined resource structure as fallback')
805
+
806
+ # Pre-defined structure of AWS services and their resources/data sources
807
+ categories: Dict[str, CategoryData] = {
808
+ 'ACM (Certificate Manager)': {
809
+ 'resources': [
810
+ {
811
+ 'name': 'aws_acm_certificate',
812
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/acm_certificate',
813
+ 'type': 'resource',
814
+ },
815
+ {
816
+ 'name': 'aws_acm_certificate_validation',
817
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/acm_certificate_validation',
818
+ 'type': 'resource',
819
+ },
820
+ ],
821
+ 'data_sources': [
822
+ {
823
+ 'name': 'aws_acm_certificate',
824
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/acm_certificate',
825
+ 'type': 'data_source',
826
+ }
827
+ ],
828
+ },
829
+ 'API Gateway': {
830
+ 'resources': [
831
+ {
832
+ 'name': 'aws_api_gateway_account',
833
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_account',
834
+ 'type': 'resource',
835
+ },
836
+ {
837
+ 'name': 'aws_api_gateway_api_key',
838
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_api_key',
839
+ 'type': 'resource',
840
+ },
841
+ {
842
+ 'name': 'aws_api_gateway_authorizer',
843
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/api_gateway_authorizer',
844
+ 'type': 'resource',
845
+ },
846
+ ],
847
+ 'data_sources': [
848
+ {
849
+ 'name': 'aws_api_gateway_api_key',
850
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/api_gateway_api_key',
851
+ 'type': 'data_source',
852
+ }
853
+ ],
854
+ },
855
+ 'AMP (Managed Prometheus)': {
856
+ 'resources': [
857
+ {
858
+ 'name': 'aws_prometheus_workspace',
859
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_workspace',
860
+ 'type': 'resource',
861
+ },
862
+ {
863
+ 'name': 'aws_prometheus_alert_manager_definition',
864
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/prometheus_alert_manager_definition',
865
+ 'type': 'resource',
866
+ },
867
+ ],
868
+ 'data_sources': [
869
+ {
870
+ 'name': 'aws_prometheus_workspace',
871
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/prometheus_workspace',
872
+ 'type': 'data_source',
873
+ }
874
+ ],
875
+ },
876
+ 'CloudWatch': {
877
+ 'resources': [
878
+ {
879
+ 'name': 'aws_cloudwatch_metric_alarm',
880
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_metric_alarm',
881
+ 'type': 'resource',
882
+ },
883
+ {
884
+ 'name': 'aws_cloudwatch_log_group',
885
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group',
886
+ 'type': 'resource',
887
+ },
888
+ ],
889
+ 'data_sources': [
890
+ {
891
+ 'name': 'aws_cloudwatch_log_group',
892
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/cloudwatch_log_group',
893
+ 'type': 'data_source',
894
+ }
895
+ ],
896
+ },
897
+ 'EC2': {
898
+ 'resources': [
899
+ {
900
+ 'name': 'aws_instance',
901
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/instance',
902
+ 'type': 'resource',
903
+ },
904
+ {
905
+ 'name': 'aws_security_group',
906
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/security_group',
907
+ 'type': 'resource',
908
+ },
909
+ {
910
+ 'name': 'aws_vpc',
911
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/vpc',
912
+ 'type': 'resource',
913
+ },
914
+ {
915
+ 'name': 'aws_subnet',
916
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/subnet',
917
+ 'type': 'resource',
918
+ },
919
+ ],
920
+ 'data_sources': [
921
+ {
922
+ 'name': 'aws_instance',
923
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/instance',
924
+ 'type': 'data_source',
925
+ },
926
+ {
927
+ 'name': 'aws_vpc',
928
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/vpc',
929
+ 'type': 'data_source',
930
+ },
931
+ ],
932
+ },
933
+ 'IAM': {
934
+ 'resources': [
935
+ {
936
+ 'name': 'aws_iam_role',
937
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_role',
938
+ 'type': 'resource',
939
+ },
940
+ {
941
+ 'name': 'aws_iam_policy',
942
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy',
943
+ 'type': 'resource',
944
+ },
945
+ {
946
+ 'name': 'aws_iam_user',
947
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_user',
948
+ 'type': 'resource',
949
+ },
950
+ ],
951
+ 'data_sources': [
952
+ {
953
+ 'name': 'aws_iam_role',
954
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_role',
955
+ 'type': 'data_source',
956
+ },
957
+ {
958
+ 'name': 'aws_iam_policy',
959
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy',
960
+ 'type': 'data_source',
961
+ },
962
+ ],
963
+ },
964
+ 'Lambda': {
965
+ 'resources': [
966
+ {
967
+ 'name': 'aws_lambda_function',
968
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_function',
969
+ 'type': 'resource',
970
+ },
971
+ {
972
+ 'name': 'aws_lambda_permission',
973
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/lambda_permission',
974
+ 'type': 'resource',
975
+ },
976
+ ],
977
+ 'data_sources': [
978
+ {
979
+ 'name': 'aws_lambda_function',
980
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/lambda_function',
981
+ 'type': 'data_source',
982
+ }
983
+ ],
984
+ },
985
+ 'S3': {
986
+ 'resources': [
987
+ {
988
+ 'name': 'aws_s3_bucket',
989
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket',
990
+ 'type': 'resource',
991
+ },
992
+ {
993
+ 'name': 'aws_s3_bucket_policy',
994
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/s3_bucket_policy',
995
+ 'type': 'resource',
996
+ },
997
+ ],
998
+ 'data_sources': [
999
+ {
1000
+ 'name': 'aws_s3_bucket',
1001
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_bucket',
1002
+ 'type': 'data_source',
1003
+ },
1004
+ {
1005
+ 'name': 'aws_s3_object',
1006
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/s3_object',
1007
+ 'type': 'data_source',
1008
+ },
1009
+ ],
1010
+ },
1011
+ 'DynamoDB': {
1012
+ 'resources': [
1013
+ {
1014
+ 'name': 'aws_dynamodb_table',
1015
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/dynamodb_table',
1016
+ 'type': 'resource',
1017
+ },
1018
+ {
1019
+ 'name': 'aws_dynamodb_table_item',
1020
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/dynamodb_table_item',
1021
+ 'type': 'resource',
1022
+ },
1023
+ ],
1024
+ 'data_sources': [
1025
+ {
1026
+ 'name': 'aws_dynamodb_table',
1027
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/dynamodb_table',
1028
+ 'type': 'data_source',
1029
+ }
1030
+ ],
1031
+ },
1032
+ 'Route53': {
1033
+ 'resources': [
1034
+ {
1035
+ 'name': 'aws_route53_zone',
1036
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route53_zone',
1037
+ 'type': 'resource',
1038
+ },
1039
+ {
1040
+ 'name': 'aws_route53_record',
1041
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/route53_record',
1042
+ 'type': 'resource',
1043
+ },
1044
+ ],
1045
+ 'data_sources': [
1046
+ {
1047
+ 'name': 'aws_route53_zone',
1048
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/route53_zone',
1049
+ 'type': 'data_source',
1050
+ }
1051
+ ],
1052
+ },
1053
+ 'SNS': {
1054
+ 'resources': [
1055
+ {
1056
+ 'name': 'aws_sns_topic',
1057
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic',
1058
+ 'type': 'resource',
1059
+ },
1060
+ {
1061
+ 'name': 'aws_sns_topic_subscription',
1062
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sns_topic_subscription',
1063
+ 'type': 'resource',
1064
+ },
1065
+ ],
1066
+ 'data_sources': [
1067
+ {
1068
+ 'name': 'aws_sns_topic',
1069
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/sns_topic',
1070
+ 'type': 'data_source',
1071
+ }
1072
+ ],
1073
+ },
1074
+ 'SQS': {
1075
+ 'resources': [
1076
+ {
1077
+ 'name': 'aws_sqs_queue',
1078
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue',
1079
+ 'type': 'resource',
1080
+ },
1081
+ {
1082
+ 'name': 'aws_sqs_queue_policy',
1083
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/sqs_queue_policy',
1084
+ 'type': 'resource',
1085
+ },
1086
+ ],
1087
+ 'data_sources': [
1088
+ {
1089
+ 'name': 'aws_sqs_queue',
1090
+ 'url': 'https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/sqs_queue',
1091
+ 'type': 'data_source',
1092
+ }
1093
+ ],
1094
+ },
1095
+ }
1096
+
1097
+ return categories
1098
+
1099
+
1100
+ def parse_arguments():
1101
+ """Parse command line arguments."""
1102
+ parser = argparse.ArgumentParser(
1103
+ description='Generate AWS provider resources markdown for the Terraform Expert MCP server.'
1104
+ )
1105
+ parser.add_argument(
1106
+ '--max-categories',
1107
+ type=int,
1108
+ default=999,
1109
+ help='Limit to N categories (default: all)',
1110
+ )
1111
+ parser.add_argument(
1112
+ '--output',
1113
+ type=Path,
1114
+ default=DEFAULT_OUTPUT_PATH,
1115
+ help=f'Output file path (default: {DEFAULT_OUTPUT_PATH})',
1116
+ )
1117
+ parser.add_argument(
1118
+ '--no-fallback',
1119
+ action='store_true',
1120
+ help="Don't use fallback data if scraping fails",
1121
+ )
1122
+ return parser.parse_args()
1123
+
1124
+
1125
+ async def main():
1126
+ """Main entry point for the script."""
1127
+ start_time = datetime.now()
1128
+
1129
+ # Parse command line arguments
1130
+ args = parse_arguments()
1131
+
1132
+ print('Generating AWS provider resources markdown...')
1133
+ print(f'Output path: {args.output}')
1134
+ print(f'Max categories: {args.max_categories if args.max_categories < 999 else "all"}')
1135
+
1136
+ # Set environment variable for max categories
1137
+ os.environ['MAX_CATEGORIES'] = str(args.max_categories)
1138
+
1139
+ # Set environment variable for fallback behavior
1140
+ if args.no_fallback:
1141
+ os.environ['USE_PLAYWRIGHT'] = '1'
1142
+ print('Using live scraping without fallback')
1143
+
1144
+ try:
1145
+ # Fetch AWS provider data using the existing implementation
1146
+ result = await fetch_aws_provider_page()
1147
+
1148
+ # Extract categories and version
1149
+ if isinstance(result, dict) and 'categories' in result and 'version' in result:
1150
+ categories = result['categories']
1151
+ provider_version = result.get('version', 'unknown')
1152
+ else:
1153
+ # Handle backward compatibility with older API
1154
+ categories = result
1155
+ provider_version = 'unknown'
1156
+
1157
+ # Sort categories alphabetically
1158
+ sorted_categories = sorted(categories.keys())
1159
+
1160
+ # Count totals
1161
+ total_resources = sum(len(cat['resources']) for cat in categories.values())
1162
+ total_data_sources = sum(len(cat['data_sources']) for cat in categories.values())
1163
+
1164
+ print(
1165
+ f'Found {len(categories)} categories, {total_resources} resources, and {total_data_sources} data sources'
1166
+ )
1167
+
1168
+ # Generate markdown
1169
+ markdown = []
1170
+ markdown.append('# AWS Provider Resources Listing')
1171
+ markdown.append(f'\nAWS Provider Version: {provider_version}')
1172
+ markdown.append(f'\nLast updated: {datetime.now().strftime("%B %d, %Y %H:%M:%S")}')
1173
+ markdown.append(
1174
+ f'\nFound {total_resources} resources and {total_data_sources} data sources across {len(categories)} AWS service categories.\n'
1175
+ )
1176
+
1177
+ # Generate table of contents
1178
+ # markdown.append('## Table of Contents')
1179
+ # for category in sorted_categories:
1180
+ # sanitized_category = (
1181
+ # category.replace(' ', '-').replace('(', '').replace(')', '').lower()
1182
+ # )
1183
+ # markdown.append(f'- [{category}](#{sanitized_category})')
1184
+ # markdown.append('')
1185
+
1186
+ # Generate content for each category
1187
+ for category in sorted_categories:
1188
+ cat_data = categories[category]
1189
+ sanitized_heading = category.replace('(', '').replace(')', '')
1190
+
1191
+ markdown.append(f'## {sanitized_heading}')
1192
+
1193
+ resource_count = len(cat_data['resources'])
1194
+ data_source_count = len(cat_data['data_sources'])
1195
+
1196
+ # Add category summary
1197
+ markdown.append(
1198
+ f'\n*{resource_count} resources and {data_source_count} data sources*\n'
1199
+ )
1200
+
1201
+ # Add resources section if available
1202
+ if cat_data['resources']:
1203
+ markdown.append('### Resources')
1204
+ for resource in sorted(cat_data['resources'], key=lambda x: x['name']):
1205
+ markdown.append(f'- [{resource["name"]}]({resource["url"]})')
1206
+
1207
+ # Add data sources section if available
1208
+ if cat_data['data_sources']:
1209
+ markdown.append('\n### Data Sources')
1210
+ for data_source in sorted(cat_data['data_sources'], key=lambda x: x['name']):
1211
+ markdown.append(f'- [{data_source["name"]}]({data_source["url"]})')
1212
+
1213
+ markdown.append('') # Add blank line between categories
1214
+
1215
+ # Add generation metadata at the end
1216
+ duration = datetime.now() - start_time
1217
+ markdown.append('---')
1218
+ markdown.append(
1219
+ '*This document was generated automatically by the AWS Provider Resources Generator script.*'
1220
+ )
1221
+ markdown.append(f'*Generation time: {duration.total_seconds():.2f} seconds*')
1222
+
1223
+ # Ensure directory exists
1224
+ args.output.parent.mkdir(parents=True, exist_ok=True)
1225
+
1226
+ # Write markdown to output file
1227
+ with open(args.output, 'w', encoding='utf-8') as f:
1228
+ f.write('\n'.join(markdown))
1229
+
1230
+ print(f'Successfully generated markdown file at: {args.output}')
1231
+ print(f'Generation completed in {duration.total_seconds():.2f} seconds')
1232
+ return 0
1233
+
1234
+ except Exception as e:
1235
+ print(f'Error generating AWS provider resources: {str(e)}', file=sys.stderr)
1236
+ return 1
1237
+
1238
+
1239
+ if __name__ == '__main__':
1240
+ sys.exit(asyncio.run(main()))