amplify-excel-migrator 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
migrator.py ADDED
@@ -0,0 +1,437 @@
1
+ import argparse
2
+ import json
3
+ import logging
4
+ import re
5
+ import sys
6
+ from getpass import getpass
7
+ from pathlib import Path
8
+ from typing import Dict, Any
9
+
10
+ import pandas as pd
11
+
12
+ from amplify_client import AmplifyClient
13
+ from model_field_parser import ModelFieldParser
14
+
15
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
16
+ logger = logging.getLogger(__name__)
17
+
18
+ CONFIG_DIR = Path.home() / ".amplify-migrator"
19
+ CONFIG_FILE = CONFIG_DIR / "config.json"
20
+
21
+
22
+ class ExcelToAmplifyMigrator:
23
+ def __init__(self, excel_file_path: str):
24
+ self.model_field_parser = ModelFieldParser()
25
+ self.excel_file_path = excel_file_path
26
+ self.amplify_client = None
27
+
28
+ def init_client(
29
+ self,
30
+ api_endpoint: str,
31
+ region: str,
32
+ user_pool_id: str,
33
+ is_aws_admin: bool = False,
34
+ client_id: str = None,
35
+ username: str = None,
36
+ aws_profile: str = None,
37
+ ):
38
+
39
+ self.amplify_client = AmplifyClient(
40
+ api_endpoint=api_endpoint,
41
+ user_pool_id=user_pool_id,
42
+ region=region,
43
+ client_id=client_id,
44
+ )
45
+
46
+ try:
47
+ self.amplify_client.init_cognito_client(
48
+ is_aws_admin=is_aws_admin, username=username, aws_profile=aws_profile
49
+ )
50
+
51
+ except RuntimeError or Exception:
52
+ sys.exit(1)
53
+
54
+ def authenticate(self, username: str, password: str) -> bool:
55
+ return self.amplify_client.authenticate(username, password)
56
+
57
+ def run(self):
58
+ all_sheets = self.read_excel()
59
+
60
+ total_success = 0
61
+ total_failed = 0
62
+ failed_records_by_sheet = {}
63
+
64
+ for sheet_name, df in all_sheets.items():
65
+ logger.info(f"Processing {sheet_name} sheet with {len(df)} rows")
66
+ success, failed, failed_records = self.process_sheet(df, sheet_name)
67
+ total_success += success
68
+ total_failed += failed
69
+
70
+ if failed_records:
71
+ failed_records_by_sheet[sheet_name] = failed_records
72
+
73
+ print("\n" + "=" * 60)
74
+ print("MIGRATION SUMMARY")
75
+ print("=" * 60)
76
+ print(f"šŸ“Š Sheets processed: {len(all_sheets)}")
77
+ print(f"āœ… Total successful: {total_success}")
78
+ print(f"āŒ Total failed: {total_failed}")
79
+ print(
80
+ f"šŸ“ˆ Success rate: {(total_success / (total_success + total_failed) * 100):.1f}%"
81
+ if (total_success + total_failed) > 0
82
+ else "šŸ“ˆ Success rate: N/A"
83
+ )
84
+
85
+ if failed_records_by_sheet:
86
+ print("\n" + "=" * 60)
87
+ print("FAILED RECORDS DETAILS")
88
+ print("=" * 60)
89
+
90
+ for sheet_name, failed_records in failed_records_by_sheet.items():
91
+ print(f"\nšŸ“„ {sheet_name}:")
92
+ print("-" * 60)
93
+ for record in failed_records:
94
+ primary_field_value = record.get("primary_field_value", "Unknown")
95
+ error = record.get("error", "Unknown error")
96
+ row_number = record.get("row_number")
97
+
98
+ if row_number:
99
+ print(f" • Row {row_number}: {primary_field_value}")
100
+ else:
101
+ print(f" • Record: {primary_field_value}")
102
+ print(f" Error: {error}")
103
+
104
+ print("\n" + "=" * 60)
105
+ else:
106
+ print("\n✨ No failed records!")
107
+
108
+ print("=" * 60)
109
+
110
+ def read_excel(self) -> Dict[str, Any]:
111
+ logger.info(f"Reading Excel file: {self.excel_file_path}")
112
+ all_sheets = pd.read_excel(self.excel_file_path, sheet_name=None)
113
+ logger.info(f"Loaded {len(all_sheets)} sheets from Excel")
114
+ return all_sheets
115
+
116
+ def process_sheet(self, df: pd.DataFrame, sheet_name: str) -> tuple[int, int, list[Dict]]:
117
+ parsed_model_structure = self.get_parsed_model_structure(sheet_name)
118
+ primary_field, _, _ = self.amplify_client.get_primary_field_name(sheet_name, parsed_model_structure)
119
+ records, failed_parsing = self.transform_rows_to_records(df, parsed_model_structure, primary_field)
120
+ total_failed = len(failed_parsing)
121
+
122
+ confirm = input(f"\nUpload {len(records)} records of {sheet_name} to Amplify? (yes/no): ")
123
+ if confirm.lower() != "yes":
124
+ logger.info(f"Upload cancelled for {sheet_name} sheet")
125
+ return 0, total_failed, failed_parsing
126
+
127
+ success_count, upload_error_count, failed_uploads = self.amplify_client.upload(
128
+ records, sheet_name, parsed_model_structure
129
+ )
130
+
131
+ all_failed_records = failed_parsing + failed_uploads
132
+ total_failed = len(all_failed_records)
133
+
134
+ print(f"=== Upload of Excel sheet: {sheet_name} Complete ===")
135
+ print(f"āœ… Success: {success_count}")
136
+ print(f"āŒ Failed: {total_failed} (Parsing: {len(failed_parsing)}, Upload: {upload_error_count})")
137
+ print(f"šŸ“Š Total: {len(df)}")
138
+
139
+ return success_count, total_failed, all_failed_records
140
+
141
+ def transform_rows_to_records(
142
+ self, df: pd.DataFrame, parsed_model_structure: Dict[str, Any], primary_field: str
143
+ ) -> tuple[list[Any], list[Dict]]:
144
+ records = []
145
+ failed_parsing = []
146
+ row_count = 0
147
+ df.columns = [self.to_camel_case(c) for c in df.columns]
148
+
149
+ fk_lookup_cache = {}
150
+ if self.amplify_client:
151
+ logger.info("šŸš€ Pre-fetching foreign key lookups...")
152
+ fk_lookup_cache = self.amplify_client.build_foreign_key_lookups(df, parsed_model_structure)
153
+
154
+ for row_tuple in df.itertuples(index=False, name="Row"):
155
+ row_count += 1
156
+ row_dict = {col: getattr(row_tuple, col) for col in df.columns}
157
+ try:
158
+ record = self.transform_row_to_record(row_dict, parsed_model_structure, fk_lookup_cache)
159
+ if record:
160
+ records.append(record)
161
+ except Exception as e:
162
+ primary_field_value = row_dict.get(primary_field, f"Row {row_count}")
163
+ error_msg = str(e)
164
+ logger.error(f"Error transforming row {row_count} ({primary_field}={primary_field_value}): {error_msg}")
165
+ failed_parsing.append(
166
+ {
167
+ "primary_field": primary_field,
168
+ "primary_field_value": primary_field_value,
169
+ "error": f"Parsing error: {error_msg}",
170
+ "row_number": row_count,
171
+ }
172
+ )
173
+
174
+ logger.info(f"Prepared {len(records)} records for upload")
175
+ if failed_parsing:
176
+ logger.warning(f"Failed to parse {len(failed_parsing)} rows")
177
+
178
+ return records, failed_parsing
179
+
180
+ def get_parsed_model_structure(self, sheet_name: str) -> Dict[str, Any]:
181
+ model_structure = self.amplify_client.get_model_structure(sheet_name)
182
+ return self.model_field_parser.parse_model_structure(model_structure)
183
+
184
+ def transform_row_to_record(
185
+ self, row_dict: Dict, parsed_model_structure: Dict[str, Any], fk_lookup_cache: Dict[str, Dict[str, str]]
186
+ ) -> dict[Any, Any] | None:
187
+ """Transform a DataFrame row to Amplify model format"""
188
+
189
+ model_record = {}
190
+
191
+ for field in parsed_model_structure["fields"]:
192
+ input = self.parse_input(row_dict, field, parsed_model_structure, fk_lookup_cache)
193
+ if input:
194
+ model_record[field["name"]] = input
195
+
196
+ return model_record
197
+
198
+ def parse_input(
199
+ self,
200
+ row_dict: Dict,
201
+ field: Dict[str, Any],
202
+ parsed_model_structure: Dict[str, Any],
203
+ fk_lookup_cache: Dict[str, Dict[str, str]],
204
+ ) -> Any | None:
205
+ field_name = field["name"][:-2] if field["is_id"] else field["name"]
206
+
207
+ if field_name not in row_dict or pd.isna(row_dict[field_name]):
208
+ if field["is_required"]:
209
+ raise ValueError(f"Required field '{field_name}' is missing")
210
+ return None
211
+
212
+ value = self.model_field_parser.clean_input(row_dict[field_name])
213
+
214
+ if field["is_id"]:
215
+ if "related_model" in field:
216
+ related_model = field["related_model"]
217
+ else:
218
+ related_model = (temp := field["name"][:-2])[0].upper() + temp[1:]
219
+
220
+ if related_model in fk_lookup_cache:
221
+ lookup_dict = fk_lookup_cache[related_model]["lookup"]
222
+ record_id = lookup_dict.get(str(value))
223
+
224
+ if record_id:
225
+ return record_id
226
+ elif field["is_required"]:
227
+ raise ValueError(f"{related_model}: {value} does not exist")
228
+ return None
229
+ else:
230
+ logger.warning(f"No pre-fetched data for {related_model}, falling back to API call")
231
+ record = self.amplify_client.get_record(
232
+ related_model, parsed_model_structure=parsed_model_structure, value=value
233
+ )
234
+ if record and record.get("id"):
235
+ return record["id"]
236
+ elif field["is_required"]:
237
+ raise ValueError(f"{related_model}: {value} does not exist")
238
+ return None
239
+ else:
240
+ return self.model_field_parser.parse_field_input(field, field_name, value)
241
+
242
+ def _parse_custom_type_array(self, row: pd.Series, field: Dict[str, Any]) -> Any:
243
+ field_name = field["name"]
244
+
245
+ if field_name in row.index and pd.notna(row[field_name]):
246
+ value = row[field_name]
247
+ if isinstance(value, str) and value.strip().startswith(("[", "{")):
248
+ try:
249
+ return json.loads(value)
250
+ except json.JSONDecodeError:
251
+ logger.warning(f"Failed to parse JSON for '{field_name}', trying column-based parsing")
252
+
253
+ custom_type_name = field["type"]
254
+ parsed_custom_type = self.get_parsed_model_structure(custom_type_name)
255
+ custom_type_fields = parsed_custom_type["fields"]
256
+
257
+ return self.model_field_parser.build_custom_type_from_columns(row, custom_type_fields, custom_type_name)
258
+
259
+ @staticmethod
260
+ def to_camel_case(s: str) -> str:
261
+ # Handle PascalCase
262
+ s_with_spaces = re.sub(r"(?<!^)(?=[A-Z])", " ", s)
263
+
264
+ parts = re.split(r"[\s_\-]+", s_with_spaces.strip())
265
+ return parts[0].lower() + "".join(word.capitalize() for word in parts[1:])
266
+
267
+
268
+ def get_config_value(prompt: str, default: str = "", secret: bool = False) -> str:
269
+ if default:
270
+ prompt = f"{prompt} [{default}]: "
271
+ else:
272
+ prompt = f"{prompt}: "
273
+
274
+ if secret:
275
+ value = getpass(prompt)
276
+ else:
277
+ value = input(prompt)
278
+
279
+ return value.strip() if value.strip() else default
280
+
281
+
282
+ def save_config(config: Dict[str, str]) -> None:
283
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
284
+
285
+ cache_config = {k: v for k, v in config.items() if k not in ["password", "ADMIN_PASSWORD"]}
286
+
287
+ with open(CONFIG_FILE, "w") as f:
288
+ json.dump(cache_config, f, indent=2)
289
+
290
+ logger.info(f"āœ… Configuration saved to {CONFIG_FILE}")
291
+
292
+
293
+ def load_cached_config() -> Dict[str, str]:
294
+ if not CONFIG_FILE.exists():
295
+ return {}
296
+
297
+ try:
298
+ with open(CONFIG_FILE, "r") as f:
299
+ return json.load(f)
300
+ except Exception as e:
301
+ logger.warning(f"Failed to load cached config: {e}")
302
+ return {}
303
+
304
+
305
+ def get_cached_or_prompt(key: str, prompt: str, cached_config: Dict, default: str = "", secret: bool = False) -> str:
306
+ if key in cached_config:
307
+ return cached_config[key]
308
+
309
+ return get_config_value(prompt, default, secret)
310
+
311
+
312
+ def cmd_show(args=None):
313
+ print(
314
+ """
315
+ ╔════════════════════════════════════════════════════╗
316
+ ā•‘ Amplify Migrator - Current Configuration ā•‘
317
+ ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•
318
+ """
319
+ )
320
+
321
+ cached_config = load_cached_config()
322
+
323
+ if not cached_config:
324
+ print("\nāŒ No configuration found!")
325
+ print("šŸ’” Run 'amplify-migrator config' first to set up your configuration.")
326
+ return
327
+
328
+ print("\nšŸ“‹ Cached Configuration:")
329
+ print("-" * 54)
330
+ print(f"Excel file path: {cached_config.get('excel_path', 'N/A')}")
331
+ print(f"API endpoint: {cached_config.get('api_endpoint', 'N/A')}")
332
+ print(f"AWS Region: {cached_config.get('region', 'N/A')}")
333
+ print(f"User Pool ID: {cached_config.get('user_pool_id', 'N/A')}")
334
+ print(f"Client ID: {cached_config.get('client_id', 'N/A')}")
335
+ print(f"Admin Username: {cached_config.get('username', 'N/A')}")
336
+ print("-" * 54)
337
+ print(f"\nšŸ“ Config location: {CONFIG_FILE}")
338
+ print(f"šŸ’” Run 'amplify-migrator config' to update configuration.")
339
+
340
+
341
+ def cmd_config(args=None):
342
+ print(
343
+ """
344
+ ╔════════════════════════════════════════════════════╗
345
+ ā•‘ Amplify Migrator - Configuration Setup ā•‘
346
+ ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•
347
+ """
348
+ )
349
+
350
+ cached_config = load_cached_config()
351
+
352
+ config = {
353
+ "excel_path": get_config_value("Excel file path", cached_config.get("excel_path")),
354
+ "api_endpoint": get_config_value("AWS Amplify API endpoint", cached_config.get("api_endpoint")),
355
+ "region": get_config_value("AWS Region", cached_config.get("region")),
356
+ "user_pool_id": get_config_value("Cognito User Pool ID", cached_config.get("user_pool_id")),
357
+ "client_id": get_config_value("Cognito Client ID", cached_config.get("client_id")),
358
+ "username": get_config_value("Admin Username", cached_config.get("username")),
359
+ }
360
+
361
+ save_config(config)
362
+ print("\nāœ… Configuration saved successfully!")
363
+ print(f"šŸ’” You can now run 'amplify-migrator migrate' to start the migration.")
364
+
365
+
366
+ def cmd_migrate(args=None):
367
+ print(
368
+ """
369
+ ╔════════════════════════════════════════════════════╗
370
+ ā•‘ Migrator Tool for Amplify ā•‘
371
+ ╠════════════════════════════════════════════════════╣
372
+ ā•‘ This tool requires admin privileges to execute ā•‘
373
+ ā•šā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•ā•
374
+ """
375
+ )
376
+
377
+ cached_config = load_cached_config()
378
+
379
+ if not cached_config:
380
+ print("\nāŒ No configuration found!")
381
+ print("šŸ’” Run 'amplify-migrator config' first to set up your configuration.")
382
+ sys.exit(1)
383
+
384
+ excel_path = get_cached_or_prompt("excel_path", "Excel file path", cached_config, "data.xlsx")
385
+ api_endpoint = get_cached_or_prompt("api_endpoint", "AWS Amplify API endpoint", cached_config)
386
+ region = get_cached_or_prompt("region", "AWS Region", cached_config, "us-east-1")
387
+ user_pool_id = get_cached_or_prompt("user_pool_id", "Cognito User Pool ID", cached_config)
388
+ client_id = get_cached_or_prompt("client_id", "Cognito Client ID", cached_config)
389
+ username = get_cached_or_prompt("username", "Admin Username", cached_config)
390
+
391
+ print("\nšŸ” Authentication:")
392
+ print("-" * 54)
393
+ password = get_config_value("Admin Password", secret=True)
394
+
395
+ migrator = ExcelToAmplifyMigrator(excel_path)
396
+ migrator.init_client(api_endpoint, region, user_pool_id, client_id=client_id, username=username)
397
+ if not migrator.authenticate(username, password):
398
+ return
399
+
400
+ migrator.run()
401
+
402
+
403
+ def main():
404
+ parser = argparse.ArgumentParser(
405
+ description="Amplify Excel Migrator - Migrate Excel data to AWS Amplify GraphQL API",
406
+ formatter_class=argparse.RawDescriptionHelpFormatter,
407
+ )
408
+
409
+ subparsers = parser.add_subparsers(dest="command", help="Available commands")
410
+
411
+ config_parser = subparsers.add_parser("config", help="Configure the migration tool")
412
+ config_parser.set_defaults(func=cmd_config)
413
+
414
+ show_parser = subparsers.add_parser("show", help="Show current configuration")
415
+ show_parser.set_defaults(func=cmd_show)
416
+
417
+ migrate_parser = subparsers.add_parser("migrate", help="Run the migration")
418
+ migrate_parser.set_defaults(func=cmd_migrate)
419
+
420
+ args = parser.parse_args()
421
+
422
+ if args.command is None:
423
+ parser.print_help()
424
+ sys.exit(1)
425
+
426
+ args.func(args)
427
+
428
+
429
+ if __name__ == "__main__":
430
+ # For IDE debugging: set the command you want to test
431
+ # Uncomment and modify one of these lines:
432
+
433
+ # sys.argv = ["migrator.py", "config"] # Test config command
434
+ # sys.argv = ['migrator.py', 'show'] # Test show command
435
+ sys.argv = ["migrator.py", "migrate"] # Test migrate command
436
+
437
+ main()