PyPI - amplify-excel-migrator - Versions diffs - 1.1.5__py3-none-any.whl → 1.2.15__py3-none-any.whl - Mend

amplify-excel-migrator 1.1.5py3-none-any.whl → 1.2.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

amplify_excel_migrator/__init__.py +17 -0
amplify_excel_migrator/auth/__init__.py +6 -0
amplify_excel_migrator/auth/cognito_auth.py +306 -0
amplify_excel_migrator/auth/provider.py +42 -0
amplify_excel_migrator/cli/__init__.py +5 -0
amplify_excel_migrator/cli/commands.py +165 -0
amplify_excel_migrator/client.py +47 -0
amplify_excel_migrator/core/__init__.py +5 -0
amplify_excel_migrator/core/config.py +98 -0
amplify_excel_migrator/data/__init__.py +7 -0
amplify_excel_migrator/data/excel_reader.py +23 -0
amplify_excel_migrator/data/transformer.py +119 -0
amplify_excel_migrator/data/validator.py +48 -0
amplify_excel_migrator/graphql/__init__.py +8 -0
amplify_excel_migrator/graphql/client.py +137 -0
amplify_excel_migrator/graphql/executor.py +405 -0
amplify_excel_migrator/graphql/mutation_builder.py +80 -0
amplify_excel_migrator/graphql/query_builder.py +194 -0
amplify_excel_migrator/migration/__init__.py +8 -0
amplify_excel_migrator/migration/batch_uploader.py +23 -0
amplify_excel_migrator/migration/failure_tracker.py +92 -0
amplify_excel_migrator/migration/orchestrator.py +143 -0
amplify_excel_migrator/migration/progress_reporter.py +57 -0
amplify_excel_migrator/schema/__init__.py +6 -0
model_field_parser.py → amplify_excel_migrator/schema/field_parser.py +100 -22
amplify_excel_migrator/schema/introspector.py +95 -0
{amplify_excel_migrator-1.1.5.dist-info → amplify_excel_migrator-1.2.15.dist-info}/METADATA +121 -26
amplify_excel_migrator-1.2.15.dist-info/RECORD +40 -0
amplify_excel_migrator-1.2.15.dist-info/entry_points.txt +2 -0
amplify_excel_migrator-1.2.15.dist-info/top_level.txt +2 -0
tests/__init__.py +1 -0
tests/test_cli_commands.py +292 -0
tests/test_client.py +187 -0
tests/test_cognito_auth.py +363 -0
tests/test_config_manager.py +347 -0
tests/test_field_parser.py +615 -0
tests/test_mutation_builder.py +391 -0
tests/test_query_builder.py +384 -0
amplify_client.py +0 -941
amplify_excel_migrator-1.1.5.dist-info/RECORD +0 -9
amplify_excel_migrator-1.1.5.dist-info/entry_points.txt +0 -2
amplify_excel_migrator-1.1.5.dist-info/top_level.txt +0 -3
migrator.py +0 -437
{amplify_excel_migrator-1.1.5.dist-info → amplify_excel_migrator-1.2.15.dist-info}/WHEEL +0 -0
{amplify_excel_migrator-1.1.5.dist-info → amplify_excel_migrator-1.2.15.dist-info}/licenses/LICENSE +0 -0

amplify_excel_migrator/migration/batch_uploader.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""Handles batch uploading of records to Amplify."""
+import logging
+from typing import Dict, List, Tuple, Any
+logger = logging.getLogger(__name__)
+class BatchUploader:
+    def __init__(self, amplify_client):
+        self.amplify_client = amplify_client
+    def upload_records(
+        self, records: List[Dict], sheet_name: str, parsed_model_structure: Dict[str, Any]
+    ) -> Tuple[int, int, List[Dict]]:
+        if not records:
+            return 0, 0, []
+        success_count, upload_error_count, failed_uploads = self.amplify_client.upload(
+            records, sheet_name, parsed_model_structure
+        )
+        return success_count, upload_error_count, failed_uploads

amplify_excel_migrator/migration/failure_tracker.py ADDED Viewed

@@ -0,0 +1,92 @@
+"""Tracks and manages failed records during migration."""
+import logging
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional
+import pandas as pd
+logger = logging.getLogger(__name__)
+class FailureTracker:
+    def __init__(self):
+        self._failures_by_sheet: Dict[str, List[Dict]] = {}
+        self._current_sheet: Optional[str] = None
+    def set_current_sheet(self, sheet_name: str) -> None:
+        self._current_sheet = sheet_name
+        if sheet_name not in self._failures_by_sheet:
+            self._failures_by_sheet[sheet_name] = []
+    def record_failure(
+        self,
+        primary_field: str,
+        primary_field_value: str,
+        error: str,
+        original_row: Optional[Dict] = None,
+    ) -> None:
+        if self._current_sheet is None:
+            raise RuntimeError("No current sheet set. Call set_current_sheet() first.")
+        failure_record = {
+            "primary_field": primary_field,
+            "primary_field_value": primary_field_value,
+            "error": error,
+        }
+        if original_row is not None:
+            failure_record["original_row"] = original_row
+        self._failures_by_sheet[self._current_sheet].append(failure_record)
+    def get_failures(self, sheet_name: Optional[str] = None) -> List[Dict]:
+        if sheet_name:
+            return self._failures_by_sheet.get(sheet_name, [])
+        return [failure for failures in self._failures_by_sheet.values() for failure in failures]
+    def get_failures_by_sheet(self) -> Dict[str, List[Dict]]:
+        return self._failures_by_sheet.copy()
+    def get_total_failure_count(self) -> int:
+        return sum(len(failures) for failures in self._failures_by_sheet.values())
+    def has_failures(self) -> bool:
+        return any(len(failures) > 0 for failures in self._failures_by_sheet.values())
+    def export_to_excel(self, original_excel_path: str) -> Optional[str]:
+        if not self.has_failures():
+            return None
+        input_path = Path(original_excel_path)
+        base_name = input_path.stem
+        if "_failed_records_" in base_name:
+            base_name = base_name.split("_failed_records_")[0]
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        output_filename = f"{base_name}_failed_records_{timestamp}.xlsx"
+        output_path = input_path.parent / output_filename
+        logger.info(f"Writing failed records to {output_path}")
+        with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
+            for sheet_name, failed_records in self._failures_by_sheet.items():
+                if not failed_records:
+                    continue
+                rows_data = []
+                for record in failed_records:
+                    row_data = record.get("original_row", {}).copy()
+                    row_data["ERROR"] = record["error"]
+                    rows_data.append(row_data)
+                df = pd.DataFrame(rows_data)
+                df.to_excel(writer, sheet_name=sheet_name, index=False)
+        logger.info(f"Successfully wrote failed records to {output_path}")
+        return str(output_path)
+    def clear(self) -> None:
+        self._failures_by_sheet.clear()
+        self._current_sheet = None

amplify_excel_migrator/migration/orchestrator.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""Main migration orchestrator that coordinates the entire migration process."""
+import logging
+from pathlib import Path
+from typing import Dict, Any, Optional
+import pandas as pd
+from amplify_excel_migrator.client import AmplifyClient
+from amplify_excel_migrator.data import ExcelReader, DataTransformer
+from amplify_excel_migrator.schema import FieldParser
+from amplify_excel_migrator.migration import FailureTracker, ProgressReporter, BatchUploader
+from amplify_excel_migrator.core import ConfigManager
+logger = logging.getLogger(__name__)
+class MigrationOrchestrator:
+    def __init__(
+        self,
+        excel_reader: ExcelReader,
+        data_transformer: DataTransformer,
+        amplify_client: AmplifyClient,
+        failure_tracker: FailureTracker,
+        progress_reporter: ProgressReporter,
+        batch_uploader: BatchUploader,
+        field_parser: FieldParser,
+    ):
+        self.excel_reader = excel_reader
+        self.data_transformer = data_transformer
+        self.amplify_client = amplify_client
+        self.failure_tracker = failure_tracker
+        self.progress_reporter = progress_reporter
+        self.batch_uploader = batch_uploader
+        self.field_parser = field_parser
+    def run(self) -> int:
+        all_sheets = self.excel_reader.read_all_sheets()
+        total_success = 0
+        for sheet_name, df in all_sheets.items():
+            logger.info(f"Processing {sheet_name} sheet with {len(df)} rows")
+            total_success += self.process_sheet(df, sheet_name)
+        self._display_summary(len(all_sheets), total_success)
+        return total_success
+    def process_sheet(self, df: pd.DataFrame, sheet_name: str) -> int:
+        self.failure_tracker.set_current_sheet(sheet_name)
+        parsed_model_structure = self._get_parsed_model_structure(sheet_name)
+        records, row_dict_by_primary = self._transform_rows_to_records(df, parsed_model_structure, sheet_name)
+        confirm = input(f"\nUpload {len(records)} records of {sheet_name} to Amplify? (yes/no): ")
+        if confirm.lower() != "yes":
+            logger.info(f"Upload cancelled for {sheet_name} sheet")
+            return 0
+        success_count, upload_error_count, failed_uploads = self.batch_uploader.upload_records(
+            records, sheet_name, parsed_model_structure
+        )
+        for failed_upload in failed_uploads:
+            primary_value = str(failed_upload["primary_field_value"])
+            original_row = row_dict_by_primary.get(primary_value, {})
+            self.failure_tracker.record_failure(
+                primary_field=failed_upload["primary_field"],
+                primary_field_value=failed_upload["primary_field_value"],
+                error=failed_upload["error"],
+                original_row=original_row,
+            )
+        failures = self.failure_tracker.get_failures(sheet_name)
+        parsing_failures = len(failures) - upload_error_count
+        self.progress_reporter.print_sheet_result(
+            sheet_name=sheet_name,
+            success_count=success_count,
+            total_rows=len(df),
+            parsing_failures=parsing_failures,
+            upload_failures=upload_error_count,
+        )
+        return success_count
+    def _transform_rows_to_records(
+        self,
+        df: pd.DataFrame,
+        parsed_model_structure: Dict[str, Any],
+        sheet_name: str,
+    ) -> tuple[list[Any], Dict[str, Dict]]:
+        df.columns = [self.data_transformer.to_camel_case(c) for c in df.columns]
+        primary_field, _, _ = self.amplify_client.get_primary_field_name(sheet_name, parsed_model_structure)
+        fk_lookup_cache = {}
+        if self.amplify_client:
+            logger.info("🚀 Pre-fetching foreign key lookups...")
+            fk_lookup_cache = self.amplify_client.build_foreign_key_lookups(df, parsed_model_structure)
+        records, row_dict_by_primary, failed_rows = self.data_transformer.transform_rows_to_records(
+            df, parsed_model_structure, primary_field, fk_lookup_cache
+        )
+        for failed_row in failed_rows:
+            self.failure_tracker.record_failure(
+                primary_field=failed_row["primary_field"],
+                primary_field_value=failed_row["primary_field_value"],
+                error=failed_row["error"],
+                original_row=failed_row["original_row"],
+            )
+        return records, row_dict_by_primary
+    def _get_parsed_model_structure(self, sheet_name: str) -> Dict[str, Any]:
+        model_structure = self.amplify_client.get_model_structure(sheet_name)
+        return self.field_parser.parse_model_structure(model_structure)
+    def _display_summary(self, sheets_processed: int, total_success: int) -> None:
+        failures_by_sheet = self.failure_tracker.get_failures_by_sheet()
+        self.progress_reporter.print_migration_summary(sheets_processed, total_success, failures_by_sheet)
+        if self.failure_tracker.has_failures():
+            export_confirm = input("\nExport failed records to Excel? (yes/no): ")
+            if export_confirm.lower() == "yes":
+                failed_records_file = self.failure_tracker.export_to_excel(self.excel_reader.file_path)
+                if failed_records_file:
+                    print(f"📁 Failed records exported to: {failed_records_file}")
+                    print("=" * 60)
+                    update_config = input("\nUpdate config to use this failed records file for next run? (yes/no): ")
+                    if update_config.lower() == "yes":
+                        config_manager = ConfigManager()
+                        config_manager.update({"excel_path": failed_records_file})
+                        print(f"✅ Config updated! Next 'migrate' will use: {Path(failed_records_file).name}")
+                        print("=" * 60)
+            else:
+                print("Failed records export skipped.")
+                print("=" * 60)

amplify_excel_migrator/migration/progress_reporter.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""Handles progress and summary reporting during migration."""
+from typing import Dict, List
+class ProgressReporter:
+    @staticmethod
+    def print_sheet_result(
+        sheet_name: str, success_count: int, total_rows: int, parsing_failures: int, upload_failures: int
+    ) -> None:
+        print(f"=== Upload of Excel sheet: {sheet_name} Complete ===")
+        print(f"✅ Success: {success_count}")
+        total_failures = parsing_failures + upload_failures
+        print(f"❌ Failed: {total_failures} (Parsing: {parsing_failures}, Upload: {upload_failures})")
+        print(f"📊 Total: {total_rows}")
+    @staticmethod
+    def print_migration_summary(
+        sheets_processed: int, total_success: int, failures_by_sheet: Dict[str, List[Dict]]
+    ) -> None:
+        total_failed = sum(len(failures) for failures in failures_by_sheet.values())
+        print("\n" + "=" * 60)
+        print("MIGRATION SUMMARY")
+        print("=" * 60)
+        print(f"📊 Sheets processed: {sheets_processed}")
+        print(f"✅ Total successful: {total_success}")
+        print(f"❌ Total failed: {total_failed}")
+        if (total_success + total_failed) > 0:
+            success_rate = (total_success / (total_success + total_failed)) * 100
+            print(f"📈 Success rate: {success_rate:.1f}%")
+        else:
+            print("📈 Success rate: N/A")
+        if total_failed > 0:
+            print("\n" + "=" * 60)
+            print("FAILED RECORDS DETAILS")
+            print("=" * 60)
+            for sheet_name, failed_records in failures_by_sheet.items():
+                if not failed_records:
+                    continue
+                print(f"\n📄 {sheet_name}:")
+                print("-" * 60)
+                for record in failed_records:
+                    primary_field_value = record.get("primary_field_value", "Unknown")
+                    error = record.get("error", "Unknown error")
+                    print(f"  • Record: {primary_field_value}")
+                    print(f"    Error: {error}")
+            print("\n" + "=" * 60)
+        else:
+            print("\n✨ No failed records!")
+        print("=" * 60)

amplify_excel_migrator/schema/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Schema introspection and field parsing."""
+from .introspector import SchemaIntrospector
+from .field_parser import FieldParser
+__all__ = ["SchemaIntrospector", "FieldParser"]

model_field_parser.py → amplify_excel_migrator/schema/field_parser.py RENAMED Viewed

@@ -8,7 +8,7 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(
 logger = logging.getLogger(__name__)
-class ModelFieldParser:
+class FieldParser:
     """Parse GraphQL model fields from introspection results"""
     def __init__(self):
@@ -220,29 +220,47 @@ class ModelFieldParser:
         return custom_type_objects
-    def parse_field_input(self, field: Dict[str, Any], field_name: str, input_value: Any) -> Any:
-        try:
-            if field["type"] in ["Int", "Integer"] or field["type"] == "Float":
+    def _convert_single_value(
+        self,
+        field: Dict[str, Any],
+        field_name: str,
+        input_value: Any,
+        use_dash_notation: bool = False,
+        index: int = None,
+    ) -> Any:
+        if field["type"] in ["Int", "Integer"]:
+            if use_dash_notation:
                 parsed_value = self.parse_number_dash_notation(input_value)
-                return int(parsed_value) if field["type"] in ["Int", "Integer"] else float(parsed_value)
-            elif field["type"] == "Float":
-                return float(input_value)
-            elif field["type"] == "Boolean":
-                if isinstance(input_value, bool):
-                    return input_value
-                if str(input_value).strip().lower() in ["true", "1", "v", "y", "yes"]:
-                    return True
-                elif str(input_value).strip().lower() in ["false", "0", "n", "x", "no"]:
-                    return False
-                else:
-                    logger.error(f"Invalid Boolean value for field '{field_name}': {input_value}")
-                    return None
-            elif field["is_enum"]:
-                return str(input_value).strip().replace(" ", "_").upper()
-            elif field["type"] == "AWSDate" or field["type"] == "AWSDateTime":
-                return self.parse_date(input_value)
+                return int(parsed_value)
+            return int(input_value)
+        elif field["type"] == "Float":
+            if use_dash_notation:
+                parsed_value = self.parse_number_dash_notation(input_value)
+                return float(parsed_value)
+            return float(input_value)
+        elif field["type"] == "Boolean":
+            if isinstance(input_value, bool):
+                return input_value
+            input_str = str(input_value).strip().lower()
+            if input_str in ["true", "1", "v", "y", "yes"]:
+                return True
+            elif input_str in ["false", "0", "n", "x", "no"]:
+                return False
             else:
-                return str(input_value).strip()
+                context = f"array '{field_name}[{index}]'" if index is not None else f"field '{field_name}'"
+                logger.error(f"Invalid Boolean value for {context}: {input_value}")
+                return None
+        elif field.get("is_enum", False):
+            return str(input_value).strip().replace(" ", "_").upper()
+        elif field["type"] in ["AWSDate", "AWSDateTime"]:
+            return self.parse_date(input_value)
+        else:
+            return str(input_value).strip()
+    def parse_field_input(self, field: Dict[str, Any], field_name: str, input_value: Any) -> Any:
+        """Parse a single field value from Excel"""
+        try:
+            return self._convert_single_value(field, field_name, input_value, use_dash_notation=True)
         except (ValueError, TypeError) as e:
             logger.warning(
                 f"Failed to parse field '{field_name}' with value '{input_value}' (type: {type(input_value).__name__}) "
@@ -251,6 +269,66 @@ class ModelFieldParser:
             )
             return None
+    def parse_scalar_array(self, field: Dict[str, Any], field_name: str, input_value: Any) -> list | None:
+        """
+        Parse scalar array from Excel cell supporting multiple formats:
+        - JSON: ["value1", "value2", "value3"]
+        - Semicolon: value1; value2; value3
+        - Comma: value1, value2, value3
+        - Space: value1 value2 value3
+        """
+        if pd.isna(input_value):
+            return None
+        input_str = str(input_value).strip()
+        if not input_str:
+            return None
+        if input_str.startswith("[") and input_str.endswith("]"):
+            try:
+                import json
+                parsed_json = json.loads(input_str)
+                if isinstance(parsed_json, list):
+                    return self._convert_array_elements(field, field_name, parsed_json)
+            except json.JSONDecodeError:
+                logger.warning(f"Failed to parse JSON array for field '{field_name}': {input_str}")
+        if ";" in input_str:
+            values = [v.strip() for v in input_str.split(";") if v.strip()]
+            return self._convert_array_elements(field, field_name, values)
+        if "," in input_str:
+            values = [v.strip() for v in input_str.split(",") if v.strip()]
+            return self._convert_array_elements(field, field_name, values)
+        values = [v.strip() for v in input_str.split() if v.strip()]
+        if len(values) > 1:
+            return self._convert_array_elements(field, field_name, values)
+        return self._convert_array_elements(field, field_name, [input_str])
+    def _convert_array_elements(self, field: Dict[str, Any], field_name: str, values: list) -> list:
+        converted = []
+        for i, value in enumerate(values):
+            cleaned_value = self.clean_input(value)
+            if not cleaned_value or (isinstance(cleaned_value, str) and not cleaned_value.strip()):
+                continue
+            try:
+                result = self._convert_single_value(field, field_name, cleaned_value, use_dash_notation=False, index=i)
+                if result is not None:
+                    converted.append(result)
+            except (ValueError, TypeError) as e:
+                logger.warning(
+                    f"Failed to convert array element '{field_name}[{i}]' with value '{cleaned_value}' "
+                    f"to type '{field['type']}': {e}"
+                )
+                continue
+        return converted if converted else None
     @staticmethod
     def parse_number_dash_notation(input_value: Any) -> int | float:
         """

amplify_excel_migrator/schema/introspector.py ADDED Viewed

@@ -0,0 +1,95 @@
+"""Schema introspection for GraphQL models."""
+import logging
+from typing import Dict, Any, Optional
+import inflect
+from amplify_excel_migrator.graphql import GraphQLClient
+from amplify_excel_migrator.graphql.query_builder import QueryBuilder
+logger = logging.getLogger(__name__)
+class SchemaIntrospector:
+    def __init__(self, client: GraphQLClient):
+        self.client = client
+    def get_model_structure(self, model_type: str) -> Dict[str, Any]:
+        query = QueryBuilder.build_introspection_query(model_type)
+        response = self.client.request(query)
+        if response and "data" in response and "__type" in response["data"]:
+            return response["data"]["__type"]
+        return {}
+    def get_primary_field_name(self, model_name: str, parsed_model_structure: Dict[str, Any]) -> tuple[str, bool, str]:
+        secondary_index = self._get_secondary_index(model_name)
+        if secondary_index:
+            field_type = "String"
+            for field in parsed_model_structure["fields"]:
+                if field["name"] == secondary_index:
+                    field_type = field["type"]
+                    break
+            return secondary_index, True, field_type
+        for field in parsed_model_structure["fields"]:
+            if field["is_required"] and field["is_scalar"] and field["name"] != "id":
+                return field["name"], False, field["type"]
+        logger.error("No suitable primary field found (required scalar field other than id)")
+        return "", False, "String"
+    def _get_secondary_index(self, model_name: str) -> str:
+        query_structure = self.get_model_structure("Query")
+        if not query_structure:
+            logger.error("Query type not found in schema")
+            return ""
+        query_fields = query_structure["fields"]
+        pattern = f"{model_name}By"
+        for query in query_fields:
+            query_name = query["name"]
+            if pattern in query_name:
+                pattern_index = query_name.index(pattern)
+                field_name = query_name[pattern_index + len(pattern) :]
+                return field_name[0].lower() + field_name[1:] if field_name else ""
+        return ""
+    def get_list_query_name(self, model_name: str) -> Optional[str]:
+        query_structure = self.get_model_structure("Query")
+        if not query_structure:
+            logger.error("Query type not found in schema")
+            return f"list{model_name}s"
+        query_fields = query_structure["fields"]
+        p = inflect.engine()
+        candidates = [f"list{model_name}"]
+        capitals = [i for i, c in enumerate(model_name) if c.isupper()]
+        if len(capitals) > 1:
+            last_word_start = capitals[-1]
+            prefix = model_name[:last_word_start]
+            last_word = model_name[last_word_start:]
+            last_word_plural = str(p.plural(last_word.lower()))
+            last_word_plural_cap = last_word_plural[0].upper() + last_word_plural[1:] if last_word_plural else ""
+            pascal_plural = f"{prefix}{last_word_plural_cap}"
+            candidates.append(f"list{pascal_plural}")
+        full_plural = str(p.plural(model_name.lower()))
+        full_plural_cap = full_plural[0].upper() + full_plural[1:] if full_plural else ""
+        candidates.append(f"list{full_plural_cap}")
+        for query in query_fields:
+            query_name = query["name"]
+            if query_name in candidates and "By" not in query_name:
+                return query_name
+        logger.error(f"No list query found for model {model_name}, tried: {candidates}")
+        return None

amplify-excel-migrator 1.1.5__py3-none-any.whl → 1.2.15__py3-none-any.whl

amplify-excel-migrator 1.1.5py3-none-any.whl → 1.2.15py3-none-any.whl