amplify-excel-migrator 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
model_field_parser.py ADDED
@@ -0,0 +1,314 @@
1
+ from typing import Dict, Any
2
+ import logging
3
+ import pandas as pd
4
+ import unicodedata
5
+ from datetime import datetime
6
+
7
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class ModelFieldParser:
12
+ """Parse GraphQL model fields from introspection results"""
13
+
14
+ def __init__(self):
15
+ self.scalar_types = {
16
+ "String",
17
+ "Int",
18
+ "Float",
19
+ "Boolean",
20
+ "AWSDate",
21
+ "AWSTime",
22
+ "AWSDateTime",
23
+ "AWSTimestamp",
24
+ "AWSEmail",
25
+ "AWSJSON",
26
+ "AWSURL",
27
+ "AWSPhone",
28
+ "AWSIPAddress",
29
+ }
30
+ self.metadata_fields = {"id", "createdAt", "updatedAt", "owner"}
31
+
32
+ def parse_model_structure(self, introspection_result: Dict) -> Dict[str, Any]:
33
+ if not introspection_result:
34
+ logger.error("Empty introspection result received", exc_info=True)
35
+ raise ValueError("Introspection result cannot be empty")
36
+
37
+ if "data" in introspection_result and "__type" in introspection_result["data"]:
38
+ type_data = introspection_result["data"]["__type"]
39
+ else:
40
+ type_data = introspection_result
41
+
42
+ model_info = {
43
+ "name": type_data.get("name"),
44
+ "kind": type_data.get("kind"),
45
+ "description": type_data.get("description"),
46
+ "fields": [],
47
+ }
48
+
49
+ relationships = {}
50
+ relationship_field_names = set()
51
+
52
+ if type_data.get("fields"):
53
+ all_field_names = {field.get("name") for field in type_data["fields"]}
54
+
55
+ for field in type_data["fields"]:
56
+ rel_info = self._extract_relationship_info(field)
57
+ if rel_info:
58
+ relationships[rel_info["foreign_key"]] = rel_info["target_model"]
59
+ if rel_info["foreign_key"] in all_field_names:
60
+ relationship_field_names.add(field.get("name"))
61
+
62
+ for field in type_data["fields"]:
63
+ if field.get("name") in relationship_field_names:
64
+ continue
65
+
66
+ parsed_field = self._parse_field(field)
67
+ if parsed_field:
68
+ if parsed_field["name"] in relationships:
69
+ parsed_field["related_model"] = relationships[parsed_field["name"]]
70
+ model_info["fields"].append(parsed_field)
71
+
72
+ return model_info
73
+
74
+ def _extract_relationship_info(self, field: Dict) -> Dict[str, str] | None:
75
+ base_type = self._get_base_type_name(field.get("type", {}))
76
+ type_kind = self._get_type_kind(field.get("type", {}))
77
+ field_name = field.get("name", "")
78
+
79
+ if type_kind != "OBJECT" or "Connection" in base_type or field_name in self.metadata_fields:
80
+ return None
81
+
82
+ inferred_foreign_key = f"{field_name}Id"
83
+ return {"target_model": base_type, "foreign_key": inferred_foreign_key}
84
+
85
+ def _parse_field(self, field: Dict) -> Dict[str, Any]:
86
+ base_type = self._get_base_type_name(field.get("type", {}))
87
+ type_kind = self._get_type_kind(field.get("type", {}))
88
+
89
+ if "Connection" in base_type or field.get("name") in self.metadata_fields or type_kind == "INTERFACE":
90
+ return {}
91
+
92
+ field_info = {
93
+ "name": field.get("name"),
94
+ "description": field.get("description"),
95
+ "type": base_type,
96
+ "is_required": self._is_required_field(field.get("type", {})),
97
+ "is_list": self._is_list_type(field.get("type", {})),
98
+ "is_scalar": base_type in self.scalar_types,
99
+ "is_id": base_type == "ID",
100
+ "is_enum": field.get("type", {}).get("kind") == "ENUM",
101
+ "is_custom_type": type_kind == "OBJECT",
102
+ }
103
+
104
+ return field_info
105
+
106
+ def _get_base_type_name(self, type_obj: Dict) -> str:
107
+ """
108
+ Get the base type name, unwrapping NON_NULL and LIST wrappers
109
+ """
110
+
111
+ if not type_obj:
112
+ return "Unknown"
113
+
114
+ if type_obj.get("name"):
115
+ return type_obj["name"]
116
+
117
+ if type_obj.get("ofType"):
118
+ return self._get_base_type_name(type_obj["ofType"])
119
+
120
+ return "Unknown"
121
+
122
+ def _get_type_kind(self, type_obj: Dict) -> str:
123
+ if not type_obj:
124
+ return "UNKNOWN"
125
+
126
+ if type_obj["kind"] in ["NON_NULL", "LIST"] and type_obj.get("ofType"):
127
+ return self._get_type_kind(type_obj["ofType"])
128
+
129
+ return type_obj.get("kind", "UNKNOWN")
130
+
131
+ @staticmethod
132
+ def _is_required_field(type_obj: Dict) -> bool:
133
+ return type_obj and type_obj.get("kind") == "NON_NULL"
134
+
135
+ def _is_list_type(self, type_obj: Dict) -> bool:
136
+ if not type_obj:
137
+ return False
138
+
139
+ if type_obj["kind"] == "LIST":
140
+ return True
141
+
142
+ if type_obj.get("ofType"):
143
+ return self._is_list_type(type_obj["ofType"])
144
+
145
+ return False
146
+
147
+ def build_custom_type_from_columns(self, row: pd.Series, custom_type_fields: list, custom_type_name: str) -> list:
148
+ """Build custom type objects from Excel columns, handling multi-value fields"""
149
+
150
+ field_values, max_count = self._collect_custom_type_fields_values(row, custom_type_fields)
151
+
152
+ custom_type_objects = self._build_custom_type_objects(
153
+ row, custom_type_fields, custom_type_name, field_values, max_count
154
+ )
155
+
156
+ return custom_type_objects if custom_type_objects else None
157
+
158
+ @staticmethod
159
+ def _collect_custom_type_fields_values(row: pd.Series, custom_type_fields: list) -> tuple[Dict[str, list], int]:
160
+ field_values = {}
161
+ max_count = 1
162
+
163
+ for custom_field in custom_type_fields:
164
+ custom_field_name = custom_field["name"]
165
+ if custom_field_name in row.index and pd.notna(row[custom_field_name]):
166
+ value = row[custom_field_name]
167
+
168
+ if isinstance(value, str) and "-" in str(value):
169
+ parts = [p.strip() for p in str(value).split("-") if p.strip()]
170
+ if len(parts) > 1:
171
+ field_values[custom_field_name] = parts
172
+ max_count = max(max_count, len(parts))
173
+ else:
174
+ field_values[custom_field_name] = [None]
175
+ else:
176
+ field_values[custom_field_name] = [value]
177
+ else:
178
+ field_values[custom_field_name] = [None]
179
+
180
+ return field_values, max_count
181
+
182
+ def _build_custom_type_objects(
183
+ self,
184
+ row: pd.Series,
185
+ custom_type_fields: list,
186
+ custom_type_name: str,
187
+ field_values: Dict[str, list],
188
+ max_count: int,
189
+ ) -> list:
190
+ custom_type_objects = []
191
+
192
+ for i in range(max_count):
193
+ obj = {}
194
+
195
+ for custom_field in custom_type_fields:
196
+ custom_field_name = custom_field["name"]
197
+ values_list = field_values.get(custom_field_name, [None])
198
+
199
+ if i < len(values_list):
200
+ value = values_list[i]
201
+ elif len(values_list) == 1:
202
+ value = values_list[0]
203
+ else:
204
+ value = None
205
+
206
+ if value is None or pd.isna(value):
207
+ if custom_field["is_required"]:
208
+ raise ValueError(
209
+ f"Required field '{custom_field_name}' is missing in custom type '{custom_type_name}' "
210
+ f"for row {row.name}, group {i + 1}"
211
+ )
212
+ continue
213
+
214
+ parsed_value = self.parse_field_input(custom_field, custom_field_name, value)
215
+ if parsed_value is not None:
216
+ obj[custom_field_name] = parsed_value
217
+
218
+ if obj:
219
+ custom_type_objects.append(obj)
220
+
221
+ return custom_type_objects
222
+
223
+ def parse_field_input(self, field: Dict[str, Any], field_name: str, input_value: Any) -> Any:
224
+ try:
225
+ if field["type"] in ["Int", "Integer"] or field["type"] == "Float":
226
+ parsed_value = self.parse_number_dash_notation(input_value)
227
+ return int(parsed_value) if field["type"] in ["Int", "Integer"] else float(parsed_value)
228
+ elif field["type"] == "Float":
229
+ return float(input_value)
230
+ elif field["type"] == "Boolean":
231
+ if isinstance(input_value, bool):
232
+ return input_value
233
+ if str(input_value).strip().lower() in ["true", "1", "v", "y", "yes"]:
234
+ return True
235
+ elif str(input_value).strip().lower() in ["false", "0", "n", "x", "no"]:
236
+ return False
237
+ else:
238
+ logger.error(f"Invalid Boolean value for field '{field_name}': {input_value}")
239
+ return None
240
+ elif field["is_enum"]:
241
+ return str(input_value).strip().replace(" ", "_").upper()
242
+ elif field["type"] == "AWSDate" or field["type"] == "AWSDateTime":
243
+ return self.parse_date(input_value)
244
+ else:
245
+ return str(input_value).strip()
246
+ except (ValueError, TypeError) as e:
247
+ logger.warning(
248
+ f"Failed to parse field '{field_name}' with value '{input_value}' (type: {type(input_value).__name__}) "
249
+ f"for field type '{field['type']}': {e}",
250
+ exc_info=True,
251
+ )
252
+ return None
253
+
254
+ @staticmethod
255
+ def parse_number_dash_notation(input_value: Any) -> int | float:
256
+ """
257
+ Parse number-dash notation like "2-2" to sum (4).
258
+ Handles cases where pandas auto-converted "2-2" to datetime.
259
+
260
+ Examples:
261
+ - "2-2" -> 4
262
+ - "2-2-2" -> 6
263
+ - datetime(2025, 2, 2) -> 4 (extracts month and day)
264
+ """
265
+
266
+ if isinstance(input_value, (pd.Timestamp, datetime)):
267
+ input_value = str(input_value)
268
+
269
+ # Handle datetime strings that pandas created from "2-2" -> "2025-02-02 00:00:00"
270
+ if isinstance(input_value, str) and " " in input_value and ":" in input_value:
271
+ date_part = input_value.split(" ")[0]
272
+ if "-" in date_part:
273
+ parts = date_part.split("-")
274
+ if len(parts) == 3:
275
+ return sum([int(parts[1]), int(parts[2])])
276
+ else:
277
+ return sum([int(p) for p in parts])
278
+
279
+ if isinstance(input_value, str) and "-" in str(input_value):
280
+ return sum([int(p.strip()) for p in str(input_value).split("-") if p.strip()])
281
+
282
+ return input_value
283
+
284
+ @staticmethod
285
+ def clean_input(input_value: Any) -> Any:
286
+ if isinstance(input_value, str):
287
+ input_value = input_value.strip()
288
+
289
+ input_value = "".join(
290
+ char
291
+ for char in input_value
292
+ if unicodedata.category(char) not in ("Cf", "Cc") or char in ("\n", "\r", "\t")
293
+ )
294
+
295
+ return input_value
296
+
297
+ @staticmethod
298
+ def parse_date(input: Any) -> str | None:
299
+ if isinstance(input, (pd.Timestamp, datetime)):
300
+ return input.date().isoformat()
301
+
302
+ input_str = str(input).strip()
303
+
304
+ try:
305
+ return pd.to_datetime(input_str, format="%d/%m/%Y").date().isoformat()
306
+ except (ValueError, OverflowError):
307
+ try:
308
+ return pd.to_datetime(input_str, format="%d-%m-%Y").date().isoformat()
309
+ except (ValueError, OverflowError):
310
+ try:
311
+ return pd.to_datetime(input_str).date().isoformat()
312
+ except (ValueError, OverflowError) as e:
313
+ logger.error(f"Failed to parse date '{input}': {e}")
314
+ return None