amplify-excel-migrator 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amplify_client.py +941 -0
- amplify_excel_migrator-1.1.5.dist-info/METADATA +219 -0
- amplify_excel_migrator-1.1.5.dist-info/RECORD +9 -0
- amplify_excel_migrator-1.1.5.dist-info/WHEEL +5 -0
- amplify_excel_migrator-1.1.5.dist-info/entry_points.txt +2 -0
- amplify_excel_migrator-1.1.5.dist-info/licenses/LICENSE +21 -0
- amplify_excel_migrator-1.1.5.dist-info/top_level.txt +3 -0
- migrator.py +437 -0
- model_field_parser.py +314 -0
model_field_parser.py
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
import logging
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import unicodedata
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ModelFieldParser:
|
|
12
|
+
"""Parse GraphQL model fields from introspection results"""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self.scalar_types = {
|
|
16
|
+
"String",
|
|
17
|
+
"Int",
|
|
18
|
+
"Float",
|
|
19
|
+
"Boolean",
|
|
20
|
+
"AWSDate",
|
|
21
|
+
"AWSTime",
|
|
22
|
+
"AWSDateTime",
|
|
23
|
+
"AWSTimestamp",
|
|
24
|
+
"AWSEmail",
|
|
25
|
+
"AWSJSON",
|
|
26
|
+
"AWSURL",
|
|
27
|
+
"AWSPhone",
|
|
28
|
+
"AWSIPAddress",
|
|
29
|
+
}
|
|
30
|
+
self.metadata_fields = {"id", "createdAt", "updatedAt", "owner"}
|
|
31
|
+
|
|
32
|
+
def parse_model_structure(self, introspection_result: Dict) -> Dict[str, Any]:
|
|
33
|
+
if not introspection_result:
|
|
34
|
+
logger.error("Empty introspection result received", exc_info=True)
|
|
35
|
+
raise ValueError("Introspection result cannot be empty")
|
|
36
|
+
|
|
37
|
+
if "data" in introspection_result and "__type" in introspection_result["data"]:
|
|
38
|
+
type_data = introspection_result["data"]["__type"]
|
|
39
|
+
else:
|
|
40
|
+
type_data = introspection_result
|
|
41
|
+
|
|
42
|
+
model_info = {
|
|
43
|
+
"name": type_data.get("name"),
|
|
44
|
+
"kind": type_data.get("kind"),
|
|
45
|
+
"description": type_data.get("description"),
|
|
46
|
+
"fields": [],
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
relationships = {}
|
|
50
|
+
relationship_field_names = set()
|
|
51
|
+
|
|
52
|
+
if type_data.get("fields"):
|
|
53
|
+
all_field_names = {field.get("name") for field in type_data["fields"]}
|
|
54
|
+
|
|
55
|
+
for field in type_data["fields"]:
|
|
56
|
+
rel_info = self._extract_relationship_info(field)
|
|
57
|
+
if rel_info:
|
|
58
|
+
relationships[rel_info["foreign_key"]] = rel_info["target_model"]
|
|
59
|
+
if rel_info["foreign_key"] in all_field_names:
|
|
60
|
+
relationship_field_names.add(field.get("name"))
|
|
61
|
+
|
|
62
|
+
for field in type_data["fields"]:
|
|
63
|
+
if field.get("name") in relationship_field_names:
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
parsed_field = self._parse_field(field)
|
|
67
|
+
if parsed_field:
|
|
68
|
+
if parsed_field["name"] in relationships:
|
|
69
|
+
parsed_field["related_model"] = relationships[parsed_field["name"]]
|
|
70
|
+
model_info["fields"].append(parsed_field)
|
|
71
|
+
|
|
72
|
+
return model_info
|
|
73
|
+
|
|
74
|
+
def _extract_relationship_info(self, field: Dict) -> Dict[str, str] | None:
|
|
75
|
+
base_type = self._get_base_type_name(field.get("type", {}))
|
|
76
|
+
type_kind = self._get_type_kind(field.get("type", {}))
|
|
77
|
+
field_name = field.get("name", "")
|
|
78
|
+
|
|
79
|
+
if type_kind != "OBJECT" or "Connection" in base_type or field_name in self.metadata_fields:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
inferred_foreign_key = f"{field_name}Id"
|
|
83
|
+
return {"target_model": base_type, "foreign_key": inferred_foreign_key}
|
|
84
|
+
|
|
85
|
+
def _parse_field(self, field: Dict) -> Dict[str, Any]:
|
|
86
|
+
base_type = self._get_base_type_name(field.get("type", {}))
|
|
87
|
+
type_kind = self._get_type_kind(field.get("type", {}))
|
|
88
|
+
|
|
89
|
+
if "Connection" in base_type or field.get("name") in self.metadata_fields or type_kind == "INTERFACE":
|
|
90
|
+
return {}
|
|
91
|
+
|
|
92
|
+
field_info = {
|
|
93
|
+
"name": field.get("name"),
|
|
94
|
+
"description": field.get("description"),
|
|
95
|
+
"type": base_type,
|
|
96
|
+
"is_required": self._is_required_field(field.get("type", {})),
|
|
97
|
+
"is_list": self._is_list_type(field.get("type", {})),
|
|
98
|
+
"is_scalar": base_type in self.scalar_types,
|
|
99
|
+
"is_id": base_type == "ID",
|
|
100
|
+
"is_enum": field.get("type", {}).get("kind") == "ENUM",
|
|
101
|
+
"is_custom_type": type_kind == "OBJECT",
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return field_info
|
|
105
|
+
|
|
106
|
+
def _get_base_type_name(self, type_obj: Dict) -> str:
|
|
107
|
+
"""
|
|
108
|
+
Get the base type name, unwrapping NON_NULL and LIST wrappers
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
if not type_obj:
|
|
112
|
+
return "Unknown"
|
|
113
|
+
|
|
114
|
+
if type_obj.get("name"):
|
|
115
|
+
return type_obj["name"]
|
|
116
|
+
|
|
117
|
+
if type_obj.get("ofType"):
|
|
118
|
+
return self._get_base_type_name(type_obj["ofType"])
|
|
119
|
+
|
|
120
|
+
return "Unknown"
|
|
121
|
+
|
|
122
|
+
def _get_type_kind(self, type_obj: Dict) -> str:
|
|
123
|
+
if not type_obj:
|
|
124
|
+
return "UNKNOWN"
|
|
125
|
+
|
|
126
|
+
if type_obj["kind"] in ["NON_NULL", "LIST"] and type_obj.get("ofType"):
|
|
127
|
+
return self._get_type_kind(type_obj["ofType"])
|
|
128
|
+
|
|
129
|
+
return type_obj.get("kind", "UNKNOWN")
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def _is_required_field(type_obj: Dict) -> bool:
|
|
133
|
+
return type_obj and type_obj.get("kind") == "NON_NULL"
|
|
134
|
+
|
|
135
|
+
def _is_list_type(self, type_obj: Dict) -> bool:
|
|
136
|
+
if not type_obj:
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
if type_obj["kind"] == "LIST":
|
|
140
|
+
return True
|
|
141
|
+
|
|
142
|
+
if type_obj.get("ofType"):
|
|
143
|
+
return self._is_list_type(type_obj["ofType"])
|
|
144
|
+
|
|
145
|
+
return False
|
|
146
|
+
|
|
147
|
+
def build_custom_type_from_columns(self, row: pd.Series, custom_type_fields: list, custom_type_name: str) -> list:
|
|
148
|
+
"""Build custom type objects from Excel columns, handling multi-value fields"""
|
|
149
|
+
|
|
150
|
+
field_values, max_count = self._collect_custom_type_fields_values(row, custom_type_fields)
|
|
151
|
+
|
|
152
|
+
custom_type_objects = self._build_custom_type_objects(
|
|
153
|
+
row, custom_type_fields, custom_type_name, field_values, max_count
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return custom_type_objects if custom_type_objects else None
|
|
157
|
+
|
|
158
|
+
@staticmethod
|
|
159
|
+
def _collect_custom_type_fields_values(row: pd.Series, custom_type_fields: list) -> tuple[Dict[str, list], int]:
|
|
160
|
+
field_values = {}
|
|
161
|
+
max_count = 1
|
|
162
|
+
|
|
163
|
+
for custom_field in custom_type_fields:
|
|
164
|
+
custom_field_name = custom_field["name"]
|
|
165
|
+
if custom_field_name in row.index and pd.notna(row[custom_field_name]):
|
|
166
|
+
value = row[custom_field_name]
|
|
167
|
+
|
|
168
|
+
if isinstance(value, str) and "-" in str(value):
|
|
169
|
+
parts = [p.strip() for p in str(value).split("-") if p.strip()]
|
|
170
|
+
if len(parts) > 1:
|
|
171
|
+
field_values[custom_field_name] = parts
|
|
172
|
+
max_count = max(max_count, len(parts))
|
|
173
|
+
else:
|
|
174
|
+
field_values[custom_field_name] = [None]
|
|
175
|
+
else:
|
|
176
|
+
field_values[custom_field_name] = [value]
|
|
177
|
+
else:
|
|
178
|
+
field_values[custom_field_name] = [None]
|
|
179
|
+
|
|
180
|
+
return field_values, max_count
|
|
181
|
+
|
|
182
|
+
def _build_custom_type_objects(
|
|
183
|
+
self,
|
|
184
|
+
row: pd.Series,
|
|
185
|
+
custom_type_fields: list,
|
|
186
|
+
custom_type_name: str,
|
|
187
|
+
field_values: Dict[str, list],
|
|
188
|
+
max_count: int,
|
|
189
|
+
) -> list:
|
|
190
|
+
custom_type_objects = []
|
|
191
|
+
|
|
192
|
+
for i in range(max_count):
|
|
193
|
+
obj = {}
|
|
194
|
+
|
|
195
|
+
for custom_field in custom_type_fields:
|
|
196
|
+
custom_field_name = custom_field["name"]
|
|
197
|
+
values_list = field_values.get(custom_field_name, [None])
|
|
198
|
+
|
|
199
|
+
if i < len(values_list):
|
|
200
|
+
value = values_list[i]
|
|
201
|
+
elif len(values_list) == 1:
|
|
202
|
+
value = values_list[0]
|
|
203
|
+
else:
|
|
204
|
+
value = None
|
|
205
|
+
|
|
206
|
+
if value is None or pd.isna(value):
|
|
207
|
+
if custom_field["is_required"]:
|
|
208
|
+
raise ValueError(
|
|
209
|
+
f"Required field '{custom_field_name}' is missing in custom type '{custom_type_name}' "
|
|
210
|
+
f"for row {row.name}, group {i + 1}"
|
|
211
|
+
)
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
parsed_value = self.parse_field_input(custom_field, custom_field_name, value)
|
|
215
|
+
if parsed_value is not None:
|
|
216
|
+
obj[custom_field_name] = parsed_value
|
|
217
|
+
|
|
218
|
+
if obj:
|
|
219
|
+
custom_type_objects.append(obj)
|
|
220
|
+
|
|
221
|
+
return custom_type_objects
|
|
222
|
+
|
|
223
|
+
def parse_field_input(self, field: Dict[str, Any], field_name: str, input_value: Any) -> Any:
|
|
224
|
+
try:
|
|
225
|
+
if field["type"] in ["Int", "Integer"] or field["type"] == "Float":
|
|
226
|
+
parsed_value = self.parse_number_dash_notation(input_value)
|
|
227
|
+
return int(parsed_value) if field["type"] in ["Int", "Integer"] else float(parsed_value)
|
|
228
|
+
elif field["type"] == "Float":
|
|
229
|
+
return float(input_value)
|
|
230
|
+
elif field["type"] == "Boolean":
|
|
231
|
+
if isinstance(input_value, bool):
|
|
232
|
+
return input_value
|
|
233
|
+
if str(input_value).strip().lower() in ["true", "1", "v", "y", "yes"]:
|
|
234
|
+
return True
|
|
235
|
+
elif str(input_value).strip().lower() in ["false", "0", "n", "x", "no"]:
|
|
236
|
+
return False
|
|
237
|
+
else:
|
|
238
|
+
logger.error(f"Invalid Boolean value for field '{field_name}': {input_value}")
|
|
239
|
+
return None
|
|
240
|
+
elif field["is_enum"]:
|
|
241
|
+
return str(input_value).strip().replace(" ", "_").upper()
|
|
242
|
+
elif field["type"] == "AWSDate" or field["type"] == "AWSDateTime":
|
|
243
|
+
return self.parse_date(input_value)
|
|
244
|
+
else:
|
|
245
|
+
return str(input_value).strip()
|
|
246
|
+
except (ValueError, TypeError) as e:
|
|
247
|
+
logger.warning(
|
|
248
|
+
f"Failed to parse field '{field_name}' with value '{input_value}' (type: {type(input_value).__name__}) "
|
|
249
|
+
f"for field type '{field['type']}': {e}",
|
|
250
|
+
exc_info=True,
|
|
251
|
+
)
|
|
252
|
+
return None
|
|
253
|
+
|
|
254
|
+
@staticmethod
|
|
255
|
+
def parse_number_dash_notation(input_value: Any) -> int | float:
|
|
256
|
+
"""
|
|
257
|
+
Parse number-dash notation like "2-2" to sum (4).
|
|
258
|
+
Handles cases where pandas auto-converted "2-2" to datetime.
|
|
259
|
+
|
|
260
|
+
Examples:
|
|
261
|
+
- "2-2" -> 4
|
|
262
|
+
- "2-2-2" -> 6
|
|
263
|
+
- datetime(2025, 2, 2) -> 4 (extracts month and day)
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
if isinstance(input_value, (pd.Timestamp, datetime)):
|
|
267
|
+
input_value = str(input_value)
|
|
268
|
+
|
|
269
|
+
# Handle datetime strings that pandas created from "2-2" -> "2025-02-02 00:00:00"
|
|
270
|
+
if isinstance(input_value, str) and " " in input_value and ":" in input_value:
|
|
271
|
+
date_part = input_value.split(" ")[0]
|
|
272
|
+
if "-" in date_part:
|
|
273
|
+
parts = date_part.split("-")
|
|
274
|
+
if len(parts) == 3:
|
|
275
|
+
return sum([int(parts[1]), int(parts[2])])
|
|
276
|
+
else:
|
|
277
|
+
return sum([int(p) for p in parts])
|
|
278
|
+
|
|
279
|
+
if isinstance(input_value, str) and "-" in str(input_value):
|
|
280
|
+
return sum([int(p.strip()) for p in str(input_value).split("-") if p.strip()])
|
|
281
|
+
|
|
282
|
+
return input_value
|
|
283
|
+
|
|
284
|
+
@staticmethod
|
|
285
|
+
def clean_input(input_value: Any) -> Any:
|
|
286
|
+
if isinstance(input_value, str):
|
|
287
|
+
input_value = input_value.strip()
|
|
288
|
+
|
|
289
|
+
input_value = "".join(
|
|
290
|
+
char
|
|
291
|
+
for char in input_value
|
|
292
|
+
if unicodedata.category(char) not in ("Cf", "Cc") or char in ("\n", "\r", "\t")
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
return input_value
|
|
296
|
+
|
|
297
|
+
@staticmethod
|
|
298
|
+
def parse_date(input: Any) -> str | None:
|
|
299
|
+
if isinstance(input, (pd.Timestamp, datetime)):
|
|
300
|
+
return input.date().isoformat()
|
|
301
|
+
|
|
302
|
+
input_str = str(input).strip()
|
|
303
|
+
|
|
304
|
+
try:
|
|
305
|
+
return pd.to_datetime(input_str, format="%d/%m/%Y").date().isoformat()
|
|
306
|
+
except (ValueError, OverflowError):
|
|
307
|
+
try:
|
|
308
|
+
return pd.to_datetime(input_str, format="%d-%m-%Y").date().isoformat()
|
|
309
|
+
except (ValueError, OverflowError):
|
|
310
|
+
try:
|
|
311
|
+
return pd.to_datetime(input_str).date().isoformat()
|
|
312
|
+
except (ValueError, OverflowError) as e:
|
|
313
|
+
logger.error(f"Failed to parse date '{input}': {e}")
|
|
314
|
+
return None
|