ai-parrot 0.8.3__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ai-parrot might be problematic. Click here for more details.

Files changed (128) hide show
  1. ai_parrot-0.8.3.dist-info/LICENSE +21 -0
  2. ai_parrot-0.8.3.dist-info/METADATA +306 -0
  3. ai_parrot-0.8.3.dist-info/RECORD +128 -0
  4. ai_parrot-0.8.3.dist-info/WHEEL +6 -0
  5. ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
  6. parrot/__init__.py +30 -0
  7. parrot/bots/__init__.py +5 -0
  8. parrot/bots/abstract.py +1115 -0
  9. parrot/bots/agent.py +492 -0
  10. parrot/bots/basic.py +9 -0
  11. parrot/bots/bose.py +17 -0
  12. parrot/bots/chatbot.py +271 -0
  13. parrot/bots/cody.py +17 -0
  14. parrot/bots/copilot.py +117 -0
  15. parrot/bots/data.py +730 -0
  16. parrot/bots/dataframe.py +103 -0
  17. parrot/bots/hrbot.py +15 -0
  18. parrot/bots/interfaces/__init__.py +1 -0
  19. parrot/bots/interfaces/retrievers.py +12 -0
  20. parrot/bots/notebook.py +619 -0
  21. parrot/bots/odoo.py +17 -0
  22. parrot/bots/prompts/__init__.py +41 -0
  23. parrot/bots/prompts/agents.py +91 -0
  24. parrot/bots/prompts/data.py +214 -0
  25. parrot/bots/retrievals/__init__.py +1 -0
  26. parrot/bots/retrievals/constitutional.py +19 -0
  27. parrot/bots/retrievals/multi.py +122 -0
  28. parrot/bots/retrievals/retrieval.py +610 -0
  29. parrot/bots/tools/__init__.py +7 -0
  30. parrot/bots/tools/eda.py +325 -0
  31. parrot/bots/tools/pdf.py +50 -0
  32. parrot/bots/tools/plot.py +48 -0
  33. parrot/bots/troc.py +16 -0
  34. parrot/conf.py +170 -0
  35. parrot/crew/__init__.py +3 -0
  36. parrot/crew/tools/__init__.py +22 -0
  37. parrot/crew/tools/bing.py +13 -0
  38. parrot/crew/tools/config.py +43 -0
  39. parrot/crew/tools/duckgo.py +62 -0
  40. parrot/crew/tools/file.py +24 -0
  41. parrot/crew/tools/google.py +168 -0
  42. parrot/crew/tools/gtrends.py +16 -0
  43. parrot/crew/tools/md2pdf.py +25 -0
  44. parrot/crew/tools/rag.py +42 -0
  45. parrot/crew/tools/search.py +32 -0
  46. parrot/crew/tools/url.py +21 -0
  47. parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
  48. parrot/handlers/__init__.py +4 -0
  49. parrot/handlers/agents.py +292 -0
  50. parrot/handlers/bots.py +196 -0
  51. parrot/handlers/chat.py +192 -0
  52. parrot/interfaces/__init__.py +6 -0
  53. parrot/interfaces/database.py +27 -0
  54. parrot/interfaces/http.py +805 -0
  55. parrot/interfaces/images/__init__.py +0 -0
  56. parrot/interfaces/images/plugins/__init__.py +18 -0
  57. parrot/interfaces/images/plugins/abstract.py +58 -0
  58. parrot/interfaces/images/plugins/exif.py +709 -0
  59. parrot/interfaces/images/plugins/hash.py +52 -0
  60. parrot/interfaces/images/plugins/vision.py +104 -0
  61. parrot/interfaces/images/plugins/yolo.py +66 -0
  62. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  63. parrot/llms/__init__.py +1 -0
  64. parrot/llms/abstract.py +69 -0
  65. parrot/llms/anthropic.py +58 -0
  66. parrot/llms/gemma.py +15 -0
  67. parrot/llms/google.py +44 -0
  68. parrot/llms/groq.py +67 -0
  69. parrot/llms/hf.py +45 -0
  70. parrot/llms/openai.py +61 -0
  71. parrot/llms/pipes.py +114 -0
  72. parrot/llms/vertex.py +89 -0
  73. parrot/loaders/__init__.py +9 -0
  74. parrot/loaders/abstract.py +628 -0
  75. parrot/loaders/files/__init__.py +0 -0
  76. parrot/loaders/files/abstract.py +39 -0
  77. parrot/loaders/files/text.py +63 -0
  78. parrot/loaders/txt.py +26 -0
  79. parrot/manager.py +333 -0
  80. parrot/models.py +504 -0
  81. parrot/py.typed +0 -0
  82. parrot/stores/__init__.py +11 -0
  83. parrot/stores/abstract.py +248 -0
  84. parrot/stores/chroma.py +188 -0
  85. parrot/stores/duck.py +162 -0
  86. parrot/stores/embeddings/__init__.py +10 -0
  87. parrot/stores/embeddings/abstract.py +46 -0
  88. parrot/stores/embeddings/base.py +52 -0
  89. parrot/stores/embeddings/bge.py +20 -0
  90. parrot/stores/embeddings/fastembed.py +17 -0
  91. parrot/stores/embeddings/google.py +18 -0
  92. parrot/stores/embeddings/huggingface.py +20 -0
  93. parrot/stores/embeddings/ollama.py +14 -0
  94. parrot/stores/embeddings/openai.py +26 -0
  95. parrot/stores/embeddings/transformers.py +21 -0
  96. parrot/stores/embeddings/vertexai.py +17 -0
  97. parrot/stores/empty.py +10 -0
  98. parrot/stores/faiss.py +160 -0
  99. parrot/stores/milvus.py +397 -0
  100. parrot/stores/postgres.py +653 -0
  101. parrot/stores/qdrant.py +170 -0
  102. parrot/tools/__init__.py +23 -0
  103. parrot/tools/abstract.py +68 -0
  104. parrot/tools/asknews.py +33 -0
  105. parrot/tools/basic.py +51 -0
  106. parrot/tools/bby.py +359 -0
  107. parrot/tools/bing.py +13 -0
  108. parrot/tools/docx.py +343 -0
  109. parrot/tools/duck.py +62 -0
  110. parrot/tools/execute.py +56 -0
  111. parrot/tools/gamma.py +28 -0
  112. parrot/tools/google.py +170 -0
  113. parrot/tools/gvoice.py +301 -0
  114. parrot/tools/results.py +278 -0
  115. parrot/tools/stack.py +27 -0
  116. parrot/tools/weather.py +70 -0
  117. parrot/tools/wikipedia.py +58 -0
  118. parrot/tools/zipcode.py +198 -0
  119. parrot/utils/__init__.py +2 -0
  120. parrot/utils/parsers/__init__.py +5 -0
  121. parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
  122. parrot/utils/toml.py +11 -0
  123. parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
  124. parrot/utils/uv.py +11 -0
  125. parrot/version.py +10 -0
  126. resources/users/__init__.py +5 -0
  127. resources/users/handlers.py +13 -0
  128. resources/users/models.py +205 -0
@@ -0,0 +1,709 @@
1
+ from collections.abc import Mapping, Sequence
2
+ from typing import Any, Dict, Optional
3
+ import re
4
+ import struct
5
+ from io import BytesIO
6
+ from PIL import Image, ExifTags, PngImagePlugin
7
+ from PIL.ExifTags import TAGS, GPSTAGS, IFD
8
+ from PIL import TiffImagePlugin
9
+ from PIL.TiffImagePlugin import IFDRational
10
+ from libxmp import XMPFiles, consts
11
+ from pillow_heif import register_heif_opener
12
+ from .abstract import ImagePlugin
13
+ import base64
14
+
15
+
16
+ register_heif_opener() # ADD HEIF support
17
+
18
+
19
+ def _json_safe(obj):
20
+ """Return a structure containing only JSON‑serialisable scalar types,
21
+ no IFDRational, no bytes, and **no NUL characters**."""
22
+ if isinstance(obj, IFDRational):
23
+ return float(obj)
24
+
25
+ if isinstance(obj, bytes):
26
+ # bytes -> str *and* strip embedded NULs
27
+ return obj.decode(errors="replace").replace('\x00', '')
28
+
29
+ if isinstance(obj, str):
30
+ # Remove NUL chars from normal strings too
31
+ return obj.replace('\x00', '')
32
+
33
+ if isinstance(obj, Mapping):
34
+ return {k: _json_safe(v) for k, v in obj.items()}
35
+
36
+ if isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray)):
37
+ return [_json_safe(v) for v in obj]
38
+
39
+ return obj
40
+
41
+
42
+ def _make_serialisable(val):
43
+ if isinstance(val, IFDRational):
44
+ return float(val)
45
+ if isinstance(val, bytes):
46
+ return val.decode(errors="replace")
47
+ return val
48
+
49
+ def get_xmp_modify_date(image, path: Optional[str] = None) -> str | None:
50
+ # 1) Try to grab the raw XMP packet from the JPEG APP1 segment
51
+ raw_xmp = image.info.get("XML:com.adobe.xmp")
52
+ if raw_xmp:
53
+ # 2) Feed it to XMPFiles via a buffer
54
+ xmpfile = XMPFiles(buffer=raw_xmp)
55
+ else:
56
+ # fallback: let XMPFiles pull directly from the file
57
+ # xmpfile = XMPFiles(file_path=path)
58
+ return None
59
+
60
+ xmp = xmpfile.get_xmp()
61
+ if not xmp:
62
+ return None
63
+
64
+ # 3) Common XMP namespaces & properties for modification history:
65
+ # - consts.XMP_NS_XMP / "ModifyDate"
66
+ modify = xmp.get_property(consts.XMP_NS_XMP, "ModifyDate")
67
+
68
+ xmpfile.close_file()
69
+
70
+ return modify
71
+
72
+
73
+ class EXIFPlugin(ImagePlugin):
74
+ """
75
+ EXIFPlugin is a plugin for extracting EXIF data from images.
76
+ It extends the ImagePlugin class and implements the analyze method to extract EXIF data.
77
+ """
78
+ column_name: str = "exif_data"
79
+
80
+ def __init__(self, *args, **kwargs):
81
+ self.extract_geoloc: bool = kwargs.get("extract_geoloc", False)
82
+ super().__init__(*args, **kwargs)
83
+
84
+ def convert_to_degrees(self, value):
85
+ """
86
+ Convert GPS coordinates to degrees with proper error handling.
87
+ """
88
+ try:
89
+ # Handles case where value is tuple of Rational objects
90
+ def to_float(r):
91
+ if hasattr(r, "num") and hasattr(r, "den"):
92
+ # Prevent division by zero
93
+ if r.den == 0:
94
+ return 0.0
95
+ return float(r.num) / float(r.den)
96
+ else:
97
+ # Handle non-rational values
98
+ return float(r) if r is not None else 0.0
99
+
100
+ # Ensure all three components exist
101
+ if len(value) < 3 or None in value:
102
+ self.logger.warning(f"Invalid GPS value format: {value}")
103
+ return None
104
+
105
+ d = to_float(value[0])
106
+ m = to_float(value[1])
107
+ s = to_float(value[2])
108
+
109
+ return d + (m / 60.0) + (s / 3600.0)
110
+ except Exception as e:
111
+ self.logger.debug(f"Error converting GPS value to degrees: {e}")
112
+ return None
113
+
114
+ def extract_gps_datetime(self, exif: dict):
115
+ """
116
+ Extract GPS coordinates and datetime from EXIF data with improved error handling.
117
+ """
118
+ gps = exif.get("GPSInfo", {})
119
+ datetime = exif.get("DateTimeOriginal") or exif.get("DateTime")
120
+
121
+ latitude = longitude = None
122
+
123
+ if gps:
124
+ lat = gps.get("GPSLatitude")
125
+ lat_ref = gps.get("GPSLatitudeRef")
126
+ lon = gps.get("GPSLongitude")
127
+ lon_ref = gps.get("GPSLongitudeRef")
128
+
129
+ if lat and lat_ref and lon and lon_ref:
130
+ # Convert coordinates to degrees
131
+ latitude = self.convert_to_degrees(lat)
132
+ longitude = self.convert_to_degrees(lon)
133
+
134
+ # Apply reference direction only if conversion succeeded
135
+ if latitude is not None and lat_ref == "S":
136
+ latitude = -latitude
137
+
138
+ if longitude is not None and lon_ref == "W":
139
+ longitude = -longitude
140
+
141
+ return {
142
+ "datetime": datetime,
143
+ "latitude": latitude,
144
+ "longitude": longitude
145
+ }
146
+
147
+ async def extract_iptc_data(self, image) -> dict:
148
+ """
149
+ Extract IPTC metadata from an image.
150
+
151
+ Args:
152
+ image: The PIL Image object.
153
+ Returns:
154
+ Dictionary of IPTC data or empty dict if no IPTC data exists.
155
+ """
156
+ try:
157
+ iptc_data = {}
158
+
159
+ # Try to get IPTC data from image.info
160
+ if 'photoshop' in image.info:
161
+ photoshop = image.info['photoshop']
162
+ # Extract IPTC information from photoshop data
163
+ iptc_data = self._parse_photoshop_data(photoshop)
164
+
165
+ # Try alternate keys for IPTC data in image.info
166
+ elif 'iptc' in image.info:
167
+ iptc = image.info['iptc']
168
+ if isinstance(iptc, bytes):
169
+ iptc_records = self._parse_iptc_data(iptc)
170
+ iptc_data.update(iptc_records)
171
+ elif isinstance(iptc, dict):
172
+ iptc_data.update(iptc)
173
+
174
+ # Check for IPTCDigest directly
175
+ if 'IPTCDigest' in image.info:
176
+ iptc_data['IPTCDigest'] = image.info['IPTCDigest']
177
+
178
+ # For JPEG images, try to get IPTC from APP13 segment directly
179
+ if not iptc_data and hasattr(image, 'applist'):
180
+ for segment, content in image.applist:
181
+ if segment == 'APP13' and b'Photoshop 3.0' in content:
182
+ iptc_data = self._parse_photoshop_data(content)
183
+ break
184
+
185
+ # For TIFF, check for IPTC data in specific tags
186
+ if not iptc_data and hasattr(image, 'tag_v2'):
187
+ # 33723 is the IPTC tag in TIFF
188
+ if 33723 in image.tag_v2:
189
+ iptc_raw = image.tag_v2[33723]
190
+ if isinstance(iptc_raw, bytes):
191
+ iptc_records = self._parse_iptc_data(iptc_raw)
192
+ iptc_data.update(iptc_records)
193
+
194
+ # Check for additional IPTC-related tags in TIFF
195
+ iptc_related_tags = [700, 33723, 34377] # Various tags that might contain IPTC data
196
+ for tag in iptc_related_tags:
197
+ if tag in image.tag_v2:
198
+ tag_name = TAGS.get(tag, f"Tag_{tag}")
199
+ iptc_data[tag_name] = _make_serialisable(image.tag_v2[tag])
200
+
201
+ # For PNG, try to get iTXt or tEXt chunks that might contain IPTC
202
+ if not iptc_data and hasattr(image, 'text'):
203
+ for key, value in image.text.items():
204
+ if key.startswith('IPTC') or key == 'XML:com.adobe.xmp':
205
+ iptc_data[key] = value
206
+ elif key == 'IPTCDigest':
207
+ iptc_data['IPTCDigest'] = value
208
+
209
+ # For XMP metadata in any image format
210
+ if 'XML:com.adobe.xmp' in image.info:
211
+ # Extract IPTCDigest from XMP if present
212
+ xmp_data = image.info['XML:com.adobe.xmp']
213
+ if isinstance(xmp_data, str) and 'IPTCDigest' in xmp_data:
214
+ # Simple pattern matching for IPTCDigest in XMP
215
+ match = re.search(r'IPTCDigest="([^"]+)"', xmp_data)
216
+ if match:
217
+ iptc_data['IPTCDigest'] = match.group(1)
218
+
219
+ return _json_safe(iptc_data) if iptc_data else {}
220
+ except Exception as e:
221
+ self.logger.error(f'Error extracting IPTC data: {e}')
222
+ return {}
223
+
224
+ def _parse_photoshop_data(self, data) -> dict:
225
+ """
226
+ Parse Photoshop data block to extract IPTC metadata.
227
+
228
+ Args:
229
+ data: Raw Photoshop data (bytes or dict) from APP13 segment.
230
+ Returns:
231
+ Dictionary of extracted IPTC data.
232
+ """
233
+ iptc_data = {}
234
+ try:
235
+ # Handle the case where data is already a dictionary
236
+ if isinstance(data, dict):
237
+ # If it's a dictionary, check for IPTCDigest key directly
238
+ if 'IPTCDigest' in data:
239
+ iptc_data['IPTCDigest'] = data['IPTCDigest']
240
+
241
+ # Check for IPTC data
242
+ if 'IPTC' in data or 1028 in data: # 1028 (0x0404) is the IPTC identifier
243
+ iptc_block = data.get('IPTC', data.get(1028, b''))
244
+ if isinstance(iptc_block, bytes):
245
+ iptc_records = self._parse_iptc_data(iptc_block)
246
+ iptc_data.update(iptc_records)
247
+
248
+ return iptc_data
249
+
250
+ # If it's bytes, proceed with the original implementation
251
+ if not isinstance(data, bytes):
252
+ self.logger.debug(f"Expected bytes for Photoshop data, got {type(data)}")
253
+ return {}
254
+
255
+ # Find Photoshop resource markers
256
+ offset = data.find(b'8BIM')
257
+ if offset < 0:
258
+ return {}
259
+
260
+ io_data = BytesIO(data)
261
+ io_data.seek(offset)
262
+
263
+ while True:
264
+ # Try to read a Photoshop resource block
265
+ try:
266
+ signature = io_data.read(4)
267
+ if signature != b'8BIM':
268
+ break
269
+
270
+ # Resource identifier (2 bytes)
271
+ resource_id = int.from_bytes(io_data.read(2), byteorder='big')
272
+
273
+ # Skip name: Pascal string padded to even length
274
+ name_len = io_data.read(1)[0]
275
+ name_bytes_to_read = name_len + (1 if name_len % 2 == 0 else 0)
276
+ io_data.read(name_bytes_to_read)
277
+
278
+ # Resource data
279
+ size = int.from_bytes(io_data.read(4), byteorder='big')
280
+ padded_size = size + (1 if size % 2 == 1 else 0)
281
+
282
+ resource_data = io_data.read(padded_size)[:size] # Trim padding if present
283
+
284
+ # Process specific resource types
285
+ if resource_id == 0x0404: # IPTC-NAA record (0x0404)
286
+ iptc_records = self._parse_iptc_data(resource_data)
287
+ iptc_data.update(iptc_records)
288
+ elif resource_id == 0x040F: # IPTCDigest (0x040F)
289
+ iptc_data['IPTCDigest'] = resource_data.hex()
290
+ elif resource_id == 0x0425: # EXIF data (1045)
291
+ # Already handled by the EXIF extraction but could process here if needed
292
+ pass
293
+
294
+ except Exception as e:
295
+ self.logger.debug(f"Error parsing Photoshop resource block: {e}")
296
+ break
297
+
298
+ return iptc_data
299
+ except Exception as e:
300
+ self.logger.debug(f"Error parsing Photoshop data: {e}")
301
+ return {}
302
+
303
+ def _parse_iptc_data(self, data: bytes) -> dict:
304
+ """
305
+ Parse raw IPTC data bytes.
306
+
307
+ Args:
308
+ data: Raw IPTC data bytes.
309
+ Returns:
310
+ Dictionary of extracted IPTC fields.
311
+ """
312
+ iptc_data = {}
313
+ try:
314
+ # IPTC marker (0x1C) followed by record number (1 byte) and dataset number (1 byte)
315
+ i = 0
316
+ while i < len(data):
317
+ # Look for IPTC marker
318
+ if i + 4 <= len(data) and data[i] == 0x1C:
319
+ record = data[i+1]
320
+ dataset = data[i+2]
321
+
322
+ # Length of the data field (can be 1, 2, or 4 bytes)
323
+ if data[i+3] & 0x80: # Check if the high bit is set
324
+ # Extended length - 4 bytes
325
+ if i + 8 <= len(data):
326
+ length = int.from_bytes(data[i+4:i+8], byteorder='big')
327
+ i += 8
328
+ else:
329
+ break
330
+ else:
331
+ # Standard length - 1 byte
332
+ length = data[i+3]
333
+ i += 4
334
+
335
+ # Check if we have enough data
336
+ if i + length <= len(data):
337
+ field_data = data[i:i+length]
338
+
339
+ # Convert to string if possible
340
+ try:
341
+ field_value = field_data.decode('utf-8', errors='replace')
342
+ except UnicodeDecodeError:
343
+ field_value = field_data.hex()
344
+
345
+ # Map record:dataset to meaningful names - simplified example
346
+ key = f"{record}:{dataset}"
347
+ # Known IPTC fields
348
+ iptc_fields = {
349
+ "2:5": "ObjectName",
350
+ "2:25": "Keywords",
351
+ "2:80": "By-line",
352
+ "2:105": "Headline",
353
+ "2:110": "Credit",
354
+ "2:115": "Source",
355
+ "2:120": "Caption-Abstract",
356
+ "2:122": "Writer-Editor",
357
+ }
358
+
359
+ field_name = iptc_fields.get(key, f"IPTC_{key}")
360
+ iptc_data[field_name] = field_value
361
+
362
+ i += length
363
+ else:
364
+ break
365
+ else:
366
+ i += 1
367
+
368
+ return iptc_data
369
+ except Exception as e:
370
+ self.logger.debug(f"Error parsing IPTC data: {e}")
371
+ return {}
372
+
373
+ def _extract_apple_gps_from_mime(self, mime_data: bytes, exif_data: Dict) -> None:
374
+ """
375
+ Extract GPS data from Apple's MIME metadata in HEIF files.
376
+
377
+ Args:
378
+ mime_data: MIME metadata bytes
379
+ exif_data: Dictionary to update with GPS data
380
+ """
381
+ try:
382
+ # Apple stores GPS in a complex binary format
383
+ # We'll search for specific patterns indicating GPS data
384
+ # Look for patterns that might indicate GPS coordinates
385
+ # Apple often stores these as 8-byte IEEE-754 double-precision values
386
+ lat_pattern = re.compile(b'CNTH.{4,32}?lat[a-z]*', re.DOTALL)
387
+ lon_pattern = re.compile(b'CNTH.{4,32}?lon[a-z]*', re.DOTALL)
388
+
389
+ lat_match = lat_pattern.search(mime_data)
390
+ lon_match = lon_pattern.search(mime_data)
391
+
392
+ if lat_match and lon_match:
393
+ # Try to find the 8-byte double values after the identifiers
394
+ lat_pos = lat_match.end()
395
+ lon_pos = lon_match.end()
396
+
397
+ # Ensure we have enough bytes to extract the doubles
398
+ if len(mime_data) >= lat_pos + 8 and len(mime_data) >= lon_pos + 8:
399
+ try:
400
+ latitude = struct.unpack('>d', mime_data[lat_pos:lat_pos + 8])[0]
401
+ longitude = struct.unpack('>d', mime_data[lon_pos:lon_pos + 8])[0]
402
+
403
+ # Only use if values seem reasonable
404
+ if -90 <= latitude <= 90 and -180 <= longitude <= 180:
405
+ if "GPSInfo" not in exif_data:
406
+ exif_data["GPSInfo"] = {}
407
+
408
+ exif_data["GPSInfo"]["GPSLatitude"] = (latitude, 0, 0)
409
+ exif_data["GPSInfo"]["GPSLongitude"] = (longitude, 0, 0)
410
+ exif_data["GPSInfo"]["GPSLatitudeRef"] = "N" if latitude >= 0 else "S"
411
+ exif_data["GPSInfo"]["GPSLongitudeRef"] = "E" if longitude >= 0 else "W"
412
+ except Exception:
413
+ # Silently fail if unpacking doesn't work
414
+ pass
415
+ except Exception as e:
416
+ self.logger.debug(f"Error extracting GPS from Apple MIME data: {e}")
417
+
418
+ def _extract_gps_from_apple_makernote(self, maker_note: str) -> Optional[Dict]:
419
+ """
420
+ Extract GPS data from Apple's MakerNote field in EXIF data.
421
+
422
+ Args:
423
+ maker_note: Apple MakerNote string
424
+ Returns:
425
+ Dictionary with latitude and longitude if found, None otherwise
426
+ """
427
+ try:
428
+ # Apple MakerNote often contains GPS coordinates in a specific format
429
+ # Look for patterns like decimal numbers that could be coordinates
430
+ coord_pattern = re.compile(r'([-+]?\d+\.\d+)')
431
+ matches = coord_pattern.findall(maker_note)
432
+
433
+ if len(matches) >= 2:
434
+ # Try pairs of numbers to see if they could be valid coordinates
435
+ for i in range(len(matches) - 1):
436
+ try:
437
+ lat = float(matches[i])
438
+ lon = float(matches[i + 1])
439
+
440
+ # Check if values are in a reasonable range for coordinates
441
+ if -90 <= lat <= 90 and -180 <= lon <= 180:
442
+ return {
443
+ "latitude": lat,
444
+ "longitude": lon
445
+ }
446
+ except ValueError:
447
+ continue
448
+
449
+ # Search for binary data that might contain GPS info
450
+ if b'bplist' in maker_note.encode('utf-8', errors='ignore'):
451
+ # Apple sometimes stores GPS in binary property lists within MakerNote
452
+ # This is a complex binary format that would require a specialized parser
453
+ # For now, we'll just log that we found a binary plist
454
+ self.logger.debug("Found binary plist in MakerNote, specialized parsing needed")
455
+
456
+ return None
457
+ except Exception as e:
458
+ self.logger.debug(f"Error extracting GPS from Apple MakerNote: {e}")
459
+ return None
460
+
461
+ async def extract_exif_heif(self, heif_image) -> Optional[Dict]:
462
+ """
463
+ Extract EXIF data from a HEIF/HEIC image using the heif library.
464
+
465
+ Args:
466
+ heif_image: HEIF image object
467
+ Returns:
468
+ Dictionary of EXIF data or None if no EXIF data exists
469
+ """
470
+ try:
471
+ # Get EXIF metadata from HEIF image
472
+ exif_data = {}
473
+
474
+ # Extract metadata from HEIF
475
+ for metadata in heif_image.metadata or []:
476
+ if metadata.type == 'Exif':
477
+ # HEIF EXIF data typically starts with a header offset
478
+ exif_bytes = metadata.data
479
+ if exif_bytes and len(exif_bytes) > 8:
480
+ # Skip the EXIF header (usually 8 bytes) to get to the TIFF data
481
+ exif_stream = BytesIO(exif_bytes)
482
+ # Try to extract EXIF data from the TIFF-formatted portion
483
+ try:
484
+ # Need to process the EXIF data in TIFF format
485
+ exif_stream.seek(8) # Skip the Exif\0\0 header
486
+ exif_image = Image.open(exif_stream)
487
+ # Extract all EXIF data from the embedded TIFF
488
+ exif_info = exif_image._getexif() or {}
489
+
490
+ # Process the EXIF data as we do with PIL images
491
+ gps_info = {}
492
+ for tag, value in exif_info.items():
493
+ decoded = TAGS.get(tag, tag)
494
+ if decoded == "GPSInfo":
495
+ for t in value:
496
+ sub_decoded = GPSTAGS.get(t, t)
497
+ gps_info[sub_decoded] = value[t]
498
+ exif_data["GPSInfo"] = gps_info
499
+ else:
500
+ exif_data[decoded] = _make_serialisable(value)
501
+ except Exception as e:
502
+ self.logger.debug(f"Error processing HEIF EXIF data: {e}")
503
+
504
+ # Apple HEIF files may store GPS in 'mime' type metadata with 'CNTH' format
505
+ elif metadata.type == 'mime':
506
+ try:
507
+ # Check for Apple-specific GPS metadata
508
+ mime_data = metadata.data
509
+ if b'CNTH' in mime_data:
510
+ # This is a special Apple container format
511
+ # Extract GPS data from CNTH container
512
+ self._extract_apple_gps_from_mime(mime_data, exif_data)
513
+ except Exception as e:
514
+ self.logger.debug(f"Error processing Apple MIME metadata: {e}")
515
+
516
+ # Extract GPS datetime if available and requested
517
+ if self.extract_geoloc:
518
+ # First try standard GPSInfo
519
+ if "GPSInfo" in exif_data:
520
+ gps_datetime = self.extract_gps_datetime(exif_data)
521
+ if gps_datetime.get("latitude") is not None and gps_datetime.get("longitude") is not None:
522
+ exif_data['gps_info'] = gps_datetime
523
+
524
+ # If no GPS found yet, try Apple's MakerNote for GPS data
525
+ has_gps_info = 'gps_info' in exif_data
526
+ has_valid_gps = has_gps_info and exif_data['gps_info'].get('latitude') is not None
527
+
528
+ if (not has_gps_info or not has_valid_gps) and 'MakerNote' in exif_data:
529
+ apple_gps = self._extract_gps_from_apple_makernote(exif_data['MakerNote'])
530
+ if apple_gps:
531
+ # If we found GPS data in MakerNote, use it
532
+ datetime = exif_data.get("DateTimeOriginal") or exif_data.get("DateTime")
533
+ exif_data['gps_info'] = {
534
+ "datetime": datetime,
535
+ "latitude": apple_gps.get("latitude"),
536
+ "longitude": apple_gps.get("longitude")
537
+ }
538
+
539
+ return _json_safe(exif_data) if exif_data else None
540
+
541
+ except Exception as e:
542
+ self.logger.error(f'Error extracting HEIF EXIF data: {e}')
543
+ return None
544
+
545
+ async def extract_exif_data(self, image) -> dict:
546
+ """
547
+ Extract EXIF data from the image file object.
548
+
549
+ Args:
550
+ image: The PIL Image object.
551
+ Returns:
552
+ Dictionary of EXIF data or empty dict if no EXIF data exists.
553
+ """
554
+ try:
555
+ exif = {}
556
+ # Check Modify Date (if any):
557
+ try:
558
+ modify_date = get_xmp_modify_date(image)
559
+ if modify_date:
560
+ exif["ModifyDate"] = modify_date
561
+ except Exception as e:
562
+ self.logger.debug(f"Error getting XMP ModifyDate: {e}")
563
+
564
+ if hasattr(image, 'getexif'):
565
+ # For JPEG and some other formats that support _getexif()
566
+ exif_data = image.getexif()
567
+ if exif_data:
568
+ gps_info = {}
569
+ for tag, value in exif_data.items():
570
+ if tag in ExifTags.TAGS:
571
+ decoded = TAGS.get(tag, tag)
572
+ # Convert EXIF data to a readable format
573
+ if decoded == "UserComment" and isinstance(value, str):
574
+ try:
575
+ # Try to decode base64 UserComment
576
+ decoded_value = base64.b64decode(value).decode('utf-8', errors='replace')
577
+ exif[decoded] = decoded_value
578
+ except Exception:
579
+ # If decoding fails, use original value
580
+ exif[decoded] = _make_serialisable(value)
581
+ else:
582
+ exif[decoded] = _make_serialisable(value)
583
+ if decoded == "GPSInfo":
584
+ for t in value:
585
+ sub_decoded = GPSTAGS.get(t, t)
586
+ gps_info[sub_decoded] = value[t]
587
+ exif["GPSInfo"] = gps_info
588
+ # Aperture, shutter, flash, lens, tz offset, etc
589
+ ifd = exif_data.get_ifd(0x8769)
590
+ for key, val in ifd.items():
591
+ exif[ExifTags.TAGS[key]] = _make_serialisable(val)
592
+ for ifd_id in IFD:
593
+ try:
594
+ ifd = exif_data.get_ifd(ifd_id)
595
+ if ifd_id == IFD.GPSInfo:
596
+ resolve = GPSTAGS
597
+ else:
598
+ resolve = TAGS
599
+ for k, v in ifd.items():
600
+ tag = resolve.get(k, k)
601
+ try:
602
+ exif[tag] = _make_serialisable(v)
603
+ except Exception:
604
+ exif[tag] = v
605
+ except KeyError:
606
+ pass
607
+ elif hasattr(image, 'tag') and hasattr(image, 'tag_v2'):
608
+ # For TIFF images which store data in tag and tag_v2 attributes
609
+ # Extract from tag_v2 first (more detailed)
610
+ gps_info = {}
611
+ for tag, value in image.tag_v2.items():
612
+ tag_name = TAGS.get(tag, tag)
613
+ if tag_name == "GPSInfo":
614
+ # For TIFF images, GPS data might be in a nested IFD
615
+ if isinstance(value, dict):
616
+ for gps_tag, gps_value in value.items():
617
+ gps_tag_name = GPSTAGS.get(gps_tag, gps_tag)
618
+ gps_info[gps_tag_name] = gps_value
619
+ exif["GPSInfo"] = gps_info
620
+ else:
621
+ exif[tag_name] = _make_serialisable(value)
622
+
623
+ # Fall back to tag if needed
624
+ if not exif and hasattr(image, 'tag'):
625
+ for tag, value in image.tag.items():
626
+ tag_name = TAGS.get(tag, tag)
627
+ exif[tag_name] = _make_serialisable(value)
628
+
629
+ else:
630
+ # For other formats, try to extract directly from image.info
631
+ for key, value in image.info.items():
632
+ if key.startswith('exif'):
633
+ # Some formats store EXIF data with keys like 'exif' or 'exif_ifd'
634
+ if isinstance(value, dict):
635
+ exif.update(value)
636
+ elif isinstance(value, bytes):
637
+ # Try to parse bytes as EXIF data
638
+ exif_stream = BytesIO(value)
639
+ try:
640
+ exif_image = TiffImagePlugin.TiffImageFile(exif_stream)
641
+ if hasattr(exif_image, 'tag_v2'):
642
+ for tag, val in exif_image.tag_v2.items():
643
+ tag_name = TAGS.get(tag, tag)
644
+ exif[tag_name] = _make_serialisable(val)
645
+ except Exception as e:
646
+ self.logger.debug(f"Error parsing EXIF bytes: {e}")
647
+ else:
648
+ # Add other metadata
649
+ exif[key] = _make_serialisable(value)
650
+
651
+ # Extract GPS datetime if available
652
+ if self.extract_geoloc and "GPSInfo" in exif:
653
+ gps_datetime = self.extract_gps_datetime(exif)
654
+ if gps_datetime:
655
+ exif['gps_info'] = gps_datetime
656
+
657
+ return _json_safe(exif) if exif else {}
658
+ except (AttributeError, KeyError) as e:
659
+ self.logger.debug(f'Error extracting PIL EXIF data: {e}')
660
+ return {}
661
+ except Exception as e:
662
+ self.logger.error(f'Unexpected error extracting PIL EXIF data: {e}')
663
+ return {}
664
+
665
+ async def analyze(self, image: Optional[Image.Image] = None, heif: Any = None, **kwargs) -> dict:
666
+ """
667
+ Extract EXIF data from the given image.
668
+
669
+ :param image: PIL Image object (optional)
670
+ :param heif: HEIF image object (optional)
671
+ :return: Dictionary containing EXIF data
672
+ """
673
+ try:
674
+ exif_data = {}
675
+
676
+ # Process HEIF image if provided (prioritize over PIL)
677
+ if heif is not None:
678
+ try:
679
+ heif_exif = await self.extract_exif_heif(heif)
680
+ if heif_exif:
681
+ # Update with HEIF data, prioritizing it over PIL data if both exist
682
+ exif_data.update(heif_exif)
683
+ except Exception as e:
684
+ self.logger.error(f"Error extracting EXIF from HEIF image: {e}")
685
+
686
+ # Process PIL image if provided
687
+ if image is not None:
688
+ try:
689
+ pil_exif = await self.extract_exif_data(image)
690
+ if pil_exif:
691
+ exif_data.update(pil_exif)
692
+ except Exception as e:
693
+ self.logger.error(f"Error extracting EXIF from PIL image: {e}")
694
+
695
+ # Extract IPTC data
696
+ try:
697
+ pil_iptc = await self.extract_iptc_data(image)
698
+ if pil_iptc:
699
+ exif_data.update(pil_iptc)
700
+ except Exception as e:
701
+ self.logger.error(
702
+ f"Error extracting IPTC data from PIL image: {e}"
703
+ )
704
+
705
+
706
+ return exif_data
707
+ except Exception as e:
708
+ self.logger.error(f"Error in EXIF analysis: {str(e)}")
709
+ return {}