ai-parrot 0.8.3__cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- ai_parrot-0.8.3.dist-info/LICENSE +21 -0
- ai_parrot-0.8.3.dist-info/METADATA +306 -0
- ai_parrot-0.8.3.dist-info/RECORD +128 -0
- ai_parrot-0.8.3.dist-info/WHEEL +6 -0
- ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
- parrot/__init__.py +30 -0
- parrot/bots/__init__.py +5 -0
- parrot/bots/abstract.py +1115 -0
- parrot/bots/agent.py +492 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/bose.py +17 -0
- parrot/bots/chatbot.py +271 -0
- parrot/bots/cody.py +17 -0
- parrot/bots/copilot.py +117 -0
- parrot/bots/data.py +730 -0
- parrot/bots/dataframe.py +103 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/interfaces/__init__.py +1 -0
- parrot/bots/interfaces/retrievers.py +12 -0
- parrot/bots/notebook.py +619 -0
- parrot/bots/odoo.py +17 -0
- parrot/bots/prompts/__init__.py +41 -0
- parrot/bots/prompts/agents.py +91 -0
- parrot/bots/prompts/data.py +214 -0
- parrot/bots/retrievals/__init__.py +1 -0
- parrot/bots/retrievals/constitutional.py +19 -0
- parrot/bots/retrievals/multi.py +122 -0
- parrot/bots/retrievals/retrieval.py +610 -0
- parrot/bots/tools/__init__.py +7 -0
- parrot/bots/tools/eda.py +325 -0
- parrot/bots/tools/pdf.py +50 -0
- parrot/bots/tools/plot.py +48 -0
- parrot/bots/troc.py +16 -0
- parrot/conf.py +170 -0
- parrot/crew/__init__.py +3 -0
- parrot/crew/tools/__init__.py +22 -0
- parrot/crew/tools/bing.py +13 -0
- parrot/crew/tools/config.py +43 -0
- parrot/crew/tools/duckgo.py +62 -0
- parrot/crew/tools/file.py +24 -0
- parrot/crew/tools/google.py +168 -0
- parrot/crew/tools/gtrends.py +16 -0
- parrot/crew/tools/md2pdf.py +25 -0
- parrot/crew/tools/rag.py +42 -0
- parrot/crew/tools/search.py +32 -0
- parrot/crew/tools/url.py +21 -0
- parrot/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agents.py +292 -0
- parrot/handlers/bots.py +196 -0
- parrot/handlers/chat.py +192 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/http.py +805 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +18 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/exif.py +709 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/llms/__init__.py +1 -0
- parrot/llms/abstract.py +69 -0
- parrot/llms/anthropic.py +58 -0
- parrot/llms/gemma.py +15 -0
- parrot/llms/google.py +44 -0
- parrot/llms/groq.py +67 -0
- parrot/llms/hf.py +45 -0
- parrot/llms/openai.py +61 -0
- parrot/llms/pipes.py +114 -0
- parrot/llms/vertex.py +89 -0
- parrot/loaders/__init__.py +9 -0
- parrot/loaders/abstract.py +628 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/txt.py +26 -0
- parrot/manager.py +333 -0
- parrot/models.py +504 -0
- parrot/py.typed +0 -0
- parrot/stores/__init__.py +11 -0
- parrot/stores/abstract.py +248 -0
- parrot/stores/chroma.py +188 -0
- parrot/stores/duck.py +162 -0
- parrot/stores/embeddings/__init__.py +10 -0
- parrot/stores/embeddings/abstract.py +46 -0
- parrot/stores/embeddings/base.py +52 -0
- parrot/stores/embeddings/bge.py +20 -0
- parrot/stores/embeddings/fastembed.py +17 -0
- parrot/stores/embeddings/google.py +18 -0
- parrot/stores/embeddings/huggingface.py +20 -0
- parrot/stores/embeddings/ollama.py +14 -0
- parrot/stores/embeddings/openai.py +26 -0
- parrot/stores/embeddings/transformers.py +21 -0
- parrot/stores/embeddings/vertexai.py +17 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss.py +160 -0
- parrot/stores/milvus.py +397 -0
- parrot/stores/postgres.py +653 -0
- parrot/stores/qdrant.py +170 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +68 -0
- parrot/tools/asknews.py +33 -0
- parrot/tools/basic.py +51 -0
- parrot/tools/bby.py +359 -0
- parrot/tools/bing.py +13 -0
- parrot/tools/docx.py +343 -0
- parrot/tools/duck.py +62 -0
- parrot/tools/execute.py +56 -0
- parrot/tools/gamma.py +28 -0
- parrot/tools/google.py +170 -0
- parrot/tools/gvoice.py +301 -0
- parrot/tools/results.py +278 -0
- parrot/tools/stack.py +27 -0
- parrot/tools/weather.py +70 -0
- parrot/tools/wikipedia.py +58 -0
- parrot/tools/zipcode.py +198 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpython-312-x86_64-linux-gnu.so +0 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- resources/users/__init__.py +5 -0
- resources/users/handlers.py +13 -0
- resources/users/models.py +205 -0
|
@@ -0,0 +1,709 @@
|
|
|
1
|
+
from collections.abc import Mapping, Sequence
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
import re
|
|
4
|
+
import struct
|
|
5
|
+
from io import BytesIO
|
|
6
|
+
from PIL import Image, ExifTags, PngImagePlugin
|
|
7
|
+
from PIL.ExifTags import TAGS, GPSTAGS, IFD
|
|
8
|
+
from PIL import TiffImagePlugin
|
|
9
|
+
from PIL.TiffImagePlugin import IFDRational
|
|
10
|
+
from libxmp import XMPFiles, consts
|
|
11
|
+
from pillow_heif import register_heif_opener
|
|
12
|
+
from .abstract import ImagePlugin
|
|
13
|
+
import base64
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
register_heif_opener() # ADD HEIF support
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _json_safe(obj):
|
|
20
|
+
"""Return a structure containing only JSON‑serialisable scalar types,
|
|
21
|
+
no IFDRational, no bytes, and **no NUL characters**."""
|
|
22
|
+
if isinstance(obj, IFDRational):
|
|
23
|
+
return float(obj)
|
|
24
|
+
|
|
25
|
+
if isinstance(obj, bytes):
|
|
26
|
+
# bytes -> str *and* strip embedded NULs
|
|
27
|
+
return obj.decode(errors="replace").replace('\x00', '')
|
|
28
|
+
|
|
29
|
+
if isinstance(obj, str):
|
|
30
|
+
# Remove NUL chars from normal strings too
|
|
31
|
+
return obj.replace('\x00', '')
|
|
32
|
+
|
|
33
|
+
if isinstance(obj, Mapping):
|
|
34
|
+
return {k: _json_safe(v) for k, v in obj.items()}
|
|
35
|
+
|
|
36
|
+
if isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray)):
|
|
37
|
+
return [_json_safe(v) for v in obj]
|
|
38
|
+
|
|
39
|
+
return obj
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _make_serialisable(val):
|
|
43
|
+
if isinstance(val, IFDRational):
|
|
44
|
+
return float(val)
|
|
45
|
+
if isinstance(val, bytes):
|
|
46
|
+
return val.decode(errors="replace")
|
|
47
|
+
return val
|
|
48
|
+
|
|
49
|
+
def get_xmp_modify_date(image, path: Optional[str] = None) -> str | None:
|
|
50
|
+
# 1) Try to grab the raw XMP packet from the JPEG APP1 segment
|
|
51
|
+
raw_xmp = image.info.get("XML:com.adobe.xmp")
|
|
52
|
+
if raw_xmp:
|
|
53
|
+
# 2) Feed it to XMPFiles via a buffer
|
|
54
|
+
xmpfile = XMPFiles(buffer=raw_xmp)
|
|
55
|
+
else:
|
|
56
|
+
# fallback: let XMPFiles pull directly from the file
|
|
57
|
+
# xmpfile = XMPFiles(file_path=path)
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
xmp = xmpfile.get_xmp()
|
|
61
|
+
if not xmp:
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
# 3) Common XMP namespaces & properties for modification history:
|
|
65
|
+
# - consts.XMP_NS_XMP / "ModifyDate"
|
|
66
|
+
modify = xmp.get_property(consts.XMP_NS_XMP, "ModifyDate")
|
|
67
|
+
|
|
68
|
+
xmpfile.close_file()
|
|
69
|
+
|
|
70
|
+
return modify
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class EXIFPlugin(ImagePlugin):
|
|
74
|
+
"""
|
|
75
|
+
EXIFPlugin is a plugin for extracting EXIF data from images.
|
|
76
|
+
It extends the ImagePlugin class and implements the analyze method to extract EXIF data.
|
|
77
|
+
"""
|
|
78
|
+
column_name: str = "exif_data"
|
|
79
|
+
|
|
80
|
+
def __init__(self, *args, **kwargs):
|
|
81
|
+
self.extract_geoloc: bool = kwargs.get("extract_geoloc", False)
|
|
82
|
+
super().__init__(*args, **kwargs)
|
|
83
|
+
|
|
84
|
+
def convert_to_degrees(self, value):
|
|
85
|
+
"""
|
|
86
|
+
Convert GPS coordinates to degrees with proper error handling.
|
|
87
|
+
"""
|
|
88
|
+
try:
|
|
89
|
+
# Handles case where value is tuple of Rational objects
|
|
90
|
+
def to_float(r):
|
|
91
|
+
if hasattr(r, "num") and hasattr(r, "den"):
|
|
92
|
+
# Prevent division by zero
|
|
93
|
+
if r.den == 0:
|
|
94
|
+
return 0.0
|
|
95
|
+
return float(r.num) / float(r.den)
|
|
96
|
+
else:
|
|
97
|
+
# Handle non-rational values
|
|
98
|
+
return float(r) if r is not None else 0.0
|
|
99
|
+
|
|
100
|
+
# Ensure all three components exist
|
|
101
|
+
if len(value) < 3 or None in value:
|
|
102
|
+
self.logger.warning(f"Invalid GPS value format: {value}")
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
d = to_float(value[0])
|
|
106
|
+
m = to_float(value[1])
|
|
107
|
+
s = to_float(value[2])
|
|
108
|
+
|
|
109
|
+
return d + (m / 60.0) + (s / 3600.0)
|
|
110
|
+
except Exception as e:
|
|
111
|
+
self.logger.debug(f"Error converting GPS value to degrees: {e}")
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
def extract_gps_datetime(self, exif: dict):
|
|
115
|
+
"""
|
|
116
|
+
Extract GPS coordinates and datetime from EXIF data with improved error handling.
|
|
117
|
+
"""
|
|
118
|
+
gps = exif.get("GPSInfo", {})
|
|
119
|
+
datetime = exif.get("DateTimeOriginal") or exif.get("DateTime")
|
|
120
|
+
|
|
121
|
+
latitude = longitude = None
|
|
122
|
+
|
|
123
|
+
if gps:
|
|
124
|
+
lat = gps.get("GPSLatitude")
|
|
125
|
+
lat_ref = gps.get("GPSLatitudeRef")
|
|
126
|
+
lon = gps.get("GPSLongitude")
|
|
127
|
+
lon_ref = gps.get("GPSLongitudeRef")
|
|
128
|
+
|
|
129
|
+
if lat and lat_ref and lon and lon_ref:
|
|
130
|
+
# Convert coordinates to degrees
|
|
131
|
+
latitude = self.convert_to_degrees(lat)
|
|
132
|
+
longitude = self.convert_to_degrees(lon)
|
|
133
|
+
|
|
134
|
+
# Apply reference direction only if conversion succeeded
|
|
135
|
+
if latitude is not None and lat_ref == "S":
|
|
136
|
+
latitude = -latitude
|
|
137
|
+
|
|
138
|
+
if longitude is not None and lon_ref == "W":
|
|
139
|
+
longitude = -longitude
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
"datetime": datetime,
|
|
143
|
+
"latitude": latitude,
|
|
144
|
+
"longitude": longitude
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
async def extract_iptc_data(self, image) -> dict:
|
|
148
|
+
"""
|
|
149
|
+
Extract IPTC metadata from an image.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
image: The PIL Image object.
|
|
153
|
+
Returns:
|
|
154
|
+
Dictionary of IPTC data or empty dict if no IPTC data exists.
|
|
155
|
+
"""
|
|
156
|
+
try:
|
|
157
|
+
iptc_data = {}
|
|
158
|
+
|
|
159
|
+
# Try to get IPTC data from image.info
|
|
160
|
+
if 'photoshop' in image.info:
|
|
161
|
+
photoshop = image.info['photoshop']
|
|
162
|
+
# Extract IPTC information from photoshop data
|
|
163
|
+
iptc_data = self._parse_photoshop_data(photoshop)
|
|
164
|
+
|
|
165
|
+
# Try alternate keys for IPTC data in image.info
|
|
166
|
+
elif 'iptc' in image.info:
|
|
167
|
+
iptc = image.info['iptc']
|
|
168
|
+
if isinstance(iptc, bytes):
|
|
169
|
+
iptc_records = self._parse_iptc_data(iptc)
|
|
170
|
+
iptc_data.update(iptc_records)
|
|
171
|
+
elif isinstance(iptc, dict):
|
|
172
|
+
iptc_data.update(iptc)
|
|
173
|
+
|
|
174
|
+
# Check for IPTCDigest directly
|
|
175
|
+
if 'IPTCDigest' in image.info:
|
|
176
|
+
iptc_data['IPTCDigest'] = image.info['IPTCDigest']
|
|
177
|
+
|
|
178
|
+
# For JPEG images, try to get IPTC from APP13 segment directly
|
|
179
|
+
if not iptc_data and hasattr(image, 'applist'):
|
|
180
|
+
for segment, content in image.applist:
|
|
181
|
+
if segment == 'APP13' and b'Photoshop 3.0' in content:
|
|
182
|
+
iptc_data = self._parse_photoshop_data(content)
|
|
183
|
+
break
|
|
184
|
+
|
|
185
|
+
# For TIFF, check for IPTC data in specific tags
|
|
186
|
+
if not iptc_data and hasattr(image, 'tag_v2'):
|
|
187
|
+
# 33723 is the IPTC tag in TIFF
|
|
188
|
+
if 33723 in image.tag_v2:
|
|
189
|
+
iptc_raw = image.tag_v2[33723]
|
|
190
|
+
if isinstance(iptc_raw, bytes):
|
|
191
|
+
iptc_records = self._parse_iptc_data(iptc_raw)
|
|
192
|
+
iptc_data.update(iptc_records)
|
|
193
|
+
|
|
194
|
+
# Check for additional IPTC-related tags in TIFF
|
|
195
|
+
iptc_related_tags = [700, 33723, 34377] # Various tags that might contain IPTC data
|
|
196
|
+
for tag in iptc_related_tags:
|
|
197
|
+
if tag in image.tag_v2:
|
|
198
|
+
tag_name = TAGS.get(tag, f"Tag_{tag}")
|
|
199
|
+
iptc_data[tag_name] = _make_serialisable(image.tag_v2[tag])
|
|
200
|
+
|
|
201
|
+
# For PNG, try to get iTXt or tEXt chunks that might contain IPTC
|
|
202
|
+
if not iptc_data and hasattr(image, 'text'):
|
|
203
|
+
for key, value in image.text.items():
|
|
204
|
+
if key.startswith('IPTC') or key == 'XML:com.adobe.xmp':
|
|
205
|
+
iptc_data[key] = value
|
|
206
|
+
elif key == 'IPTCDigest':
|
|
207
|
+
iptc_data['IPTCDigest'] = value
|
|
208
|
+
|
|
209
|
+
# For XMP metadata in any image format
|
|
210
|
+
if 'XML:com.adobe.xmp' in image.info:
|
|
211
|
+
# Extract IPTCDigest from XMP if present
|
|
212
|
+
xmp_data = image.info['XML:com.adobe.xmp']
|
|
213
|
+
if isinstance(xmp_data, str) and 'IPTCDigest' in xmp_data:
|
|
214
|
+
# Simple pattern matching for IPTCDigest in XMP
|
|
215
|
+
match = re.search(r'IPTCDigest="([^"]+)"', xmp_data)
|
|
216
|
+
if match:
|
|
217
|
+
iptc_data['IPTCDigest'] = match.group(1)
|
|
218
|
+
|
|
219
|
+
return _json_safe(iptc_data) if iptc_data else {}
|
|
220
|
+
except Exception as e:
|
|
221
|
+
self.logger.error(f'Error extracting IPTC data: {e}')
|
|
222
|
+
return {}
|
|
223
|
+
|
|
224
|
+
def _parse_photoshop_data(self, data) -> dict:
|
|
225
|
+
"""
|
|
226
|
+
Parse Photoshop data block to extract IPTC metadata.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
data: Raw Photoshop data (bytes or dict) from APP13 segment.
|
|
230
|
+
Returns:
|
|
231
|
+
Dictionary of extracted IPTC data.
|
|
232
|
+
"""
|
|
233
|
+
iptc_data = {}
|
|
234
|
+
try:
|
|
235
|
+
# Handle the case where data is already a dictionary
|
|
236
|
+
if isinstance(data, dict):
|
|
237
|
+
# If it's a dictionary, check for IPTCDigest key directly
|
|
238
|
+
if 'IPTCDigest' in data:
|
|
239
|
+
iptc_data['IPTCDigest'] = data['IPTCDigest']
|
|
240
|
+
|
|
241
|
+
# Check for IPTC data
|
|
242
|
+
if 'IPTC' in data or 1028 in data: # 1028 (0x0404) is the IPTC identifier
|
|
243
|
+
iptc_block = data.get('IPTC', data.get(1028, b''))
|
|
244
|
+
if isinstance(iptc_block, bytes):
|
|
245
|
+
iptc_records = self._parse_iptc_data(iptc_block)
|
|
246
|
+
iptc_data.update(iptc_records)
|
|
247
|
+
|
|
248
|
+
return iptc_data
|
|
249
|
+
|
|
250
|
+
# If it's bytes, proceed with the original implementation
|
|
251
|
+
if not isinstance(data, bytes):
|
|
252
|
+
self.logger.debug(f"Expected bytes for Photoshop data, got {type(data)}")
|
|
253
|
+
return {}
|
|
254
|
+
|
|
255
|
+
# Find Photoshop resource markers
|
|
256
|
+
offset = data.find(b'8BIM')
|
|
257
|
+
if offset < 0:
|
|
258
|
+
return {}
|
|
259
|
+
|
|
260
|
+
io_data = BytesIO(data)
|
|
261
|
+
io_data.seek(offset)
|
|
262
|
+
|
|
263
|
+
while True:
|
|
264
|
+
# Try to read a Photoshop resource block
|
|
265
|
+
try:
|
|
266
|
+
signature = io_data.read(4)
|
|
267
|
+
if signature != b'8BIM':
|
|
268
|
+
break
|
|
269
|
+
|
|
270
|
+
# Resource identifier (2 bytes)
|
|
271
|
+
resource_id = int.from_bytes(io_data.read(2), byteorder='big')
|
|
272
|
+
|
|
273
|
+
# Skip name: Pascal string padded to even length
|
|
274
|
+
name_len = io_data.read(1)[0]
|
|
275
|
+
name_bytes_to_read = name_len + (1 if name_len % 2 == 0 else 0)
|
|
276
|
+
io_data.read(name_bytes_to_read)
|
|
277
|
+
|
|
278
|
+
# Resource data
|
|
279
|
+
size = int.from_bytes(io_data.read(4), byteorder='big')
|
|
280
|
+
padded_size = size + (1 if size % 2 == 1 else 0)
|
|
281
|
+
|
|
282
|
+
resource_data = io_data.read(padded_size)[:size] # Trim padding if present
|
|
283
|
+
|
|
284
|
+
# Process specific resource types
|
|
285
|
+
if resource_id == 0x0404: # IPTC-NAA record (0x0404)
|
|
286
|
+
iptc_records = self._parse_iptc_data(resource_data)
|
|
287
|
+
iptc_data.update(iptc_records)
|
|
288
|
+
elif resource_id == 0x040F: # IPTCDigest (0x040F)
|
|
289
|
+
iptc_data['IPTCDigest'] = resource_data.hex()
|
|
290
|
+
elif resource_id == 0x0425: # EXIF data (1045)
|
|
291
|
+
# Already handled by the EXIF extraction but could process here if needed
|
|
292
|
+
pass
|
|
293
|
+
|
|
294
|
+
except Exception as e:
|
|
295
|
+
self.logger.debug(f"Error parsing Photoshop resource block: {e}")
|
|
296
|
+
break
|
|
297
|
+
|
|
298
|
+
return iptc_data
|
|
299
|
+
except Exception as e:
|
|
300
|
+
self.logger.debug(f"Error parsing Photoshop data: {e}")
|
|
301
|
+
return {}
|
|
302
|
+
|
|
303
|
+
def _parse_iptc_data(self, data: bytes) -> dict:
|
|
304
|
+
"""
|
|
305
|
+
Parse raw IPTC data bytes.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
data: Raw IPTC data bytes.
|
|
309
|
+
Returns:
|
|
310
|
+
Dictionary of extracted IPTC fields.
|
|
311
|
+
"""
|
|
312
|
+
iptc_data = {}
|
|
313
|
+
try:
|
|
314
|
+
# IPTC marker (0x1C) followed by record number (1 byte) and dataset number (1 byte)
|
|
315
|
+
i = 0
|
|
316
|
+
while i < len(data):
|
|
317
|
+
# Look for IPTC marker
|
|
318
|
+
if i + 4 <= len(data) and data[i] == 0x1C:
|
|
319
|
+
record = data[i+1]
|
|
320
|
+
dataset = data[i+2]
|
|
321
|
+
|
|
322
|
+
# Length of the data field (can be 1, 2, or 4 bytes)
|
|
323
|
+
if data[i+3] & 0x80: # Check if the high bit is set
|
|
324
|
+
# Extended length - 4 bytes
|
|
325
|
+
if i + 8 <= len(data):
|
|
326
|
+
length = int.from_bytes(data[i+4:i+8], byteorder='big')
|
|
327
|
+
i += 8
|
|
328
|
+
else:
|
|
329
|
+
break
|
|
330
|
+
else:
|
|
331
|
+
# Standard length - 1 byte
|
|
332
|
+
length = data[i+3]
|
|
333
|
+
i += 4
|
|
334
|
+
|
|
335
|
+
# Check if we have enough data
|
|
336
|
+
if i + length <= len(data):
|
|
337
|
+
field_data = data[i:i+length]
|
|
338
|
+
|
|
339
|
+
# Convert to string if possible
|
|
340
|
+
try:
|
|
341
|
+
field_value = field_data.decode('utf-8', errors='replace')
|
|
342
|
+
except UnicodeDecodeError:
|
|
343
|
+
field_value = field_data.hex()
|
|
344
|
+
|
|
345
|
+
# Map record:dataset to meaningful names - simplified example
|
|
346
|
+
key = f"{record}:{dataset}"
|
|
347
|
+
# Known IPTC fields
|
|
348
|
+
iptc_fields = {
|
|
349
|
+
"2:5": "ObjectName",
|
|
350
|
+
"2:25": "Keywords",
|
|
351
|
+
"2:80": "By-line",
|
|
352
|
+
"2:105": "Headline",
|
|
353
|
+
"2:110": "Credit",
|
|
354
|
+
"2:115": "Source",
|
|
355
|
+
"2:120": "Caption-Abstract",
|
|
356
|
+
"2:122": "Writer-Editor",
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
field_name = iptc_fields.get(key, f"IPTC_{key}")
|
|
360
|
+
iptc_data[field_name] = field_value
|
|
361
|
+
|
|
362
|
+
i += length
|
|
363
|
+
else:
|
|
364
|
+
break
|
|
365
|
+
else:
|
|
366
|
+
i += 1
|
|
367
|
+
|
|
368
|
+
return iptc_data
|
|
369
|
+
except Exception as e:
|
|
370
|
+
self.logger.debug(f"Error parsing IPTC data: {e}")
|
|
371
|
+
return {}
|
|
372
|
+
|
|
373
|
+
def _extract_apple_gps_from_mime(self, mime_data: bytes, exif_data: Dict) -> None:
|
|
374
|
+
"""
|
|
375
|
+
Extract GPS data from Apple's MIME metadata in HEIF files.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
mime_data: MIME metadata bytes
|
|
379
|
+
exif_data: Dictionary to update with GPS data
|
|
380
|
+
"""
|
|
381
|
+
try:
|
|
382
|
+
# Apple stores GPS in a complex binary format
|
|
383
|
+
# We'll search for specific patterns indicating GPS data
|
|
384
|
+
# Look for patterns that might indicate GPS coordinates
|
|
385
|
+
# Apple often stores these as 8-byte IEEE-754 double-precision values
|
|
386
|
+
lat_pattern = re.compile(b'CNTH.{4,32}?lat[a-z]*', re.DOTALL)
|
|
387
|
+
lon_pattern = re.compile(b'CNTH.{4,32}?lon[a-z]*', re.DOTALL)
|
|
388
|
+
|
|
389
|
+
lat_match = lat_pattern.search(mime_data)
|
|
390
|
+
lon_match = lon_pattern.search(mime_data)
|
|
391
|
+
|
|
392
|
+
if lat_match and lon_match:
|
|
393
|
+
# Try to find the 8-byte double values after the identifiers
|
|
394
|
+
lat_pos = lat_match.end()
|
|
395
|
+
lon_pos = lon_match.end()
|
|
396
|
+
|
|
397
|
+
# Ensure we have enough bytes to extract the doubles
|
|
398
|
+
if len(mime_data) >= lat_pos + 8 and len(mime_data) >= lon_pos + 8:
|
|
399
|
+
try:
|
|
400
|
+
latitude = struct.unpack('>d', mime_data[lat_pos:lat_pos + 8])[0]
|
|
401
|
+
longitude = struct.unpack('>d', mime_data[lon_pos:lon_pos + 8])[0]
|
|
402
|
+
|
|
403
|
+
# Only use if values seem reasonable
|
|
404
|
+
if -90 <= latitude <= 90 and -180 <= longitude <= 180:
|
|
405
|
+
if "GPSInfo" not in exif_data:
|
|
406
|
+
exif_data["GPSInfo"] = {}
|
|
407
|
+
|
|
408
|
+
exif_data["GPSInfo"]["GPSLatitude"] = (latitude, 0, 0)
|
|
409
|
+
exif_data["GPSInfo"]["GPSLongitude"] = (longitude, 0, 0)
|
|
410
|
+
exif_data["GPSInfo"]["GPSLatitudeRef"] = "N" if latitude >= 0 else "S"
|
|
411
|
+
exif_data["GPSInfo"]["GPSLongitudeRef"] = "E" if longitude >= 0 else "W"
|
|
412
|
+
except Exception:
|
|
413
|
+
# Silently fail if unpacking doesn't work
|
|
414
|
+
pass
|
|
415
|
+
except Exception as e:
|
|
416
|
+
self.logger.debug(f"Error extracting GPS from Apple MIME data: {e}")
|
|
417
|
+
|
|
418
|
+
def _extract_gps_from_apple_makernote(self, maker_note: str) -> Optional[Dict]:
|
|
419
|
+
"""
|
|
420
|
+
Extract GPS data from Apple's MakerNote field in EXIF data.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
maker_note: Apple MakerNote string
|
|
424
|
+
Returns:
|
|
425
|
+
Dictionary with latitude and longitude if found, None otherwise
|
|
426
|
+
"""
|
|
427
|
+
try:
|
|
428
|
+
# Apple MakerNote often contains GPS coordinates in a specific format
|
|
429
|
+
# Look for patterns like decimal numbers that could be coordinates
|
|
430
|
+
coord_pattern = re.compile(r'([-+]?\d+\.\d+)')
|
|
431
|
+
matches = coord_pattern.findall(maker_note)
|
|
432
|
+
|
|
433
|
+
if len(matches) >= 2:
|
|
434
|
+
# Try pairs of numbers to see if they could be valid coordinates
|
|
435
|
+
for i in range(len(matches) - 1):
|
|
436
|
+
try:
|
|
437
|
+
lat = float(matches[i])
|
|
438
|
+
lon = float(matches[i + 1])
|
|
439
|
+
|
|
440
|
+
# Check if values are in a reasonable range for coordinates
|
|
441
|
+
if -90 <= lat <= 90 and -180 <= lon <= 180:
|
|
442
|
+
return {
|
|
443
|
+
"latitude": lat,
|
|
444
|
+
"longitude": lon
|
|
445
|
+
}
|
|
446
|
+
except ValueError:
|
|
447
|
+
continue
|
|
448
|
+
|
|
449
|
+
# Search for binary data that might contain GPS info
|
|
450
|
+
if b'bplist' in maker_note.encode('utf-8', errors='ignore'):
|
|
451
|
+
# Apple sometimes stores GPS in binary property lists within MakerNote
|
|
452
|
+
# This is a complex binary format that would require a specialized parser
|
|
453
|
+
# For now, we'll just log that we found a binary plist
|
|
454
|
+
self.logger.debug("Found binary plist in MakerNote, specialized parsing needed")
|
|
455
|
+
|
|
456
|
+
return None
|
|
457
|
+
except Exception as e:
|
|
458
|
+
self.logger.debug(f"Error extracting GPS from Apple MakerNote: {e}")
|
|
459
|
+
return None
|
|
460
|
+
|
|
461
|
+
async def extract_exif_heif(self, heif_image) -> Optional[Dict]:
|
|
462
|
+
"""
|
|
463
|
+
Extract EXIF data from a HEIF/HEIC image using the heif library.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
heif_image: HEIF image object
|
|
467
|
+
Returns:
|
|
468
|
+
Dictionary of EXIF data or None if no EXIF data exists
|
|
469
|
+
"""
|
|
470
|
+
try:
|
|
471
|
+
# Get EXIF metadata from HEIF image
|
|
472
|
+
exif_data = {}
|
|
473
|
+
|
|
474
|
+
# Extract metadata from HEIF
|
|
475
|
+
for metadata in heif_image.metadata or []:
|
|
476
|
+
if metadata.type == 'Exif':
|
|
477
|
+
# HEIF EXIF data typically starts with a header offset
|
|
478
|
+
exif_bytes = metadata.data
|
|
479
|
+
if exif_bytes and len(exif_bytes) > 8:
|
|
480
|
+
# Skip the EXIF header (usually 8 bytes) to get to the TIFF data
|
|
481
|
+
exif_stream = BytesIO(exif_bytes)
|
|
482
|
+
# Try to extract EXIF data from the TIFF-formatted portion
|
|
483
|
+
try:
|
|
484
|
+
# Need to process the EXIF data in TIFF format
|
|
485
|
+
exif_stream.seek(8) # Skip the Exif\0\0 header
|
|
486
|
+
exif_image = Image.open(exif_stream)
|
|
487
|
+
# Extract all EXIF data from the embedded TIFF
|
|
488
|
+
exif_info = exif_image._getexif() or {}
|
|
489
|
+
|
|
490
|
+
# Process the EXIF data as we do with PIL images
|
|
491
|
+
gps_info = {}
|
|
492
|
+
for tag, value in exif_info.items():
|
|
493
|
+
decoded = TAGS.get(tag, tag)
|
|
494
|
+
if decoded == "GPSInfo":
|
|
495
|
+
for t in value:
|
|
496
|
+
sub_decoded = GPSTAGS.get(t, t)
|
|
497
|
+
gps_info[sub_decoded] = value[t]
|
|
498
|
+
exif_data["GPSInfo"] = gps_info
|
|
499
|
+
else:
|
|
500
|
+
exif_data[decoded] = _make_serialisable(value)
|
|
501
|
+
except Exception as e:
|
|
502
|
+
self.logger.debug(f"Error processing HEIF EXIF data: {e}")
|
|
503
|
+
|
|
504
|
+
# Apple HEIF files may store GPS in 'mime' type metadata with 'CNTH' format
|
|
505
|
+
elif metadata.type == 'mime':
|
|
506
|
+
try:
|
|
507
|
+
# Check for Apple-specific GPS metadata
|
|
508
|
+
mime_data = metadata.data
|
|
509
|
+
if b'CNTH' in mime_data:
|
|
510
|
+
# This is a special Apple container format
|
|
511
|
+
# Extract GPS data from CNTH container
|
|
512
|
+
self._extract_apple_gps_from_mime(mime_data, exif_data)
|
|
513
|
+
except Exception as e:
|
|
514
|
+
self.logger.debug(f"Error processing Apple MIME metadata: {e}")
|
|
515
|
+
|
|
516
|
+
# Extract GPS datetime if available and requested
|
|
517
|
+
if self.extract_geoloc:
|
|
518
|
+
# First try standard GPSInfo
|
|
519
|
+
if "GPSInfo" in exif_data:
|
|
520
|
+
gps_datetime = self.extract_gps_datetime(exif_data)
|
|
521
|
+
if gps_datetime.get("latitude") is not None and gps_datetime.get("longitude") is not None:
|
|
522
|
+
exif_data['gps_info'] = gps_datetime
|
|
523
|
+
|
|
524
|
+
# If no GPS found yet, try Apple's MakerNote for GPS data
|
|
525
|
+
has_gps_info = 'gps_info' in exif_data
|
|
526
|
+
has_valid_gps = has_gps_info and exif_data['gps_info'].get('latitude') is not None
|
|
527
|
+
|
|
528
|
+
if (not has_gps_info or not has_valid_gps) and 'MakerNote' in exif_data:
|
|
529
|
+
apple_gps = self._extract_gps_from_apple_makernote(exif_data['MakerNote'])
|
|
530
|
+
if apple_gps:
|
|
531
|
+
# If we found GPS data in MakerNote, use it
|
|
532
|
+
datetime = exif_data.get("DateTimeOriginal") or exif_data.get("DateTime")
|
|
533
|
+
exif_data['gps_info'] = {
|
|
534
|
+
"datetime": datetime,
|
|
535
|
+
"latitude": apple_gps.get("latitude"),
|
|
536
|
+
"longitude": apple_gps.get("longitude")
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
return _json_safe(exif_data) if exif_data else None
|
|
540
|
+
|
|
541
|
+
except Exception as e:
|
|
542
|
+
self.logger.error(f'Error extracting HEIF EXIF data: {e}')
|
|
543
|
+
return None
|
|
544
|
+
|
|
545
|
+
async def extract_exif_data(self, image) -> dict:
|
|
546
|
+
"""
|
|
547
|
+
Extract EXIF data from the image file object.
|
|
548
|
+
|
|
549
|
+
Args:
|
|
550
|
+
image: The PIL Image object.
|
|
551
|
+
Returns:
|
|
552
|
+
Dictionary of EXIF data or empty dict if no EXIF data exists.
|
|
553
|
+
"""
|
|
554
|
+
try:
|
|
555
|
+
exif = {}
|
|
556
|
+
# Check Modify Date (if any):
|
|
557
|
+
try:
|
|
558
|
+
modify_date = get_xmp_modify_date(image)
|
|
559
|
+
if modify_date:
|
|
560
|
+
exif["ModifyDate"] = modify_date
|
|
561
|
+
except Exception as e:
|
|
562
|
+
self.logger.debug(f"Error getting XMP ModifyDate: {e}")
|
|
563
|
+
|
|
564
|
+
if hasattr(image, 'getexif'):
|
|
565
|
+
# For JPEG and some other formats that support _getexif()
|
|
566
|
+
exif_data = image.getexif()
|
|
567
|
+
if exif_data:
|
|
568
|
+
gps_info = {}
|
|
569
|
+
for tag, value in exif_data.items():
|
|
570
|
+
if tag in ExifTags.TAGS:
|
|
571
|
+
decoded = TAGS.get(tag, tag)
|
|
572
|
+
# Convert EXIF data to a readable format
|
|
573
|
+
if decoded == "UserComment" and isinstance(value, str):
|
|
574
|
+
try:
|
|
575
|
+
# Try to decode base64 UserComment
|
|
576
|
+
decoded_value = base64.b64decode(value).decode('utf-8', errors='replace')
|
|
577
|
+
exif[decoded] = decoded_value
|
|
578
|
+
except Exception:
|
|
579
|
+
# If decoding fails, use original value
|
|
580
|
+
exif[decoded] = _make_serialisable(value)
|
|
581
|
+
else:
|
|
582
|
+
exif[decoded] = _make_serialisable(value)
|
|
583
|
+
if decoded == "GPSInfo":
|
|
584
|
+
for t in value:
|
|
585
|
+
sub_decoded = GPSTAGS.get(t, t)
|
|
586
|
+
gps_info[sub_decoded] = value[t]
|
|
587
|
+
exif["GPSInfo"] = gps_info
|
|
588
|
+
# Aperture, shutter, flash, lens, tz offset, etc
|
|
589
|
+
ifd = exif_data.get_ifd(0x8769)
|
|
590
|
+
for key, val in ifd.items():
|
|
591
|
+
exif[ExifTags.TAGS[key]] = _make_serialisable(val)
|
|
592
|
+
for ifd_id in IFD:
|
|
593
|
+
try:
|
|
594
|
+
ifd = exif_data.get_ifd(ifd_id)
|
|
595
|
+
if ifd_id == IFD.GPSInfo:
|
|
596
|
+
resolve = GPSTAGS
|
|
597
|
+
else:
|
|
598
|
+
resolve = TAGS
|
|
599
|
+
for k, v in ifd.items():
|
|
600
|
+
tag = resolve.get(k, k)
|
|
601
|
+
try:
|
|
602
|
+
exif[tag] = _make_serialisable(v)
|
|
603
|
+
except Exception:
|
|
604
|
+
exif[tag] = v
|
|
605
|
+
except KeyError:
|
|
606
|
+
pass
|
|
607
|
+
elif hasattr(image, 'tag') and hasattr(image, 'tag_v2'):
|
|
608
|
+
# For TIFF images which store data in tag and tag_v2 attributes
|
|
609
|
+
# Extract from tag_v2 first (more detailed)
|
|
610
|
+
gps_info = {}
|
|
611
|
+
for tag, value in image.tag_v2.items():
|
|
612
|
+
tag_name = TAGS.get(tag, tag)
|
|
613
|
+
if tag_name == "GPSInfo":
|
|
614
|
+
# For TIFF images, GPS data might be in a nested IFD
|
|
615
|
+
if isinstance(value, dict):
|
|
616
|
+
for gps_tag, gps_value in value.items():
|
|
617
|
+
gps_tag_name = GPSTAGS.get(gps_tag, gps_tag)
|
|
618
|
+
gps_info[gps_tag_name] = gps_value
|
|
619
|
+
exif["GPSInfo"] = gps_info
|
|
620
|
+
else:
|
|
621
|
+
exif[tag_name] = _make_serialisable(value)
|
|
622
|
+
|
|
623
|
+
# Fall back to tag if needed
|
|
624
|
+
if not exif and hasattr(image, 'tag'):
|
|
625
|
+
for tag, value in image.tag.items():
|
|
626
|
+
tag_name = TAGS.get(tag, tag)
|
|
627
|
+
exif[tag_name] = _make_serialisable(value)
|
|
628
|
+
|
|
629
|
+
else:
|
|
630
|
+
# For other formats, try to extract directly from image.info
|
|
631
|
+
for key, value in image.info.items():
|
|
632
|
+
if key.startswith('exif'):
|
|
633
|
+
# Some formats store EXIF data with keys like 'exif' or 'exif_ifd'
|
|
634
|
+
if isinstance(value, dict):
|
|
635
|
+
exif.update(value)
|
|
636
|
+
elif isinstance(value, bytes):
|
|
637
|
+
# Try to parse bytes as EXIF data
|
|
638
|
+
exif_stream = BytesIO(value)
|
|
639
|
+
try:
|
|
640
|
+
exif_image = TiffImagePlugin.TiffImageFile(exif_stream)
|
|
641
|
+
if hasattr(exif_image, 'tag_v2'):
|
|
642
|
+
for tag, val in exif_image.tag_v2.items():
|
|
643
|
+
tag_name = TAGS.get(tag, tag)
|
|
644
|
+
exif[tag_name] = _make_serialisable(val)
|
|
645
|
+
except Exception as e:
|
|
646
|
+
self.logger.debug(f"Error parsing EXIF bytes: {e}")
|
|
647
|
+
else:
|
|
648
|
+
# Add other metadata
|
|
649
|
+
exif[key] = _make_serialisable(value)
|
|
650
|
+
|
|
651
|
+
# Extract GPS datetime if available
|
|
652
|
+
if self.extract_geoloc and "GPSInfo" in exif:
|
|
653
|
+
gps_datetime = self.extract_gps_datetime(exif)
|
|
654
|
+
if gps_datetime:
|
|
655
|
+
exif['gps_info'] = gps_datetime
|
|
656
|
+
|
|
657
|
+
return _json_safe(exif) if exif else {}
|
|
658
|
+
except (AttributeError, KeyError) as e:
|
|
659
|
+
self.logger.debug(f'Error extracting PIL EXIF data: {e}')
|
|
660
|
+
return {}
|
|
661
|
+
except Exception as e:
|
|
662
|
+
self.logger.error(f'Unexpected error extracting PIL EXIF data: {e}')
|
|
663
|
+
return {}
|
|
664
|
+
|
|
665
|
+
async def analyze(self, image: Optional[Image.Image] = None, heif: Any = None, **kwargs) -> dict:
|
|
666
|
+
"""
|
|
667
|
+
Extract EXIF data from the given image.
|
|
668
|
+
|
|
669
|
+
:param image: PIL Image object (optional)
|
|
670
|
+
:param heif: HEIF image object (optional)
|
|
671
|
+
:return: Dictionary containing EXIF data
|
|
672
|
+
"""
|
|
673
|
+
try:
|
|
674
|
+
exif_data = {}
|
|
675
|
+
|
|
676
|
+
# Process HEIF image if provided (prioritize over PIL)
|
|
677
|
+
if heif is not None:
|
|
678
|
+
try:
|
|
679
|
+
heif_exif = await self.extract_exif_heif(heif)
|
|
680
|
+
if heif_exif:
|
|
681
|
+
# Update with HEIF data, prioritizing it over PIL data if both exist
|
|
682
|
+
exif_data.update(heif_exif)
|
|
683
|
+
except Exception as e:
|
|
684
|
+
self.logger.error(f"Error extracting EXIF from HEIF image: {e}")
|
|
685
|
+
|
|
686
|
+
# Process PIL image if provided
|
|
687
|
+
if image is not None:
|
|
688
|
+
try:
|
|
689
|
+
pil_exif = await self.extract_exif_data(image)
|
|
690
|
+
if pil_exif:
|
|
691
|
+
exif_data.update(pil_exif)
|
|
692
|
+
except Exception as e:
|
|
693
|
+
self.logger.error(f"Error extracting EXIF from PIL image: {e}")
|
|
694
|
+
|
|
695
|
+
# Extract IPTC data
|
|
696
|
+
try:
|
|
697
|
+
pil_iptc = await self.extract_iptc_data(image)
|
|
698
|
+
if pil_iptc:
|
|
699
|
+
exif_data.update(pil_iptc)
|
|
700
|
+
except Exception as e:
|
|
701
|
+
self.logger.error(
|
|
702
|
+
f"Error extracting IPTC data from PIL image: {e}"
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
|
|
706
|
+
return exif_data
|
|
707
|
+
except Exception as e:
|
|
708
|
+
self.logger.error(f"Error in EXIF analysis: {str(e)}")
|
|
709
|
+
return {}
|