c2pa-python 0.25.0__tar.gz → 0.26.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {c2pa_python-0.25.0/src/c2pa_python.egg-info → c2pa_python-0.26.0}/PKG-INFO +1 -1
  2. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/pyproject.toml +1 -1
  3. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/src/c2pa/c2pa.py +160 -1
  4. {c2pa_python-0.25.0 → c2pa_python-0.26.0/src/c2pa_python.egg-info}/PKG-INFO +1 -1
  5. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/tests/test_unit_tests.py +236 -0
  6. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/tests/test_unit_tests_threaded.py +150 -0
  7. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/LICENSE-APACHE +0 -0
  8. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/LICENSE-MIT +0 -0
  9. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/MANIFEST.in +0 -0
  10. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/README.md +0 -0
  11. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/requirements.txt +0 -0
  12. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/scripts/download_artifacts.py +0 -0
  13. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/setup.cfg +0 -0
  14. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/setup.py +0 -0
  15. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/src/c2pa/__init__.py +0 -0
  16. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/src/c2pa/build.py +0 -0
  17. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/src/c2pa/lib.py +0 -0
  18. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/src/c2pa_python.egg-info/SOURCES.txt +0 -0
  19. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/src/c2pa_python.egg-info/dependency_links.txt +0 -0
  20. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/src/c2pa_python.egg-info/entry_points.txt +0 -0
  21. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/src/c2pa_python.egg-info/requires.txt +0 -0
  22. {c2pa_python-0.25.0 → c2pa_python-0.26.0}/src/c2pa_python.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: c2pa-python
3
- Version: 0.25.0
3
+ Version: 0.26.0
4
4
  Summary: Python bindings for the C2PA Content Authenticity Initiative (CAI) library
5
5
  Author-email: Gavin Peacock <gvnpeacock@adobe.com>, Tania Mathern <mathern@adobe.com>
6
6
  Maintainer-email: Gavin Peacock <gpeacock@adobe.com>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "c2pa-python"
7
- version = "0.25.0"
7
+ version = "0.26.0"
8
8
  requires-python = ">=3.10"
9
9
  description = "Python bindings for the C2PA Content Authenticity Initiative (CAI) library"
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -1358,6 +1358,10 @@ class Reader:
1358
1358
  # we may have opened ourselves, and that we need to close later
1359
1359
  self._backing_file = None
1360
1360
 
1361
+ # Caches for manifest JSON string and parsed data
1362
+ self._manifest_json_str_cache = None
1363
+ self._manifest_data_cache = None
1364
+
1361
1365
  if stream is None:
1362
1366
  # If we don't get a stream as param:
1363
1367
  # Create a stream from the file path in format_or_path
@@ -1600,6 +1604,33 @@ class Reader:
1600
1604
  # Ensure we don't raise exceptions during cleanup
1601
1605
  pass
1602
1606
 
1607
+ def _get_cached_manifest_data(self) -> Optional[dict]:
1608
+ """Get the cached manifest data, fetching and parsing if not cached.
1609
+
1610
+ Returns:
1611
+ A dictionary containing the parsed manifest data, or None if
1612
+ JSON parsing fails
1613
+
1614
+ Raises:
1615
+ C2paError: If there was an error getting the JSON
1616
+ """
1617
+ if self._manifest_data_cache is None:
1618
+ if self._manifest_json_str_cache is None:
1619
+ self._manifest_json_str_cache = self.json()
1620
+
1621
+ try:
1622
+ self._manifest_data_cache = json.loads(
1623
+ self._manifest_json_str_cache
1624
+ )
1625
+ except json.JSONDecodeError:
1626
+ # Reset cache to reattempt read, possibly
1627
+ self._manifest_data_cache = None
1628
+ self._manifest_json_str_cache = None
1629
+ # Failed to parse manifest JSON
1630
+ return None
1631
+
1632
+ return self._manifest_data_cache
1633
+
1603
1634
  def close(self):
1604
1635
  """Release the reader resources.
1605
1636
 
@@ -1620,6 +1651,9 @@ class Reader:
1620
1651
  Reader._ERROR_MESSAGES['cleanup_error'].format(
1621
1652
  str(e)))
1622
1653
  finally:
1654
+ # Clear the cache when closing
1655
+ self._manifest_json_str_cache = None
1656
+ self._manifest_data_cache = None
1623
1657
  self._closed = True
1624
1658
 
1625
1659
  def json(self) -> str:
@@ -1634,6 +1668,10 @@ class Reader:
1634
1668
 
1635
1669
  self._ensure_valid_state()
1636
1670
 
1671
+ # Return cached result if available
1672
+ if self._manifest_json_str_cache is not None:
1673
+ return self._manifest_json_str_cache
1674
+
1637
1675
  result = _lib.c2pa_reader_json(self._reader)
1638
1676
 
1639
1677
  if result is None:
@@ -1642,7 +1680,128 @@ class Reader:
1642
1680
  raise C2paError(error)
1643
1681
  raise C2paError("Error during manifest parsing in Reader")
1644
1682
 
1645
- return _convert_to_py_string(result)
1683
+ # Cache the result and return it
1684
+ self._manifest_json_str_cache = _convert_to_py_string(result)
1685
+ return self._manifest_json_str_cache
1686
+
1687
+ def get_active_manifest(self) -> Optional[dict]:
1688
+ """Get the active manifest from the manifest store.
1689
+
1690
+ This method retrieves the full manifest JSON and extracts the active
1691
+ manifest based on the active_manifest key.
1692
+
1693
+ Returns:
1694
+ A dictionary containing the active manifest data, including claims,
1695
+ assertions, ingredients, and signature information, or None if no
1696
+ manifest is found or if there was an error parsing the JSON.
1697
+
1698
+ Raises:
1699
+ KeyError: If the active_manifest key is missing from the JSON
1700
+ """
1701
+ try:
1702
+ # Get cached manifest data
1703
+ manifest_data = self._get_cached_manifest_data()
1704
+ if manifest_data is None:
1705
+ # raise C2paError("Failed to parse manifest JSON")
1706
+ return None
1707
+
1708
+ # Get the active manfiest id/label
1709
+ if "active_manifest" not in manifest_data:
1710
+ raise KeyError("No 'active_manifest' key found")
1711
+
1712
+ active_manifest_id = manifest_data["active_manifest"]
1713
+
1714
+ # Retrieve the active manifest data using manifest id/label
1715
+ if "manifests" not in manifest_data:
1716
+ raise KeyError("No 'manifests' key found in manifest data")
1717
+
1718
+ manifests = manifest_data["manifests"]
1719
+ if active_manifest_id not in manifests:
1720
+ raise KeyError("Active manifest not found in manifest store")
1721
+
1722
+ return manifests[active_manifest_id]
1723
+ except C2paError.ManifestNotFound:
1724
+ return None
1725
+
1726
+ def get_manifest(self, label: str) -> Optional[dict]:
1727
+ """Get a specific manifest from the manifest store by its label.
1728
+
1729
+ This method retrieves the manifest JSON and extracts the manifest
1730
+ that corresponds to the provided manifest label/ID.
1731
+
1732
+ Args:
1733
+ label: The manifest label/ID to look up in the manifest store
1734
+
1735
+ Returns:
1736
+ A dictionary containing the manifest data for the specified label,
1737
+ or None if no manifest is found or if there was an error parsing
1738
+ the JSON.
1739
+
1740
+ Raises:
1741
+ KeyError: If the manifests key is missing from the JSON
1742
+ """
1743
+ try:
1744
+ # Get cached manifest data
1745
+ manifest_data = self._get_cached_manifest_data()
1746
+ if manifest_data is None:
1747
+ # raise C2paError("Failed to parse manifest JSON")
1748
+ return None
1749
+
1750
+ if "manifests" not in manifest_data:
1751
+ raise KeyError("No 'manifests' key found in manifest data")
1752
+
1753
+ manifests = manifest_data["manifests"]
1754
+ if label not in manifests:
1755
+ raise KeyError(f"Manifest {label} not found in manifest store")
1756
+
1757
+ return manifests[label]
1758
+ except C2paError.ManifestNotFound:
1759
+ return None
1760
+
1761
+ def get_validation_state(self) -> Optional[str]:
1762
+ """Get the validation state of the manifest store.
1763
+
1764
+ This method retrieves the full manifest JSON and extracts the
1765
+ validation_state field, which indicates the overall validation
1766
+ status of the C2PA manifest.
1767
+
1768
+ Returns:
1769
+ The validation state as a string,
1770
+ or None if the validation_state field is not present or if no
1771
+ manifest is found or if there was an error parsing the JSON.
1772
+ """
1773
+ try:
1774
+ # Get cached manifest data
1775
+ manifest_data = self._get_cached_manifest_data()
1776
+ if manifest_data is None:
1777
+ return None
1778
+
1779
+ return manifest_data.get("validation_state")
1780
+ except C2paError.ManifestNotFound:
1781
+ return None
1782
+
1783
+ def get_validation_results(self) -> Optional[dict]:
1784
+ """Get the validation results of the manifest store.
1785
+
1786
+ This method retrieves the full manifest JSON and extracts
1787
+ the validation_results object, which contains detailed
1788
+ validation information.
1789
+
1790
+ Returns:
1791
+ The validation results as a dictionary containing
1792
+ validation details, or None if the validation_results
1793
+ field is not present or if no manifest is found or if
1794
+ there was an error parsing the JSON.
1795
+ """
1796
+ try:
1797
+ # Get cached manifest data
1798
+ manifest_data = self._get_cached_manifest_data()
1799
+ if manifest_data is None:
1800
+ return None
1801
+
1802
+ return manifest_data.get("validation_results")
1803
+ except C2paError.ManifestNotFound:
1804
+ return None
1646
1805
 
1647
1806
  def resource_to_stream(self, uri: str, stream: Any) -> int:
1648
1807
  """Write a resource to a stream.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: c2pa-python
3
- Version: 0.25.0
3
+ Version: 0.26.0
4
4
  Summary: Python bindings for the C2PA Content Authenticity Initiative (CAI) library
5
5
  Author-email: Gavin Peacock <gvnpeacock@adobe.com>, Tania Mathern <mathern@adobe.com>
6
6
  Maintainer-email: Gavin Peacock <gpeacock@adobe.com>
@@ -64,6 +64,72 @@ class TestReader(unittest.TestCase):
64
64
  json_data = reader.json()
65
65
  self.assertIn(DEFAULT_TEST_FILE_NAME, json_data)
66
66
 
67
+ def test_get_active_manifest(self):
68
+ with open(self.testPath, "rb") as file:
69
+ reader = Reader("image/jpeg", file)
70
+ active_manifest = reader.get_active_manifest()
71
+
72
+ # Check the returned manifest label/key
73
+ expected_label = "contentauth:urn:uuid:c85a2b90-f1a0-4aa4-b17f-f938b475804e"
74
+ self.assertEqual(active_manifest["label"], expected_label)
75
+
76
+ def test_get_manifest(self):
77
+ with open(self.testPath, "rb") as file:
78
+ reader = Reader("image/jpeg", file)
79
+
80
+ # Test getting manifest by the specific label
81
+ label = "contentauth:urn:uuid:c85a2b90-f1a0-4aa4-b17f-f938b475804e"
82
+ manifest = reader.get_manifest(label)
83
+ self.assertEqual(manifest["label"], label)
84
+
85
+ # It should be the active manifest too, so cross-check
86
+ active_manifest = reader.get_active_manifest()
87
+ self.assertEqual(manifest, active_manifest)
88
+
89
+ def test_stream_get_non_active_manifest_by_label(self):
90
+ video_path = os.path.join(FIXTURES_DIR, "video1.mp4")
91
+ with open(video_path, "rb") as file:
92
+ reader = Reader("video/mp4", file)
93
+
94
+ non_active_label = "urn:uuid:54281c07-ad34-430e-bea5-112a18facf0b"
95
+ non_active_manifest = reader.get_manifest(non_active_label)
96
+ self.assertEqual(non_active_manifest["label"], non_active_label)
97
+
98
+ # Verify it's not the active manifest
99
+ # (that test case has only one other manifest that is not the active manifest)
100
+ active_manifest = reader.get_active_manifest()
101
+ self.assertNotEqual(non_active_manifest, active_manifest)
102
+ self.assertNotEqual(non_active_manifest["label"], active_manifest["label"])
103
+
104
+ def test_stream_get_non_active_manifest_by_label_not_found(self):
105
+ video_path = os.path.join(FIXTURES_DIR, "video1.mp4")
106
+ with open(video_path, "rb") as file:
107
+ reader = Reader("video/mp4", file)
108
+
109
+ # Try to get a manifest with a label that clearly doesn't exist...
110
+ non_existing_label = "urn:uuid:clearly-not-existing"
111
+ with self.assertRaises(KeyError):
112
+ reader.get_manifest(non_existing_label)
113
+
114
+ def test_stream_read_get_validation_state(self):
115
+ with open(self.testPath, "rb") as file:
116
+ reader = Reader("image/jpeg", file)
117
+ validation_state = reader.get_validation_state()
118
+ self.assertIsNotNone(validation_state)
119
+ self.assertEqual(validation_state, "Valid")
120
+
121
+ def test_stream_read_get_validation_results(self):
122
+ with open(self.testPath, "rb") as file:
123
+ reader = Reader("image/jpeg", file)
124
+ validation_results = reader.get_validation_results()
125
+
126
+ self.assertIsNotNone(validation_results)
127
+ self.assertIsInstance(validation_results, dict)
128
+
129
+ self.assertIn("activeManifest", validation_results)
130
+ active_manifest_results = validation_results["activeManifest"]
131
+ self.assertIsInstance(active_manifest_results, dict)
132
+
67
133
  def test_reader_detects_unsupported_mimetype_on_stream(self):
68
134
  with open(self.testPath, "rb") as file:
69
135
  with self.assertRaises(Error.NotSupported):
@@ -270,6 +336,115 @@ class TestReader(unittest.TestCase):
270
336
  except Exception as e:
271
337
  self.fail(f"Failed to read metadata from {filename}: {str(e)}")
272
338
 
339
+ def test_read_cached_all_files(self):
340
+ """Test reading C2PA metadata with cache functionality from all files in the fixtures/files-for-reading-tests directory"""
341
+ reading_dir = os.path.join(self.data_dir, "files-for-reading-tests")
342
+
343
+ # Map of file extensions to MIME types
344
+ mime_types = {
345
+ '.jpg': 'image/jpeg',
346
+ '.jpeg': 'image/jpeg',
347
+ '.png': 'image/png',
348
+ '.gif': 'image/gif',
349
+ '.webp': 'image/webp',
350
+ '.heic': 'image/heic',
351
+ '.heif': 'image/heif',
352
+ '.avif': 'image/avif',
353
+ '.tif': 'image/tiff',
354
+ '.tiff': 'image/tiff',
355
+ '.mp4': 'video/mp4',
356
+ '.avi': 'video/x-msvideo',
357
+ '.mp3': 'audio/mpeg',
358
+ '.m4a': 'audio/mp4',
359
+ '.wav': 'audio/wav',
360
+ '.pdf': 'application/pdf',
361
+ }
362
+
363
+ # Skip system files
364
+ skip_files = {
365
+ '.DS_Store'
366
+ }
367
+
368
+ for filename in os.listdir(reading_dir):
369
+ if filename in skip_files:
370
+ continue
371
+
372
+ file_path = os.path.join(reading_dir, filename)
373
+ if not os.path.isfile(file_path):
374
+ continue
375
+
376
+ # Get file extension and corresponding MIME type
377
+ _, ext = os.path.splitext(filename)
378
+ ext = ext.lower()
379
+ if ext not in mime_types:
380
+ continue
381
+
382
+ mime_type = mime_types[ext]
383
+
384
+ try:
385
+ with open(file_path, "rb") as file:
386
+ reader = Reader(mime_type, file)
387
+
388
+ # Test 1: Verify cache variables are initially None
389
+ self.assertIsNone(reader._manifest_json_str_cache, f"JSON cache should be None initially for {filename}")
390
+ self.assertIsNone(reader._manifest_data_cache, f"Manifest data cache should be None initially for {filename}")
391
+
392
+ # Test 2: Multiple calls to json() should return the same result and use cache
393
+ json_data_1 = reader.json()
394
+ self.assertIsNotNone(reader._manifest_json_str_cache, f"JSON cache not set after first json() call for {filename}")
395
+ self.assertEqual(json_data_1, reader._manifest_json_str_cache, f"JSON cache doesn't match return value for {filename}")
396
+
397
+ json_data_2 = reader.json()
398
+ self.assertEqual(json_data_1, json_data_2, f"JSON inconsistency for {filename}")
399
+ self.assertIsInstance(json_data_1, str)
400
+
401
+ # Test 3: Test methods that use the cache
402
+ try:
403
+ # Test get_active_manifest() which uses _get_cached_manifest_data()
404
+ active_manifest = reader.get_active_manifest()
405
+ self.assertIsInstance(active_manifest, dict, f"Active manifest not dict for {filename}")
406
+
407
+ # Test 4: Verify cache is set after calling cache-using methods
408
+ self.assertIsNotNone(reader._manifest_json_str_cache, f"JSON cache not set after get_active_manifest for {filename}")
409
+ self.assertIsNotNone(reader._manifest_data_cache, f"Manifest data cache not set after get_active_manifest for {filename}")
410
+
411
+ # Test 5: Multiple calls to cache-using methods should return the same result
412
+ active_manifest_2 = reader.get_active_manifest()
413
+ self.assertEqual(active_manifest, active_manifest_2, f"Active manifest cache inconsistency for {filename}")
414
+
415
+ # Test get_validation_state() which uses the cache
416
+ validation_state = reader.get_validation_state()
417
+ # validation_state can be None, so just check it doesn't crash
418
+
419
+ # Test get_validation_results() which uses the cache
420
+ validation_results = reader.get_validation_results()
421
+ # validation_results can be None, so just check it doesn't crash
422
+
423
+ # Test 6: Multiple calls to validation methods should return the same result
424
+ validation_state_2 = reader.get_validation_state()
425
+ self.assertEqual(validation_state, validation_state_2, f"Validation state cache inconsistency for {filename}")
426
+
427
+ validation_results_2 = reader.get_validation_results()
428
+ self.assertEqual(validation_results, validation_results_2, f"Validation results cache inconsistency for {filename}")
429
+
430
+ except KeyError as e:
431
+ # Some files might not have active manifests or validation data
432
+ # This is expected for some test files, so we'll skip cache testing for those
433
+ pass
434
+
435
+ # Test 7: Verify the manifest contains expected fields
436
+ manifest = json.loads(json_data_1)
437
+ self.assertIn("manifests", manifest)
438
+ self.assertIn("active_manifest", manifest)
439
+
440
+ # Test 8: Test cache clearing on close
441
+ reader.close()
442
+ self.assertIsNone(reader._manifest_json_str_cache, f"JSON cache not cleared for {filename}")
443
+ self.assertIsNone(reader._manifest_data_cache, f"Manifest data cache not cleared for {filename}")
444
+
445
+ except Exception as e:
446
+ self.fail(f"Failed to read cached metadata from {filename}: {str(e)}")
447
+
273
448
  def test_reader_context_manager_with_exception(self):
274
449
  """Test Reader state after exception in context manager."""
275
450
  try:
@@ -430,6 +605,67 @@ class TestReader(unittest.TestCase):
430
605
  self.assertEqual(remote_url, "https://cai-manifests.adobe.com/manifests/adobe-urn-uuid-5f37e182-3687-462e-a7fb-573462780391")
431
606
  self.assertFalse(reader.is_embedded())
432
607
 
608
+ def test_stream_read_and_parse_cached(self):
609
+ """Test reading and parsing with cache verification by repeating operations multiple times"""
610
+ with open(self.testPath, "rb") as file:
611
+ reader = Reader("image/jpeg", file)
612
+
613
+ # Verify cache starts as None
614
+ self.assertIsNone(reader._manifest_json_str_cache, "JSON cache should be None initially")
615
+ self.assertIsNone(reader._manifest_data_cache, "Manifest data cache should be None initially")
616
+
617
+ # First operation - should populate cache
618
+ manifest_store_1 = json.loads(reader.json())
619
+ title_1 = manifest_store_1["manifests"][manifest_store_1["active_manifest"]]["title"]
620
+ self.assertEqual(title_1, DEFAULT_TEST_FILE_NAME)
621
+
622
+ # Verify cache is populated after first json() call
623
+ self.assertIsNotNone(reader._manifest_json_str_cache, "JSON cache should be set after first json() call")
624
+ self.assertEqual(manifest_store_1, json.loads(reader._manifest_json_str_cache), "Cached JSON should match parsed result")
625
+
626
+ # Repeat the same operation multiple times to verify cache usage
627
+ for i in range(5):
628
+ manifest_store = json.loads(reader.json())
629
+ title = manifest_store["manifests"][manifest_store["active_manifest"]]["title"]
630
+ self.assertEqual(title, DEFAULT_TEST_FILE_NAME, f"Title should be consistent on iteration {i+1}")
631
+
632
+ # Verify cache is still populated and consistent
633
+ self.assertIsNotNone(reader._manifest_json_str_cache, f"JSON cache should remain set on iteration {i+1}")
634
+ self.assertEqual(manifest_store, json.loads(reader._manifest_json_str_cache), f"Cached JSON should match parsed result on iteration {i+1}")
635
+
636
+ # Test methods that use the cache
637
+ # Test get_active_manifest() which uses _get_cached_manifest_data()
638
+ active_manifest_1 = reader.get_active_manifest()
639
+ self.assertIsInstance(active_manifest_1, dict, "Active manifest should be a dict")
640
+
641
+ # Verify manifest data cache is populated
642
+ self.assertIsNotNone(reader._manifest_data_cache, "Manifest data cache should be set after get_active_manifest()")
643
+
644
+ # Repeat get_active_manifest() multiple times to verify cache usage
645
+ for i in range(3):
646
+ active_manifest = reader.get_active_manifest()
647
+ self.assertEqual(active_manifest_1, active_manifest, f"Active manifest should be consistent on iteration {i+1}")
648
+
649
+ # Verify cache remains populated
650
+ self.assertIsNotNone(reader._manifest_data_cache, f"Manifest data cache should remain set on iteration {i+1}")
651
+
652
+ # Test get_validation_state() and get_validation_results() with cache
653
+ validation_state_1 = reader.get_validation_state()
654
+ validation_results_1 = reader.get_validation_results()
655
+
656
+ # Repeat validation methods to verify cache usage
657
+ for i in range(3):
658
+ validation_state = reader.get_validation_state()
659
+ validation_results = reader.get_validation_results()
660
+
661
+ self.assertEqual(validation_state_1, validation_state, f"Validation state should be consistent on iteration {i+1}")
662
+ self.assertEqual(validation_results_1, validation_results, f"Validation results should be consistent on iteration {i+1}")
663
+
664
+ # Verify cache clearing on close
665
+ reader.close()
666
+ self.assertIsNone(reader._manifest_json_str_cache, "JSON cache should be cleared on close")
667
+ self.assertIsNone(reader._manifest_data_cache, "Manifest data cache should be cleared on close")
668
+
433
669
  # TODO: Unskip once fixed configuration to read data is clarified
434
670
  # def test_read_cawg_data_file(self):
435
671
  # """Test reading C2PA metadata from C_with_CAWG_data.jpg file."""
@@ -165,6 +165,156 @@ class TestReaderWithThreads(unittest.TestCase):
165
165
  if errors:
166
166
  self.fail("\n".join(errors))
167
167
 
168
+ def test_read_cached_all_files(self):
169
+ """Test reading C2PA metadata with cache functionality from all files in the fixtures/files-for-reading-tests directory using multithreading"""
170
+ reading_dir = os.path.join(self.data_dir, "files-for-reading-tests")
171
+
172
+ # Map of file extensions to MIME types
173
+ mime_types = {
174
+ '.jpg': 'image/jpeg',
175
+ '.jpeg': 'image/jpeg',
176
+ '.png': 'image/png',
177
+ '.gif': 'image/gif',
178
+ '.webp': 'image/webp',
179
+ '.heic': 'image/heic',
180
+ '.heif': 'image/heif',
181
+ '.avif': 'image/avif',
182
+ '.tif': 'image/tiff',
183
+ '.tiff': 'image/tiff',
184
+ '.mp4': 'video/mp4',
185
+ '.avi': 'video/x-msvideo',
186
+ '.mp3': 'audio/mpeg',
187
+ '.m4a': 'audio/mp4',
188
+ '.wav': 'audio/wav',
189
+ '.pdf': 'application/pdf',
190
+ }
191
+
192
+ # Skip system files
193
+ skip_files = {
194
+ '.DS_Store'
195
+ }
196
+
197
+ def process_file_with_cache(filename):
198
+ if filename in skip_files:
199
+ return None
200
+
201
+ file_path = os.path.join(reading_dir, filename)
202
+ if not os.path.isfile(file_path):
203
+ return None
204
+
205
+ # Get file extension and corresponding MIME type
206
+ _, ext = os.path.splitext(filename)
207
+ ext = ext.lower()
208
+ if ext not in mime_types:
209
+ return None
210
+
211
+ mime_type = mime_types[ext]
212
+
213
+ try:
214
+ with open(file_path, "rb") as file:
215
+ reader = Reader(mime_type, file)
216
+
217
+ # Test 1: Verify cache variables are initially None
218
+ if reader._manifest_json_str_cache is not None:
219
+ return f"JSON cache should be None initially for {filename}"
220
+ if reader._manifest_data_cache is not None:
221
+ return f"Manifest data cache should be None initially for {filename}"
222
+
223
+ # Test 2: Multiple calls to json() should return the same result and use cache
224
+ json_data_1 = reader.json()
225
+ if reader._manifest_json_str_cache is None:
226
+ return f"JSON cache not set after first json() call for {filename}"
227
+ if json_data_1 != reader._manifest_json_str_cache:
228
+ return f"JSON cache doesn't match return value for {filename}"
229
+
230
+ json_data_2 = reader.json()
231
+ if json_data_1 != json_data_2:
232
+ return f"JSON inconsistency for {filename}"
233
+ if not isinstance(json_data_1, str):
234
+ return f"JSON data is not a string for {filename}"
235
+
236
+ # Test 3: Test methods that use the cache
237
+ try:
238
+ # Test get_active_manifest() which uses _get_cached_manifest_data()
239
+ active_manifest = reader.get_active_manifest()
240
+ if not isinstance(active_manifest, dict):
241
+ return f"Active manifest not dict for {filename}"
242
+
243
+ # Test 4: Verify cache is set after calling cache-using methods
244
+ if reader._manifest_json_str_cache is None:
245
+ return f"JSON cache not set after get_active_manifest for {filename}"
246
+ if reader._manifest_data_cache is None:
247
+ return f"Manifest data cache not set after get_active_manifest for {filename}"
248
+
249
+ # Test 5: Multiple calls to cache-using methods should return the same result
250
+ active_manifest_2 = reader.get_active_manifest()
251
+ if active_manifest != active_manifest_2:
252
+ return f"Active manifest cache inconsistency for {filename}"
253
+
254
+ # Test get_validation_state() which uses the cache
255
+ validation_state = reader.get_validation_state()
256
+ # validation_state can be None, so just check it doesn't crash
257
+
258
+ # Test get_validation_results() which uses the cache
259
+ validation_results = reader.get_validation_results()
260
+ # validation_results can be None, so just check it doesn't crash
261
+
262
+ # Test 6: Multiple calls to validation methods should return the same result
263
+ validation_state_2 = reader.get_validation_state()
264
+ if validation_state != validation_state_2:
265
+ return f"Validation state cache inconsistency for {filename}"
266
+
267
+ validation_results_2 = reader.get_validation_results()
268
+ if validation_results != validation_results_2:
269
+ return f"Validation results cache inconsistency for {filename}"
270
+
271
+ except KeyError:
272
+ # Some files might not have active manifests or validation data
273
+ # This is expected for some test files, so we'll skip cache testing for those
274
+ pass
275
+
276
+ # Test 7: Verify the manifest contains expected fields
277
+ manifest = json.loads(json_data_1)
278
+ if "manifests" not in manifest:
279
+ return f"Missing 'manifests' key in {filename}"
280
+ if "active_manifest" not in manifest:
281
+ return f"Missing 'active_manifest' key in {filename}"
282
+
283
+ # Test 8: Test cache clearing on close
284
+ reader.close()
285
+ if reader._manifest_json_str_cache is not None:
286
+ return f"JSON cache not cleared for {filename}"
287
+ if reader._manifest_data_cache is not None:
288
+ return f"Manifest data cache not cleared for {filename}"
289
+
290
+ return None # Success case returns None
291
+
292
+ except Exception as e:
293
+ return f"Failed to read cached metadata from {filename}: {str(e)}"
294
+
295
+ # Create a thread pool with 6 workers
296
+ with concurrent.futures.ThreadPoolExecutor(max_workers=6) as executor:
297
+ # Submit all files to the thread pool
298
+ future_to_file = {
299
+ executor.submit(process_file_with_cache, filename): filename
300
+ for filename in os.listdir(reading_dir)
301
+ }
302
+
303
+ # Collect results as they complete
304
+ errors = []
305
+ for future in concurrent.futures.as_completed(future_to_file):
306
+ filename = future_to_file[future]
307
+ try:
308
+ error = future.result()
309
+ if error:
310
+ errors.append(error)
311
+ except Exception as e:
312
+ errors.append(
313
+ f"Unexpected error processing {filename}: {str(e)}")
314
+
315
+ # If any errors occurred, fail the test with all error messages
316
+ if errors:
317
+ self.fail("\n".join(errors))
168
318
 
169
319
  class TestBuilderWithThreads(unittest.TestCase):
170
320
  def setUp(self):
File without changes
File without changes
File without changes
File without changes
File without changes