PyPI - regscale-cli - Versions diffs - 6.18.0.0__py3-none-any.whl → 6.19.0.1__py3-none-any.whl - Mend - Supply Chain Defender

regscale-cli 6.18.0.0py3-none-any.whl → 6.19.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of regscale-cli might be problematic. Click here for more details.

Files changed (47) hide show

regscale/integrations/jsonl_scanner_integration.py CHANGED Viewed

@@ -55,6 +55,10 @@ class JSONLScannerIntegration(ScannerIntegration):
         """
         logger.info("Initializing JSONLScannerIntegration")
         self.plan_id = kwargs.get("plan_id", None)
+        # Pass vulnerability creation option to parent class
+        self.vulnerability_creation = kwargs.get("vulnerability_creation", None)
         # plan_id is required for all integrations
         super().__init__(**kwargs)
         # Extract S3-related kwargs
@@ -76,7 +80,8 @@ class JSONLScannerIntegration(ScannerIntegration):
         self.mapping = self._load_mapping() if not self.disable_mapping else None
         self.set_scan_date(kwargs.get("scan_date", get_current_datetime()))
-        # Initialize parent class
+        self.existing_assets = {}
         self.s3_client = None
         if self.s3_bucket and not self.read_files_only:
@@ -290,10 +295,7 @@ class JSONLScannerIntegration(ScannerIntegration):
         with open(output_file, "a") as output_f:
             for file_data in self.find_valid_files(file_path):
-                if isinstance(file_data, tuple) and len(file_data) >= 2:
-                    file, data = file_data[0], file_data[1]
-                else:
-                    file, data = file_data, None
+                file, data = self._extract_file_and_data(file_data)
                 file_str = str(file)
                 if file_str in processed_files:
@@ -301,23 +303,64 @@ class JSONLScannerIntegration(ScannerIntegration):
                 processed_files.add(file_str)
-                try:
-                    logger.info(f"Processing file: {file}")
-                    if item_type == "asset":
-                        self._process_asset_file(file, data, output_f, existing_items)
-                        new_items_count += 1
-                        total_items_count += 1
-                    else:
-                        findings_count = self._process_finding_file(file, data, output_f, existing_items)
-                        new_items_count += findings_count
-                        total_items_count += findings_count
+                items_added = self._process_file_by_type(file, data, output_f, existing_items, item_type)
+                new_items_count += items_added
+                total_items_count += items_added
-                except Exception as e:
-                    logger.error(f"Error processing file {file}: {str(e)}")
+        self._log_items_added(new_items_count, item_type, output_file)
+        return total_items_count
+    def _extract_file_and_data(self, file_data: Any) -> Tuple[Union[Path, str], Optional[Dict[str, Any]]]:
+        """
+        Extract file path and data from file_data which might be a tuple or a single value.
+        :param Any file_data: File data from find_valid_files
+        :return: Tuple of (file_path, file_data)
+        :rtype: Tuple[Union[Path, str], Optional[Dict[str, Any]]]
+        """
+        if isinstance(file_data, tuple) and len(file_data) >= 2:
+            return file_data[0], file_data[1]
+        return file_data, None
+    def _process_file_by_type(
+        self,
+        file: Union[Path, str],
+        data: Optional[Dict[str, Any]],
+        output_f: Any,
+        existing_items: Dict[str, bool],
+        item_type: str,
+    ) -> int:
+        """
+        Process a file based on the item type (asset or finding).
+        :param Union[Path, str] file: File path
+        :param Optional[Dict[str, Any]] data: File data
+        :param Any output_f: Output file handle
+        :param Dict[str, bool] existing_items: Dictionary of existing item keys
+        :param str item_type: Type of items to process ('asset' or 'finding')
+        :return: Number of items added
+        :rtype: int
+        """
+        try:
+            logger.info(f"Processing file: {file}")
+            if item_type == "asset":
+                return self._process_asset_file(file, data, output_f, existing_items)
+            else:
+                return self._process_finding_file(file, data, output_f, existing_items)
+        except Exception as e:
+            logger.error(f"Error processing file {file}: {str(e)}")
+            return 0
+    def _log_items_added(self, new_items_count: int, item_type: str, output_file: str) -> None:
+        """
+        Log information about the number of items added.
+        :param int new_items_count: Number of new items added
+        :param str item_type: Type of items processed ('asset' or 'finding')
+        :param str output_file: Path to the output file
+        """
         item_type_label = "assets" if item_type == "asset" else "findings"
         logger.info(f"Added {new_items_count} new {item_type_label} to {output_file}")
-        return total_items_count
     def _process_asset_file(self, file, data, output_f, existing_items):
         """
@@ -350,8 +393,7 @@ class JSONLScannerIntegration(ScannerIntegration):
             logger.debug(f"Asset with identifier {key} already exists, skipping")
             return 0
-        output_f.write(json.dumps(dataclasses.asdict(mapped_asset)) + "\n")
-        output_f.flush()
+        self._write_item(output_f, mapped_asset)
         existing_items[key] = True
         return 1
@@ -392,8 +434,7 @@ class JSONLScannerIntegration(ScannerIntegration):
                 logger.debug(f"Finding with key {key} already exists, skipping")
                 continue
-            output_f.write(json.dumps(dataclasses.asdict(mapped_finding)) + "\n")
-            output_f.flush()
+            self._write_item(output_f, mapped_finding)
             existing_items[key] = True
             findings_in_file += 1
@@ -416,20 +457,54 @@ class JSONLScannerIntegration(ScannerIntegration):
     def _yield_items_from_jsonl(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
         """
-        Read items from JSONL file and yield them one by one.
+        Read items from JSONL file and yield them one by one with optimizations for large files.
+        This method automatically selects an appropriate processing strategy based on file size:
+        - Small files (<100MB): Simple line-by-line processing
+        - Medium files (100MB-500MB): Batch processing with increased buffer size
+        - Large files (>500MB): Parallel processing with multiprocessing
         :param str jsonl_file: Path to JSONL file containing items
         :param Type[ItemType] item_class: Class to convert dictionary items to (IntegrationAsset or IntegrationFinding)
         :yields: Items one at a time
         :rtype: Iterator[ItemType]
         """
+        # Standard library imports should be at the module level, but these are only needed here
+        # and having them at the top would create unnecessary dependencies for small files
         if not os.path.exists(jsonl_file):
             logger.warning(f"JSONL file {jsonl_file} does not exist")
             return
-        logger.info(f"Reading items from {jsonl_file}")
+        # Check file size to determine best strategy
+        file_size = os.path.getsize(jsonl_file)
+        file_size_mb = file_size / (1024 * 1024)
+        logger.info(f"Reading items from {jsonl_file} (size: {file_size_mb:.2f} MB)")
+        # Select processing strategy based on file size
+        if file_size < 100 * 1024 * 1024:  # < 100MB
+            yield from self._process_small_file(jsonl_file, item_class)
+        elif file_size < 500 * 1024 * 1024:  # 100MB-500MB
+            yield from self._process_medium_file(jsonl_file, item_class)
+        else:  # > 500MB
+            yield from self._process_large_file(jsonl_file, item_class)
+        logger.info(f"Finished reading items from {jsonl_file}")
+    def _process_small_file(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
+        """
+        Process a small JSONL file (<100MB) using line-by-line processing.
+        :param str jsonl_file: Path to JSONL file
+        :param Type[ItemType] item_class: Class to convert dictionary items to
+        :yields: Items one at a time
+        :rtype: Iterator[ItemType]
+        """
         with open(jsonl_file, "r") as f:
             for line_number, line in enumerate(f, 1):
+                if not line.strip():  # Skip empty lines
+                    continue
                 try:
                     item_dict = json.loads(line.strip())
                     yield item_class(**item_dict)
@@ -438,6 +513,95 @@ class JSONLScannerIntegration(ScannerIntegration):
                 except Exception as e:
                     logger.error(f"Error processing line {line_number} in {jsonl_file}: {str(e)}")
+    def _process_batch(self, batch: List[str], item_class: Type[ItemType]) -> List[ItemType]:
+        """
+        Process a batch of lines into item objects.
+        :param List[str] batch: List of JSON lines to process
+        :param Type[ItemType] item_class: Class to convert dictionary items to
+        :return: List of processed items
+        :rtype: List[ItemType]
+        """
+        results = []
+        for line in batch:
+            if not line.strip():  # Skip empty lines
+                continue
+            try:
+                item_dict = json.loads(line.strip())
+                results.append(item_class(**item_dict))
+            except json.JSONDecodeError:
+                logger.warning("Could not parse line in batch")
+            except Exception as e:
+                logger.error(f"Error processing line in batch: {str(e)}")
+        return results
+    def _process_medium_file(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
+        """
+        Process a medium-sized JSONL file (100MB-500MB) using batch processing.
+        :param str jsonl_file: Path to JSONL file
+        :param Type[ItemType] item_class: Class to convert dictionary items to
+        :yields: Items one at a time
+        :rtype: Iterator[ItemType]
+        """
+        batch_size = 10000  # Process 10,000 lines at a time
+        buffer_size = 10 * 1024 * 1024  # 10MB buffer
+        with open(jsonl_file, "r", buffering=buffer_size) as f:
+            batch = []
+            for line in f:
+                batch.append(line)
+                if len(batch) >= batch_size:
+                    for item in self._process_batch(batch, item_class):
+                        yield item
+                    batch = []
+            # Process any remaining lines
+            if batch:
+                for item in self._process_batch(batch, item_class):
+                    yield item
+    def _process_large_file(self, jsonl_file: str, item_class: Type[ItemType]) -> Iterator[ItemType]:
+        """
+        Process a large JSONL file (>500MB) using parallel processing.
+        :param str jsonl_file: Path to JSONL file
+        :param Type[ItemType] item_class: Class to convert dictionary items to
+        :yields: Items one at a time
+        :rtype: Iterator[ItemType]
+        """
+        from concurrent.futures import ProcessPoolExecutor
+        from functools import partial
+        max_workers = min(os.cpu_count() or 4, 8)
+        batch_size = 10000  # Process 10,000 lines at a time
+        buffer_size = 10 * 1024 * 1024  # 10MB buffer
+        logger.info(f"Processing large file with {max_workers} workers, batch size: {batch_size}")
+        with open(jsonl_file, "r", buffering=buffer_size) as f:
+            batch = []
+            process_func = partial(self._process_batch, item_class=item_class)
+            with ProcessPoolExecutor(max_workers=max_workers) as executor:
+                for line in f:
+                    batch.append(line)
+                    if len(batch) >= batch_size:
+                        future = executor.submit(process_func, batch)
+                        batch = []
+                        # Yield results as they complete
+                        for item in future.result():
+                            yield item
+                # Process any remaining lines
+                if batch:
+                    for item in executor.submit(process_func, batch).result():
+                        yield item
     def _process_files(
         self,
         file_path: Union[str, Path],
@@ -590,11 +754,52 @@ class JSONLScannerIntegration(ScannerIntegration):
         """
         findings_data = self._get_findings_data_from_file(data)
         logger.info(f"Found {len(findings_data)} findings in file: {file}")
+        self.existing_assets = existing_assets
+        asset_id = self._get_asset_id_from_assets()
+        findings_added = self._process_finding_items(findings_data, asset_id, data, findings_file, tracker)
+        if findings_added > 0:
+            logger.info(f"Added {findings_added} new findings from file {file}")
+    def _get_asset_id_from_assets(self) -> str:
+        """
+        Get the first asset ID from existing assets, or 'unknown' if none exist.
+        :return: The first asset ID found or 'unknown'
+        :rtype: str
+        """
+        return list(self.existing_assets.keys())[0] if self.existing_assets else "unknown"
+    def _process_finding_items(
+        self,
+        findings_data: List[Dict[str, Any]],
+        asset_id: str,
+        data: Optional[Dict[str, Any]],
+        findings_file: Any,
+        tracker: "CountTracker",
+    ) -> int:
+        """
+        Process individual finding items and write them to the findings file.
+        :param List[Dict[str, Any]] findings_data: List of findings data
+        :param str asset_id: Asset ID to associate with findings
+        :param Optional[Dict[str, Any]] data: Source data from the file
+        :param Any findings_file: Open file handle for writing findings
+        :param CountTracker tracker: Tracker for finding counts
+        :return: Number of findings added
+        :rtype: int
+        """
         findings_added = 0
-        asset_id = list(existing_assets.keys())[0] if existing_assets else "unknown"
+        # Create a default asset_id to use only if absolutely necessary
+        default_asset_id = self._get_asset_id_from_assets()
+        # Process each finding individually
         for finding_item in findings_data:
-            finding = self.parse_finding(asset_id, data, finding_item)
+            # Let the parse_finding implementation determine the correct asset_identifier
+            # This relies on subclasses implementing parse_finding to extract the right asset ID
+            # from the finding_item directly
+            finding = self.parse_finding(default_asset_id, data, finding_item)
             finding_dict = dataclasses.asdict(finding)
             mapped_finding = self._map_item(finding_dict, "finding_mapping", IntegrationFinding)
             self._validate_fields(mapped_finding, self.required_finding_fields)
@@ -605,12 +810,47 @@ class JSONLScannerIntegration(ScannerIntegration):
                 tracker.existing[finding_key] = True
                 tracker.new_count += 1
                 tracker.total_count += 1
+            if self._process_single_finding(finding_item, asset_id, data, findings_file, tracker):
                 findings_added += 1
-            else:
-                logger.debug(f"Finding with key {finding_key} already exists, skipping")
-        if findings_added > 0:
-            logger.info(f"Added {findings_added} new findings from file {file}")
+        return findings_added
+    def _process_single_finding(
+        self,
+        finding_item: Dict[str, Any],
+        asset_id: str,
+        data: Optional[Dict[str, Any]],
+        findings_file: Any,
+        tracker: "CountTracker",
+    ) -> bool:
+        """
+        Process a single finding item and write it if it's new.
+        :param Dict[str, Any] finding_item: Finding data
+        :param str asset_id: Asset ID to associate with the finding
+        :param Optional[Dict[str, Any]] data: Source data from the file
+        :param Any findings_file: Open file handle for writing findings
+        :param CountTracker tracker: Tracker for finding counts
+        :return: True if the finding was added, False otherwise
+        :rtype: bool
+        """
+        finding = self.parse_finding(asset_id, data, finding_item)
+        finding_dict = dataclasses.asdict(finding)
+        mapped_finding = self._map_item(finding_dict, "finding_mapping", IntegrationFinding)
+        self._validate_fields(mapped_finding, self.required_finding_fields)
+        finding_key = self._get_item_key(dataclasses.asdict(mapped_finding), "finding")
+        if finding_key in tracker.existing:
+            logger.debug(f"Finding with key {finding_key} already exists, skipping")
+            return False
+        self._write_item(findings_file, mapped_finding)
+        tracker.existing[finding_key] = True
+        tracker.new_count += 1
+        tracker.total_count += 1
+        return True
     def _map_item(self, item_dict: Dict[str, Any], mapping_key: str, item_class: Type) -> Any:
         """
@@ -629,16 +869,116 @@ class JSONLScannerIntegration(ScannerIntegration):
             return item_class(**mapped_dict)
         return item_class(**item_dict)
-    def _write_item(self, file_handle: Any, item: Any) -> None:
+    def _write_item(self, file_handle_or_path: Any, item: Any) -> None:
         """
-        Write an item to the specified file handle.
+        Write an item to a JSONL file.
-        :param Any file_handle: Open file handle to write to
-        :param Any item: Item object to write (IntegrationAsset or IntegrationFinding)
-        :rtype: None
+        :param Any file_handle_or_path: Open file handle or file path to write to
+        :param Any item: Item to write (IntegrationAsset or IntegrationFinding)
+        """
+        try:
+            item_dict = self._convert_item_to_dict(item)
+            item_dict = self._ensure_serializable(item_dict)
+            self._write_dict_to_file(file_handle_or_path, item_dict)
+        except Exception as e:
+            logger.error(f"Error writing item: {str(e)}")
+            logger.debug(f"Problem item: {str(item)}")
+            self._write_fallback_record(file_handle_or_path, item, e)
+    def _convert_item_to_dict(self, item: Any) -> Dict[str, Any]:
         """
-        file_handle.write(json.dumps(dataclasses.asdict(item)) + "\n")
-        file_handle.flush()
+        Convert an item to a dictionary using the most appropriate method.
+        :param Any item: Item to convert
+        :return: Dictionary representation of the item
+        :rtype: Dict[str, Any]
+        """
+        if dataclasses.is_dataclass(item):
+            return dataclasses.asdict(item)
+        if hasattr(item, "to_dict") and callable(item.to_dict):
+            return item.to_dict()
+        if hasattr(item, "__dict__"):
+            return item.__dict__
+        if isinstance(item, dict):
+            return item
+        return {"value": str(item)}
+    def _write_dict_to_file(self, file_handle_or_path: Any, item_dict: Dict[str, Any]) -> None:
+        """
+        Write a dictionary to a file as JSON.
+        :param Any file_handle_or_path: Open file handle or file path
+        :param Dict[str, Any] item_dict: Dictionary to write
+        """
+        json_line = json.dumps(item_dict) + "\n"
+        if self._is_file_handle(file_handle_or_path):
+            file_handle_or_path.write(json_line)
+            file_handle_or_path.flush()
+        else:
+            with open(file_handle_or_path, "a") as f:
+                f.write(json_line)
+    def _is_file_handle(self, file_handle_or_path: Any) -> bool:
+        """
+        Check if the given object is a file handle.
+        :param Any file_handle_or_path: Object to check
+        :return: True if it's a file handle, False otherwise
+        :rtype: bool
+        """
+        return hasattr(file_handle_or_path, "write") and callable(file_handle_or_path.write)
+    def _write_fallback_record(self, file_handle_or_path: Any, item: Any, error: Exception) -> None:
+        """
+        Write a simplified fallback record when normal serialization fails.
+        :param Any file_handle_or_path: Open file handle or file path
+        :param Any item: Original item that failed to serialize
+        :param Exception error: The exception that occurred
+        """
+        try:
+            simplified = {
+                "error": "Failed to serialize original item",
+                "item_type": str(type(item)),
+                "error_message": str(error),
+            }
+            if hasattr(item, "__str__"):
+                simplified["item_string"] = str(item)
+            self._write_dict_to_file(file_handle_or_path, simplified)
+            logger.warning("Wrote simplified version of item after serialization error")
+        except Exception as e2:
+            logger.error(f"Failed to write simplified item: {str(e2)}")
+    def _ensure_serializable(self, obj: Any) -> Any:
+        """
+        Ensure all values in an object are JSON serializable.
+        :param Any obj: Object to make serializable
+        :return: Serializable object
+        """
+        if isinstance(obj, (str, int, float, bool, type(None))):
+            return obj
+        elif isinstance(obj, datetime):
+            return obj.isoformat()
+        elif isinstance(obj, dict):
+            return {k: self._ensure_serializable(v) for k, v in obj.items()}
+        elif isinstance(obj, list):
+            return [self._ensure_serializable(i) for i in obj]
+        elif dataclasses.is_dataclass(obj):
+            return self._ensure_serializable(dataclasses.asdict(obj))
+        elif hasattr(obj, "to_dict") and callable(obj.to_dict):
+            return self._ensure_serializable(obj.to_dict())
+        elif hasattr(obj, "__dict__"):
+            return self._ensure_serializable(obj.__dict__)
+        else:
+            return str(obj)
     def _log_processing_results(self, new_count: int, output_file: str, item_type: str) -> None:
         """
@@ -684,38 +1024,16 @@ class JSONLScannerIntegration(ScannerIntegration):
         :yields: Iterator[IntegrationAsset]
         """
         logger.info("Starting fetch_assets")
-        file_path = kwargs.get("file_path", self.file_path)
-        empty_file = kwargs.get("empty_file", True)
-        process_together = kwargs.get("process_together", False)
-        use_jsonl_file = kwargs.get("use_jsonl_file", False)
-        self.create_artifacts_dir()
-        if use_jsonl_file:
-            logger.info(f"Using existing JSONL file: {self.ASSETS_FILE}")
-            total_assets = sum(1 for _ in open(self.ASSETS_FILE, "r")) if os.path.exists(self.ASSETS_FILE) else 0
-            self.num_assets_to_process = total_assets
-            logger.info(f"Found {total_assets} assets in existing JSONL file")
-        else:
-            file_path = self._validate_file_path(file_path)
-            if process_together:
-                total_assets, _ = self._process_files(
-                    file_path,
-                    self.ASSETS_FILE,
-                    self.FINDINGS_FILE,
-                    empty_assets_file=empty_file,
-                    empty_findings_file=False,
-                )
-                self.num_assets_to_process = total_assets
-            else:
-                total_assets = self._write_items_to_jsonl(file_path, self.ASSETS_FILE, "asset", empty_file=empty_file)
-                self.num_assets_to_process = total_assets
-            logger.info(f"Total assets to process: {total_assets}")
-        for asset in self._yield_items_from_jsonl(self.ASSETS_FILE, IntegrationAsset):
-            yield asset
-        logger.info(f"Assets read from JSONL complete. Total assets identified: {self.num_assets_to_process}")
+        return self._fetch_items(
+            "asset",
+            self.ASSETS_FILE,
+            IntegrationAsset,
+            kwargs.get("file_path", self.file_path),
+            kwargs.get("empty_file", True),
+            kwargs.get("process_together", False),
+            kwargs.get("use_jsonl_file", False),
+        )
     def fetch_findings(self, *args: Any, **kwargs: Any) -> Iterator[IntegrationFinding]:
         """
@@ -730,40 +1048,151 @@ class JSONLScannerIntegration(ScannerIntegration):
         :yields: Iterator[IntegrationFinding]
         """
         logger.info("Starting fetch_findings")
-        file_path = kwargs.get("file_path", self.file_path)
-        empty_file = kwargs.get("empty_file", True)
-        process_together = kwargs.get("process_together", False)
-        use_jsonl_file = kwargs.get("use_jsonl_file", False)
+        return self._fetch_items(
+            "finding",
+            self.FINDINGS_FILE,
+            IntegrationFinding,
+            kwargs.get("file_path", self.file_path),
+            kwargs.get("empty_file", True),
+            kwargs.get("process_together", False),
+            kwargs.get("use_jsonl_file", False),
+        )
+    def _fetch_items(
+        self,
+        item_type: str,
+        jsonl_file: str,
+        item_class: Type[ItemType],
+        file_path: Optional[str] = None,
+        empty_file: bool = True,
+        process_together: bool = False,
+        use_jsonl_file: bool = False,
+    ) -> Iterator[ItemType]:
+        """
+        Common method to fetch assets or findings from processed source files.
+        :param str item_type: Type of items to fetch ('asset' or 'finding')
+        :param str jsonl_file: Path to the JSONL file containing items
+        :param Type[ItemType] item_class: Class to convert dictionary items to
+        :param Optional[str] file_path: Path to source file or directory
+        :param bool empty_file: Whether to empty the output file before writing
+        :param bool process_together: Whether to process assets and findings together
+        :param bool use_jsonl_file: Whether to use an existing JSONL file
+        :yields: Iterator[ItemType]
+        :rtype: Iterator[ItemType]
+        """
         self.create_artifacts_dir()
+        is_asset = item_type == "asset"
+        counter_attr = "num_assets_to_process" if is_asset else "num_findings_to_process"
         if use_jsonl_file:
-            logger.info(f"Using existing JSONL file: {self.FINDINGS_FILE}")
-            total_findings = sum(1 for _ in open(self.FINDINGS_FILE, "r")) if os.path.exists(self.FINDINGS_FILE) else 0
-            self.num_findings_to_process = total_findings
-            logger.info(f"Found {total_findings} findings in existing JSONL file")
+            logger.info(f"Using existing JSONL file: {jsonl_file}")
+            total_items = sum(1 for _ in open(jsonl_file, "r")) if os.path.exists(jsonl_file) else 0
+            setattr(self, counter_attr, total_items)
+            logger.info(f"Found {total_items} {item_type}s in existing JSONL file")
         else:
             file_path = self._validate_file_path(file_path)
-            if process_together:
-                _, total_findings = self._process_files(
-                    file_path,
-                    self.ASSETS_FILE,
-                    self.FINDINGS_FILE,
-                    empty_assets_file=False,
-                    empty_findings_file=empty_file,
-                )
-                self.num_findings_to_process = total_findings
-            else:
-                total_findings = self._write_items_to_jsonl(
-                    file_path, self.FINDINGS_FILE, "finding", empty_file=empty_file
-                )
-                self.num_findings_to_process = total_findings
-            logger.info(f"Total findings to process: {total_findings}")
+            total_items = self._process_source_files(
+                file_path, jsonl_file, item_type, empty_file, process_together, counter_attr
+            )
+            logger.info(f"Total {item_type}s to process: {total_items}")
+        # Yield items from the JSONL file
+        for item in self._yield_items_from_jsonl(jsonl_file, item_class):
+            yield item
+        logger.info(
+            f"{item_type.capitalize()}s read from JSONL complete. Total {item_type}s identified: {getattr(self, counter_attr)}"
+        )
+    def _process_source_files(
+        self,
+        file_path: str,
+        jsonl_file: str,
+        item_type: str,
+        empty_file: bool,
+        process_together: bool,
+        counter_attr: str,
+    ) -> int:
+        """
+        Process source files and return the total count of items.
+        :param str file_path: Path to source file or directory
+        :param str jsonl_file: Path to the JSONL file to write
+        :param str item_type: Type of items to process ('asset' or 'finding')
+        :param bool empty_file: Whether to empty output files
+        :param bool process_together: Whether to process assets and findings together
+        :param str counter_attr: Attribute name for storing the count
+        :return: Total count of items
+        :rtype: int
+        """
+        is_asset = item_type == "asset"
+        if process_together:
+            # Handle joint processing of assets and findings
+            asset_count, finding_count = self._process_files(
+                file_path,
+                self.ASSETS_FILE,
+                self.FINDINGS_FILE,
+                empty_assets_file=empty_file if is_asset else False,
+                empty_findings_file=empty_file if not is_asset else False,
+            )
+            total_items = asset_count if is_asset else finding_count
+        else:
+            # Process just one type
+            total_items = self._write_items_to_jsonl(file_path, jsonl_file, item_type, empty_file=empty_file)
-        for finding in self._yield_items_from_jsonl(self.FINDINGS_FILE, IntegrationFinding):
-            yield finding
+        setattr(self, counter_attr, total_items)
+        return total_items
-        logger.info(f"Findings read from JSONL complete. Total findings identified: {self.num_findings_to_process}")
+    def parse_asset(self, file_path: Union[Path, str], data: Dict[str, Any]) -> IntegrationAsset:
+        """
+        Parse a single asset from source data.
+        Subclasses must implement this method to parse assets from their specific file format.
+        :param Union[Path, str] file_path: Path to the file containing the asset data
+        :param Dict[str, Any] data: The parsed data
+        :return: IntegrationAsset object
+        :rtype: IntegrationAsset
+        """
+        raise NotImplementedError("Subclasses must implement parse_asset")
+    def parse_finding(self, asset_identifier: str, data: Dict[str, Any], item: Dict[str, Any]) -> IntegrationFinding:
+        """Parse a single finding from source data.
+        Subclasses must implement this method to parse findings from their specific file format.
+        :param str asset_identifier: The identifier of the asset this finding belongs to
+        :param Dict[str, Any] data: The asset data
+        :param Dict[str, Any] item: The finding data
+        :return: IntegrationFinding object
+        :rtype: IntegrationFinding
+        """
+        raise NotImplementedError("Subclasses must implement parse_finding")
+    def is_valid_file(self, data: Any, file_path: Union[Path, str]) -> Tuple[bool, Optional[Dict[str, Any]]]:
+        """
+        Check if the provided data is valid for processing.
+        This default implementation ensures the data is a non-empty dictionary.
+        Subclasses should override this to implement specific validation logic.
+        :param Any data: Data parsed from the file to validate
+        :param Union[Path, str] file_path: Path to the file being processed
+        :return: Tuple of (is_valid, data) where is_valid indicates validity and data is the validated content or None
+        :rtype: Tuple[bool, Optional[Dict[str, Any]]]
+        """
+        if not isinstance(data, dict):
+            logger.warning(f"Data is not a dictionary for file {file_path}, skipping")
+            return False, None
+        if not data:
+            logger.warning(f"Data is an empty dictionary for file {file_path}, skipping")
+            return False, None
+        return True, data
     def fetch_assets_and_findings(
         self, file_path: str = None, empty_files: bool = True
@@ -923,51 +1352,3 @@ class JSONLScannerIntegration(ScannerIntegration):
         is_valid, validated_data = self.is_valid_file(data, file)
         if is_valid and validated_data is not None:
             yield file, validated_data
-    def parse_asset(self, file_path: Union[Path, str], data: Dict[str, Any]) -> IntegrationAsset:
-        """
-        Parse a single asset from source data.
-        Subclasses must implement this method to parse assets from their specific file format.
-        :param Union[Path, str] file_path: Path to the file containing the asset data
-        :param Dict[str, Any] data: The parsed data
-        :return: IntegrationAsset object
-        :rtype: IntegrationAsset
-        """
-        raise NotImplementedError("Subclasses must implement parse_asset")
-    def parse_finding(self, asset_identifier: str, data: Dict[str, Any], item: Dict[str, Any]) -> IntegrationFinding:
-        """Parse a single finding from source data.
-        Subclasses must implement this method to parse findings from their specific file format.
-        :param str asset_identifier: The identifier of the asset this finding belongs to
-        :param Dict[str, Any] data: The asset data
-        :param Dict[str, Any] item: The finding data
-        :return: IntegrationFinding object
-        :rtype: IntegrationFinding
-        """
-        raise NotImplementedError("Subclasses must implement parse_finding")
-    def is_valid_file(self, data: Any, file_path: Union[Path, str]) -> Tuple[bool, Optional[Dict[str, Any]]]:
-        """
-        Check if the provided data is valid for processing.
-        This default implementation ensures the data is a non-empty dictionary.
-        Subclasses should override this to implement specific validation logic.
-        :param Any data: Data parsed from the file to validate
-        :param Union[Path, str] file_path: Path to the file being processed
-        :return: Tuple of (is_valid, data) where is_valid indicates validity and data is the validated content or None
-        :rtype: Tuple[bool, Optional[Dict[str, Any]]]
-        """
-        if not isinstance(data, dict):
-            logger.warning(f"Data is not a dictionary for file {file_path}, skipping")
-            return False, None
-        if not data:
-            logger.warning(f"Data is an empty dictionary for file {file_path}, skipping")
-            return False, None
-        return True, data