PyPI - codescanai - Versions diffs - 0.1.0__tar.gz - Mend

codescanai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

codescanai-0.1.0/LICENSE +21 -0
codescanai-0.1.0/MANIFEST.in +4 -0
codescanai-0.1.0/PKG-INFO +152 -0
codescanai-0.1.0/README.md +128 -0
codescanai-0.1.0/codescanai.egg-info/PKG-INFO +152 -0
codescanai-0.1.0/codescanai.egg-info/SOURCES.txt +25 -0
codescanai-0.1.0/codescanai.egg-info/dependency_links.txt +1 -0
codescanai-0.1.0/codescanai.egg-info/entry_points.txt +2 -0
codescanai-0.1.0/codescanai.egg-info/requires.txt +10 -0
codescanai-0.1.0/codescanai.egg-info/top_level.txt +1 -0
codescanai-0.1.0/core/__init__.py +0 -0
codescanai-0.1.0/core/code_scanner/__init__.py +0 -0
codescanai-0.1.0/core/code_scanner/code_scanner.py +80 -0
codescanai-0.1.0/core/providers/__init__.py +0 -0
codescanai-0.1.0/core/providers/base_ai_provider.py +20 -0
codescanai-0.1.0/core/providers/custom_ai_provider.py +57 -0
codescanai-0.1.0/core/providers/google_gemini_ai_provider.py +39 -0
codescanai-0.1.0/core/providers/open_ai_provider.py +46 -0
codescanai-0.1.0/core/runner.py +32 -0
codescanai-0.1.0/core/utils/__init__.py +0 -0
codescanai-0.1.0/core/utils/argument_parser.py +62 -0
codescanai-0.1.0/core/utils/code_summary_extractor.py +51 -0
codescanai-0.1.0/core/utils/file_extractor.py +93 -0
codescanai-0.1.0/core/utils/provider_creator.py +39 -0
codescanai-0.1.0/pyproject.toml +37 -0
codescanai-0.1.0/requirements.txt +5 -0
codescanai-0.1.0/setup.cfg +4 -0

codescanai-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 codescan-ai
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

codescanai-0.1.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,4 @@
+include requirements.txt
+include README.md
+include LICENSE
+include CHANGELOG

codescanai-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,152 @@
+Metadata-Version: 2.1
+Name: codescanai
+Version: 0.1.0
+Summary: A CLI tool that scans your codebases for security vulnerabilities powered by powerful AI models.
+Author-email: Caleb Abhulimhen <calebabhulimhen@gmail.com>
+License: MIT
+Project-URL: Homepage, https://github.com/codescan-ai/codescan
+Keywords: code scanning,cli,github action,security,vulnerabilities check
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: openai>=1.40.6
+Requires-Dist: PyGithub
+Requires-Dist: requests
+Requires-Dist: google-generativeai
+Requires-Dist: ipython
+Provides-Extra: dev
+Requires-Dist: pylint; extra == "dev"
+Requires-Dist: black; extra == "dev"
+Requires-Dist: isort; extra == "dev"
+# CodeScanAI
+CodeScanAI utilizes a variety of AI models, including OpenAI, Gemini, and custom self-hosted AI servers, to scan your codebase for bad development practices. It is currently configure to catch potential security vulnerabilities, but will be extended to other use cases in the future.
+It has been designed to enable  seamless integration into CI/CD pipelines like GitHub Actions, or can be used via a simple command. CodeScanAI enables developers to automatically detect potential security issues in their code throughout the development process. [Try it out today](#getting-started)!
+## Features
+- **Support for Multiple AI Models:**
+  - **OpenAI Integration:**  Utilize OpenAI's advanced models, such as GPT-4, to scan your code and identify potential security vulnerabilities, OR
+  - **Gemini Integration:** Tap into Gemini's expertise to analyze your code for security risks, OR
+  - **Custom AI Server Integration:** Connect with self-hosted or private AI servers for security scans, offering fully customizable and self-managed AI solutions.
+- **CI/CD Integration:**
+  - Seamlessly integrate the CLI tool into GitHub Actions for automated security vulnerability scanning on every pull request.
+  - Supports targeted scans on specific branches or changes within a repository.
+- **Flexible Scanning Options:**
+  - **Full Directory Scans:** Perform a comprehensive security analysis by scanning all files within a directory.
+  - **Changes Only Scan:** Only scan those files that have chnaged since the last scan.
+  - **PR-Specific Scans:** Target files modified in a specific pull request to optimize the scanning process and reduce overhead.
+## Getting Started
+### Prerequisites
+- Python 3.10 or higher
+- API keys for the supported AI models:
+  - OpenAI API key
+  - Gemini API key
+  - Access to a custom AI server (host, port, and optional token)
+- Set an environment variable for your API key(s).
+```bash
+export OPENAI_API_KEY = 'your_openai_api_key'
+```
+```bash
+export GEMINI_API_KEY = 'your_gemini_api_key'
+```
+### Installation
+#### Option 1: Install via pip
+You can install the tool directly from the repository using pip:
+```bash
+pip install codescanai
+```
+This will allow you to use the `codescanai` command directly in your terminal.
+#### Option 2: Clone the Repository
+If you prefer to clone the repository and install the dependencies manually:
+```bash
+git clone https://github.com/codescan-ai/codescan.git
+cd codescan
+pip install -r requirements.txt
+```
+### Usage
+#### Scanning files in  your current directory
+```bash
+codescanai --provider openai
+```
+OR if you're cloning the repository,
+```bash
+python3 -m core.runner --provider openai
+```
+#### Scanning with a Custom AI Server
+To scan code using a custom AI server:
+```bash
+guardai --provider custom --host http://localhost --port 5000 --token your_token --directory path/to/your/code
+```
+### Supported arguments
+| name           | description                                               | required | default        |
+| -------------- | --------------------------------------------------------- | -------- | -------------- |
+| `provider`     | <p>AI provider</p>                                        | `true`   | `""`           |
+| `model`        | <p>AI model to use</p>                                    | `false`  | `""`           |
+| `directory`    | <p>Directory to scan</p>                                  | `false`  | `.`            |
+| `changes_only` | <p>Scan only changed files</p>                            | `false`  | `false`        |
+| `repo`         | <p>GitHub repository</p>                                  | `false`  | `""`           |
+| `pr_number`    | <p>Pull request number</p>                                | `false`  | `""`           |
+| `github_token` | <p>GitHub API token</p>                                   | `false`  | `""`           |
+| `host`         | <p>Custom AI server host</p>                              | `false`  | `""`           |
+| `port`         | <p>Custom AI server port</p>                              | `false`  | `""`           |
+| `token`        | <p>Token for authenticating with the custom AI server</p> | `false`  | `""`           |
+| `endpoint`     | <p>API endpoint for the custom server</p>                 | `false`  | `/api/v1/scan` |
+### Supported AI Providers
+- **OpenAI:** Utilizes GPT models for in-depth security analysis.
+- **Gemini:** Delivers strong security insights through Gemini's advanced capabilities.
+- **Custom:** Connects with self-hosted or private AI servers for fully customizable solutions.
+### Limitations
+- **Large number of files:** We currently do not support scalable way to scan a large number of files on a single run. Depending on the capacity of your AI Provider, you might run into a `rate_limit_exceeded` error. To do this, you can create a custom solution that breaks down the number of files for each run.
+## Future Work
+- **Batch Processing:** For the limitation above, a future version will be to implement batch processing for a large number of files.
+- **Caching Implementation:** A caching mechanism to store results of previously scanned files, reducing the number of API calls and optimizing performance.
+- **Expanded Git Provider Support:** The tool is currently integrated with GitHub for PR-based scanning, future plans include extending support to other Git providers like GitLab, Bitbucket, and Azure Repos.
+- **Expanded Development tools:** This will be a plan to expand this tool to be accessible in other development environments. For example, as a VSCode extension.
+## Contributing
+Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
+## License
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.

codescanai-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,128 @@
+# CodeScanAI
+CodeScanAI utilizes a variety of AI models, including OpenAI, Gemini, and custom self-hosted AI servers, to scan your codebase for bad development practices. It is currently configure to catch potential security vulnerabilities, but will be extended to other use cases in the future.
+It has been designed to enable  seamless integration into CI/CD pipelines like GitHub Actions, or can be used via a simple command. CodeScanAI enables developers to automatically detect potential security issues in their code throughout the development process. [Try it out today](#getting-started)!
+## Features
+- **Support for Multiple AI Models:**
+  - **OpenAI Integration:**  Utilize OpenAI's advanced models, such as GPT-4, to scan your code and identify potential security vulnerabilities, OR
+  - **Gemini Integration:** Tap into Gemini's expertise to analyze your code for security risks, OR
+  - **Custom AI Server Integration:** Connect with self-hosted or private AI servers for security scans, offering fully customizable and self-managed AI solutions.
+- **CI/CD Integration:**
+  - Seamlessly integrate the CLI tool into GitHub Actions for automated security vulnerability scanning on every pull request.
+  - Supports targeted scans on specific branches or changes within a repository.
+- **Flexible Scanning Options:**
+  - **Full Directory Scans:** Perform a comprehensive security analysis by scanning all files within a directory.
+  - **Changes Only Scan:** Only scan those files that have chnaged since the last scan.
+  - **PR-Specific Scans:** Target files modified in a specific pull request to optimize the scanning process and reduce overhead.
+## Getting Started
+### Prerequisites
+- Python 3.10 or higher
+- API keys for the supported AI models:
+  - OpenAI API key
+  - Gemini API key
+  - Access to a custom AI server (host, port, and optional token)
+- Set an environment variable for your API key(s).
+```bash
+export OPENAI_API_KEY = 'your_openai_api_key'
+```
+```bash
+export GEMINI_API_KEY = 'your_gemini_api_key'
+```
+### Installation
+#### Option 1: Install via pip
+You can install the tool directly from the repository using pip:
+```bash
+pip install codescanai
+```
+This will allow you to use the `codescanai` command directly in your terminal.
+#### Option 2: Clone the Repository
+If you prefer to clone the repository and install the dependencies manually:
+```bash
+git clone https://github.com/codescan-ai/codescan.git
+cd codescan
+pip install -r requirements.txt
+```
+### Usage
+#### Scanning files in  your current directory
+```bash
+codescanai --provider openai
+```
+OR if you're cloning the repository,
+```bash
+python3 -m core.runner --provider openai
+```
+#### Scanning with a Custom AI Server
+To scan code using a custom AI server:
+```bash
+guardai --provider custom --host http://localhost --port 5000 --token your_token --directory path/to/your/code
+```
+### Supported arguments
+| name           | description                                               | required | default        |
+| -------------- | --------------------------------------------------------- | -------- | -------------- |
+| `provider`     | <p>AI provider</p>                                        | `true`   | `""`           |
+| `model`        | <p>AI model to use</p>                                    | `false`  | `""`           |
+| `directory`    | <p>Directory to scan</p>                                  | `false`  | `.`            |
+| `changes_only` | <p>Scan only changed files</p>                            | `false`  | `false`        |
+| `repo`         | <p>GitHub repository</p>                                  | `false`  | `""`           |
+| `pr_number`    | <p>Pull request number</p>                                | `false`  | `""`           |
+| `github_token` | <p>GitHub API token</p>                                   | `false`  | `""`           |
+| `host`         | <p>Custom AI server host</p>                              | `false`  | `""`           |
+| `port`         | <p>Custom AI server port</p>                              | `false`  | `""`           |
+| `token`        | <p>Token for authenticating with the custom AI server</p> | `false`  | `""`           |
+| `endpoint`     | <p>API endpoint for the custom server</p>                 | `false`  | `/api/v1/scan` |
+### Supported AI Providers
+- **OpenAI:** Utilizes GPT models for in-depth security analysis.
+- **Gemini:** Delivers strong security insights through Gemini's advanced capabilities.
+- **Custom:** Connects with self-hosted or private AI servers for fully customizable solutions.
+### Limitations
+- **Large number of files:** We currently do not support scalable way to scan a large number of files on a single run. Depending on the capacity of your AI Provider, you might run into a `rate_limit_exceeded` error. To do this, you can create a custom solution that breaks down the number of files for each run.
+## Future Work
+- **Batch Processing:** For the limitation above, a future version will be to implement batch processing for a large number of files.
+- **Caching Implementation:** A caching mechanism to store results of previously scanned files, reducing the number of API calls and optimizing performance.
+- **Expanded Git Provider Support:** The tool is currently integrated with GitHub for PR-based scanning, future plans include extending support to other Git providers like GitLab, Bitbucket, and Azure Repos.
+- **Expanded Development tools:** This will be a plan to expand this tool to be accessible in other development environments. For example, as a VSCode extension.
+## Contributing
+Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
+## License
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.

codescanai-0.1.0/codescanai.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,152 @@
+Metadata-Version: 2.1
+Name: codescanai
+Version: 0.1.0
+Summary: A CLI tool that scans your codebases for security vulnerabilities powered by powerful AI models.
+Author-email: Caleb Abhulimhen <calebabhulimhen@gmail.com>
+License: MIT
+Project-URL: Homepage, https://github.com/codescan-ai/codescan
+Keywords: code scanning,cli,github action,security,vulnerabilities check
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: openai>=1.40.6
+Requires-Dist: PyGithub
+Requires-Dist: requests
+Requires-Dist: google-generativeai
+Requires-Dist: ipython
+Provides-Extra: dev
+Requires-Dist: pylint; extra == "dev"
+Requires-Dist: black; extra == "dev"
+Requires-Dist: isort; extra == "dev"
+# CodeScanAI
+CodeScanAI utilizes a variety of AI models, including OpenAI, Gemini, and custom self-hosted AI servers, to scan your codebase for bad development practices. It is currently configure to catch potential security vulnerabilities, but will be extended to other use cases in the future.
+It has been designed to enable  seamless integration into CI/CD pipelines like GitHub Actions, or can be used via a simple command. CodeScanAI enables developers to automatically detect potential security issues in their code throughout the development process. [Try it out today](#getting-started)!
+## Features
+- **Support for Multiple AI Models:**
+  - **OpenAI Integration:**  Utilize OpenAI's advanced models, such as GPT-4, to scan your code and identify potential security vulnerabilities, OR
+  - **Gemini Integration:** Tap into Gemini's expertise to analyze your code for security risks, OR
+  - **Custom AI Server Integration:** Connect with self-hosted or private AI servers for security scans, offering fully customizable and self-managed AI solutions.
+- **CI/CD Integration:**
+  - Seamlessly integrate the CLI tool into GitHub Actions for automated security vulnerability scanning on every pull request.
+  - Supports targeted scans on specific branches or changes within a repository.
+- **Flexible Scanning Options:**
+  - **Full Directory Scans:** Perform a comprehensive security analysis by scanning all files within a directory.
+  - **Changes Only Scan:** Only scan those files that have chnaged since the last scan.
+  - **PR-Specific Scans:** Target files modified in a specific pull request to optimize the scanning process and reduce overhead.
+## Getting Started
+### Prerequisites
+- Python 3.10 or higher
+- API keys for the supported AI models:
+  - OpenAI API key
+  - Gemini API key
+  - Access to a custom AI server (host, port, and optional token)
+- Set an environment variable for your API key(s).
+```bash
+export OPENAI_API_KEY = 'your_openai_api_key'
+```
+```bash
+export GEMINI_API_KEY = 'your_gemini_api_key'
+```
+### Installation
+#### Option 1: Install via pip
+You can install the tool directly from the repository using pip:
+```bash
+pip install codescanai
+```
+This will allow you to use the `codescanai` command directly in your terminal.
+#### Option 2: Clone the Repository
+If you prefer to clone the repository and install the dependencies manually:
+```bash
+git clone https://github.com/codescan-ai/codescan.git
+cd codescan
+pip install -r requirements.txt
+```
+### Usage
+#### Scanning files in  your current directory
+```bash
+codescanai --provider openai
+```
+OR if you're cloning the repository,
+```bash
+python3 -m core.runner --provider openai
+```
+#### Scanning with a Custom AI Server
+To scan code using a custom AI server:
+```bash
+guardai --provider custom --host http://localhost --port 5000 --token your_token --directory path/to/your/code
+```
+### Supported arguments
+| name           | description                                               | required | default        |
+| -------------- | --------------------------------------------------------- | -------- | -------------- |
+| `provider`     | <p>AI provider</p>                                        | `true`   | `""`           |
+| `model`        | <p>AI model to use</p>                                    | `false`  | `""`           |
+| `directory`    | <p>Directory to scan</p>                                  | `false`  | `.`            |
+| `changes_only` | <p>Scan only changed files</p>                            | `false`  | `false`        |
+| `repo`         | <p>GitHub repository</p>                                  | `false`  | `""`           |
+| `pr_number`    | <p>Pull request number</p>                                | `false`  | `""`           |
+| `github_token` | <p>GitHub API token</p>                                   | `false`  | `""`           |
+| `host`         | <p>Custom AI server host</p>                              | `false`  | `""`           |
+| `port`         | <p>Custom AI server port</p>                              | `false`  | `""`           |
+| `token`        | <p>Token for authenticating with the custom AI server</p> | `false`  | `""`           |
+| `endpoint`     | <p>API endpoint for the custom server</p>                 | `false`  | `/api/v1/scan` |
+### Supported AI Providers
+- **OpenAI:** Utilizes GPT models for in-depth security analysis.
+- **Gemini:** Delivers strong security insights through Gemini's advanced capabilities.
+- **Custom:** Connects with self-hosted or private AI servers for fully customizable solutions.
+### Limitations
+- **Large number of files:** We currently do not support scalable way to scan a large number of files on a single run. Depending on the capacity of your AI Provider, you might run into a `rate_limit_exceeded` error. To do this, you can create a custom solution that breaks down the number of files for each run.
+## Future Work
+- **Batch Processing:** For the limitation above, a future version will be to implement batch processing for a large number of files.
+- **Caching Implementation:** A caching mechanism to store results of previously scanned files, reducing the number of API calls and optimizing performance.
+- **Expanded Git Provider Support:** The tool is currently integrated with GitHub for PR-based scanning, future plans include extending support to other Git providers like GitLab, Bitbucket, and Azure Repos.
+- **Expanded Development tools:** This will be a plan to expand this tool to be accessible in other development environments. For example, as a VSCode extension.
+## Contributing
+Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
+## License
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.

codescanai-0.1.0/codescanai.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,25 @@
+LICENSE
+MANIFEST.in
+README.md
+pyproject.toml
+requirements.txt
+codescanai.egg-info/PKG-INFO
+codescanai.egg-info/SOURCES.txt
+codescanai.egg-info/dependency_links.txt
+codescanai.egg-info/entry_points.txt
+codescanai.egg-info/requires.txt
+codescanai.egg-info/top_level.txt
+core/__init__.py
+core/runner.py
+core/code_scanner/__init__.py
+core/code_scanner/code_scanner.py
+core/providers/__init__.py
+core/providers/base_ai_provider.py
+core/providers/custom_ai_provider.py
+core/providers/google_gemini_ai_provider.py
+core/providers/open_ai_provider.py
+core/utils/__init__.py
+core/utils/argument_parser.py
+core/utils/code_summary_extractor.py
+core/utils/file_extractor.py
+core/utils/provider_creator.py

codescanai-0.1.0/codescanai.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

codescanai-0.1.0/codescanai.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ guardai = core.runner:main

codescanai-0.1.0/codescanai.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,10 @@
+openai>=1.40.6
+PyGithub
+requests
+google-generativeai
+ipython
+[dev]
+pylint
+black
+isort

codescanai-0.1.0/codescanai.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ core

codescanai-0.1.0/core/__init__.py ADDED Viewed

File without changes

codescanai-0.1.0/core/code_scanner/__init__.py ADDED Viewed

File without changes

codescanai-0.1.0/core/code_scanner/code_scanner.py ADDED Viewed

@@ -0,0 +1,80 @@
+"""
+This module defines a class that scans/analyse code based on the input args using AI providers.
+This is the brain of this application and all core logic will be referenced here.
+"""
+import logging
+import os
+from core.utils.code_summary_extractor import (
+    generate_code_summary,
+    read_files_and_extract_code_summary,
+)
+from core.utils.file_extractor import get_changed_files_in_pr, get_changed_files_in_repo
+from core.utils.provider_creator import init_provider
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+class CodeScanner:
+    """
+    This class defines the logic for scanning source code based on the context provided in **args**
+    """
+    def __init__(self, args) -> None:
+        self.args = args
+        self.provider = init_provider(
+            args.provider, args.model, args.host, args.port, args.token, args.endpoint
+        )
+    def scan(self):
+        """
+        Scans the code based on the provided arguments and AI client.
+        """
+        if self.args.changes_only:
+            # Only scan new changes. This is supported in Git repositories only(for now).
+            return self._scan_changes()
+        return self._scan_files()
+    def _scan_changes(self):
+        """
+        Scans only the files that have been changed in the specified directory or PR.
+        """
+        try:
+            if self._is_repo_valid() and self._is_pr_number_valid():
+                changed_files = get_changed_files_in_pr(
+                    self.args.repo, self.args.pr_number, self.args.github_token
+                )
+            else:
+                changed_files = get_changed_files_in_repo(self.args.directory)
+        except ValueError as e:
+            logging.error(e)
+            return str(e)
+        if not changed_files:
+            logging.info("No changes detected in the directory.")
+            return "No changes detected in the directory."
+        code_summary = generate_code_summary(self.args.directory, changed_files)
+        return self.provider.scan_code(code_summary)
+    def _scan_files(self):
+        """
+        Scans all files in the specified directory.
+        """
+        file_paths = []
+        for root, _, files in os.walk(self.args.directory):
+            for file in files:
+                file_paths.append(os.path.join(root, file))
+        code_summary = read_files_and_extract_code_summary(file_paths)
+        return self.provider.scan_code(code_summary)
+    def _is_repo_valid(self):
+        return len(self.args.repo) > 0
+    def _is_pr_number_valid(self):
+        return self.args.pr_number > 0

codescanai-0.1.0/core/providers/__init__.py ADDED Viewed

File without changes

codescanai-0.1.0/core/providers/base_ai_provider.py ADDED Viewed

@@ -0,0 +1,20 @@
+"""
+This module defines an abstract class to represent an AI provider.
+Currently supported AI providers will implement this class.
+"""
+class BaseAIProvider:
+    """Abstract base class for defining AI providers."""
+    def __init__(self):
+        """Initializes the base AI provider."""
+        raise NotImplementedError(
+            "BaseAIProvider is an abstract class and cannot be instantiated directly."
+        )
+    def scan_code(self, code_summary):
+        """Scans the provided code summary for potential security vulnerabilities."""
+        raise NotImplementedError(
+            "Each AI provider must implement the `scan_code` method."
+        )

codescanai-0.1.0/core/providers/custom_ai_provider.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+This module defines a CustomAI Provider, and implement the BaseAIProvider abstract class.
+With this users can connect to their locally hosted AI provider.
+"""
+import requests
+from core.providers.base_ai_provider import BaseAIProvider
+class CustomAIProvider(BaseAIProvider):
+    """Provider for interacting with a custom AI server."""
+    def __init__(self, model, host, port, token=None, endpoint="/api/v1/scan"):
+        """Initializes the custom AI provider with the given parameters."""
+        self.model = model
+        self.host = host
+        self.port = port
+        self.token = token
+        self.endpoint = endpoint
+        self.base_url = f"{host}:{port}{endpoint}"
+    def scan_code(self, code_summary):
+        """Scans the code using the custom AI server."""
+        headers = {"Authorization": f"Bearer {self.token}" if self.token else ""}
+        payload = {
+            "model": self.model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": """You are an experienced application security specialist, entrusted with the task of
+                    carefully reviewing the following code for potential security vulnerabilities. Your objective
+                    is to conduct a comprehensive analysis, identifying any weak points that could be exploited
+                    by malicious actors. Once identified, provide clear and actionable recommendations to
+                    mitigate these risks and strengthen the overall security posture of the application.
+                    Focus on issues that could compromise the integrity, confidentiality, or availability
+                    of the system, and ensure that your suggestions are practical and implementable.
+                    Here is the code you need to review:
+                    """
+                    + code_summary,
+                },
+            ],
+        }
+        try:
+            response = requests.post(
+                self.base_url, json=payload, headers=headers, timeout=120
+            )
+            response.raise_for_status()
+            return (
+                response.json()
+                .get("message", {})
+                .get("content", "No response content.")
+            )
+        except requests.exceptions.RequestException as e:
+            return f"Error occurred while connecting to the server: {e}"

codescanai-0.1.0/core/providers/google_gemini_ai_provider.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""
+This module defines the GoogleGemini AI Provider.
+This is one of the supported AI Providers, and implement the BaseAIProvider abstract class.
+"""
+import os
+import google.generativeai as genai
+from core.providers.base_ai_provider import BaseAIProvider
+class GoogleGeminiAIProvider(BaseAIProvider):
+    """Client for interacting with the Google Generative AI API."""
+    def __init__(self, model):
+        """Initializes the GoogleGemini AI Provider with the given model."""
+        self.api_key = os.getenv("GEMINI_API_KEY")
+        if not self.api_key:
+            raise ValueError("Gemini API key is not set in the environment.")
+        genai.configure(api_key=self.api_key)
+        self.model = genai.GenerativeModel(model)
+    def scan_code(self, code_summary):
+        try:
+            response = self.model.generate_content(
+                """You are a specialist in application security, known for your ability to
+                analyze complex codebases and uncover hidden vulnerabilities. You will be
+                presented with the full code of an application. Your mission is to conduct
+                a thorough security review, identifying potential weaknesses and offering
+                actionable recommendations for improvement. Prioritize the most significant
+                security risks that could compromise the integrity of the application.
+                Here is the code:"""
+                + code_summary,
+            )
+            return response.text
+        except Exception as e:  # pylint: disable=W0718
+            return f"Error occurred: {e}"

codescanai-0.1.0/core/providers/open_ai_provider.py ADDED Viewed

@@ -0,0 +1,46 @@
+"""
+This module defines the OpenAI Provider.
+This is one of the supported AI Providers, and implement the BaseAIProvider abstract class.
+"""
+import os
+import openai
+from core.providers.base_ai_provider import BaseAIProvider
+class OpenAIProvider(BaseAIProvider):
+    """Provider that interacts with the OpenAI API."""
+    def __init__(self, model):
+        """Initializes the OpenAIProvider with the given model."""
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        if not self.api_key:
+            raise ValueError("OpenAI API key is not set in the environment.")
+        self.client = openai.OpenAI(api_key=self.api_key)
+        self.model = model
+    def scan_code(self, code_summary):
+        """Scans the code using OpenAI."""
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": """You are an expert in software security analysis,
+                    adept at identifying and explaining potential vulnerabilities in code. You will be
+                    given complete code snippets from various applications. Your task is to analyze
+                    the provided code, pinpoint potential security risks, and offer clear suggestions
+                    for enhancing the application's security posture. Focus on the critical issues that
+                    could impact the overall security of the application.""",
+                    },
+                    {"role": "user", "content": code_summary},
+                ],
+            )
+            return response.choices[0].message.content
+        except Exception as e:  # pylint: disable=W0718
+            return f"Error occurred: {e}"

codescanai-0.1.0/core/runner.py ADDED Viewed

@@ -0,0 +1,32 @@
+"""
+This is the runner of the codescan-ai CLI tool.
+"""
+from IPython.display import display_markdown
+from core.code_scanner.code_scanner import CodeScanner
+from core.utils.argument_parser import parse_arguments
+def format_as_markdown(result):
+    """
+    Formats the scan result as Markdown.
+    """
+    output = "## Code Security Analysis Results\n"
+    output += result
+    return output
+def main():
+    """
+    Main entry point for the CLI. Parses arguments, calls the centralized CodeScanner
+    (which performs the scanning by using the AI provider in *args),
+    and displays the results.
+    """
+    args = parse_arguments()
+    scan_result = CodeScanner(args).scan()
+    display_markdown(format_as_markdown(scan_result))
+if __name__ == "__main__":
+    main()

codescanai-0.1.0/core/utils/__init__.py ADDED Viewed

File without changes

codescanai-0.1.0/core/utils/argument_parser.py ADDED Viewed

@@ -0,0 +1,62 @@
+"""
+This module provides util methods for understanding and parsing the arguments sent by user in the CLI.
+"""
+import argparse
+def parse_arguments():
+    """
+    Parses command-line arguments for the AI-based code scanner.
+    """
+    parser = argparse.ArgumentParser(
+        description="A CLI tool for powered by GenAI to access vulnerability of codebases and provide suggestions."
+    )
+    parser.add_argument(
+        "--provider",
+        type=str,
+        required=True,
+        choices=["openai", "gemini", "custom"],
+        help="Select the AI provider",
+    )
+    parser.add_argument(
+        "--directory",
+        type=str,
+        default=".",
+        help="Directory to scan (defaults to root)",
+    )
+    parser.add_argument(
+        "--model",
+        type=str,
+        help="AI model to use (optional, defaults vary by provider. See [gemini: gemini-pro, openai: gpt-4o-mini])",
+    )
+    parser.add_argument(
+        "--changes_only",
+        action="store_true",
+        help="Scan only changed files in a git repository",
+    )
+    # Additional arguments for PR scanning
+    parser.add_argument(
+        "--repo", type=str, help="GitHub repository in the format 'owner/repo'"
+    )
+    parser.add_argument("--pr_number", type=int, help="Pull request number")
+    parser.add_argument("--github_token", help="GitHub API token")
+    # Additional arguments for custom provider
+    parser.add_argument(
+        "--host", type=str, help="Custom AI server host (e.g., http://localhost)"
+    )
+    parser.add_argument("--port", type=int, help="Custom AI server port (e.g., 5000)")
+    parser.add_argument(
+        "--token", type=str, help="Token for authenticating with the custom AI server"
+    )
+    parser.add_argument(
+        "--endpoint",
+        type=str,
+        default="/api/v1/scan",
+        help="API endpoint for the custom server",
+    )
+    return parser.parse_args()

codescanai-0.1.0/core/utils/code_summary_extractor.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""
+This module provides util methods for extracting code summaries from a list of files.
+"""
+import logging
+import os
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+def read_files_and_extract_code_summary(file_paths):
+    """
+    Reads the content of the given files and generates a code summary.
+    Skips files that cannot be decoded as text.
+    Parameters:
+        file_path (list[string]): The list of filenames to extract code from.
+    Returns:
+        string: summary of code extracted from the input files.
+    """
+    code_summary = ""
+    for file_path in file_paths:
+        if os.path.isfile(file_path):
+            try:
+                with open(file_path, "r", encoding="utf-8") as file:
+                    logging.info("Reading: %s", file_path)
+                    code_summary += f"\n\nFile: {os.path.basename(file_path)}\n"
+                    code_summary += file.read()
+            except (UnicodeDecodeError, IOError) as e:
+                logging.warning("Skipping file %s: %s", file_path, e)
+        else:
+            logging.warning("Skipped %s: Not a valid file.", file_path)
+    return code_summary
+def generate_code_summary(directory, changed_files):
+    """
+    Generates a summary of the code from the changed files.
+    Parameters:
+        directory (string) : The path to the directory.
+        changed_files (list[string]): The list of filenames to extract code from.
+    Returns:
+        string: summary of code extracted from the input files.
+    """
+    file_paths = [os.path.join(directory, file) for file in changed_files]
+    return read_files_and_extract_code_summary(file_paths)

codescanai-0.1.0/core/utils/file_extractor.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""
+This module contains utilities for checking
+if a directory is a Git repository, retrieving changed files from local repositories
+or GitHub pull requests.
+"""
+import logging
+import os
+import subprocess
+from github import Github
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+def is_git_repo(directory):
+    """
+    Checks if the directory is a valid Git repository.
+    Parameters:
+        directory (string): The path to the directory.
+    Returns:
+        bool: Representing if the directory is a Git repository.
+    """
+    try:
+        subprocess.check_output(
+            ["git", "-C", directory, "rev-parse", "--is-inside-work-tree"],
+            stderr=subprocess.STDOUT,
+        )
+        return True
+    except subprocess.CalledProcessError:
+        logging.error("Directory is not a valid Git repository: %s", directory)
+        return False
+def get_changed_files_in_pr(repo_name, pr_number, github_token):
+    """
+    Returns a list of files that have been changed in the specified pull request.
+    Parameters:
+        repo_name (string): The name of the repository.
+        pr_number (int): The number representing the specified pull request.
+        github_token(string): Your github token.
+    Returns:
+        list[string]: A list of all changed filenames in the pull request.
+    """
+    if not github_token:
+        logging.error("GitHub token is required for scanning PR changes.")
+        raise ValueError("GitHub token is required for scanning PR changes.")
+    files = Github(github_token).get_repo(repo_name).get_pull(pr_number).get_files()
+    changed_files = [file.filename for file in files]
+    logging.info(
+        "Fetched %d changed files from PR #%d in %s repository.",
+        len(changed_files),
+        pr_number,
+        repo_name,
+    )
+    return changed_files
+def get_changed_files_in_repo(directory):
+    """
+    Returns a list of files that have been changed locally.
+    Parameters:
+        directory (string): The path to the directory.
+    Returns:
+        list[string]: A list of all changed filenames in the directory.
+    """
+    if not is_git_repo(directory):
+        logging.error("Directory is not a valid Git repository: %s", directory)
+        raise ValueError("Directory is not a valid Git repository.")
+    changed_files = []
+    try:
+        os.chdir(directory)
+        result = subprocess.check_output(["git", "diff", "--name-only"], text=True)
+        if result.strip():
+            changed_files = result.strip().split("\n")
+            logging.info(
+                "Found %d changed files in local repository", len(changed_files)
+            )
+    except subprocess.CalledProcessError as e:
+        logging.error("Error getting changed files: %s", e)
+    return changed_files

codescanai-0.1.0/core/utils/provider_creator.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""
+This module provides util methods used for initializing an AIProvider based on the user args.
+"""
+from core.providers.custom_ai_provider import CustomAIProvider
+from core.providers.google_gemini_ai_provider import GoogleGeminiAIProvider
+from core.providers.open_ai_provider import OpenAIProvider
+PROVIDERS = {
+    "openai": OpenAIProvider,
+    "gemini": GoogleGeminiAIProvider,
+    "custom": CustomAIProvider,
+}
+DEFAULT_MODELS = {
+    "openai": "gpt-4o-mini",
+    "gemini": "gemini-pro",
+}
+def init_provider(provider, model, host=None, port=None, token=None, endpoint=None):
+    """
+    Initializes and returns the appropriate AI client based on the provider.
+    """
+    if provider == "custom":
+        client_params = {
+            "model": model,
+            "host": host,
+            "port": port,
+            "token": token,
+            "endpoint": endpoint,
+        }
+    else:
+        client_params = {
+            "model": model if model else DEFAULT_MODELS[provider],
+        }
+    return PROVIDERS[provider](**client_params)

codescanai-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,37 @@
+[build-system]
+requires = ["setuptools>=61.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "codescanai"
+version = "0.1.0"
+description = "A CLI tool that scans your codebases for security vulnerabilities powered by powerful AI models."
+readme = "README.md"
+authors = [{ name = "Caleb Abhulimhen", email = "calebabhulimhen@gmail.com" }]
+license = { text = "MIT" }
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent"
+]
+keywords = ["code scanning",  "cli", "github action", "security", "vulnerabilities check"]
+dependencies = [
+    "openai>=1.40.6",
+    "PyGithub",
+    "requests",
+    "google-generativeai",
+    "ipython"
+]
+requires-python = ">=3.10"
+[project.optional-dependencies]
+dev = ["pylint", "black", "isort"]
+[project.urls]
+Homepage = "https://github.com/codescan-ai/codescan"
+[project.scripts]
+guardai = "core.runner:main"
+[tool.setuptools.packages.find]
+include = ["core", "core.*"]

codescanai-0.1.0/requirements.txt ADDED Viewed

@@ -0,0 +1,5 @@
+openai==1.42.0
+PyGithub==2.4.0
+requests==2.32.3
+google-generativeai==0.7.2
+ipython==8.26.0

codescanai-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0