codescanai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 codescan-ai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,4 @@
1
+ include requirements.txt
2
+ include README.md
3
+ include LICENSE
4
+ include CHANGELOG
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.1
2
+ Name: codescanai
3
+ Version: 0.1.0
4
+ Summary: A CLI tool that scans your codebases for security vulnerabilities powered by powerful AI models.
5
+ Author-email: Caleb Abhulimhen <calebabhulimhen@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/codescan-ai/codescan
8
+ Keywords: code scanning,cli,github action,security,vulnerabilities check
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: openai>=1.40.6
16
+ Requires-Dist: PyGithub
17
+ Requires-Dist: requests
18
+ Requires-Dist: google-generativeai
19
+ Requires-Dist: ipython
20
+ Provides-Extra: dev
21
+ Requires-Dist: pylint; extra == "dev"
22
+ Requires-Dist: black; extra == "dev"
23
+ Requires-Dist: isort; extra == "dev"
24
+
25
+ # CodeScanAI
26
+
27
+ CodeScanAI utilizes a variety of AI models, including OpenAI, Gemini, and custom self-hosted AI servers, to scan your codebase for bad development practices. It is currently configure to catch potential security vulnerabilities, but will be extended to other use cases in the future.
28
+
29
+ It has been designed to enable seamless integration into CI/CD pipelines like GitHub Actions, or can be used via a simple command. CodeScanAI enables developers to automatically detect potential security issues in their code throughout the development process. [Try it out today](#getting-started)!
30
+
31
+ ## Features
32
+
33
+ - **Support for Multiple AI Models:**
34
+
35
+ - **OpenAI Integration:** Utilize OpenAI's advanced models, such as GPT-4, to scan your code and identify potential security vulnerabilities, OR
36
+ - **Gemini Integration:** Tap into Gemini's expertise to analyze your code for security risks, OR
37
+ - **Custom AI Server Integration:** Connect with self-hosted or private AI servers for security scans, offering fully customizable and self-managed AI solutions.
38
+
39
+ - **CI/CD Integration:**
40
+
41
+ - Seamlessly integrate the CLI tool into GitHub Actions for automated security vulnerability scanning on every pull request.
42
+ - Supports targeted scans on specific branches or changes within a repository.
43
+
44
+ - **Flexible Scanning Options:**
45
+ - **Full Directory Scans:** Perform a comprehensive security analysis by scanning all files within a directory.
46
+ - **Changes Only Scan:** Only scan those files that have chnaged since the last scan.
47
+ - **PR-Specific Scans:** Target files modified in a specific pull request to optimize the scanning process and reduce overhead.
48
+
49
+ ## Getting Started
50
+
51
+ ### Prerequisites
52
+
53
+ - Python 3.10 or higher
54
+ - API keys for the supported AI models:
55
+ - OpenAI API key
56
+ - Gemini API key
57
+ - Access to a custom AI server (host, port, and optional token)
58
+ - Set an environment variable for your API key(s).
59
+
60
+ ```bash
61
+ export OPENAI_API_KEY = 'your_openai_api_key'
62
+ ```
63
+
64
+ ```bash
65
+ export GEMINI_API_KEY = 'your_gemini_api_key'
66
+ ```
67
+
68
+ ### Installation
69
+
70
+ #### Option 1: Install via pip
71
+
72
+ You can install the tool directly from the repository using pip:
73
+
74
+ ```bash
75
+ pip install codescanai
76
+ ```
77
+
78
+ This will allow you to use the `codescanai` command directly in your terminal.
79
+
80
+ #### Option 2: Clone the Repository
81
+
82
+ If you prefer to clone the repository and install the dependencies manually:
83
+
84
+ ```bash
85
+ git clone https://github.com/codescan-ai/codescan.git
86
+ cd codescan
87
+ pip install -r requirements.txt
88
+ ```
89
+
90
+ ### Usage
91
+
92
+ #### Scanning files in your current directory
93
+
94
+ ```bash
95
+ codescanai --provider openai
96
+ ```
97
+ OR if you're cloning the repository,
98
+ ```bash
99
+ python3 -m core.runner --provider openai
100
+ ```
101
+
102
+ #### Scanning with a Custom AI Server
103
+
104
+ To scan code using a custom AI server:
105
+
106
+ ```bash
107
+ guardai --provider custom --host http://localhost --port 5000 --token your_token --directory path/to/your/code
108
+ ```
109
+
110
+ ### Supported arguments
111
+
112
+ | name | description | required | default |
113
+ | -------------- | --------------------------------------------------------- | -------- | -------------- |
114
+ | `provider` | <p>AI provider</p> | `true` | `""` |
115
+ | `model` | <p>AI model to use</p> | `false` | `""` |
116
+ | `directory` | <p>Directory to scan</p> | `false` | `.` |
117
+ | `changes_only` | <p>Scan only changed files</p> | `false` | `false` |
118
+ | `repo` | <p>GitHub repository</p> | `false` | `""` |
119
+ | `pr_number` | <p>Pull request number</p> | `false` | `""` |
120
+ | `github_token` | <p>GitHub API token</p> | `false` | `""` |
121
+ | `host` | <p>Custom AI server host</p> | `false` | `""` |
122
+ | `port` | <p>Custom AI server port</p> | `false` | `""` |
123
+ | `token` | <p>Token for authenticating with the custom AI server</p> | `false` | `""` |
124
+ | `endpoint` | <p>API endpoint for the custom server</p> | `false` | `/api/v1/scan` |
125
+
126
+ ### Supported AI Providers
127
+
128
+ - **OpenAI:** Utilizes GPT models for in-depth security analysis.
129
+ - **Gemini:** Delivers strong security insights through Gemini's advanced capabilities.
130
+ - **Custom:** Connects with self-hosted or private AI servers for fully customizable solutions.
131
+
132
+ ### Limitations
133
+
134
+ - **Large number of files:** We currently do not support scalable way to scan a large number of files on a single run. Depending on the capacity of your AI Provider, you might run into a `rate_limit_exceeded` error. To do this, you can create a custom solution that breaks down the number of files for each run.
135
+
136
+ ## Future Work
137
+
138
+ - **Batch Processing:** For the limitation above, a future version will be to implement batch processing for a large number of files.
139
+
140
+ - **Caching Implementation:** A caching mechanism to store results of previously scanned files, reducing the number of API calls and optimizing performance.
141
+
142
+ - **Expanded Git Provider Support:** The tool is currently integrated with GitHub for PR-based scanning, future plans include extending support to other Git providers like GitLab, Bitbucket, and Azure Repos.
143
+
144
+ - **Expanded Development tools:** This will be a plan to expand this tool to be accessible in other development environments. For example, as a VSCode extension.
145
+
146
+ ## Contributing
147
+
148
+ Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
149
+
150
+ ## License
151
+
152
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,128 @@
1
+ # CodeScanAI
2
+
3
+ CodeScanAI utilizes a variety of AI models, including OpenAI, Gemini, and custom self-hosted AI servers, to scan your codebase for bad development practices. It is currently configure to catch potential security vulnerabilities, but will be extended to other use cases in the future.
4
+
5
+ It has been designed to enable seamless integration into CI/CD pipelines like GitHub Actions, or can be used via a simple command. CodeScanAI enables developers to automatically detect potential security issues in their code throughout the development process. [Try it out today](#getting-started)!
6
+
7
+ ## Features
8
+
9
+ - **Support for Multiple AI Models:**
10
+
11
+ - **OpenAI Integration:** Utilize OpenAI's advanced models, such as GPT-4, to scan your code and identify potential security vulnerabilities, OR
12
+ - **Gemini Integration:** Tap into Gemini's expertise to analyze your code for security risks, OR
13
+ - **Custom AI Server Integration:** Connect with self-hosted or private AI servers for security scans, offering fully customizable and self-managed AI solutions.
14
+
15
+ - **CI/CD Integration:**
16
+
17
+ - Seamlessly integrate the CLI tool into GitHub Actions for automated security vulnerability scanning on every pull request.
18
+ - Supports targeted scans on specific branches or changes within a repository.
19
+
20
+ - **Flexible Scanning Options:**
21
+ - **Full Directory Scans:** Perform a comprehensive security analysis by scanning all files within a directory.
22
+ - **Changes Only Scan:** Only scan those files that have chnaged since the last scan.
23
+ - **PR-Specific Scans:** Target files modified in a specific pull request to optimize the scanning process and reduce overhead.
24
+
25
+ ## Getting Started
26
+
27
+ ### Prerequisites
28
+
29
+ - Python 3.10 or higher
30
+ - API keys for the supported AI models:
31
+ - OpenAI API key
32
+ - Gemini API key
33
+ - Access to a custom AI server (host, port, and optional token)
34
+ - Set an environment variable for your API key(s).
35
+
36
+ ```bash
37
+ export OPENAI_API_KEY = 'your_openai_api_key'
38
+ ```
39
+
40
+ ```bash
41
+ export GEMINI_API_KEY = 'your_gemini_api_key'
42
+ ```
43
+
44
+ ### Installation
45
+
46
+ #### Option 1: Install via pip
47
+
48
+ You can install the tool directly from the repository using pip:
49
+
50
+ ```bash
51
+ pip install codescanai
52
+ ```
53
+
54
+ This will allow you to use the `codescanai` command directly in your terminal.
55
+
56
+ #### Option 2: Clone the Repository
57
+
58
+ If you prefer to clone the repository and install the dependencies manually:
59
+
60
+ ```bash
61
+ git clone https://github.com/codescan-ai/codescan.git
62
+ cd codescan
63
+ pip install -r requirements.txt
64
+ ```
65
+
66
+ ### Usage
67
+
68
+ #### Scanning files in your current directory
69
+
70
+ ```bash
71
+ codescanai --provider openai
72
+ ```
73
+ OR if you're cloning the repository,
74
+ ```bash
75
+ python3 -m core.runner --provider openai
76
+ ```
77
+
78
+ #### Scanning with a Custom AI Server
79
+
80
+ To scan code using a custom AI server:
81
+
82
+ ```bash
83
+ guardai --provider custom --host http://localhost --port 5000 --token your_token --directory path/to/your/code
84
+ ```
85
+
86
+ ### Supported arguments
87
+
88
+ | name | description | required | default |
89
+ | -------------- | --------------------------------------------------------- | -------- | -------------- |
90
+ | `provider` | <p>AI provider</p> | `true` | `""` |
91
+ | `model` | <p>AI model to use</p> | `false` | `""` |
92
+ | `directory` | <p>Directory to scan</p> | `false` | `.` |
93
+ | `changes_only` | <p>Scan only changed files</p> | `false` | `false` |
94
+ | `repo` | <p>GitHub repository</p> | `false` | `""` |
95
+ | `pr_number` | <p>Pull request number</p> | `false` | `""` |
96
+ | `github_token` | <p>GitHub API token</p> | `false` | `""` |
97
+ | `host` | <p>Custom AI server host</p> | `false` | `""` |
98
+ | `port` | <p>Custom AI server port</p> | `false` | `""` |
99
+ | `token` | <p>Token for authenticating with the custom AI server</p> | `false` | `""` |
100
+ | `endpoint` | <p>API endpoint for the custom server</p> | `false` | `/api/v1/scan` |
101
+
102
+ ### Supported AI Providers
103
+
104
+ - **OpenAI:** Utilizes GPT models for in-depth security analysis.
105
+ - **Gemini:** Delivers strong security insights through Gemini's advanced capabilities.
106
+ - **Custom:** Connects with self-hosted or private AI servers for fully customizable solutions.
107
+
108
+ ### Limitations
109
+
110
+ - **Large number of files:** We currently do not support scalable way to scan a large number of files on a single run. Depending on the capacity of your AI Provider, you might run into a `rate_limit_exceeded` error. To do this, you can create a custom solution that breaks down the number of files for each run.
111
+
112
+ ## Future Work
113
+
114
+ - **Batch Processing:** For the limitation above, a future version will be to implement batch processing for a large number of files.
115
+
116
+ - **Caching Implementation:** A caching mechanism to store results of previously scanned files, reducing the number of API calls and optimizing performance.
117
+
118
+ - **Expanded Git Provider Support:** The tool is currently integrated with GitHub for PR-based scanning, future plans include extending support to other Git providers like GitLab, Bitbucket, and Azure Repos.
119
+
120
+ - **Expanded Development tools:** This will be a plan to expand this tool to be accessible in other development environments. For example, as a VSCode extension.
121
+
122
+ ## Contributing
123
+
124
+ Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
125
+
126
+ ## License
127
+
128
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.1
2
+ Name: codescanai
3
+ Version: 0.1.0
4
+ Summary: A CLI tool that scans your codebases for security vulnerabilities powered by powerful AI models.
5
+ Author-email: Caleb Abhulimhen <calebabhulimhen@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/codescan-ai/codescan
8
+ Keywords: code scanning,cli,github action,security,vulnerabilities check
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: openai>=1.40.6
16
+ Requires-Dist: PyGithub
17
+ Requires-Dist: requests
18
+ Requires-Dist: google-generativeai
19
+ Requires-Dist: ipython
20
+ Provides-Extra: dev
21
+ Requires-Dist: pylint; extra == "dev"
22
+ Requires-Dist: black; extra == "dev"
23
+ Requires-Dist: isort; extra == "dev"
24
+
25
+ # CodeScanAI
26
+
27
+ CodeScanAI utilizes a variety of AI models, including OpenAI, Gemini, and custom self-hosted AI servers, to scan your codebase for bad development practices. It is currently configure to catch potential security vulnerabilities, but will be extended to other use cases in the future.
28
+
29
+ It has been designed to enable seamless integration into CI/CD pipelines like GitHub Actions, or can be used via a simple command. CodeScanAI enables developers to automatically detect potential security issues in their code throughout the development process. [Try it out today](#getting-started)!
30
+
31
+ ## Features
32
+
33
+ - **Support for Multiple AI Models:**
34
+
35
+ - **OpenAI Integration:** Utilize OpenAI's advanced models, such as GPT-4, to scan your code and identify potential security vulnerabilities, OR
36
+ - **Gemini Integration:** Tap into Gemini's expertise to analyze your code for security risks, OR
37
+ - **Custom AI Server Integration:** Connect with self-hosted or private AI servers for security scans, offering fully customizable and self-managed AI solutions.
38
+
39
+ - **CI/CD Integration:**
40
+
41
+ - Seamlessly integrate the CLI tool into GitHub Actions for automated security vulnerability scanning on every pull request.
42
+ - Supports targeted scans on specific branches or changes within a repository.
43
+
44
+ - **Flexible Scanning Options:**
45
+ - **Full Directory Scans:** Perform a comprehensive security analysis by scanning all files within a directory.
46
+ - **Changes Only Scan:** Only scan those files that have chnaged since the last scan.
47
+ - **PR-Specific Scans:** Target files modified in a specific pull request to optimize the scanning process and reduce overhead.
48
+
49
+ ## Getting Started
50
+
51
+ ### Prerequisites
52
+
53
+ - Python 3.10 or higher
54
+ - API keys for the supported AI models:
55
+ - OpenAI API key
56
+ - Gemini API key
57
+ - Access to a custom AI server (host, port, and optional token)
58
+ - Set an environment variable for your API key(s).
59
+
60
+ ```bash
61
+ export OPENAI_API_KEY = 'your_openai_api_key'
62
+ ```
63
+
64
+ ```bash
65
+ export GEMINI_API_KEY = 'your_gemini_api_key'
66
+ ```
67
+
68
+ ### Installation
69
+
70
+ #### Option 1: Install via pip
71
+
72
+ You can install the tool directly from the repository using pip:
73
+
74
+ ```bash
75
+ pip install codescanai
76
+ ```
77
+
78
+ This will allow you to use the `codescanai` command directly in your terminal.
79
+
80
+ #### Option 2: Clone the Repository
81
+
82
+ If you prefer to clone the repository and install the dependencies manually:
83
+
84
+ ```bash
85
+ git clone https://github.com/codescan-ai/codescan.git
86
+ cd codescan
87
+ pip install -r requirements.txt
88
+ ```
89
+
90
+ ### Usage
91
+
92
+ #### Scanning files in your current directory
93
+
94
+ ```bash
95
+ codescanai --provider openai
96
+ ```
97
+ OR if you're cloning the repository,
98
+ ```bash
99
+ python3 -m core.runner --provider openai
100
+ ```
101
+
102
+ #### Scanning with a Custom AI Server
103
+
104
+ To scan code using a custom AI server:
105
+
106
+ ```bash
107
+ guardai --provider custom --host http://localhost --port 5000 --token your_token --directory path/to/your/code
108
+ ```
109
+
110
+ ### Supported arguments
111
+
112
+ | name | description | required | default |
113
+ | -------------- | --------------------------------------------------------- | -------- | -------------- |
114
+ | `provider` | <p>AI provider</p> | `true` | `""` |
115
+ | `model` | <p>AI model to use</p> | `false` | `""` |
116
+ | `directory` | <p>Directory to scan</p> | `false` | `.` |
117
+ | `changes_only` | <p>Scan only changed files</p> | `false` | `false` |
118
+ | `repo` | <p>GitHub repository</p> | `false` | `""` |
119
+ | `pr_number` | <p>Pull request number</p> | `false` | `""` |
120
+ | `github_token` | <p>GitHub API token</p> | `false` | `""` |
121
+ | `host` | <p>Custom AI server host</p> | `false` | `""` |
122
+ | `port` | <p>Custom AI server port</p> | `false` | `""` |
123
+ | `token` | <p>Token for authenticating with the custom AI server</p> | `false` | `""` |
124
+ | `endpoint` | <p>API endpoint for the custom server</p> | `false` | `/api/v1/scan` |
125
+
126
+ ### Supported AI Providers
127
+
128
+ - **OpenAI:** Utilizes GPT models for in-depth security analysis.
129
+ - **Gemini:** Delivers strong security insights through Gemini's advanced capabilities.
130
+ - **Custom:** Connects with self-hosted or private AI servers for fully customizable solutions.
131
+
132
+ ### Limitations
133
+
134
+ - **Large number of files:** We currently do not support scalable way to scan a large number of files on a single run. Depending on the capacity of your AI Provider, you might run into a `rate_limit_exceeded` error. To do this, you can create a custom solution that breaks down the number of files for each run.
135
+
136
+ ## Future Work
137
+
138
+ - **Batch Processing:** For the limitation above, a future version will be to implement batch processing for a large number of files.
139
+
140
+ - **Caching Implementation:** A caching mechanism to store results of previously scanned files, reducing the number of API calls and optimizing performance.
141
+
142
+ - **Expanded Git Provider Support:** The tool is currently integrated with GitHub for PR-based scanning, future plans include extending support to other Git providers like GitLab, Bitbucket, and Azure Repos.
143
+
144
+ - **Expanded Development tools:** This will be a plan to expand this tool to be accessible in other development environments. For example, as a VSCode extension.
145
+
146
+ ## Contributing
147
+
148
+ Contributions are welcome! Please fork the repository and submit a pull request with your improvements.
149
+
150
+ ## License
151
+
152
+ This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,25 @@
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ pyproject.toml
5
+ requirements.txt
6
+ codescanai.egg-info/PKG-INFO
7
+ codescanai.egg-info/SOURCES.txt
8
+ codescanai.egg-info/dependency_links.txt
9
+ codescanai.egg-info/entry_points.txt
10
+ codescanai.egg-info/requires.txt
11
+ codescanai.egg-info/top_level.txt
12
+ core/__init__.py
13
+ core/runner.py
14
+ core/code_scanner/__init__.py
15
+ core/code_scanner/code_scanner.py
16
+ core/providers/__init__.py
17
+ core/providers/base_ai_provider.py
18
+ core/providers/custom_ai_provider.py
19
+ core/providers/google_gemini_ai_provider.py
20
+ core/providers/open_ai_provider.py
21
+ core/utils/__init__.py
22
+ core/utils/argument_parser.py
23
+ core/utils/code_summary_extractor.py
24
+ core/utils/file_extractor.py
25
+ core/utils/provider_creator.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ guardai = core.runner:main
@@ -0,0 +1,10 @@
1
+ openai>=1.40.6
2
+ PyGithub
3
+ requests
4
+ google-generativeai
5
+ ipython
6
+
7
+ [dev]
8
+ pylint
9
+ black
10
+ isort
File without changes
File without changes
@@ -0,0 +1,80 @@
1
+ """
2
+ This module defines a class that scans/analyse code based on the input args using AI providers.
3
+ This is the brain of this application and all core logic will be referenced here.
4
+ """
5
+
6
+ import logging
7
+ import os
8
+
9
+ from core.utils.code_summary_extractor import (
10
+ generate_code_summary,
11
+ read_files_and_extract_code_summary,
12
+ )
13
+ from core.utils.file_extractor import get_changed_files_in_pr, get_changed_files_in_repo
14
+ from core.utils.provider_creator import init_provider
15
+
16
+ logging.basicConfig(
17
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
18
+ )
19
+
20
+
21
+ class CodeScanner:
22
+ """
23
+ This class defines the logic for scanning source code based on the context provided in **args**
24
+ """
25
+
26
+ def __init__(self, args) -> None:
27
+ self.args = args
28
+ self.provider = init_provider(
29
+ args.provider, args.model, args.host, args.port, args.token, args.endpoint
30
+ )
31
+
32
+ def scan(self):
33
+ """
34
+ Scans the code based on the provided arguments and AI client.
35
+ """
36
+ if self.args.changes_only:
37
+ # Only scan new changes. This is supported in Git repositories only(for now).
38
+ return self._scan_changes()
39
+ return self._scan_files()
40
+
41
+ def _scan_changes(self):
42
+ """
43
+ Scans only the files that have been changed in the specified directory or PR.
44
+ """
45
+ try:
46
+ if self._is_repo_valid() and self._is_pr_number_valid():
47
+ changed_files = get_changed_files_in_pr(
48
+ self.args.repo, self.args.pr_number, self.args.github_token
49
+ )
50
+ else:
51
+ changed_files = get_changed_files_in_repo(self.args.directory)
52
+ except ValueError as e:
53
+ logging.error(e)
54
+ return str(e)
55
+
56
+ if not changed_files:
57
+ logging.info("No changes detected in the directory.")
58
+ return "No changes detected in the directory."
59
+
60
+ code_summary = generate_code_summary(self.args.directory, changed_files)
61
+
62
+ return self.provider.scan_code(code_summary)
63
+
64
+ def _scan_files(self):
65
+ """
66
+ Scans all files in the specified directory.
67
+ """
68
+ file_paths = []
69
+ for root, _, files in os.walk(self.args.directory):
70
+ for file in files:
71
+ file_paths.append(os.path.join(root, file))
72
+
73
+ code_summary = read_files_and_extract_code_summary(file_paths)
74
+ return self.provider.scan_code(code_summary)
75
+
76
+ def _is_repo_valid(self):
77
+ return len(self.args.repo) > 0
78
+
79
+ def _is_pr_number_valid(self):
80
+ return self.args.pr_number > 0
File without changes
@@ -0,0 +1,20 @@
1
+ """
2
+ This module defines an abstract class to represent an AI provider.
3
+ Currently supported AI providers will implement this class.
4
+ """
5
+
6
+
7
+ class BaseAIProvider:
8
+ """Abstract base class for defining AI providers."""
9
+
10
+ def __init__(self):
11
+ """Initializes the base AI provider."""
12
+ raise NotImplementedError(
13
+ "BaseAIProvider is an abstract class and cannot be instantiated directly."
14
+ )
15
+
16
+ def scan_code(self, code_summary):
17
+ """Scans the provided code summary for potential security vulnerabilities."""
18
+ raise NotImplementedError(
19
+ "Each AI provider must implement the `scan_code` method."
20
+ )
@@ -0,0 +1,57 @@
1
+ """
2
+ This module defines a CustomAI Provider, and implement the BaseAIProvider abstract class.
3
+ With this users can connect to their locally hosted AI provider.
4
+ """
5
+
6
+ import requests
7
+
8
+ from core.providers.base_ai_provider import BaseAIProvider
9
+
10
+
11
+ class CustomAIProvider(BaseAIProvider):
12
+ """Provider for interacting with a custom AI server."""
13
+
14
+ def __init__(self, model, host, port, token=None, endpoint="/api/v1/scan"):
15
+ """Initializes the custom AI provider with the given parameters."""
16
+
17
+ self.model = model
18
+ self.host = host
19
+ self.port = port
20
+ self.token = token
21
+ self.endpoint = endpoint
22
+ self.base_url = f"{host}:{port}{endpoint}"
23
+
24
+ def scan_code(self, code_summary):
25
+ """Scans the code using the custom AI server."""
26
+
27
+ headers = {"Authorization": f"Bearer {self.token}" if self.token else ""}
28
+ payload = {
29
+ "model": self.model,
30
+ "messages": [
31
+ {
32
+ "role": "user",
33
+ "content": """You are an experienced application security specialist, entrusted with the task of
34
+ carefully reviewing the following code for potential security vulnerabilities. Your objective
35
+ is to conduct a comprehensive analysis, identifying any weak points that could be exploited
36
+ by malicious actors. Once identified, provide clear and actionable recommendations to
37
+ mitigate these risks and strengthen the overall security posture of the application.
38
+ Focus on issues that could compromise the integrity, confidentiality, or availability
39
+ of the system, and ensure that your suggestions are practical and implementable.
40
+ Here is the code you need to review:
41
+ """
42
+ + code_summary,
43
+ },
44
+ ],
45
+ }
46
+ try:
47
+ response = requests.post(
48
+ self.base_url, json=payload, headers=headers, timeout=120
49
+ )
50
+ response.raise_for_status()
51
+ return (
52
+ response.json()
53
+ .get("message", {})
54
+ .get("content", "No response content.")
55
+ )
56
+ except requests.exceptions.RequestException as e:
57
+ return f"Error occurred while connecting to the server: {e}"
@@ -0,0 +1,39 @@
1
+ """
2
+ This module defines the GoogleGemini AI Provider.
3
+ This is one of the supported AI Providers, and implement the BaseAIProvider abstract class.
4
+ """
5
+
6
+ import os
7
+
8
+ import google.generativeai as genai
9
+
10
+ from core.providers.base_ai_provider import BaseAIProvider
11
+
12
+
13
+ class GoogleGeminiAIProvider(BaseAIProvider):
14
+ """Client for interacting with the Google Generative AI API."""
15
+
16
+ def __init__(self, model):
17
+ """Initializes the GoogleGemini AI Provider with the given model."""
18
+
19
+ self.api_key = os.getenv("GEMINI_API_KEY")
20
+ if not self.api_key:
21
+ raise ValueError("Gemini API key is not set in the environment.")
22
+ genai.configure(api_key=self.api_key)
23
+ self.model = genai.GenerativeModel(model)
24
+
25
+ def scan_code(self, code_summary):
26
+ try:
27
+ response = self.model.generate_content(
28
+ """You are a specialist in application security, known for your ability to
29
+ analyze complex codebases and uncover hidden vulnerabilities. You will be
30
+ presented with the full code of an application. Your mission is to conduct
31
+ a thorough security review, identifying potential weaknesses and offering
32
+ actionable recommendations for improvement. Prioritize the most significant
33
+ security risks that could compromise the integrity of the application.
34
+ Here is the code:"""
35
+ + code_summary,
36
+ )
37
+ return response.text
38
+ except Exception as e: # pylint: disable=W0718
39
+ return f"Error occurred: {e}"
@@ -0,0 +1,46 @@
1
+ """
2
+ This module defines the OpenAI Provider.
3
+ This is one of the supported AI Providers, and implement the BaseAIProvider abstract class.
4
+ """
5
+
6
+ import os
7
+
8
+ import openai
9
+
10
+ from core.providers.base_ai_provider import BaseAIProvider
11
+
12
+
13
+ class OpenAIProvider(BaseAIProvider):
14
+ """Provider that interacts with the OpenAI API."""
15
+
16
+ def __init__(self, model):
17
+ """Initializes the OpenAIProvider with the given model."""
18
+
19
+ self.api_key = os.getenv("OPENAI_API_KEY")
20
+ if not self.api_key:
21
+ raise ValueError("OpenAI API key is not set in the environment.")
22
+ self.client = openai.OpenAI(api_key=self.api_key)
23
+ self.model = model
24
+
25
+ def scan_code(self, code_summary):
26
+ """Scans the code using OpenAI."""
27
+
28
+ try:
29
+ response = self.client.chat.completions.create(
30
+ model=self.model,
31
+ messages=[
32
+ {
33
+ "role": "system",
34
+ "content": """You are an expert in software security analysis,
35
+ adept at identifying and explaining potential vulnerabilities in code. You will be
36
+ given complete code snippets from various applications. Your task is to analyze
37
+ the provided code, pinpoint potential security risks, and offer clear suggestions
38
+ for enhancing the application's security posture. Focus on the critical issues that
39
+ could impact the overall security of the application.""",
40
+ },
41
+ {"role": "user", "content": code_summary},
42
+ ],
43
+ )
44
+ return response.choices[0].message.content
45
+ except Exception as e: # pylint: disable=W0718
46
+ return f"Error occurred: {e}"
@@ -0,0 +1,32 @@
1
+ """
2
+ This is the runner of the codescan-ai CLI tool.
3
+ """
4
+
5
+ from IPython.display import display_markdown
6
+
7
+ from core.code_scanner.code_scanner import CodeScanner
8
+ from core.utils.argument_parser import parse_arguments
9
+
10
+
11
+ def format_as_markdown(result):
12
+ """
13
+ Formats the scan result as Markdown.
14
+ """
15
+ output = "## Code Security Analysis Results\n"
16
+ output += result
17
+ return output
18
+
19
+
20
+ def main():
21
+ """
22
+ Main entry point for the CLI. Parses arguments, calls the centralized CodeScanner
23
+ (which performs the scanning by using the AI provider in *args),
24
+ and displays the results.
25
+ """
26
+ args = parse_arguments()
27
+ scan_result = CodeScanner(args).scan()
28
+ display_markdown(format_as_markdown(scan_result))
29
+
30
+
31
+ if __name__ == "__main__":
32
+ main()
File without changes
@@ -0,0 +1,62 @@
1
+ """
2
+ This module provides util methods for understanding and parsing the arguments sent by user in the CLI.
3
+ """
4
+
5
+ import argparse
6
+
7
+
8
+ def parse_arguments():
9
+ """
10
+ Parses command-line arguments for the AI-based code scanner.
11
+ """
12
+ parser = argparse.ArgumentParser(
13
+ description="A CLI tool for powered by GenAI to access vulnerability of codebases and provide suggestions."
14
+ )
15
+
16
+ parser.add_argument(
17
+ "--provider",
18
+ type=str,
19
+ required=True,
20
+ choices=["openai", "gemini", "custom"],
21
+ help="Select the AI provider",
22
+ )
23
+ parser.add_argument(
24
+ "--directory",
25
+ type=str,
26
+ default=".",
27
+ help="Directory to scan (defaults to root)",
28
+ )
29
+ parser.add_argument(
30
+ "--model",
31
+ type=str,
32
+ help="AI model to use (optional, defaults vary by provider. See [gemini: gemini-pro, openai: gpt-4o-mini])",
33
+ )
34
+ parser.add_argument(
35
+ "--changes_only",
36
+ action="store_true",
37
+ help="Scan only changed files in a git repository",
38
+ )
39
+
40
+ # Additional arguments for PR scanning
41
+ parser.add_argument(
42
+ "--repo", type=str, help="GitHub repository in the format 'owner/repo'"
43
+ )
44
+ parser.add_argument("--pr_number", type=int, help="Pull request number")
45
+ parser.add_argument("--github_token", help="GitHub API token")
46
+
47
+ # Additional arguments for custom provider
48
+ parser.add_argument(
49
+ "--host", type=str, help="Custom AI server host (e.g., http://localhost)"
50
+ )
51
+ parser.add_argument("--port", type=int, help="Custom AI server port (e.g., 5000)")
52
+ parser.add_argument(
53
+ "--token", type=str, help="Token for authenticating with the custom AI server"
54
+ )
55
+ parser.add_argument(
56
+ "--endpoint",
57
+ type=str,
58
+ default="/api/v1/scan",
59
+ help="API endpoint for the custom server",
60
+ )
61
+
62
+ return parser.parse_args()
@@ -0,0 +1,51 @@
1
+ """
2
+ This module provides util methods for extracting code summaries from a list of files.
3
+ """
4
+
5
+ import logging
6
+ import os
7
+
8
+ logging.basicConfig(
9
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
10
+ )
11
+
12
+
13
+ def read_files_and_extract_code_summary(file_paths):
14
+ """
15
+ Reads the content of the given files and generates a code summary.
16
+ Skips files that cannot be decoded as text.
17
+
18
+ Parameters:
19
+ file_path (list[string]): The list of filenames to extract code from.
20
+
21
+ Returns:
22
+ string: summary of code extracted from the input files.
23
+ """
24
+ code_summary = ""
25
+ for file_path in file_paths:
26
+ if os.path.isfile(file_path):
27
+ try:
28
+ with open(file_path, "r", encoding="utf-8") as file:
29
+ logging.info("Reading: %s", file_path)
30
+ code_summary += f"\n\nFile: {os.path.basename(file_path)}\n"
31
+ code_summary += file.read()
32
+ except (UnicodeDecodeError, IOError) as e:
33
+ logging.warning("Skipping file %s: %s", file_path, e)
34
+ else:
35
+ logging.warning("Skipped %s: Not a valid file.", file_path)
36
+ return code_summary
37
+
38
+
39
+ def generate_code_summary(directory, changed_files):
40
+ """
41
+ Generates a summary of the code from the changed files.
42
+
43
+ Parameters:
44
+ directory (string) : The path to the directory.
45
+ changed_files (list[string]): The list of filenames to extract code from.
46
+
47
+ Returns:
48
+ string: summary of code extracted from the input files.
49
+ """
50
+ file_paths = [os.path.join(directory, file) for file in changed_files]
51
+ return read_files_and_extract_code_summary(file_paths)
@@ -0,0 +1,93 @@
1
+ """
2
+ This module contains utilities for checking
3
+ if a directory is a Git repository, retrieving changed files from local repositories
4
+ or GitHub pull requests.
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ import subprocess
10
+
11
+ from github import Github
12
+
13
+ logging.basicConfig(
14
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
15
+ )
16
+
17
+
18
+ def is_git_repo(directory):
19
+ """
20
+ Checks if the directory is a valid Git repository.
21
+
22
+ Parameters:
23
+ directory (string): The path to the directory.
24
+
25
+ Returns:
26
+ bool: Representing if the directory is a Git repository.
27
+ """
28
+ try:
29
+ subprocess.check_output(
30
+ ["git", "-C", directory, "rev-parse", "--is-inside-work-tree"],
31
+ stderr=subprocess.STDOUT,
32
+ )
33
+ return True
34
+ except subprocess.CalledProcessError:
35
+ logging.error("Directory is not a valid Git repository: %s", directory)
36
+ return False
37
+
38
+
39
+ def get_changed_files_in_pr(repo_name, pr_number, github_token):
40
+ """
41
+ Returns a list of files that have been changed in the specified pull request.
42
+
43
+ Parameters:
44
+ repo_name (string): The name of the repository.
45
+ pr_number (int): The number representing the specified pull request.
46
+ github_token(string): Your github token.
47
+
48
+ Returns:
49
+ list[string]: A list of all changed filenames in the pull request.
50
+ """
51
+
52
+ if not github_token:
53
+ logging.error("GitHub token is required for scanning PR changes.")
54
+ raise ValueError("GitHub token is required for scanning PR changes.")
55
+
56
+ files = Github(github_token).get_repo(repo_name).get_pull(pr_number).get_files()
57
+
58
+ changed_files = [file.filename for file in files]
59
+ logging.info(
60
+ "Fetched %d changed files from PR #%d in %s repository.",
61
+ len(changed_files),
62
+ pr_number,
63
+ repo_name,
64
+ )
65
+ return changed_files
66
+
67
+
68
+ def get_changed_files_in_repo(directory):
69
+ """
70
+ Returns a list of files that have been changed locally.
71
+
72
+ Parameters:
73
+ directory (string): The path to the directory.
74
+
75
+ Returns:
76
+ list[string]: A list of all changed filenames in the directory.
77
+ """
78
+ if not is_git_repo(directory):
79
+ logging.error("Directory is not a valid Git repository: %s", directory)
80
+ raise ValueError("Directory is not a valid Git repository.")
81
+
82
+ changed_files = []
83
+ try:
84
+ os.chdir(directory)
85
+ result = subprocess.check_output(["git", "diff", "--name-only"], text=True)
86
+ if result.strip():
87
+ changed_files = result.strip().split("\n")
88
+ logging.info(
89
+ "Found %d changed files in local repository", len(changed_files)
90
+ )
91
+ except subprocess.CalledProcessError as e:
92
+ logging.error("Error getting changed files: %s", e)
93
+ return changed_files
@@ -0,0 +1,39 @@
1
+ """
2
+ This module provides util methods used for initializing an AIProvider based on the user args.
3
+ """
4
+
5
+ from core.providers.custom_ai_provider import CustomAIProvider
6
+ from core.providers.google_gemini_ai_provider import GoogleGeminiAIProvider
7
+ from core.providers.open_ai_provider import OpenAIProvider
8
+
9
+ PROVIDERS = {
10
+ "openai": OpenAIProvider,
11
+ "gemini": GoogleGeminiAIProvider,
12
+ "custom": CustomAIProvider,
13
+ }
14
+
15
+ DEFAULT_MODELS = {
16
+ "openai": "gpt-4o-mini",
17
+ "gemini": "gemini-pro",
18
+ }
19
+
20
+
21
+ def init_provider(provider, model, host=None, port=None, token=None, endpoint=None):
22
+ """
23
+ Initializes and returns the appropriate AI client based on the provider.
24
+ """
25
+
26
+ if provider == "custom":
27
+ client_params = {
28
+ "model": model,
29
+ "host": host,
30
+ "port": port,
31
+ "token": token,
32
+ "endpoint": endpoint,
33
+ }
34
+ else:
35
+ client_params = {
36
+ "model": model if model else DEFAULT_MODELS[provider],
37
+ }
38
+
39
+ return PROVIDERS[provider](**client_params)
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "codescanai"
7
+ version = "0.1.0"
8
+ description = "A CLI tool that scans your codebases for security vulnerabilities powered by powerful AI models."
9
+ readme = "README.md"
10
+ authors = [{ name = "Caleb Abhulimhen", email = "calebabhulimhen@gmail.com" }]
11
+ license = { text = "MIT" }
12
+ classifiers = [
13
+ "Programming Language :: Python :: 3",
14
+ "License :: OSI Approved :: MIT License",
15
+ "Operating System :: OS Independent"
16
+ ]
17
+ keywords = ["code scanning", "cli", "github action", "security", "vulnerabilities check"]
18
+ dependencies = [
19
+ "openai>=1.40.6",
20
+ "PyGithub",
21
+ "requests",
22
+ "google-generativeai",
23
+ "ipython"
24
+ ]
25
+ requires-python = ">=3.10"
26
+
27
+ [project.optional-dependencies]
28
+ dev = ["pylint", "black", "isort"]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/codescan-ai/codescan"
32
+
33
+ [project.scripts]
34
+ guardai = "core.runner:main"
35
+
36
+ [tool.setuptools.packages.find]
37
+ include = ["core", "core.*"]
@@ -0,0 +1,5 @@
1
+ openai==1.42.0
2
+ PyGithub==2.4.0
3
+ requests==2.32.3
4
+ google-generativeai==0.7.2
5
+ ipython==8.26.0
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+