scanoss 1.41.1__tar.gz → 1.43.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scanoss-1.41.1/src/scanoss.egg-info → scanoss-1.43.0}/PKG-INFO +1 -1
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/__init__.py +1 -1
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/cli.py +17 -5
- scanoss-1.43.0/src/scanoss/data/build_date.txt +1 -0
- scanoss-1.43.0/src/scanoss/header_filter.py +563 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanner.py +14 -137
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanossapi.py +1 -1
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanossbase.py +1 -1
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/winnowing.py +71 -19
- {scanoss-1.41.1 → scanoss-1.43.0/src/scanoss.egg-info}/PKG-INFO +1 -1
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss.egg-info/SOURCES.txt +2 -0
- scanoss-1.43.0/tests/test_headers_filter.py +370 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/tests/test_winnowing.py +144 -2
- scanoss-1.41.1/src/scanoss/data/build_date.txt +0 -1
- {scanoss-1.41.1 → scanoss-1.43.0}/LICENSE +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/PACKAGE.md +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/README.md +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/pyproject.toml +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/setup.cfg +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/protoc_gen_swagger/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/protoc_gen_swagger/options/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/protoc_gen_swagger/options/annotations_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/protoc_gen_swagger/options/annotations_pb2.pyi +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/protoc_gen_swagger/options/annotations_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/protoc_gen_swagger/options/openapiv2_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/protoc_gen_swagger/options/openapiv2_pb2.pyi +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/protoc_gen_swagger/options/openapiv2_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/common/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/common/v2/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/common/v2/scanoss_common_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/common/v2/scanoss_common_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/components/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/components/v2/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/components/v2/scanoss_components_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/components/v2/scanoss_components_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/dependencies/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/dependencies/v2/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/geoprovenance/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/geoprovenance/v2/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/licenses/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/licenses/v2/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/licenses/v2/scanoss_licenses_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/licenses/v2/scanoss_licenses_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/scanning/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/scanning/v2/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/semgrep/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/semgrep/v2/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/vulnerabilities/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/vulnerabilities/v2/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/components.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/constants.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/cryptography.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/csvoutput.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/cyclonedx.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/data/osadl-copyleft.json +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/data/scanoss-settings-schema.json +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/data/spdx-exceptions.json +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/data/spdx-licenses.json +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/delta.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/export/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/export/dependency_track.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/file_filters.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/filecount.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/gitlabqualityreport.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/policy_check/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/policy_check/dependency_track/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/policy_check/dependency_track/project_violation.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/policy_check/policy_check.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/policy_check/scanoss/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/policy_check/scanoss/copyleft.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/policy_check/scanoss/undeclared_component.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/summary/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/summary/component_summary.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/summary/license_summary.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/summary/match_summary.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/utils/file_utils.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/utils/license_utils.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/utils/markdown_utils.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/inspection/utils/scan_result_processor.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/osadl.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/results.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scancodedeps.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanners/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanners/container_scanner.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanners/folder_hasher.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanners/scanner_config.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanners/scanner_hfh.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanoss_settings.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanossgrpc.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scanpostprocessor.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/scantype.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/services/dependency_track_service.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/spdxlite.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/threadeddependencies.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/threadedscanning.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/utils/__init__.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/utils/abstract_presenter.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/utils/crc64.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/utils/file.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/utils/scanoss_scan_results_utils.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss/utils/simhash.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss.egg-info/dependency_links.txt +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss.egg-info/entry_points.txt +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss.egg-info/requires.txt +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/src/scanoss.egg-info/top_level.txt +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/tests/test_csv_output.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/tests/test_file_filters.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/tests/test_osadl.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/tests/test_policy_inspect.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/tests/test_scan_post_processor.py +0 -0
- {scanoss-1.41.1 → scanoss-1.43.0}/tests/test_spdxlite.py +0 -0
|
@@ -170,7 +170,6 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
|
|
|
170
170
|
default=DEFAULT_RETRY,
|
|
171
171
|
help='Retry limit for API communication (optional - default 5)',
|
|
172
172
|
)
|
|
173
|
-
p_scan.add_argument('--no-wfp-output', action='store_true', help='Skip WFP file generation')
|
|
174
173
|
p_scan.add_argument('--dependencies', '-D', action='store_true', help='Add Dependency scanning')
|
|
175
174
|
p_scan.add_argument('--dependencies-only', action='store_true', help='Run Dependency scanning only')
|
|
176
175
|
p_scan.add_argument(
|
|
@@ -1096,6 +1095,19 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
|
|
|
1096
1095
|
p.add_argument('--skip-md5', '-5', type=str, action='append', help='Skip files matching MD5.')
|
|
1097
1096
|
p.add_argument('--strip-hpsm', '-G', type=str, action='append', help='Strip HPSM string from WFP.')
|
|
1098
1097
|
p.add_argument('--strip-snippet', '-N', type=str, action='append', help='Strip Snippet ID string from WFP.')
|
|
1098
|
+
p.add_argument(
|
|
1099
|
+
'--skip-headers',
|
|
1100
|
+
'-skh',
|
|
1101
|
+
action='store_true',
|
|
1102
|
+
help='Skip license headers, comments and imports at the beginning of files.',
|
|
1103
|
+
)
|
|
1104
|
+
p.add_argument(
|
|
1105
|
+
'--skip-headers-limit',
|
|
1106
|
+
'-shl',
|
|
1107
|
+
type=int,
|
|
1108
|
+
default=0,
|
|
1109
|
+
help='Maximum number of lines to skip when filtering headers (default: 0 = no limit).',
|
|
1110
|
+
)
|
|
1099
1111
|
|
|
1100
1112
|
# Global Scan/GRPC options
|
|
1101
1113
|
for p in [
|
|
@@ -1388,6 +1400,8 @@ def wfp(parser, args):
|
|
|
1388
1400
|
strip_hpsm_ids=args.strip_hpsm,
|
|
1389
1401
|
strip_snippet_ids=args.strip_snippet,
|
|
1390
1402
|
scan_settings=scan_settings,
|
|
1403
|
+
skip_headers=args.skip_headers,
|
|
1404
|
+
skip_headers_limit=args.skip_headers_limit,
|
|
1391
1405
|
)
|
|
1392
1406
|
if args.stdin:
|
|
1393
1407
|
contents = sys.stdin.buffer.read()
|
|
@@ -1537,9 +1551,6 @@ def scan(parser, args): # noqa: PLR0912, PLR0915
|
|
|
1537
1551
|
if args.retry < 0:
|
|
1538
1552
|
print_stderr(f'POST retry (--retry) too small: {args.retry}. Reverting to default.')
|
|
1539
1553
|
|
|
1540
|
-
if not os.access(os.getcwd(), os.W_OK): # Make sure the current directory is writable. If not disable saving WFP
|
|
1541
|
-
print_stderr(f'Warning: Current directory is not writable: {os.getcwd()}')
|
|
1542
|
-
args.no_wfp_output = True
|
|
1543
1554
|
if args.ca_cert and not os.path.exists(args.ca_cert):
|
|
1544
1555
|
print_stderr(f'Error: Certificate file does not exist: {args.ca_cert}.')
|
|
1545
1556
|
sys.exit(1)
|
|
@@ -1558,7 +1569,6 @@ def scan(parser, args): # noqa: PLR0912, PLR0915
|
|
|
1558
1569
|
nb_threads=args.threads,
|
|
1559
1570
|
post_size=args.post_size,
|
|
1560
1571
|
timeout=args.timeout,
|
|
1561
|
-
no_wfp_file=args.no_wfp_output,
|
|
1562
1572
|
all_extensions=args.all_extensions,
|
|
1563
1573
|
all_folders=args.all_folders,
|
|
1564
1574
|
hidden_files_folders=args.all_hidden,
|
|
@@ -1583,6 +1593,8 @@ def scan(parser, args): # noqa: PLR0912, PLR0915
|
|
|
1583
1593
|
scan_settings=scan_settings,
|
|
1584
1594
|
req_headers=process_req_headers(args.header),
|
|
1585
1595
|
use_grpc=args.grpc,
|
|
1596
|
+
skip_headers=args.skip_headers,
|
|
1597
|
+
skip_headers_limit=args.skip_headers_limit,
|
|
1586
1598
|
)
|
|
1587
1599
|
if args.wfp:
|
|
1588
1600
|
if not scanner.is_file_or_snippet_scan():
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
date: 20260105093002, utime: 1767605402
|
|
@@ -0,0 +1,563 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
Copyright (c) 2025, SCANOSS
|
|
5
|
+
|
|
6
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
8
|
+
in the Software without restriction, including without limitation the rights
|
|
9
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
11
|
+
furnished to do so, subject to the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be included in
|
|
14
|
+
all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
+
THE SOFTWARE.
|
|
23
|
+
|
|
24
|
+
Line Filter Module - Identifies where real source code implementation begins.
|
|
25
|
+
|
|
26
|
+
This module analyzes source code files and determines which lines are:
|
|
27
|
+
- License headers
|
|
28
|
+
- Documentation comments
|
|
29
|
+
- Imports/includes
|
|
30
|
+
- Blank lines
|
|
31
|
+
|
|
32
|
+
And returns the content from where the real implementation begins.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
import re
|
|
36
|
+
from pathlib import Path
|
|
37
|
+
from typing import Optional, Tuple
|
|
38
|
+
|
|
39
|
+
from .scanossbase import ScanossBase
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class LanguagePatterns:
|
|
43
|
+
"""
|
|
44
|
+
Regex patterns for different programming languages.
|
|
45
|
+
|
|
46
|
+
This class provides a collection of regex patterns for identifying different
|
|
47
|
+
programming constructs, handling imports, comments, and license statements
|
|
48
|
+
across various programming languages. The main purpose of this class is to
|
|
49
|
+
assist in parsing or analysing code written in different languages efficiently.
|
|
50
|
+
|
|
51
|
+
:ivar COMMENT_PATTERNS: A dictionary containing regex patterns to identify
|
|
52
|
+
single-line and multi-line comments in various programming languages.
|
|
53
|
+
:ivar IMPORT_PATTERNS: A dictionary mapping programming languages to their
|
|
54
|
+
respective regex patterns for identifying import statements or package
|
|
55
|
+
includes it.
|
|
56
|
+
:ivar LICENSE_KEYWORDS: A list of keywords commonly found in license texts
|
|
57
|
+
or statements, often used to detect the presence of licensing information.
|
|
58
|
+
"""
|
|
59
|
+
# Comment patterns (single-line and multi-line start/end)
|
|
60
|
+
COMMENT_PATTERNS = {
|
|
61
|
+
# C-style languages: C, C++, Java, JavaScript, TypeScript, Go,
|
|
62
|
+
# Rust, C#, PHP, Kotlin, Scala, Dart, Objective-C
|
|
63
|
+
'c_style': {
|
|
64
|
+
'single_line': r'^\s*//.*$',
|
|
65
|
+
'multi_start': r'^\s*/\*',
|
|
66
|
+
'multi_end': r'\*/\s*$',
|
|
67
|
+
'multi_single': r'^\s*/\*.*\*/\s*$',
|
|
68
|
+
},
|
|
69
|
+
# Python, shell scripts, Ruby, Perl, R, Julia, YAML
|
|
70
|
+
'python_style': {
|
|
71
|
+
'single_line': r'^\s*#.*$',
|
|
72
|
+
'doc_string_start': r'^\s*"""',
|
|
73
|
+
'doc_string_end': r'"""\s*$',
|
|
74
|
+
},
|
|
75
|
+
# Lua, SQL, Haskell
|
|
76
|
+
'lua_style': {
|
|
77
|
+
'single_line': r'^\s*--.*$',
|
|
78
|
+
'multi_start': r'^\s*--\[\[',
|
|
79
|
+
'multi_end': r'\]\]\s*$',
|
|
80
|
+
},
|
|
81
|
+
# HTML, XML
|
|
82
|
+
'html_style': {
|
|
83
|
+
'multi_start': r'^\s*<!--',
|
|
84
|
+
'multi_end': r'-->\s*$',
|
|
85
|
+
'multi_single': r'^\s*<!--.*-->\s*$',
|
|
86
|
+
},
|
|
87
|
+
}
|
|
88
|
+
# Import/include patterns by language
|
|
89
|
+
IMPORT_PATTERNS = {
|
|
90
|
+
'python': [
|
|
91
|
+
r'^\s*import\s+',
|
|
92
|
+
r'^\s*from\s+.*\s+import\s+',
|
|
93
|
+
],
|
|
94
|
+
'javascript': [
|
|
95
|
+
r'^\s*import\s+.*\s+from\s+',
|
|
96
|
+
r'^\s*import\s+["\']',
|
|
97
|
+
r'^\s*import\s+type\s+',
|
|
98
|
+
r'^\s*export\s+\*\s+from\s+',
|
|
99
|
+
r'^\s*export\s+\{.*\}\s+from\s+',
|
|
100
|
+
r'^\s*const\s+.*\s*=\s*require\(',
|
|
101
|
+
r'^\s*var\s+.*\s*=\s*require\(',
|
|
102
|
+
r'^\s*let\s+.*\s*=\s*require\(',
|
|
103
|
+
],
|
|
104
|
+
'typescript': [
|
|
105
|
+
r'^\s*import\s+',
|
|
106
|
+
r'^\s*export\s+.*\s+from\s+',
|
|
107
|
+
r'^\s*import\s+type\s+',
|
|
108
|
+
r'^\s*import\s+\{.*\}\s+from\s+',
|
|
109
|
+
],
|
|
110
|
+
'java': [
|
|
111
|
+
r'^\s*import\s+',
|
|
112
|
+
r'^\s*package\s+',
|
|
113
|
+
],
|
|
114
|
+
'kotlin': [
|
|
115
|
+
r'^\s*import\s+',
|
|
116
|
+
r'^\s*package\s+',
|
|
117
|
+
],
|
|
118
|
+
'scala': [
|
|
119
|
+
r'^\s*import\s+',
|
|
120
|
+
r'^\s*package\s+',
|
|
121
|
+
],
|
|
122
|
+
'go': [
|
|
123
|
+
r'^\s*import\s+\(',
|
|
124
|
+
r'^\s*import\s+"',
|
|
125
|
+
r'^\s*package\s+',
|
|
126
|
+
r'^\s*"[^"]*"\s*$', # Imports inside import () block
|
|
127
|
+
# Imports with alias: name "package"
|
|
128
|
+
r'^\s*[a-zA-Z_][a-zA-Z0-9_]*\s+"[^"]*"\s*$',
|
|
129
|
+
r'^\s*_\s+"[^"]*"\s*$', # _ "package" imports
|
|
130
|
+
],
|
|
131
|
+
'rust': [
|
|
132
|
+
r'^\s*use\s+',
|
|
133
|
+
r'^\s*extern\s+crate\s+',
|
|
134
|
+
r'^\s*mod\s+',
|
|
135
|
+
],
|
|
136
|
+
'cpp': [
|
|
137
|
+
r'^\s*#include\s+',
|
|
138
|
+
r'^\s*#pragma\s+',
|
|
139
|
+
r'^\s*#ifndef\s+.*_H.*', # Header guards: #ifndef FOO_H
|
|
140
|
+
r'^\s*#define\s+.*_H.*', # Header guards: #define FOO_H
|
|
141
|
+
# #endif at end of file (may have comment)
|
|
142
|
+
r'^\s*#endif\s+(//.*)?\s*$',
|
|
143
|
+
],
|
|
144
|
+
'csharp': [
|
|
145
|
+
r'^\s*using\s+',
|
|
146
|
+
r'^\s*namespace\s+',
|
|
147
|
+
],
|
|
148
|
+
'php': [
|
|
149
|
+
r'^\s*use\s+',
|
|
150
|
+
r'^\s*require\s+',
|
|
151
|
+
r'^\s*require_once\s+',
|
|
152
|
+
r'^\s*include\s+',
|
|
153
|
+
r'^\s*include_once\s+',
|
|
154
|
+
r'^\s*namespace\s+',
|
|
155
|
+
],
|
|
156
|
+
'swift': [
|
|
157
|
+
r'^\s*import\s+',
|
|
158
|
+
],
|
|
159
|
+
'ruby': [
|
|
160
|
+
r'^\s*require\s+',
|
|
161
|
+
r'^\s*require_relative\s+',
|
|
162
|
+
r'^\s*load\s+',
|
|
163
|
+
],
|
|
164
|
+
'perl': [
|
|
165
|
+
r'^\s*use\s+',
|
|
166
|
+
r'^\s*require\s+',
|
|
167
|
+
],
|
|
168
|
+
'r': [
|
|
169
|
+
r'^\s*library\(',
|
|
170
|
+
r'^\s*require\(',
|
|
171
|
+
r'^\s*source\(',
|
|
172
|
+
],
|
|
173
|
+
'lua': [
|
|
174
|
+
r'^\s*require\s+',
|
|
175
|
+
r'^\s*local\s+.*\s*=\s*require\(',
|
|
176
|
+
],
|
|
177
|
+
'dart': [
|
|
178
|
+
r'^\s*import\s+',
|
|
179
|
+
r'^\s*export\s+',
|
|
180
|
+
r'^\s*part\s+',
|
|
181
|
+
],
|
|
182
|
+
'haskell': [
|
|
183
|
+
r'^\s*import\s+',
|
|
184
|
+
r'^\s*module\s+',
|
|
185
|
+
],
|
|
186
|
+
'elixir': [
|
|
187
|
+
r'^\s*import\s+',
|
|
188
|
+
r'^\s*alias\s+',
|
|
189
|
+
r'^\s*require\s+',
|
|
190
|
+
r'^\s*use\s+',
|
|
191
|
+
],
|
|
192
|
+
'clojure': [
|
|
193
|
+
r'^\s*\(\s*ns\s+',
|
|
194
|
+
r'^\s*\(\s*require\s+',
|
|
195
|
+
r'^\s*\(\s*import\s+',
|
|
196
|
+
],
|
|
197
|
+
}
|
|
198
|
+
# Keywords that indicate licenses
|
|
199
|
+
LICENSE_KEYWORDS = [
|
|
200
|
+
'copyright', 'license', 'licensed', 'all rights reserved',
|
|
201
|
+
'permission', 'redistribution', 'warranty', 'liability',
|
|
202
|
+
'apache', 'mit', 'gpl', 'bsd', 'mozilla', 'author:',
|
|
203
|
+
'spdx-license', 'contributors', 'licensee'
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
COMPLETE_DOCSTRING_QUOTE_COUNT = 2
|
|
207
|
+
LICENSE_HEADER_MAX_LINES = 50
|
|
208
|
+
# Map of file extensions to programming languages
|
|
209
|
+
EXT_MAP = {
|
|
210
|
+
'.py': 'python',
|
|
211
|
+
'.js': 'javascript',
|
|
212
|
+
'.mjs': 'javascript',
|
|
213
|
+
'.cjs': 'javascript',
|
|
214
|
+
'.ts': 'typescript',
|
|
215
|
+
'.tsx': 'typescript',
|
|
216
|
+
'.jsx': 'javascript',
|
|
217
|
+
'.java': 'java',
|
|
218
|
+
'.kt': 'kotlin',
|
|
219
|
+
'.kts': 'kotlin',
|
|
220
|
+
'.scala': 'scala',
|
|
221
|
+
'.sc': 'scala',
|
|
222
|
+
'.go': 'go',
|
|
223
|
+
'.rs': 'rust',
|
|
224
|
+
'.cpp': 'cpp',
|
|
225
|
+
'.cc': 'cpp',
|
|
226
|
+
'.cxx': 'cpp',
|
|
227
|
+
'.c': 'cpp',
|
|
228
|
+
'.h': 'cpp',
|
|
229
|
+
'.hpp': 'cpp',
|
|
230
|
+
'.hxx': 'cpp',
|
|
231
|
+
'.cs': 'csharp',
|
|
232
|
+
'.php': 'php',
|
|
233
|
+
'.swift': 'swift',
|
|
234
|
+
'.rb': 'ruby',
|
|
235
|
+
'.pl': 'perl',
|
|
236
|
+
'.pm': 'perl',
|
|
237
|
+
'.r': 'r',
|
|
238
|
+
'.R': 'r',
|
|
239
|
+
'.lua': 'lua',
|
|
240
|
+
'.dart': 'dart',
|
|
241
|
+
'.hs': 'haskell',
|
|
242
|
+
'.ex': 'elixir',
|
|
243
|
+
'.exs': 'elixir',
|
|
244
|
+
'.clj': 'clojure',
|
|
245
|
+
'.cljs': 'clojure',
|
|
246
|
+
'.m': 'cpp', # Objective-C
|
|
247
|
+
'.mm': 'cpp', # Objective-C++
|
|
248
|
+
# Shell scripts share Python's # comment style, but lack dedicated
|
|
249
|
+
# import patterns (source/. commands won't be filtered)
|
|
250
|
+
'.sh': 'python',
|
|
251
|
+
'.bash': 'python',
|
|
252
|
+
'.zsh': 'python',
|
|
253
|
+
'.fish': 'python',
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def is_blank_line(stripped_line: str) -> bool:
|
|
258
|
+
"""
|
|
259
|
+
Check if a line is blank.
|
|
260
|
+
|
|
261
|
+
This method determines whether a given string `line` is blank by checking
|
|
262
|
+
if it consists entirely of whitespace or is empty.
|
|
263
|
+
|
|
264
|
+
:param stripped_line: The string to be evaluated.
|
|
265
|
+
:return: True if the string is blank, otherwise False.
|
|
266
|
+
"""
|
|
267
|
+
return len(stripped_line) == 0
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def is_shebang(stripped_line: str) -> bool:
|
|
271
|
+
"""
|
|
272
|
+
Check if the given line is a shebang line.
|
|
273
|
+
|
|
274
|
+
This function determines if the provided string is a shebang line,
|
|
275
|
+
which indicates the path to the interpreter that should execute the
|
|
276
|
+
script.
|
|
277
|
+
|
|
278
|
+
:param stripped_line: The string to check if it's a shebang line.
|
|
279
|
+
:return: True if the given line starts with '#!', otherwise False.
|
|
280
|
+
"""
|
|
281
|
+
return stripped_line.startswith('#!')
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class HeaderFilter(ScanossBase):
|
|
285
|
+
"""
|
|
286
|
+
Source code file analyser that filters headers, comments, and imports.
|
|
287
|
+
|
|
288
|
+
This class processes code files and returns only the real
|
|
289
|
+
implementation content, omitting licenses, documentation comments,
|
|
290
|
+
and imports.
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
def __init__(
|
|
294
|
+
self,
|
|
295
|
+
debug: bool = False,
|
|
296
|
+
trace: bool = False,
|
|
297
|
+
quiet: bool = False,
|
|
298
|
+
skip_limit: Optional[int] = None
|
|
299
|
+
):
|
|
300
|
+
"""
|
|
301
|
+
Initialise HeaderFilter
|
|
302
|
+
Parameters
|
|
303
|
+
----------
|
|
304
|
+
skip_limit: int
|
|
305
|
+
Maximum number of lines to skip when analysing a file.
|
|
306
|
+
If set, then stop stripping data after this number of lines.
|
|
307
|
+
(None/0 = unlimited by default)
|
|
308
|
+
"""
|
|
309
|
+
super().__init__(debug, trace, quiet)
|
|
310
|
+
self.patterns = LanguagePatterns()
|
|
311
|
+
self.max_lines = skip_limit
|
|
312
|
+
|
|
313
|
+
def filter(self, file: str, decoded_contents: str) -> int:
|
|
314
|
+
"""
|
|
315
|
+
Main method that filters file content
|
|
316
|
+
Parameters
|
|
317
|
+
----------
|
|
318
|
+
:param file: File path (used to detect extension)
|
|
319
|
+
:param decoded_contents: File contents in utf-8 encoding
|
|
320
|
+
Return
|
|
321
|
+
------
|
|
322
|
+
- line_offset: Number of lines skipped from the beginning
|
|
323
|
+
(0 if no filtering)
|
|
324
|
+
"""
|
|
325
|
+
if not decoded_contents or not file:
|
|
326
|
+
self.print_msg(f'No file or contents provided, skipping line filter for: {file}')
|
|
327
|
+
return 0
|
|
328
|
+
self.print_debug(f'HeaderFilter processing file: {file}')
|
|
329
|
+
# Detect language
|
|
330
|
+
language = self.detect_language(file)
|
|
331
|
+
# If language is not supported, return original content
|
|
332
|
+
if not language:
|
|
333
|
+
self.print_debug(f'Skipping line filter for unsupported language: {file}')
|
|
334
|
+
return 0
|
|
335
|
+
lines = decoded_contents.splitlines(keepends=True)
|
|
336
|
+
num_lines = len(lines)
|
|
337
|
+
if num_lines == 0:
|
|
338
|
+
self.print_msg(f'No lines in file: {file}')
|
|
339
|
+
return 0
|
|
340
|
+
self.print_debug(f'Analysing {num_lines} lines for file: {file}')
|
|
341
|
+
|
|
342
|
+
# Find the first implementation line (optimised - stops at first match)
|
|
343
|
+
implementation_start = self.find_first_implementation_line(lines, language)
|
|
344
|
+
# If no implementation, return empty
|
|
345
|
+
if implementation_start is None:
|
|
346
|
+
self.print_debug(f'No implementation found in file: {file}')
|
|
347
|
+
return 0
|
|
348
|
+
# Calculate how many lines were filtered out (line_offset)
|
|
349
|
+
line_offset = implementation_start - 1
|
|
350
|
+
# Apply max_lines limit if configured
|
|
351
|
+
if self.max_lines is not None and 0 < self.max_lines < line_offset:
|
|
352
|
+
self.print_trace(
|
|
353
|
+
f'Line offset {line_offset} exceeds max_lines {self.max_lines}, '
|
|
354
|
+
f'capping at {self.max_lines} for: {file}'
|
|
355
|
+
)
|
|
356
|
+
line_offset = self.max_lines
|
|
357
|
+
|
|
358
|
+
if line_offset > 0:
|
|
359
|
+
self.print_debug(f'Filtered out {line_offset} lines from beginning of {file} (language: {language})')
|
|
360
|
+
return line_offset
|
|
361
|
+
|
|
362
|
+
def detect_language(self, file_path: str) -> Optional[str]:
|
|
363
|
+
"""
|
|
364
|
+
Detects the programming language based on the provided file extension.
|
|
365
|
+
|
|
366
|
+
This function uses a predefined mapping between file extensions and programming
|
|
367
|
+
languages to determine the language associated with the file. If the file extension
|
|
368
|
+
is found in the mapping, the corresponding language is returned. Otherwise, it
|
|
369
|
+
returns None.
|
|
370
|
+
|
|
371
|
+
:param file_path: Path to the file whose programming language needs to be detected.
|
|
372
|
+
:return: The programming language corresponding to the file extension if mapped,
|
|
373
|
+
otherwise None.
|
|
374
|
+
"""
|
|
375
|
+
path = Path(file_path)
|
|
376
|
+
extension = path.suffix.lower()
|
|
377
|
+
if extension:
|
|
378
|
+
detected_language = EXT_MAP.get(extension)
|
|
379
|
+
if detected_language:
|
|
380
|
+
self.print_debug(f'Detected language "{detected_language}" for extension "{extension}"')
|
|
381
|
+
else:
|
|
382
|
+
self.print_debug(f'No language mapping found for extension "{extension}"')
|
|
383
|
+
else:
|
|
384
|
+
self.print_debug(f'No file extension found, skipping language detection for: {file_path}')
|
|
385
|
+
detected_language = None
|
|
386
|
+
return detected_language
|
|
387
|
+
|
|
388
|
+
def is_license_header(self, line: str) -> bool:
|
|
389
|
+
"""
|
|
390
|
+
Check if the line appears to be part of a license header.
|
|
391
|
+
|
|
392
|
+
This method evaluates a given line of text to determine whether it
|
|
393
|
+
contains keywords that suggest it is part of a license header. It
|
|
394
|
+
performs a case-insensitive check against a predefined set of license
|
|
395
|
+
keywords.
|
|
396
|
+
|
|
397
|
+
:param line: The line of text to check.
|
|
398
|
+
:return: True if the line contains keywords indicating it is part of a
|
|
399
|
+
license header; False otherwise.
|
|
400
|
+
"""
|
|
401
|
+
line_lower = line.lower()
|
|
402
|
+
return any(keyword in line_lower for keyword in self.patterns.LICENSE_KEYWORDS)
|
|
403
|
+
|
|
404
|
+
def get_comment_style(self, language: str) -> str:
|
|
405
|
+
"""
|
|
406
|
+
Return the comment style associated with a given programming language.
|
|
407
|
+
|
|
408
|
+
This method determines the appropriate comment style to use based on the
|
|
409
|
+
specified programming language. Supported languages include those with C-style
|
|
410
|
+
comments, Python-style comments, and Lua-style comments. If the language does
|
|
411
|
+
not match any of the explicitly defined groups, a default of `c_style` is
|
|
412
|
+
returned.
|
|
413
|
+
|
|
414
|
+
:param language: The name of the programming language for which the comment
|
|
415
|
+
style needs to be determined.
|
|
416
|
+
:return: The comment style for the provided programming language. Possible
|
|
417
|
+
values are 'c_style', 'python_style', or 'lua_style'.
|
|
418
|
+
"""
|
|
419
|
+
if language:
|
|
420
|
+
if language in ['cpp', 'java', 'kotlin', 'scala', 'javascript', 'typescript',
|
|
421
|
+
'go', 'rust', 'csharp', 'php', 'swift', 'dart']:
|
|
422
|
+
return 'c_style'
|
|
423
|
+
if language in ['python', 'ruby', 'perl', 'r']:
|
|
424
|
+
return 'python_style'
|
|
425
|
+
if language in ['lua', 'haskell']:
|
|
426
|
+
return 'lua_style'
|
|
427
|
+
self.print_debug(f'No comment style defined for language "{language}", using default: "c_style"')
|
|
428
|
+
return 'c_style' # Default
|
|
429
|
+
|
|
430
|
+
def is_comment(self, line: str, in_multiline: bool, patterns: dict) -> Tuple[bool, bool]: # noqa: PLR0911
|
|
431
|
+
"""
|
|
432
|
+
Check if a line is a comment
|
|
433
|
+
|
|
434
|
+
:param patterns: comment patterns
|
|
435
|
+
:param line: Line to check
|
|
436
|
+
:param in_multiline: Whether we're currently in a multiline comment
|
|
437
|
+
:return: Tuple of (is_comment, still_in_multiline)
|
|
438
|
+
"""
|
|
439
|
+
if not patterns:
|
|
440
|
+
self.print_msg('No comment patterns defined, skipping comment check')
|
|
441
|
+
return False, in_multiline
|
|
442
|
+
# If we're in a multiline comment
|
|
443
|
+
if in_multiline:
|
|
444
|
+
# Check if the comment ends
|
|
445
|
+
if 'multi_end' in patterns and re.search(patterns['multi_end'], line):
|
|
446
|
+
return True, False
|
|
447
|
+
if 'doc_string_end' in patterns and re.search(patterns['doc_string_end'], line):
|
|
448
|
+
return True, False
|
|
449
|
+
return True, True
|
|
450
|
+
# Single-line comment
|
|
451
|
+
if 'single_line' in patterns and re.match(patterns['single_line'], line):
|
|
452
|
+
return True, False
|
|
453
|
+
# Multiline comment complete in one line
|
|
454
|
+
if 'multi_single' in patterns and re.match(patterns['multi_single'], line):
|
|
455
|
+
return True, False
|
|
456
|
+
# Start of multiline comment (C-style)
|
|
457
|
+
if 'multi_start' in patterns and re.search(patterns['multi_start'], line):
|
|
458
|
+
# If it also ends on the same line
|
|
459
|
+
if 'multi_end' in patterns and re.search(patterns['multi_end'], line):
|
|
460
|
+
return True, False
|
|
461
|
+
return True, True
|
|
462
|
+
# Start of docstring (Python)
|
|
463
|
+
if 'doc_string_start' in patterns and '"""' in line:
|
|
464
|
+
# Count how many quotes there are
|
|
465
|
+
count = line.count('"""')
|
|
466
|
+
if count == COMPLETE_DOCSTRING_QUOTE_COUNT: # Complete docstring in one line
|
|
467
|
+
return True, False
|
|
468
|
+
if count == 1: # Start of a multiline docstring
|
|
469
|
+
return True, True
|
|
470
|
+
# Default response: not a comment
|
|
471
|
+
return False, in_multiline
|
|
472
|
+
|
|
473
|
+
def is_import(self, line: str, patterns: dict) -> bool:
|
|
474
|
+
"""
|
|
475
|
+
Check if a line of code is an import or include statement for a given programming language.
|
|
476
|
+
|
|
477
|
+
This function determines whether a specific line of code matches any
|
|
478
|
+
import/include patterns defined for the provided programming language.
|
|
479
|
+
It relies on predefined regular expression patterns.
|
|
480
|
+
|
|
481
|
+
:param patterns: import patterns for the given language.
|
|
482
|
+
:param line: A single line of code to check.
|
|
483
|
+
:return: True if the line matches any import/include pattern for the given language,
|
|
484
|
+
otherwise False.
|
|
485
|
+
"""
|
|
486
|
+
if not patterns:
|
|
487
|
+
self.print_debug('No import patterns defined, skipping import check')
|
|
488
|
+
return any(re.match(pattern, line) for pattern in patterns)
|
|
489
|
+
|
|
490
|
+
def find_first_implementation_line(self, lines: list[str], language: str) -> Optional[int]: # noqa: PLR0912
|
|
491
|
+
"""
|
|
492
|
+
Find the line number where the implementation begins (optimised version).
|
|
493
|
+
Returns as soon as the first implementation line is found.
|
|
494
|
+
|
|
495
|
+
:param lines: List of code lines
|
|
496
|
+
:param language: Programming language
|
|
497
|
+
:return: Line number (1-indexed) where implementation starts, or None if not found
|
|
498
|
+
"""
|
|
499
|
+
if not lines or not language:
|
|
500
|
+
self.print_debug('No lines or language provided, skipping implementation line detection')
|
|
501
|
+
return None
|
|
502
|
+
in_multiline_comment = False
|
|
503
|
+
in_license_section = False
|
|
504
|
+
in_import_block = False # To handle import blocks in Go
|
|
505
|
+
consecutive_imports_count = 0
|
|
506
|
+
# Get comment & import patterns for the language
|
|
507
|
+
comment_patterns = self.patterns.COMMENT_PATTERNS[self.get_comment_style(language)]
|
|
508
|
+
import_patterns = self.patterns.IMPORT_PATTERNS[language]
|
|
509
|
+
# Iterate through lines trying to find the first implementation line
|
|
510
|
+
for i, line in enumerate(lines):
|
|
511
|
+
line_number = i + 1
|
|
512
|
+
stripped = line.strip()
|
|
513
|
+
# Shebang (only first line) or blank line
|
|
514
|
+
if (i == 0 and is_shebang(stripped)) or is_blank_line(stripped):
|
|
515
|
+
continue
|
|
516
|
+
# Check if it's a comment
|
|
517
|
+
is_a_comment, in_multiline_comment = self.is_comment(line, in_multiline_comment, comment_patterns)
|
|
518
|
+
if is_a_comment:
|
|
519
|
+
# Check if it's part of the license header
|
|
520
|
+
if self.is_license_header(line):
|
|
521
|
+
if not in_license_section:
|
|
522
|
+
self.print_trace(f'Line {line_number}: Detected license header section')
|
|
523
|
+
in_license_section = True
|
|
524
|
+
# If still in the license section (first lines)
|
|
525
|
+
elif in_license_section and line_number < LICENSE_HEADER_MAX_LINES:
|
|
526
|
+
pass # Still in the license section. Keep looking.
|
|
527
|
+
else:
|
|
528
|
+
if in_license_section:
|
|
529
|
+
self.print_trace(f'Line {line_number}: End of license header section')
|
|
530
|
+
in_license_section = False
|
|
531
|
+
continue
|
|
532
|
+
# If not a comment but we find a non-empty line, end license section
|
|
533
|
+
if not is_a_comment:
|
|
534
|
+
in_license_section = False
|
|
535
|
+
# Handle import blocks in Go
|
|
536
|
+
if language == 'go':
|
|
537
|
+
if stripped.startswith('import ('):
|
|
538
|
+
self.print_trace(f'Line {line_number}: Detected Go import block start')
|
|
539
|
+
in_import_block = True
|
|
540
|
+
continue
|
|
541
|
+
if in_import_block:
|
|
542
|
+
if stripped == ')':
|
|
543
|
+
self.print_trace(f'Line {line_number}: Detected Go import block end')
|
|
544
|
+
in_import_block = False
|
|
545
|
+
continue
|
|
546
|
+
if (stripped.startswith('"') or stripped.startswith('_') or
|
|
547
|
+
re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*\s+"', stripped)):
|
|
548
|
+
# It's part of the import block
|
|
549
|
+
continue
|
|
550
|
+
# Check if it's an import
|
|
551
|
+
if self.is_import(line, import_patterns):
|
|
552
|
+
if consecutive_imports_count == 0:
|
|
553
|
+
self.print_trace(f'Line {line_number}: Detected import section')
|
|
554
|
+
consecutive_imports_count += 1
|
|
555
|
+
continue
|
|
556
|
+
# If we get here, it's implementation code - return immediately!
|
|
557
|
+
self.print_trace(f'Line {line_number}: First implementation line detected')
|
|
558
|
+
return line_number
|
|
559
|
+
# End for loop?
|
|
560
|
+
return None
|
|
561
|
+
#
|
|
562
|
+
# End of HeaderFilter Class
|
|
563
|
+
#
|