scanoss 1.24.0__tar.gz → 1.25.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {scanoss-1.24.0/src/scanoss.egg-info → scanoss-1.25.0}/PKG-INFO +1 -1
  2. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/__init__.py +1 -1
  3. scanoss-1.25.0/src/scanoss/data/build_date.txt +1 -0
  4. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/inspection/policy_check.py +77 -47
  5. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/winnowing.py +64 -7
  6. {scanoss-1.24.0 → scanoss-1.25.0/src/scanoss.egg-info}/PKG-INFO +1 -1
  7. scanoss-1.25.0/tests/test_winnowing.py +393 -0
  8. scanoss-1.24.0/src/scanoss/data/build_date.txt +0 -1
  9. scanoss-1.24.0/tests/test_winnowing.py +0 -82
  10. {scanoss-1.24.0 → scanoss-1.25.0}/LICENSE +0 -0
  11. {scanoss-1.24.0 → scanoss-1.25.0}/PACKAGE.md +0 -0
  12. {scanoss-1.24.0 → scanoss-1.25.0}/README.md +0 -0
  13. {scanoss-1.24.0 → scanoss-1.25.0}/pyproject.toml +0 -0
  14. {scanoss-1.24.0 → scanoss-1.25.0}/setup.cfg +0 -0
  15. {scanoss-1.24.0 → scanoss-1.25.0}/src/protoc_gen_swagger/__init__.py +0 -0
  16. {scanoss-1.24.0 → scanoss-1.25.0}/src/protoc_gen_swagger/options/__init__.py +0 -0
  17. {scanoss-1.24.0 → scanoss-1.25.0}/src/protoc_gen_swagger/options/annotations_pb2.py +0 -0
  18. {scanoss-1.24.0 → scanoss-1.25.0}/src/protoc_gen_swagger/options/annotations_pb2_grpc.py +0 -0
  19. {scanoss-1.24.0 → scanoss-1.25.0}/src/protoc_gen_swagger/options/openapiv2_pb2.py +0 -0
  20. {scanoss-1.24.0 → scanoss-1.25.0}/src/protoc_gen_swagger/options/openapiv2_pb2_grpc.py +0 -0
  21. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/__init__.py +0 -0
  22. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/common/__init__.py +0 -0
  23. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/common/v2/__init__.py +0 -0
  24. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/common/v2/scanoss_common_pb2.py +0 -0
  25. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/common/v2/scanoss_common_pb2_grpc.py +0 -0
  26. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/components/__init__.py +0 -0
  27. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/components/v2/__init__.py +0 -0
  28. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/components/v2/scanoss_components_pb2.py +0 -0
  29. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/components/v2/scanoss_components_pb2_grpc.py +0 -0
  30. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2.py +0 -0
  31. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/cryptography/v2/scanoss_cryptography_pb2_grpc.py +0 -0
  32. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/dependencies/__init__.py +0 -0
  33. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/dependencies/v2/__init__.py +0 -0
  34. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2.py +0 -0
  35. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/dependencies/v2/scanoss_dependencies_pb2_grpc.py +0 -0
  36. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/geoprovenance/__init__.py +0 -0
  37. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/geoprovenance/v2/__init__.py +0 -0
  38. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2.py +0 -0
  39. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/geoprovenance/v2/scanoss_geoprovenance_pb2_grpc.py +0 -0
  40. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/scanning/__init__.py +0 -0
  41. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/scanning/v2/__init__.py +0 -0
  42. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2.py +0 -0
  43. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/scanning/v2/scanoss_scanning_pb2_grpc.py +0 -0
  44. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/semgrep/__init__.py +0 -0
  45. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/semgrep/v2/__init__.py +0 -0
  46. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2.py +0 -0
  47. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/semgrep/v2/scanoss_semgrep_pb2_grpc.py +0 -0
  48. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/vulnerabilities/__init__.py +0 -0
  49. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/vulnerabilities/v2/__init__.py +0 -0
  50. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2.py +0 -0
  51. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/api/vulnerabilities/v2/scanoss_vulnerabilities_pb2_grpc.py +0 -0
  52. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/cli.py +0 -0
  53. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/components.py +0 -0
  54. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/constants.py +0 -0
  55. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/cryptography.py +0 -0
  56. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/csvoutput.py +0 -0
  57. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/cyclonedx.py +0 -0
  58. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/data/scanoss-settings-schema.json +0 -0
  59. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/data/spdx-exceptions.json +0 -0
  60. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/data/spdx-licenses.json +0 -0
  61. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/file_filters.py +0 -0
  62. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/filecount.py +0 -0
  63. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/inspection/__init__.py +0 -0
  64. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/inspection/copyleft.py +0 -0
  65. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/inspection/undeclared_component.py +0 -0
  66. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/inspection/utils/license_utils.py +0 -0
  67. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/results.py +0 -0
  68. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scancodedeps.py +0 -0
  69. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanner.py +0 -0
  70. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanners/__init__.py +0 -0
  71. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanners/container_scanner.py +0 -0
  72. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanners/folder_hasher.py +0 -0
  73. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanners/scanner_config.py +0 -0
  74. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanners/scanner_hfh.py +0 -0
  75. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanoss_settings.py +0 -0
  76. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanossapi.py +0 -0
  77. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanossbase.py +0 -0
  78. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanossgrpc.py +0 -0
  79. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scanpostprocessor.py +0 -0
  80. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/scantype.py +0 -0
  81. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/spdxlite.py +0 -0
  82. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/threadeddependencies.py +0 -0
  83. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/threadedscanning.py +0 -0
  84. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/utils/__init__.py +0 -0
  85. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/utils/abstract_presenter.py +0 -0
  86. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/utils/crc64.py +0 -0
  87. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/utils/file.py +0 -0
  88. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss/utils/simhash.py +0 -0
  89. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss.egg-info/SOURCES.txt +0 -0
  90. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss.egg-info/dependency_links.txt +0 -0
  91. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss.egg-info/entry_points.txt +0 -0
  92. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss.egg-info/requires.txt +0 -0
  93. {scanoss-1.24.0 → scanoss-1.25.0}/src/scanoss.egg-info/top_level.txt +0 -0
  94. {scanoss-1.24.0 → scanoss-1.25.0}/tests/test_csv_output.py +0 -0
  95. {scanoss-1.24.0 → scanoss-1.25.0}/tests/test_file_filters.py +0 -0
  96. {scanoss-1.24.0 → scanoss-1.25.0}/tests/test_policy_inspect.py +0 -0
  97. {scanoss-1.24.0 → scanoss-1.25.0}/tests/test_scan_post_processor.py +0 -0
  98. {scanoss-1.24.0 → scanoss-1.25.0}/tests/test_spdxlite.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scanoss
3
- Version: 1.24.0
3
+ Version: 1.25.0
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS
@@ -22,4 +22,4 @@ SPDX-License-Identifier: MIT
22
22
  THE SOFTWARE.
23
23
  """
24
24
 
25
- __version__ = '1.24.0'
25
+ __version__ = '1.25.0'
@@ -0,0 +1 @@
1
+ date: 20250610161304, utime: 1749571984
@@ -26,9 +26,10 @@ import json
26
26
  import os.path
27
27
  from abc import abstractmethod
28
28
  from enum import Enum
29
- from typing import Callable, List, Dict, Any
30
- from .utils.license_utils import LicenseUtil
29
+ from typing import Any, Callable, Dict, List
30
+
31
31
  from ..scanossbase import ScanossBase
32
+ from .utils.license_utils import LicenseUtil
32
33
 
33
34
 
34
35
  class PolicyStatus(Enum):
@@ -87,7 +88,7 @@ class PolicyCheck(ScanossBase):
87
88
 
88
89
  VALID_FORMATS = {'md', 'json', 'jira_md'}
89
90
 
90
- def __init__(
91
+ def __init__( # noqa: PLR0913
91
92
  self,
92
93
  debug: bool = False,
93
94
  trace: bool = True,
@@ -181,10 +182,9 @@ class PolicyCheck(ScanossBase):
181
182
  :param status: The new component status
182
183
  :return: The updated components dictionary
183
184
  """
184
-
185
185
  # Determine the component key and purl based on component type
186
186
  if id in [ComponentID.FILE.value, ComponentID.SNIPPET.value]:
187
- purl = new_component['purl'][0] # Take first purl for these component types
187
+ purl = new_component['purl'][0] # Take the first purl for these component types
188
188
  else:
189
189
  purl = new_component['purl']
190
190
 
@@ -195,14 +195,13 @@ class PolicyCheck(ScanossBase):
195
195
  'licenses': {},
196
196
  'status': status,
197
197
  }
198
-
199
198
  if not new_component.get('licenses'):
200
- self.print_stderr(f'WARNING: Results missing licenses. Skipping.')
199
+ self.print_debug(f'WARNING: Results missing licenses. Skipping: {new_component}')
201
200
  return components
202
201
  # Process licenses for this component
203
- for l in new_component['licenses']:
204
- if l.get('name'):
205
- spdxid = l['name']
202
+ for license_item in new_component['licenses']:
203
+ if license_item.get('name'):
204
+ spdxid = license_item['name']
206
205
  components[component_key]['licenses'][spdxid] = {
207
206
  'spdxid': spdxid,
208
207
  'copyleft': self.license_util.is_copyleft(spdxid),
@@ -210,71 +209,103 @@ class PolicyCheck(ScanossBase):
210
209
  }
211
210
  return components
212
211
 
213
- def _get_components_from_results(self, results: Dict[str, Any]) -> list or None:
212
+ def _get_components_data(self, results: Dict[str, Any], components: Dict[str, Any]) -> Dict[str, Any]:
214
213
  """
215
- Process the results dictionary to extract and format component information.
216
-
217
- This function iterates through the results dictionary, identifying components from
218
- different sources (files, snippets, and dependencies). It consolidates this information
219
- into a list of unique components, each with its associated licenses and other details.
214
+ Extract and process file and snippet components from results.
220
215
 
221
216
  :param results: A dictionary containing the raw results of a component scan
222
- :return: A list of dictionaries, each representing a unique component with its details
217
+ :param components: Existing components dictionary to update
218
+ :return: Updated components dictionary with file and snippet data
223
219
  """
224
- if results is None:
225
- self.print_stderr(f'ERROR: Results cannot be empty')
226
- return None
227
- components = {}
228
220
  for component in results.values():
229
221
  for c in component:
230
222
  component_id = c.get('id')
231
223
  if not component_id:
232
- self.print_stderr(f'WARNING: Result missing id. Skipping.')
224
+ self.print_debug(f'WARNING: Result missing id. Skipping: {c}')
233
225
  continue
234
226
  status = c.get('status')
235
- if not component_id:
236
- self.print_stderr(f'WARNING: Result missing status. Skipping.')
227
+ if not status:
228
+ self.print_debug(f'WARNING: Result missing status. Skipping: {c}')
237
229
  continue
238
230
  if component_id in [ComponentID.FILE.value, ComponentID.SNIPPET.value]:
239
231
  if not c.get('purl'):
240
- self.print_stderr(f'WARNING: Result missing purl. Skipping.')
232
+ self.print_debug(f'WARNING: Result missing purl. Skipping: {c}')
241
233
  continue
242
234
  if len(c.get('purl')) <= 0:
243
- self.print_stderr(f'WARNING: Result missing purls. Skipping.')
235
+ self.print_debug(f'WARNING: Result missing purls. Skipping: {c}')
244
236
  continue
245
237
  if not c.get('version'):
246
- self.print_stderr(f'WARNING: Result missing version. Skipping.')
238
+ self.print_msg(f'WARNING: Result missing version. Skipping: {c}')
247
239
  continue
248
240
  component_key = f'{c["purl"][0]}@{c["version"]}'
249
- # Initialize or update the component entry
250
241
  if component_key not in components:
251
242
  components = self._append_component(components, c, component_id, status)
243
+ # End component loop
244
+ # End components loop
245
+ return components
252
246
 
253
- if c['id'] == ComponentID.DEPENDENCY.value:
247
+ def _get_dependencies_data(self, results: Dict[str, Any], components: Dict[str, Any]) -> Dict[str, Any]:
248
+ """
249
+ Extract and process dependency components from results.
250
+
251
+ :param results: A dictionary containing the raw results of a component scan
252
+ :param components: Existing components dictionary to update
253
+ :return: Updated components dictionary with dependency data
254
+ """
255
+ for component in results.values():
256
+ for c in component:
257
+ component_id = c.get('id')
258
+ if not component_id:
259
+ self.print_debug(f'WARNING: Result missing id. Skipping: {c}')
260
+ continue
261
+ status = c.get('status')
262
+ if not status:
263
+ self.print_debug(f'WARNING: Result missing status. Skipping: {c}')
264
+ continue
265
+ if component_id == ComponentID.DEPENDENCY.value:
254
266
  if c.get('dependencies') is None:
255
267
  continue
256
- for d in c['dependencies']:
257
- if not d.get('purl'):
258
- self.print_stderr(f'WARNING: Result missing purl. Skipping.')
259
- continue
260
- if len(d.get('purl')) <= 0:
261
- self.print_stderr(f'WARNING: Result missing purls. Skipping.')
268
+ for dependency in c['dependencies']:
269
+ if not dependency.get('purl'):
270
+ self.print_debug(f'WARNING: Dependency result missing purl. Skipping: {dependency}')
262
271
  continue
263
- if not d.get('version'):
264
- self.print_stderr(f'WARNING: Result missing version. Skipping.')
272
+ if not dependency.get('version'):
273
+ self.print_msg(f'WARNING: Dependency result missing version. Skipping: {dependency}')
265
274
  continue
266
- component_key = f'{d["purl"]}@{d["version"]}'
275
+ component_key = f'{dependency["purl"]}@{dependency["version"]}'
267
276
  if component_key not in components:
268
- components = self._append_component(components, d, component_id, status)
269
- # End of dependencies loop
270
- # End if
271
- # End of component loop
272
- # End of results loop
273
- results = list(components.values())
274
- for component in results:
277
+ components = self._append_component(components, dependency, component_id, status)
278
+ # End dependency loop
279
+ # End component loop
280
+ # End of result loop
281
+ return components
282
+
283
+ def _get_components_from_results(self, results: Dict[str, Any]) -> list or None:
284
+ """
285
+ Process the results dictionary to extract and format component information.
286
+
287
+ This function iterates through the results dictionary, identifying components from
288
+ different sources (files, snippets, and dependencies). It consolidates this information
289
+ into a list of unique components, each with its associated licenses and other details.
290
+
291
+ :param results: A dictionary containing the raw results of a component scan
292
+ :return: A list of dictionaries, each representing a unique component with its details
293
+ """
294
+ if results is None:
295
+ self.print_stderr('ERROR: Results cannot be empty')
296
+ return None
297
+
298
+ components = {}
299
+ # Extract file and snippet components
300
+ components = self._get_components_data(results, components)
301
+ # Extract dependency components
302
+ components = self._get_dependencies_data(results, components)
303
+ # Convert to list and process licenses
304
+ results_list = list(components.values())
305
+ for component in results_list:
275
306
  component['licenses'] = list(component['licenses'].values())
276
307
 
277
- return results
308
+ return results_list
278
309
 
279
310
  def generate_table(self, headers, rows, centered_columns=None):
280
311
  """
@@ -403,7 +434,6 @@ class PolicyCheck(ScanossBase):
403
434
  components = self._get_components_from_results(self.results)
404
435
  return components
405
436
 
406
-
407
437
  #
408
438
  # End of PolicyCheck Class
409
439
  #
@@ -32,9 +32,10 @@ import hashlib
32
32
  import pathlib
33
33
  import platform
34
34
  import re
35
+ from typing import Tuple
35
36
 
36
- from crc32c import crc32c
37
37
  from binaryornot.check import is_binary
38
+ from crc32c import crc32c
38
39
 
39
40
  from .scanossbase import ScanossBase
40
41
 
@@ -157,7 +158,7 @@ class Winnowing(ScanossBase):
157
158
  a list of WFP fingerprints with their corresponding line numbers.
158
159
  """
159
160
 
160
- def __init__(
161
+ def __init__( # noqa: PLR0913
161
162
  self,
162
163
  size_limit: bool = False,
163
164
  debug: bool = False,
@@ -197,6 +198,7 @@ class Winnowing(ScanossBase):
197
198
  self.strip_hpsm_ids = strip_hpsm_ids
198
199
  self.strip_snippet_ids = strip_snippet_ids
199
200
  self.hpsm = hpsm
201
+ self.is_windows = platform.system() == 'Windows'
200
202
  if hpsm:
201
203
  self.crc8_maxim_dow_table = []
202
204
  self.crc8_generate_table()
@@ -218,11 +220,11 @@ class Winnowing(ScanossBase):
218
220
  return byte
219
221
  if byte >= ASCII_a:
220
222
  return byte
221
- if (byte >= 65) and (byte <= 90):
223
+ if (byte >= ASCII_A) and (byte <= ASCII_Z):
222
224
  return byte + 32
223
225
  return 0
224
226
 
225
- def __skip_snippets(self, file: str, src: str) -> bool:
227
+ def __skip_snippets(self, file: str, src: str) -> bool: # noqa: PLR0911
226
228
  """
227
229
  Determine files that are not of interest based on their content or file extension
228
230
  Parameters
@@ -351,7 +353,55 @@ class Winnowing(ScanossBase):
351
353
  self.print_debug(f'Stripped snippet ids from {file}')
352
354
  return wfp
353
355
 
354
- def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str:
356
+ def __detect_line_endings(self, contents: bytes) -> Tuple[bool, bool, bool]:
357
+ """Detect the types of line endings present in file contents.
358
+
359
+ Args:
360
+ contents: File contents as bytes.
361
+
362
+ Returns:
363
+ Tuple of (has_crlf, has_lf_only, has_cr_only, has_mixed) indicating which line ending types are present.
364
+ """
365
+ has_crlf = b'\r\n' in contents
366
+ # For LF detection, we need to find LF that's not part of CRLF
367
+ content_without_crlf = contents.replace(b'\r\n', b'')
368
+ has_standalone_lf = b'\n' in content_without_crlf
369
+ # For CR detection, we need to find CR that's not part of CRLF
370
+ has_standalone_cr = b'\r' in content_without_crlf
371
+
372
+ return has_crlf, has_standalone_lf, has_standalone_cr
373
+
374
+ def __calculate_opposite_line_ending_hash(self, contents: bytes):
375
+ """Calculate hash for contents with opposite line endings.
376
+
377
+ If the file is primarily Unix (LF), calculates Windows (CRLF) hash.
378
+ If the file is primarily Windows (CRLF), calculates Unix (LF) hash.
379
+
380
+ Args:
381
+ contents: File contents as bytes.
382
+
383
+ Returns:
384
+ Hash with opposite line endings as hex string, or None if no line endings detected.
385
+ """
386
+ has_crlf, has_standalone_lf, has_standalone_cr = self.__detect_line_endings(contents)
387
+
388
+ if not has_crlf and not has_standalone_lf and not has_standalone_cr:
389
+ return None
390
+
391
+ # Normalize all line endings to LF first
392
+ normalized = contents.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
393
+
394
+ # Determine the dominant line ending type
395
+ if has_crlf and not has_standalone_lf and not has_standalone_cr:
396
+ # File is Windows (CRLF) - produce Unix (LF) hash
397
+ opposite_contents = normalized
398
+ else:
399
+ # File is Unix (LF/CR) or mixed - produce Windows (CRLF) hash
400
+ opposite_contents = normalized.replace(b'\n', b'\r\n')
401
+
402
+ return hashlib.md5(opposite_contents).hexdigest()
403
+
404
+ def wfp_for_contents(self, file: str, bin_file: bool, contents: bytes) -> str: # noqa: PLR0912, PLR0915
355
405
  """
356
406
  Generate a Winnowing fingerprint (WFP) for the given file contents
357
407
  Parameters
@@ -371,7 +421,7 @@ class Winnowing(ScanossBase):
371
421
  content_length = len(contents)
372
422
  original_filename = file
373
423
 
374
- if platform.system() == 'Windows':
424
+ if self.is_windows:
375
425
  original_filename = file.replace('\\', '/')
376
426
  wfp_filename = repr(original_filename).strip("'") # return a utf-8 compatible version of the filename
377
427
  if self.obfuscate: # hide the real size of the file and its name, but keep the suffix
@@ -380,6 +430,13 @@ class Winnowing(ScanossBase):
380
430
  self.file_map[wfp_filename] = original_filename # Save the file name map for later (reverse lookup)
381
431
 
382
432
  wfp = 'file={0},{1},{2}\n'.format(file_md5, content_length, wfp_filename)
433
+
434
+ # Add opposite line ending hash based on line ending analysis
435
+ if not bin_file:
436
+ opposite_hash = self.__calculate_opposite_line_ending_hash(contents)
437
+ if opposite_hash is not None:
438
+ wfp += f'fh2={opposite_hash}\n'
439
+
383
440
  # We don't process snippets for binaries, or other uninteresting files, or if we're requested to skip
384
441
  if bin_file or self.skip_snippets or self.__skip_snippets(file, contents.decode('utf-8', 'ignore')):
385
442
  return wfp
@@ -467,7 +524,7 @@ class Winnowing(ScanossBase):
467
524
  for i, byte in enumerate(content):
468
525
  c = byte
469
526
  if c == ASCII_LF: # When there is a new line
470
- if len(list_normalized):
527
+ if list_normalized:
471
528
  crc_lines.append(self.crc8_buffer(list_normalized))
472
529
  list_normalized = []
473
530
  elif last_line + 1 == i:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scanoss
3
- Version: 1.24.0
3
+ Version: 1.25.0
4
4
  Summary: Simple Python library to leverage the SCANOSS APIs
5
5
  Home-page: https://scanoss.com
6
6
  Author: SCANOSS
@@ -0,0 +1,393 @@
1
+ """
2
+ SPDX-License-Identifier: MIT
3
+
4
+ Copyright (c) 2021, SCANOSS
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
23
+ """
24
+
25
+ import platform
26
+ import unittest
27
+ from unittest.mock import patch
28
+
29
+ from scanoss.winnowing import Winnowing
30
+
31
+
32
+ class MyTestCase(unittest.TestCase):
33
+ """
34
+ Exercise the Winnowing class
35
+ """
36
+
37
+ def test_winnowing(self):
38
+ winnowing = Winnowing(debug=True)
39
+ filename = 'test-file.c'
40
+ contents = 'c code contents'
41
+ content_types = bytes(contents, encoding='raw_unicode_escape')
42
+ wfp = winnowing.wfp_for_contents(filename, False, content_types)
43
+ print(f'WFP for {filename}: {wfp}')
44
+ self.assertIsNotNone(wfp)
45
+ filename = __file__
46
+ wfp = winnowing.wfp_for_file(filename, filename)
47
+ print(f'WFP for {filename}: {wfp}')
48
+ self.assertIsNotNone(wfp)
49
+
50
+ def test_snippet_skip(self):
51
+ winnowing = Winnowing(debug=True)
52
+ filename = 'test-file.jar'
53
+ contents = 'jar file contents'
54
+ content_types = bytes(contents, encoding='raw_unicode_escape')
55
+ wfp = winnowing.wfp_for_contents(filename, False, content_types)
56
+ print(f'WFP for {filename}: {wfp}')
57
+ self.assertIsNotNone(wfp)
58
+
59
+ def test_snippet_strip(self):
60
+ winnowing = Winnowing(
61
+ debug=True, hpsm=True, strip_snippet_ids=['d5e54c33,b03faabe'], strip_hpsm_ids=['0d2fffaffc62d18']
62
+ )
63
+ filename = 'test-file.py'
64
+ with open(__file__, 'rb') as f:
65
+ contents = f.read()
66
+ print('--- Test snippet and HPSM strip ---')
67
+ wfp = winnowing.wfp_for_contents(filename, False, contents)
68
+ found = 0
69
+ print(f'WFP for {filename}: {wfp}')
70
+ try:
71
+ found = wfp.index('d5e54c33,b03faabe')
72
+ except ValueError:
73
+ found = -1
74
+ self.assertEqual(found, -1)
75
+
76
+ try:
77
+ found = wfp.index('0d2fffaffc62d18')
78
+ except ValueError:
79
+ found = -1
80
+ self.assertEqual(found, -1)
81
+
82
+ def test_windows_hash_calculation(self):
83
+ """Test Windows-specific hash calculation with CRLF line endings."""
84
+ import hashlib
85
+
86
+ # Test content with LF line endings
87
+ content_lf = b'line1\nline2\nline3\n'
88
+ # Expected content with CRLF line endings for Windows hash
89
+ content_crlf = b'line1\r\nline2\r\nline3\r\n'
90
+
91
+ # Calculate the expected Windows hash manually
92
+ expected_windows_hash = hashlib.md5(content_crlf).hexdigest()
93
+ lf_hash = hashlib.md5(content_lf).hexdigest()
94
+
95
+ print(f'LF content hash: {lf_hash}')
96
+ print(f'CRLF content hash (Windows): {expected_windows_hash}')
97
+
98
+ # They should be different
99
+ self.assertNotEqual(lf_hash, expected_windows_hash)
100
+
101
+ @patch('platform.system')
102
+ def test_windows_wfp_includes_fh2(self, mock_platform):
103
+ """Test that WFP includes fh2 hash when running on Windows."""
104
+ # Mock Windows environment
105
+ mock_platform.return_value = 'Windows'
106
+ winnowing = Winnowing(debug=True)
107
+
108
+ filename = 'test-file.c'
109
+ content = b'int main() {\n return 0;\n}\n'
110
+
111
+ wfp = winnowing.wfp_for_contents(filename, False, content)
112
+
113
+ print(f'Windows WFP output:\n{wfp}')
114
+
115
+ # Check that WFP contains fh2 line
116
+ self.assertIn('fh2=', wfp)
117
+
118
+ # Extract the fh2 hash from WFP
119
+ lines = wfp.split('\n')
120
+ fh2_line = [line for line in lines if line.startswith('fh2=')]
121
+ self.assertEqual(len(fh2_line), 1)
122
+
123
+ fh2_hash = fh2_line[0].split('=')[1]
124
+
125
+ # Verify it matches expected CRLF conversion
126
+ import hashlib
127
+ content_crlf = content.replace(b'\n', b'\r\n')
128
+ expected_hash = hashlib.md5(content_crlf).hexdigest()
129
+ self.assertEqual(fh2_hash, expected_hash)
130
+
131
+ def test_line_ending_detection(self):
132
+ """Test line ending detection logic."""
133
+ winnowing = Winnowing(debug=True)
134
+
135
+ # Test LF only
136
+ content_lf = b'line1\nline2\nline3\n'
137
+ has_crlf, has_lf, has_cr = winnowing._Winnowing__detect_line_endings(content_lf)
138
+ self.assertFalse(has_crlf)
139
+ self.assertTrue(has_lf)
140
+ self.assertFalse(has_cr)
141
+
142
+ # Test CRLF only
143
+ content_crlf = b'line1\r\nline2\r\nline3\r\n'
144
+ has_crlf, has_lf, has_cr = winnowing._Winnowing__detect_line_endings(content_crlf)
145
+ self.assertTrue(has_crlf)
146
+ self.assertFalse(has_lf)
147
+ self.assertFalse(has_cr)
148
+
149
+ # Test CR only (old Mac style)
150
+ content_cr = b'line1\rline2\rline3\r'
151
+ has_crlf, has_lf, has_cr = winnowing._Winnowing__detect_line_endings(content_cr)
152
+ self.assertFalse(has_crlf)
153
+ self.assertFalse(has_lf)
154
+ self.assertTrue(has_cr)
155
+
156
+ # Test mixed CRLF and LF
157
+ content_mixed = b'line1\r\nline2\nline3\r\n'
158
+ has_crlf, has_lf, has_cr = winnowing._Winnowing__detect_line_endings(content_mixed)
159
+ self.assertTrue(has_crlf)
160
+ self.assertTrue(has_lf)
161
+ self.assertFalse(has_cr)
162
+
163
+ def test_opposite_hash_logic(self):
164
+ """Test the logic of opposite hash calculation."""
165
+ winnowing = Winnowing(debug=True)
166
+
167
+ # Test different line ending scenarios
168
+ content_lf = b'line1\nline2\nline3\n'
169
+ content_crlf = b'line1\r\nline2\r\nline3\r\n'
170
+ content_cr = b'line1\rline2\rline3\r'
171
+ content_mixed = b'line1\r\nline2\nline3\r'
172
+
173
+ hash_lf = winnowing._Winnowing__calculate_opposite_line_ending_hash(content_lf)
174
+ hash_crlf = winnowing._Winnowing__calculate_opposite_line_ending_hash(content_crlf)
175
+ hash_cr = winnowing._Winnowing__calculate_opposite_line_ending_hash(content_cr)
176
+ hash_mixed = winnowing._Winnowing__calculate_opposite_line_ending_hash(content_mixed)
177
+
178
+ print(f'LF opposite hash: {hash_lf}')
179
+ print(f'CRLF opposite hash: {hash_crlf}')
180
+ print(f'CR opposite hash: {hash_cr}')
181
+ print(f'Mixed opposite hash: {hash_mixed}')
182
+
183
+ # LF, CR, and mixed content should all produce CRLF hash (same result)
184
+ self.assertEqual(hash_lf, hash_cr)
185
+ self.assertEqual(hash_lf, hash_mixed)
186
+
187
+ # CRLF content should produce LF hash (different from the others)
188
+ self.assertNotEqual(hash_crlf, hash_lf)
189
+
190
+ @unittest.skipUnless(platform.system() == 'Windows', 'Windows-specific test')
191
+ def test_actual_windows_behavior(self):
192
+ """Test actual Windows behavior when running on Windows."""
193
+ winnowing = Winnowing(debug=True)
194
+ filename = 'test-file.c'
195
+ content = b'int main() {\n return 0;\n}\n'
196
+
197
+ wfp = winnowing.wfp_for_contents(filename, False, content)
198
+
199
+ print(f'Actual Windows WFP:\n{wfp}')
200
+
201
+ # On actual Windows with LF content, should include fh2
202
+ # Should always generate fh2 when line endings are present
203
+ self.assertIn('fh2=', wfp)
204
+
205
+ def test_empty_file_fh2(self):
206
+ """Test fh2 behavior with empty files."""
207
+ winnowing = Winnowing(debug=True)
208
+ content = b''
209
+ wfp = winnowing.wfp_for_contents('empty.txt', False, content)
210
+
211
+ print(f'Empty file WFP:\n{wfp}')
212
+
213
+ # Empty files should not generate fh2
214
+ self.assertNotIn('fh2=', wfp)
215
+
216
+ def test_no_line_endings_fh2(self):
217
+ """Test files without any line endings."""
218
+ winnowing = Winnowing(debug=True)
219
+ content = b'no line endings here'
220
+ wfp = winnowing.wfp_for_contents('noline.txt', False, content)
221
+
222
+ print(f'No line endings WFP:\n{wfp}')
223
+
224
+ # Files without line endings should not generate fh2
225
+ self.assertNotIn('fh2=', wfp)
226
+
227
+ def test_all_platforms_generate_fh2(self):
228
+ """Test that all platforms generate fh2 when line endings are present."""
229
+ winnowing = Winnowing(debug=True)
230
+ content = b'line1\nline2\n'
231
+ wfp = winnowing.wfp_for_contents('test.txt', False, content)
232
+
233
+ print(f'Platform-independent WFP:\n{wfp}')
234
+
235
+ # Any platform should generate fh2 when line endings are present
236
+ self.assertIn('fh2=', wfp)
237
+
238
+ def test_verify_opposite_hash_calculation(self):
239
+ """Test that the opposite hash calculation works correctly."""
240
+ winnowing = Winnowing(debug=True)
241
+
242
+ # Test LF -> CRLF conversion
243
+ content_lf = b'line1\nline2\nline3\n'
244
+ wfp_lf = winnowing.wfp_for_contents('test_lf.txt', False, content_lf)
245
+
246
+ # Test CRLF -> LF conversion
247
+ content_crlf = b'line1\r\nline2\r\nline3\r\n'
248
+ wfp_crlf = winnowing.wfp_for_contents('test_crlf.txt', False, content_crlf)
249
+
250
+ print(f'LF content WFP:\n{wfp_lf}')
251
+ print(f'CRLF content WFP:\n{wfp_crlf}')
252
+
253
+ # Both should generate fh2
254
+ self.assertIn('fh2=', wfp_lf)
255
+ self.assertIn('fh2=', wfp_crlf)
256
+
257
+ # Extract fh2 values
258
+ lf_fh2 = wfp_lf.split('fh2=')[1].split('\n')[0]
259
+ crlf_fh2 = wfp_crlf.split('fh2=')[1].split('\n')[0]
260
+
261
+ # The fh2 values should be swapped (LF file gets CRLF hash, CRLF file gets LF hash)
262
+ import hashlib
263
+ expected_lf_to_crlf = hashlib.md5(content_lf.replace(b'\n', b'\r\n')).hexdigest()
264
+ expected_crlf_to_lf = hashlib.md5(content_crlf.replace(b'\r\n', b'\n')).hexdigest()
265
+
266
+ self.assertEqual(lf_fh2, expected_lf_to_crlf)
267
+ self.assertEqual(crlf_fh2, expected_crlf_to_lf)
268
+
269
+ def test_binary_file_with_line_endings(self):
270
+ """Test binary files with embedded line endings."""
271
+ winnowing = Winnowing(debug=True)
272
+ # Binary content with embedded newlines
273
+ content = b'\x00\x01\n\x02\x03\r\n\x04'
274
+ wfp = winnowing.wfp_for_contents('binary.bin', True, content)
275
+
276
+ print(f'Binary file WFP:\n{wfp}')
277
+
278
+ # Binary files should not generate fh2
279
+ self.assertNotIn('fh2=', wfp)
280
+
281
+ def test_cr_only_line_endings(self):
282
+ """Test classic Mac CR-only line endings."""
283
+ winnowing = Winnowing(debug=True)
284
+ content = b'line1\rline2\rline3\r'
285
+ wfp = winnowing.wfp_for_contents('mac.txt', False, content)
286
+
287
+ print(f'CR-only WFP:\n{wfp}')
288
+
289
+ # Should generate fh2 (platform independent)
290
+ self.assertIn('fh2=', wfp)
291
+
292
+ # Should normalize CR to CRLF for the opposite hash
293
+ import hashlib
294
+ expected = content.replace(b'\r', b'\r\n')
295
+ expected_hash = hashlib.md5(expected).hexdigest()
296
+ self.assertIn(f'fh2={expected_hash}', wfp)
297
+
298
+ def test_whitespace_only_file(self):
299
+ """Test files with only whitespace characters."""
300
+ winnowing = Winnowing(debug=True)
301
+ content = b' \n\t\n \n'
302
+ wfp = winnowing.wfp_for_contents('whitespace.txt', False, content)
303
+
304
+ print(f'Whitespace-only WFP:\n{wfp}')
305
+
306
+ # Should generate fh2 since it has line endings
307
+ self.assertIn('fh2=', wfp)
308
+
309
+ def test_mixed_complex_line_endings(self):
310
+ """Test complex mixed line ending scenarios."""
311
+ winnowing = Winnowing(debug=True)
312
+ # Mix of CRLF, LF, and CR
313
+ content = b'line1\r\nline2\nline3\rline4\r\nline5\n'
314
+ wfp = winnowing.wfp_for_contents('mixed.txt', False, content)
315
+
316
+ print(f'Mixed line endings WFP:\n{wfp}')
317
+
318
+ # Should generate fh2
319
+ self.assertIn('fh2=', wfp)
320
+
321
+ # Verify the hash calculation
322
+ import hashlib
323
+ normalized = content.replace(b'\r\n', b'\n').replace(b'\r', b'\n')
324
+ expected_crlf = normalized.replace(b'\n', b'\r\n')
325
+ expected_hash = hashlib.md5(expected_crlf).hexdigest()
326
+ self.assertIn(f'fh2={expected_hash}', wfp)
327
+
328
+ def test_fh2_with_skip_snippets(self):
329
+ """Test fh2 generation when skip_snippets is enabled."""
330
+ winnowing = Winnowing(debug=True, skip_snippets=True)
331
+ content = b'line1\nline2\nline3\n'
332
+ wfp = winnowing.wfp_for_contents('test.txt', False, content)
333
+
334
+ print(f'Skip snippets WFP:\n{wfp}')
335
+
336
+ # Should still generate fh2 even when skipping snippets
337
+ self.assertIn('fh2=', wfp)
338
+ # But should not contain snippet fingerprints (line numbers)
339
+ lines = wfp.split('\n')
340
+ snippet_lines = [line for line in lines if '=' in line and line[0].isdigit()]
341
+ self.assertEqual(len(snippet_lines), 0)
342
+
343
+ def test_fh2_with_obfuscation(self):
344
+ """Test fh2 generation with obfuscation enabled."""
345
+ winnowing = Winnowing(debug=True, obfuscate=True)
346
+ content = b'line1\nline2\nline3\n'
347
+ wfp = winnowing.wfp_for_contents('test.txt', False, content)
348
+
349
+ print(f'Obfuscated WFP:\n{wfp}')
350
+
351
+ # Should still generate fh2 with obfuscation
352
+ self.assertIn('fh2=', wfp)
353
+ # Filename should be obfuscated
354
+ self.assertIn('1.txt', wfp)
355
+ self.assertNotIn('test.txt', wfp)
356
+
357
+ def test_large_file_with_line_endings(self):
358
+ """Test large files with many line endings."""
359
+ winnowing = Winnowing(debug=True, size_limit=True, post_size=1) # 1KB limit
360
+ # Create content larger than the limit
361
+ content = b'line\n' * 1000 # Should exceed 1KB
362
+ wfp = winnowing.wfp_for_contents('large.txt', False, content)
363
+
364
+ print(f'Large file WFP length: {len(wfp)}')
365
+
366
+ # Should still generate fh2 even with size limits
367
+ self.assertIn('fh2=', wfp)
368
+
369
+ def test_single_line_no_newline(self):
370
+ """Test single line files without trailing newline."""
371
+ winnowing = Winnowing(debug=True)
372
+ content = b'single line without newline'
373
+ wfp = winnowing.wfp_for_contents('single.txt', False, content)
374
+
375
+ print(f'Single line no newline WFP:\n{wfp}')
376
+
377
+ # Should not generate fh2 (no line endings)
378
+ self.assertNotIn('fh2=', wfp)
379
+
380
+ def test_file_with_null_bytes_and_newlines(self):
381
+ """Test files with null bytes mixed with newlines."""
382
+ winnowing = Winnowing(debug=True)
383
+ content = b'line1\x00\nline2\x00\x00\nline3\n'
384
+ wfp = winnowing.wfp_for_contents('nullbytes.txt', False, content)
385
+
386
+ print(f'Null bytes with newlines WFP:\n{wfp}')
387
+
388
+ # Should generate fh2 (has line endings)
389
+ self.assertIn('fh2=', wfp)
390
+
391
+
392
+ if __name__ == '__main__':
393
+ unittest.main()
@@ -1 +0,0 @@
1
- date: 20250528081438, utime: 1748420078
@@ -1,82 +0,0 @@
1
- """
2
- SPDX-License-Identifier: MIT
3
-
4
- Copyright (c) 2021, SCANOSS
5
-
6
- Permission is hereby granted, free of charge, to any person obtaining a copy
7
- of this software and associated documentation files (the "Software"), to deal
8
- in the Software without restriction, including without limitation the rights
9
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
- copies of the Software, and to permit persons to whom the Software is
11
- furnished to do so, subject to the following conditions:
12
-
13
- The above copyright notice and this permission notice shall be included in
14
- all copies or substantial portions of the Software.
15
-
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
- THE SOFTWARE.
23
- """
24
-
25
- import unittest
26
-
27
- from scanoss.winnowing import Winnowing
28
-
29
-
30
- class MyTestCase(unittest.TestCase):
31
- """
32
- Exercise the Winnowing class
33
- """
34
-
35
- def test_winnowing(self):
36
- winnowing = Winnowing(debug=True)
37
- filename = 'test-file.c'
38
- contents = 'c code contents'
39
- content_types = bytes(contents, encoding='raw_unicode_escape')
40
- wfp = winnowing.wfp_for_contents(filename, False, content_types)
41
- print(f'WFP for {filename}: {wfp}')
42
- self.assertIsNotNone(wfp)
43
- filename = __file__
44
- wfp = winnowing.wfp_for_file(filename, filename)
45
- print(f'WFP for {filename}: {wfp}')
46
- self.assertIsNotNone(wfp)
47
-
48
- def test_snippet_skip(self):
49
- winnowing = Winnowing(debug=True)
50
- filename = 'test-file.jar'
51
- contents = 'jar file contents'
52
- content_types = bytes(contents, encoding='raw_unicode_escape')
53
- wfp = winnowing.wfp_for_contents(filename, False, content_types)
54
- print(f'WFP for {filename}: {wfp}')
55
- self.assertIsNotNone(wfp)
56
-
57
- def test_snippet_strip(self):
58
- winnowing = Winnowing(
59
- debug=True, hpsm=True, strip_snippet_ids=['d5e54c33,b03faabe'], strip_hpsm_ids=['0d2fffaffc62d18']
60
- )
61
- filename = 'test-file.py'
62
- with open(__file__, 'rb') as f:
63
- contents = f.read()
64
- print('--- Test snippet and HPSM strip ---')
65
- wfp = winnowing.wfp_for_contents(filename, False, contents)
66
- found = 0
67
- print(f'WFP for {filename}: {wfp}')
68
- try:
69
- found = wfp.index('d5e54c33,b03faabe')
70
- except ValueError:
71
- found = -1
72
- self.assertEqual(found, -1)
73
-
74
- try:
75
- found = wfp.index('0d2fffaffc62d18')
76
- except ValueError:
77
- found = -1
78
- self.assertEqual(found, -1)
79
-
80
-
81
- if __name__ == '__main__':
82
- unittest.main()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes