dcicutils 7.12.0__py3-none-any.whl → 7.12.0.1b4__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of dcicutils might be problematic. Click here for more details.

@@ -1,5 +1,7 @@
1
1
  import contextlib
2
+ import csv
2
3
  import datetime
4
+ import glob
3
5
  import io
4
6
  import json
5
7
  # import logging
@@ -23,13 +25,18 @@ except ImportError: # pragma: no cover - not worth unit testing this case
23
25
  # import piplicenses
24
26
 
25
27
  from collections import defaultdict
26
- from typing import Any, Dict, DefaultDict, List, Optional, Type, Union
28
+ from typing import Any, Dict, DefaultDict, List, Optional, Type, TypeVar, Union
27
29
 
28
30
  # For obscure reasons related to how this file is used for early prototyping, these must use absolute references
29
31
  # to modules, not relative references. Later when things are better installed, we can make refs relative again.
32
+ from dcicutils.exceptions import InvalidParameterError
30
33
  from dcicutils.lang_utils import there_are
31
- from dcicutils.misc_utils import PRINT, get_error_message, local_attrs
34
+ from dcicutils.misc_utils import (
35
+ PRINT, get_error_message, ignorable, ignored, json_file_contents, local_attrs, environ_bool,
36
+ remove_suffix,
37
+ )
32
38
 
39
+ T = TypeVar("T")
33
40
 
34
41
  # logging.basicConfig()
35
42
  # logger = logging.getLogger(__name__)
@@ -43,6 +50,14 @@ _NAME = 'name'
43
50
  _STATUS = 'status'
44
51
 
45
52
 
53
+ def pattern(x):
54
+ return re.compile(x, re.IGNORECASE)
55
+
56
+
57
+ def augment(d: dict, by: dict):
58
+ return dict(d, **by)
59
+
60
+
46
61
  class LicenseStatus:
47
62
  ALLOWED = "ALLOWED"
48
63
  SPECIALLY_ALLOWED = "SPECIALLY_ALLOWED"
@@ -51,6 +66,23 @@ class LicenseStatus:
51
66
  UNEXPECTED_MISSING = "UNEXPECTED_MISSING"
52
67
 
53
68
 
69
+ class LicenseOptions:
70
+ # General verbosity, such as progress information
71
+ VERBOSE = environ_bool("LICENSE_UTILS_VERBOSE", default=True)
72
+ # Specific additional debugging output
73
+ DEBUG = environ_bool("LICENSE_UTILS_DEBUG", default=False)
74
+ CONDA_PREFIX = os.environ.get("CONDA_LICENSE_CHECKER_PREFIX", os.environ.get("CONDA_PREFIX", ""))
75
+
76
+ @classmethod
77
+ @contextlib.contextmanager
78
+ def selected_options(cls, verbose=VERBOSE, debug=DEBUG, conda_prefix=CONDA_PREFIX):
79
+ """
80
+ Allows a script, for example, to specify overrides for these options dynamically.
81
+ """
82
+ with local_attrs(cls, VERBOSE=verbose, DEBUG=debug, CONDA_PREFIX=conda_prefix):
83
+ yield
84
+
85
+
54
86
  class LicenseFramework:
55
87
 
56
88
  NAME = None
@@ -87,13 +119,13 @@ class LicenseFrameworkRegistry:
87
119
  yield
88
120
 
89
121
  @classmethod
90
- def register(cls, *, name):
122
+ def register_framework(cls, *, name):
91
123
  """
92
124
  Declares a python license framework classs.
93
125
  Mostly these names will be language names like 'python' or 'javascript',
94
126
  but they might be names of other, non-linguistic frameworks (like 'cgap-pipeline', for example).
95
127
  """
96
- def _decorator(framework_class):
128
+ def _decorator(framework_class: T) -> T:
97
129
  if not issubclass(framework_class, LicenseFramework):
98
130
  raise ValueError(f"The class {framework_class.__name__} does not inherit from LicenseFramework.")
99
131
  framework_class.NAME = name
@@ -117,25 +149,108 @@ class LicenseFrameworkRegistry:
117
149
  return sorted(cls.LICENSE_FRAMEWORKS.values(), key=lambda x: x.NAME)
118
150
 
119
151
 
120
- @LicenseFrameworkRegistry.register(name='javascript')
152
+ # This is intended to match ' (= 3)', ' (>= 3)', ' (version 3)', ' (version 3 or greater)'
153
+ # It will incidentally and harmlessly also take ' (>version 3)' or '(>= 3 or greater)'.
154
+ # It will also correctly handle the unlikely case of ' (= 3 or greater)'
155
+
156
+ _OR_LATER_PATTERN = '(?:[- ]or[ -](?:greater|later))'
157
+ _PARENTHETICAL_VERSION_CONSTRAINT = re.compile(f'( [(]([>]?)(?:[=]|version) ([0-9.]+)({_OR_LATER_PATTERN}?)[)])')
158
+ _POSTFIX_OR_LATER_PATTERN = re.compile(f"({_OR_LATER_PATTERN})")
159
+ _GPL_VERSION_CHOICE = re.compile('^GPL-v?([0-9.+]) (?:OR|[|]) GPL-v?([0-9.+])$')
160
+
161
+
162
+ def simplify_license_versions(licenses_spec: str, *, for_package_name) -> str:
163
+ m = _GPL_VERSION_CHOICE.match(licenses_spec)
164
+ if m:
165
+ version_a, version_b = m.groups()
166
+ return f"GPL-{version_a}-or-{version_b}"
167
+ # We only care which licenses were mentioned, not what algebra is used on them.
168
+ # (Thankfully there are no NOTs, and that's probably not by accident, since that would be too big a set.)
169
+ # So for us, either (FOO AND BAR) or (FOO OR BAR) is the same because we want to treat it as "FOO,BAR".
170
+ # If all of those licenses match, all is good. That _does_ mean some things like (MIT OR GPL-3.0) will
171
+ # have trouble passing unless both MIT and GPL-3.0 are allowed.
172
+ transform_count = 0
173
+ original_licenses_spec = licenses_spec
174
+ ignorable(original_licenses_spec) # sometimes useful for debugging
175
+ while True:
176
+ if transform_count > 100: # It'd be surprising if there were even ten of these to convert.
177
+ warnings.warn(f"Transforming {for_package_name} {licenses_spec!r} seemed to be looping."
178
+ f" Please report this as a bug.")
179
+ return licenses_spec # return the unmodified
180
+ transform_count += 1
181
+ m = _PARENTHETICAL_VERSION_CONSTRAINT.search(licenses_spec)
182
+ if not m:
183
+ break
184
+ matched, greater, version_spec, greater2 = m.groups()
185
+ is_greater = bool(greater or greater2)
186
+ licenses_spec = licenses_spec.replace(matched,
187
+ f"-{version_spec}"
188
+ f"{'+' if is_greater else ''}")
189
+ transform_count = 0
190
+ while True:
191
+ if transform_count > 100: # It'd be surprising if there were even ten of these to convert.
192
+ warnings.warn(f"Transforming {for_package_name} {licenses_spec!r} seemed to be looping."
193
+ f" Please report this as a bug.")
194
+ return licenses_spec # return the unmodified
195
+ transform_count += 1
196
+ m = _POSTFIX_OR_LATER_PATTERN.search(licenses_spec)
197
+ if not m:
198
+ break
199
+ matched = m.group(1)
200
+ licenses_spec = licenses_spec.replace(matched, '+')
201
+ if LicenseOptions.DEBUG and licenses_spec != original_licenses_spec:
202
+ PRINT(f"Rewriting {original_licenses_spec!r} as {licenses_spec!r}.")
203
+ return licenses_spec
204
+
205
+
206
+ def extract_boolean_terms(boolean_expression: str, for_package_name: str) -> List[str]:
207
+ # We only care which licenses were mentioned, not what algebra is used on them.
208
+ # (Thankfully there are no NOTs, and that's probably not by accident, since that would be too big a set.)
209
+ # So for us, either (FOO AND BAR) or (FOO OR BAR) is the same because we want to treat it as "FOO,BAR".
210
+ # If all of those licenses match, all is good. That _does_ mean some things like (MIT OR GPL-3.0) will
211
+ # have trouble passing unless both MIT and GPL-3.0 are allowed.
212
+ revised_boolean_expression = (
213
+ boolean_expression
214
+ .replace('(', '')
215
+ .replace(')', '')
216
+ .replace(' AND ', ',')
217
+ .replace(' and ', ',')
218
+ .replace(' & ', ',')
219
+ .replace(' OR ', ',')
220
+ .replace(' or ', ',')
221
+ .replace('|', ',')
222
+ .replace(';', ',')
223
+ .replace(' + ', ',')
224
+ .replace('file ', f'Custom: {for_package_name} file ')
225
+ )
226
+ terms = [x for x in sorted(map(lambda x: x.strip(), revised_boolean_expression.split(','))) if x]
227
+ if LicenseOptions.DEBUG and revised_boolean_expression != boolean_expression:
228
+ PRINT(f"Rewriting {boolean_expression!r} as {terms!r}.")
229
+ return terms
230
+
231
+
232
+ @LicenseFrameworkRegistry.register_framework(name='javascript')
121
233
  class JavascriptLicenseFramework(LicenseFramework):
122
234
 
123
235
  @classmethod
124
- def implicated_licenses(cls, *, licenses_spec: str):
125
- # We only care which licenses were mentioned, not what algebra is used on them.
126
- # (Thankfully there are no NOTs, and that's probably not by accident, since that would be too big a set.)
127
- # So for us, either (FOO AND BAR) or (FOO OR BAR) is the same because we want to treat it as "FOO,BAR".
128
- # If all of those licenses match, all is good. That _does_ mean some things like (MIT OR GPL-3.0) will
129
- # have trouble passing unless both MIT and GPL-3.0 are allowed.
130
- licenses = sorted(map(lambda x: x.strip(),
131
- (licenses_spec
132
- .replace('(', '')
133
- .replace(')', '')
134
- .replace(' AND ', ',')
135
- .replace(' OR ', ',')
136
- ).split(',')))
236
+ def implicated_licenses(cls, *, package_name, licenses_spec: str) -> List[str]:
237
+ ignored(package_name)
238
+ licenses_spec = simplify_license_versions(licenses_spec, for_package_name=package_name)
239
+ licenses = extract_boolean_terms(licenses_spec, for_package_name=package_name)
137
240
  return licenses
138
241
 
242
+ VERSION_PATTERN = re.compile('^.+?([@][0-9.][^@]*|)$')
243
+
244
+ @classmethod
245
+ def strip_version(cls, raw_name):
246
+ name = raw_name
247
+ m = cls.VERSION_PATTERN.match(raw_name) # e.g., @foo/bar@3.7
248
+ if m:
249
+ suffix = m.group(1)
250
+ if suffix:
251
+ name = remove_suffix(m.group(1), name)
252
+ return name
253
+
139
254
  @classmethod
140
255
  def get_dependencies(cls):
141
256
  output = subprocess.check_output(['npx', 'license-checker', '--summary', '--json'],
@@ -147,24 +262,20 @@ class JavascriptLicenseFramework(LicenseFramework):
147
262
  # e.g., this happens if there's no javascript in the repo
148
263
  raise Exception("No javascript license data was found.")
149
264
  result = []
150
- for name, record in records.items():
151
- licenses_spec = record.get(_LICENSES)
152
- if '(' in licenses_spec:
153
- licenses = cls.implicated_licenses(licenses_spec=licenses_spec)
154
- PRINT(f"Rewriting {licenses_spec!r} as {licenses!r}")
155
- elif licenses_spec:
156
- licenses = [licenses_spec]
157
- else:
158
- licenses = []
265
+ for raw_name, record in records.items():
266
+ name = cls.strip_version(raw_name)
267
+ raw_licenses_spec = record.get(_LICENSES)
268
+ licenses = cls.implicated_licenses(licenses_spec=raw_licenses_spec, package_name=name)
159
269
  entry = {
160
- _NAME: name.lstrip('@').split('@')[0], # e.g., @foo/bar@3.7
161
- _LICENSES: licenses # TODO: could parse this better.
270
+ _NAME: name,
271
+ _LICENSES: licenses,
272
+ _FRAMEWORK: 'javascript'
162
273
  }
163
274
  result.append(entry)
164
275
  return result
165
276
 
166
277
 
167
- @LicenseFrameworkRegistry.register(name='python')
278
+ @LicenseFrameworkRegistry.register_framework(name='python')
168
279
  class PythonLicenseFramework(LicenseFramework):
169
280
 
170
281
  @classmethod
@@ -184,15 +295,107 @@ class PythonLicenseFramework(LicenseFramework):
184
295
  entry = {
185
296
  _NAME: license_name,
186
297
  _LICENSES: licenses,
187
- _LANGUAGE: 'python',
298
+ _FRAMEWORK: 'python',
188
299
  }
189
300
  result.append(entry)
190
301
  return sorted(result, key=lambda x: x.get(_NAME).lower())
191
302
 
192
303
 
193
- class LicenseFileParser:
304
+ @LicenseFrameworkRegistry.register_framework(name='conda')
305
+ class CondaLicenseFramework(LicenseFramework):
306
+
307
+ @classmethod
308
+ def get_dependencies(cls):
309
+ prefix = LicenseOptions.CONDA_PREFIX
310
+ result = []
311
+ filespec = os.path.join(prefix, "conda-meta/*.json")
312
+ files = glob.glob(filespec)
313
+ for file in files:
314
+ data = json_file_contents(file)
315
+ package_name = data['name']
316
+ package_license = data.get('license') or "MISSING"
317
+ if package_license:
318
+ # print(f"package_license={package_license}")
319
+ simplified_package_license_spec = simplify_license_versions(package_license,
320
+ for_package_name=package_name)
321
+ # print(f" =simplified_package_license_spec => {simplified_package_license_spec}")
322
+ package_licenses = extract_boolean_terms(simplified_package_license_spec,
323
+ for_package_name=package_name)
324
+ # print(f"=> {package_licenses}")
325
+ else:
326
+ package_licenses = []
327
+ entry = {
328
+ _NAME: package_name,
329
+ _LICENSES: package_licenses,
330
+ _FRAMEWORK: 'conda',
331
+ }
332
+ result.append(entry)
333
+ result.sort(key=lambda x: x['name'])
334
+ # print(f"conda get_dependencies result={json.dumps(result, indent=2)}")
335
+ # print("conda deps = ", json.dumps(result, indent=2))
336
+ return result
337
+
338
+
339
+ @LicenseFrameworkRegistry.register_framework(name='r')
340
+ class RLicenseFramework(LicenseFramework):
341
+
342
+ R_PART_SPEC = re.compile("^Part of R [0-9.]+$")
343
+ R_LANGUAGE_LICENSE_NAME = 'R-language-license'
344
+
345
+ @classmethod
346
+ def implicated_licenses(cls, *, package_name, licenses_spec: str) -> List[str]:
347
+ if cls.R_PART_SPEC.match(licenses_spec):
348
+ return [cls.R_LANGUAGE_LICENSE_NAME]
349
+ licenses_spec = simplify_license_versions(licenses_spec, for_package_name=package_name)
350
+ licenses = extract_boolean_terms(licenses_spec, for_package_name=package_name)
351
+ return licenses
352
+
353
+ @classmethod
354
+ def get_dependencies(cls):
355
+ # NOTE: Although the R Language itself is released under the GPL, our belief is that it is
356
+ # still possible to write programs in R that are not GPL, even programs that use commercial licenses.
357
+ # So we do ordinary license checking here, same as in other frameworks.
358
+ # For notes on this, see the R FAQ.
359
+ # Ref: https://cran.r-project.org/doc/FAQ/R-FAQ.html#Can-I-use-R-for-commercial-purposes_003f
360
+
361
+ _PACKAGE = "Package"
362
+ _LICENSE = "License"
363
+
364
+ found_problems = 0
365
+
366
+ output_bytes = subprocess.check_output(['r', '--no-echo', '-q', '-e',
367
+ f'write.csv(installed.packages()[,c("Package", "License")])'],
368
+ # This will output to stderr if there's an error,
369
+ # but it will still put {} on stdout, which is good enough for us.
370
+ stderr=subprocess.DEVNULL)
371
+ output = output_bytes.decode('utf-8')
372
+ result = []
373
+ first_line = True
374
+ for entry in csv.reader(io.StringIO(output)): # [ignore, package, license]
375
+ if first_line:
376
+ first_line = False
377
+ if entry == ["", _PACKAGE, _LICENSE]: # we expect headers
378
+ continue
379
+ try:
380
+ package_name = entry[1]
381
+ licenses_spec = entry[2]
382
+ licenses = cls.implicated_licenses(package_name=package_name, licenses_spec=licenses_spec)
383
+ entry = {
384
+ _NAME: package_name,
385
+ _LICENSES: licenses,
386
+ _FRAMEWORK: 'r',
387
+ }
388
+ result.append(entry)
389
+ except Exception as e:
390
+ found_problems += 1
391
+ if LicenseOptions.VERBOSE:
392
+ PRINT(get_error_message(e))
393
+ if found_problems > 0:
394
+ warnings.warn(there_are(found_problems, kind="problem", show=False, punctuate=True, tense='past'))
395
+ return sorted(result, key=lambda x: x.get(_NAME).lower())
194
396
 
195
- VERBOSE = False
397
+
398
+ class LicenseFileParser:
196
399
 
197
400
  SEPARATORS = '-.,'
198
401
  SEPARATORS_AND_WHITESPACE = SEPARATORS + ' \t'
@@ -230,8 +433,6 @@ class LicenseFileParser:
230
433
  lines = []
231
434
  for i, line in enumerate(fp):
232
435
  line = line.strip(' \t\n\r')
233
- if cls.VERBOSE: # pragma: no cover - this is just for debugging
234
- PRINT(str(i).rjust(3), line)
235
436
  m = cls.COPYRIGHT_LINE.match(line) if line[:1].isupper() else None
236
437
  if not m:
237
438
  lines.append(line)
@@ -316,14 +517,12 @@ class LicenseChecker:
316
517
  Note that if you don't like these license names, which are admittedly non-standard and do nt seem to use
317
518
  SPDX naming conventions, you can customize the get_dependencies method to return a different
318
519
  list, one of the form
319
- [{"name": "libname", "license_classifier": ["license1", "license2", ...], "language": "python"}]
520
+ [{"name": "libname", "license_classifier": ["license1", "license2", ...], "framework": "python"}]
320
521
  by whatever means you like and using whatever names you like.
321
522
  """
322
523
 
323
524
  # Set this to True in subclasses if you want your organization's policy to be that you see
324
525
  # some visible proof of which licenses were checked.
325
- VERBOSE = True
326
-
327
526
  LICENSE_TITLE = None
328
527
  COPYRIGHT_OWNER = None
329
528
  LICENSE_FRAMEWORKS = None
@@ -378,6 +577,22 @@ class LicenseChecker:
378
577
  check_license_title=license_title or cls.LICENSE_TITLE,
379
578
  analysis=analysis)
380
579
 
580
+ CHOICE_REGEXPS = {}
581
+
582
+ @classmethod
583
+ def _make_regexp_for_choices(cls, choices):
584
+ inner_pattern = '|'.join('^' + (re.escape(choice) if isinstance(choice, str) else choice.pattern) + '$'
585
+ for choice in choices) or "^$"
586
+ return re.compile(f"({inner_pattern})", re.IGNORECASE)
587
+
588
+ @classmethod
589
+ def _find_regexp_for_choices(cls, choices):
590
+ key = str(choices)
591
+ regexp = cls.CHOICE_REGEXPS.get(key)
592
+ if not regexp:
593
+ cls.CHOICE_REGEXPS[key] = regexp = cls._make_regexp_for_choices(choices)
594
+ return regexp
595
+
381
596
  @classmethod
382
597
  def analyze_license_dependencies_for_framework(cls, *,
383
598
  analysis: LicenseAnalysis,
@@ -385,7 +600,7 @@ class LicenseChecker:
385
600
  acceptable: Optional[List[str]] = None,
386
601
  exceptions: Optional[Dict[str, str]] = None,
387
602
  ) -> None:
388
- acceptable = (acceptable or []) + (cls.ALLOWED or [])
603
+ acceptability_regexp = cls._find_regexp_for_choices((acceptable or []) + (cls.ALLOWED or []))
389
604
  exceptions = dict(cls.EXCEPTIONS or {}, **(exceptions or {}))
390
605
 
391
606
  try:
@@ -415,7 +630,7 @@ class LicenseChecker:
415
630
  by_special_exception = False
416
631
  for license_name in license_names:
417
632
  special_exceptions = exceptions.get(license_name, [])
418
- if license_name in acceptable:
633
+ if acceptability_regexp.match(license_name): # license_name in acceptable:
419
634
  pass
420
635
  elif name in special_exceptions:
421
636
  by_special_exception = True
@@ -430,7 +645,7 @@ class LicenseChecker:
430
645
  _LICENSES: license_names,
431
646
  _STATUS: status
432
647
  })
433
- if cls.VERBOSE: # pragma: no cover - this is just for debugging
648
+ if LicenseOptions.VERBOSE: # pragma: no cover - this is just for debugging
434
649
  PRINT(f"Checked {framework.NAME} {name}:"
435
650
  f" {'; '.join(license_names) if license_names else '---'} ({status})")
436
651
 
@@ -459,7 +674,7 @@ class LicenseChecker:
459
674
  def show_unacceptable_licenses(cls, *, analysis: LicenseAnalysis) -> LicenseAnalysis:
460
675
  if analysis.unacceptable:
461
676
  PRINT(there_are(analysis.unacceptable, kind="unacceptable license", show=False, punctuation_mark=':'))
462
- for license, names in analysis.unacceptable.items():
677
+ for license, names in sorted(analysis.unacceptable.items()):
463
678
  PRINT(f" {license}: {', '.join(names)}")
464
679
  return analysis
465
680
 
@@ -499,6 +714,30 @@ class LicenseChecker:
499
714
  raise LicenseAcceptabilityCheckFailure(unacceptable_licenses=analysis.unacceptable)
500
715
 
501
716
 
717
+ class LicenseCheckerRegistry:
718
+
719
+ REGISTRY: Dict[str, Type[LicenseChecker]] = {}
720
+
721
+ @classmethod
722
+ def register_checker(cls, name: str):
723
+ def _register(license_checker_class: Type[LicenseChecker]):
724
+ cls.REGISTRY[name] = license_checker_class
725
+ return license_checker_class
726
+ return _register
727
+
728
+ @classmethod
729
+ def lookup_checker(cls, name: str) -> Type[LicenseChecker]:
730
+ result: Optional[Type[LicenseChecker]] = cls.REGISTRY.get(name)
731
+ if result is None:
732
+ raise InvalidParameterError(parameter='checker_name', value=name,
733
+ options=cls.all_checker_names())
734
+ return result
735
+
736
+ @classmethod
737
+ def all_checker_names(cls):
738
+ return list(cls.REGISTRY.keys())
739
+
740
+
502
741
  class LicenseCheckFailure(Exception):
503
742
 
504
743
  DEFAULT_MESSAGE = "License check failure."
@@ -523,16 +762,13 @@ class LicenseAcceptabilityCheckFailure(LicenseCheckFailure):
523
762
  super().__init__(message=message)
524
763
 
525
764
 
526
- class C4InfrastructureLicenseChecker(LicenseChecker):
765
+ @LicenseCheckerRegistry.register_checker('park-lab-common')
766
+ class ParkLabCommonLicenseChecker(LicenseChecker):
527
767
  """
528
- This set of values is useful to us in Park Lab where these tools were developed.
529
- If you're at some other organization, we recommend you make a class that has values
530
- suitable to your own organizational needs.
768
+ Minimal checker common to all tech from Park Lab.
531
769
  """
532
770
 
533
771
  COPYRIGHT_OWNER = "President and Fellows of Harvard College"
534
- LICENSE_TITLE = "(The )?MIT License"
535
- LICENSE_FRAMEWORKS = ['python', 'javascript']
536
772
 
537
773
  ALLOWED = [
538
774
 
@@ -548,16 +784,39 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
548
784
  'AFL-2.1',
549
785
 
550
786
  # Linking = Permissive, Private Use = Yes
787
+ # Apache licenses before version 2.0 are controversial, but we here construe an unmarked naming to imply
788
+ # any version, and hence v2.
551
789
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
552
790
  'Apache Software License',
553
791
  'Apache-Style',
554
- 'Apache-2.0',
792
+ pattern("Apache([- ]2([.]0)?)?([- ]Licen[cs]e)?([- ]with[- ]LLVM[- ]exception)?"),
793
+ # 'Apache-2.0',
794
+
795
+ # Artistic License 1.0 was confusing to people, so its status as permissive is in general uncertain,
796
+ # however the issue seems to revolve around point 8 (relating to whether or not perl is deliberately
797
+ # exposed). That isn't in play for our uses, so we don't flag it here.
798
+ # Artistic license 2.0 is a permissive license.
799
+ # Ref: https://en.wikipedia.org/wiki/Artistic_License
800
+ 'Artistic-1.0-Perl',
801
+ pattern('Artistic[- ]2([.]0)?'),
802
+
803
+ # According to Wikipedia, the Boost is considered permissive and BSD-like.
804
+ # Refs:
805
+ # *
806
+ # * https://en.wikipedia.org/wiki/Boost_(C%2B%2B_libraries)#License
807
+ pattern('(BSL|Boost(([- ]Software)?[- ]License)?)([- ]1([.]0)?)?'),
555
808
 
556
809
  # Linking = Permissive, Private Use = Yes
557
810
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
558
- 'BSD License',
559
- 'BSD-2-Clause',
560
- 'BSD-3-Clause',
811
+ pattern('((modified[- ])?[234][- ]Clause[- ])?BSD([- ][234][- ]Clause)?( Licen[cs]e)?'),
812
+ # 'BSD License',
813
+ # 'BSD-2-Clause',
814
+ # 'BSD-3-Clause',
815
+ # 'BSD 3-Clause',
816
+
817
+ # BZIP2 is a permissive license
818
+ # Ref: https://github.com/asimonov-im/bzip2/blob/master/LICENSE
819
+ pattern('bzip2(-1[.0-9]*)'),
561
820
 
562
821
  # Linking = Public Domain, Private Use = Public Domain
563
822
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
@@ -570,6 +829,10 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
570
829
  'CC-BY-3.0',
571
830
  'CC-BY-4.0',
572
831
 
832
+ # The curl license is a permissive license.
833
+ # Ref: https://curl.se/docs/copyright.html
834
+ 'curl',
835
+
573
836
  # Linking = Permissive, Private Use = ?
574
837
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
575
838
  'CDDL',
@@ -583,9 +846,32 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
583
846
  'Eclipse Public License',
584
847
  'EPL-2.0',
585
848
 
849
+ # The FSF Unlimited License (FSFUL) seems to be a completely permissive license.
850
+ # Refs:
851
+ # * https://spdx.org/licenses/FSFUL.html
852
+ # * https://fedoraproject.org/wiki/Licensing/FSF_Unlimited_License
853
+ 'FSF Unlimited License',
854
+ 'FSFUL',
855
+
856
+ # The FreeType license is a permissive license.
857
+ # Ref: LicenseRef-FreeType
858
+ pattern('(Licen[cs]eRef-)?(FTL|FreeType( Licen[cs]e)?)'),
859
+
586
860
  # Linking = Yes, Cat = Permissive Software Licenses
587
861
  # Ref: https://en.wikipedia.org/wiki/Historical_Permission_Notice_and_Disclaimer
588
862
  'Historical Permission Notice and Disclaimer (HPND)',
863
+ 'HPND',
864
+ pattern('(Licen[cs]eRef-)?PIL'),
865
+ # The Pillow or Python Image Library is an HPND license, which is a simple permissive license:
866
+ # Refs:
867
+ # * https://github.com/python-pillow/Pillow/blob/main/LICENSE
868
+ # * https://www.fsf.org/blogs/licensing/historical-permission-notice-and-disclaimer-added-to-license-list
869
+
870
+ # The IJG license, used by Independent JPEG Group (IJG) is a custom permissive license.
871
+ # Refs:
872
+ # * https://en.wikipedia.org/wiki/Libjpeg
873
+ # * https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/LICENSE.md
874
+ 'IJG',
589
875
 
590
876
  # Linking = Permissive, Private Use = Permissive
591
877
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
@@ -610,10 +896,11 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
610
896
  'OFL-1.1',
611
897
 
612
898
  # Ref: https://en.wikipedia.org/wiki/Public_domain
613
- 'Public Domain',
899
+ pattern('(Licen[cs]eRef-)?Public[- ]Domain([- ]dedic[t]?ation)?'), # "dedictation" is a typo in docutils
614
900
 
615
901
  # Linking = Permissive, Private Use = Permissive
616
902
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
903
+ pattern('(Licen[cs]eRef-)?PSF-2([.][.0-9]*)'),
617
904
  'Python Software Foundation License',
618
905
  'Python-2.0',
619
906
 
@@ -621,11 +908,32 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
621
908
  # Ref: https://en.wikipedia.org/wiki/Pylons_project
622
909
  'Repoze Public License',
623
910
 
911
+ # The TCL or Tcl/Tk licenses are permissive licenses.
912
+ # Ref: https://www.tcl.tk/software/tcltk/license.html
913
+ # The one used by the tktable library has a 'bourbon' clause that doesn't add compliance requirements
914
+ # Ref: https://github.com/wjoye/tktable/blob/master/license.txt
915
+ pattern('Tcl([/]tk)?'),
916
+
917
+ # The Ubuntu Font Licence is mostly permissive. It contains some restrictions if you are going to modify the
918
+ # fonts that require you to change the name to avoid confusion. But for our purposes, we're assuming that's
919
+ # not done, and so we're not flagging it.
920
+ pattern('Ubuntu Font Licen[cs]e Version( 1([.]0)?)?'),
921
+
624
922
  # Linking = Permissive/Public domain, Private Use = Permissive/Public domain
625
923
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
626
924
  'The Unlicense (Unlicense)',
627
925
  'Unlicense',
628
926
 
927
+ # Various licenses seem to call themselves or be summed up as unlimited.
928
+ # So far we know of none that are not highly permissive.
929
+ # * boot and KernSmooth are reported by R as being 'Unlimited'
930
+ # Refs:
931
+ # * https://cran.r-project.org/web/packages/KernSmooth/index.html
932
+ # (https://github.com/cran/KernSmooth/blob/master/LICENCE.note)
933
+ # * https://cran.r-project.org/package=boot
934
+ # (https://github.com/cran/boot/blob/master/DESCRIPTION)
935
+ 'Unlimited',
936
+
629
937
  # Linking = Permissive, Private Use = ?
630
938
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
631
939
  'W3C License',
@@ -646,6 +954,109 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
646
954
  'Zope Public License',
647
955
  ]
648
956
 
957
+ EXCEPTIONS = {
958
+
959
+ # The Bioconductor zlibbioc license is a permissive license.
960
+ # Ref: https://github.com/Bioconductor/zlibbioc/blob/devel/LICENSE
961
+ 'Custom: bioconductor-zlibbioc file LICENSE': [
962
+ 'bioconductor-zlibbioc'
963
+ ],
964
+
965
+ # The Bioconductor rsamtools license is an MIT license
966
+ # Ref: https://bioconductor.org/packages/release/bioc/licenses/Rsamtools/LICENSE
967
+ 'Custom: bioconductor-rsamtools file LICENSE': [
968
+ 'bioconductor-rsamtools'
969
+ ],
970
+
971
+ # DFSG = Debian Free Software Guidelines
972
+ # Ref: https://en.wikipedia.org/wiki/Debian_Free_Software_Guidelines
973
+ # Used as an apparent modifier to other licenses, to say they are approved per Debian.
974
+ # For example in this case, pytest-timeout has license: DFSG approved, MIT License,
975
+ # but is really just an MIT License that someone has checked is DFSG approved.
976
+ 'DFSG approved': [
977
+ 'pytest-timeout', # MIT Licensed
978
+ ],
979
+
980
+ 'FOSS': [
981
+ # The r-stringi library is a conda library that implements a stringi (pronounced "stringy") library for R.
982
+ # The COnda source feed is: https://github.com/conda-forge/r-stringi-feedstock
983
+ # This page explains that the home source is https://stringi.gagolewski.com/ but that's a doc page.
984
+ # The doc page says:
985
+ # > stringi’s source code is hosted on GitHub.
986
+ # > It is distributed under the open source BSD-3-clause license.
987
+ # The source code has a license that begins with a BSD-3-clause license and includes numerous others,
988
+ # but they all appear to be permissive.
989
+ # Ref: https://github.com/gagolews/stringi/blob/master/LICENSE
990
+ 'stringi', 'r-stringi',
991
+ ],
992
+
993
+ # Linking = With Restrictions, Private Use = Yes
994
+ # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
995
+ 'GNU Lesser General Public License v2 or later (LGPLv2+)': [
996
+ 'chardet' # used at runtime during server operation (ingestion), but not modified or distributed
997
+ ],
998
+
999
+ # Linking = With Restrictions, Private Use = Yes
1000
+ # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
1001
+ 'GNU Lesser General Public License v3 or later (LGPLv3+)': [
1002
+ # used only privately in testing, not used in server code, not modified, not distributed
1003
+ 'pytest-redis',
1004
+ # required by pytest-redis (used only where it's used)
1005
+ 'mirakuru',
1006
+ ],
1007
+
1008
+ 'GNU General Public License (GPL)': [
1009
+ 'docutils', # Used only privately as a separate documentation-generation task for ReadTheDocs
1010
+ ],
1011
+
1012
+ 'MIT/X11 Derivative': [
1013
+ # The license used by libxkbcommon is complicated and involves numerous included licenses,
1014
+ # but all are permissive.
1015
+ # Ref: https://github.com/xkbcommon/libxkbcommon/blob/master/LICENSE
1016
+ 'libxkbcommon',
1017
+ ],
1018
+
1019
+ 'None': [
1020
+ # It's not obvious why Conda shows this license as 'None'.
1021
+ # In fact, though, BSD 3-Clause "New" or "Revised" License
1022
+ # Ref: https://github.com/AnacondaRecipes/_libgcc_mutex-feedstock/blob/master/LICENSE.txt
1023
+ '_libgcc_mutex',
1024
+ ],
1025
+
1026
+ 'PostgreSQL': [
1027
+ # The libpq library is actually licensed with a permissive BSD 3-Clause "New" or "Revised" License
1028
+ # Ref: https://github.com/lpsmith/postgresql-libpq/blob/master/LICENSE
1029
+ 'libpq',
1030
+ ],
1031
+
1032
+ 'UCSD': [
1033
+ # It isn't obvious why these show up with a UCSD license in Conda.
1034
+ # The actual sources say it should be a 2-clause BSD license:
1035
+ # Refs:
1036
+ # * https://github.com/AlexandrovLab/SigProfilerMatrixGenerator/blob/master/LICENSE
1037
+ # * https://github.com/AlexandrovLab/SigProfilerPlotting/blob/master/LICENSE
1038
+ 'sigprofilermatrixgenerator',
1039
+ 'sigprofilerplotting',
1040
+ ],
1041
+
1042
+ 'X11': [
1043
+ # The ncurses library has a VERY complicated history, BUT seems consistently permissive
1044
+ # and the most recent version seems to be essentially the MIT license.
1045
+ # Refs:
1046
+ # * https://en.wikipedia.org/wiki/Ncurses#License
1047
+ # * https://invisible-island.net/ncurses/ncurses-license.html
1048
+ 'ncurses'
1049
+ ],
1050
+
1051
+ 'zlib-acknowledgement': [
1052
+ # It isn't clear whey libpng shows up with this license name, but the license for libpng
1053
+ # is a permissive license.
1054
+ # Ref: https://github.com/glennrp/libpng/blob/libpng16/LICENSE
1055
+ 'libpng',
1056
+ ],
1057
+
1058
+ }
1059
+
649
1060
  EXPECTED_MISSING_LICENSES = [
650
1061
 
651
1062
  # This is a name we use for our C4 portals. And it isn't published.
@@ -726,7 +1137,7 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
726
1137
  'responses',
727
1138
 
728
1139
  # This seems to get flagged sometimes, but is not the pypi snovault library, it's what our dcicsnovault
729
- # calls itself internally.. In any case, it's under MIT license and OK.
1140
+ # calls itself internally. In any case, it's under MIT license and OK.
730
1141
  # Ref: https://github.com/4dn-dcic/snovault/blob/master/LICENSE.txt
731
1142
  'snovault',
732
1143
 
@@ -757,141 +1168,242 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
757
1168
 
758
1169
  ]
759
1170
 
760
- EXCEPTIONS = {
761
1171
 
762
- 'BSD*': [
763
- # Although modified to insert the author name into the license text itself,
764
- # the license for these libraries are essentially BSD-3-Clause.
765
- 'formatio',
766
- 'samsam',
767
-
768
- # There are some slightly different versions of what appear to be BSD licenses here,
769
- # but clearly the license is permissive.
770
- # Ref: https://www.npmjs.com/package/mutation-observer?activeTab=readme
771
- 'mutation-observer',
772
- ],
1172
+ @LicenseCheckerRegistry.register_checker('park-lab-pipeline')
1173
+ class ParkLabPipelineLicenseChecker(ParkLabCommonLicenseChecker):
1174
+ """
1175
+ Minimal checker common to pipelines from Park Lab.
1176
+ """
773
1177
 
774
- 'Custom: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global': [
775
- # The use of this URL appears to be a syntax error in the definition of entries-ponyfill
776
- # In fact this seems to be covered by a CC0-1.0 license.
777
- # Ref: https://unpkg.com/browse/object.entries-ponyfill@1.0.1/LICENSE
778
- 'object.entries-ponyfill',
779
- ],
1178
+ LICENSE_FRAMEWORKS = ['python', 'conda', 'r']
780
1179
 
781
- 'Custom: https://github.com/saikocat/colorbrewer.': [
782
- # The use of this URL appears to be a syntax error in the definition of cartocolor
783
- # In fact, this seems to be covered by a CC-BY-3.0 license.
784
- # Ref: https://www.npmjs.com/package/cartocolor?activeTab=readme
785
- 'cartocolor',
786
- ],
787
1180
 
788
- 'Custom: https://travis-ci.org/component/emitter.png': [
789
- # The use of this png appears to be a syntax error in the definition of emitter-component.
790
- # In fact, emitter-component uses an MIT License
791
- # Ref: https://www.npmjs.com/package/emitter-component
792
- # Ref: https://github.com/component/emitter/blob/master/LICENSE
793
- 'emitter-component',
794
- ],
1181
+ @LicenseCheckerRegistry.register_checker('park-lab-gpl-pipeline')
1182
+ class ParkLabGplPipelineLicenseChecker(ParkLabCommonLicenseChecker):
1183
+ """
1184
+ Minimal checker common to GPL pipelines from Park Lab.
1185
+ """
795
1186
 
796
- # The 'turfs-jsts' repository (https://github.com/DenisCarriere/turf-jsts/blob/master/README.md)
797
- # seems to lack a license, but appears to be forked from the jsts library that uses
798
- # the Eclipse Public License 1.0 and Eclipse Distribution License 1.0, so probably a permissive
799
- # license is intended.
800
- 'Custom: https://travis-ci.org/DenisCarriere/turf-jsts.svg': [
801
- 'turf-jsts'
802
- ],
803
-
804
- # DFSG = Debian Free Software Guidelines
805
- # Ref: https://en.wikipedia.org/wiki/Debian_Free_Software_Guidelines
806
- # Used as an apparent modifier to other licenses, to say they are approved per Debian.
807
- # For example in this case, pytest-timeout has license: DFSG approved, MIT License,
808
- # but is really just an MIT License that someone has checked is DFSG approved.
809
- 'DFSG approved': [
810
- 'pytest-timeout', # MIT Licensed
811
- ],
1187
+ ALLOWED = ParkLabPipelineLicenseChecker.ALLOWED + [
812
1188
 
813
1189
  # Linking = With Restrictions, Private Use = Yes
814
1190
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
815
- 'GNU Lesser General Public License v2 or later (LGPLv2+)': [
816
- 'chardet' # used at runtime during server operation (ingestion), but not modified or distributed
817
- ],
818
-
819
- # Linking = With Restrictions, Private Use = Yes
1191
+ # The "exceptions", if present, indicate waivers to source delivery requirements.
1192
+ # Ref: https://spdx.org/licenses/LGPL-3.0-linking-exception.html
1193
+ pattern('GNU Lesser General Public License v2( or later)?( [(]LGPL[v]?[23][+]?[)])?'),
1194
+ # 'GNU Lesser General Public License v2 or later (LGPLv2+)',
1195
+ # 'GNU Lesser General Public License v3 or later (LGPLv3+)',
1196
+ # 'LGPLv2', 'LGPL-v2', 'LGPL-v2.0', 'LGPL-2', 'LGPL-2.0',
1197
+ # 'LGPLv2+', 'LGPL-v2+', 'LGPL-v2.0+', 'LGPL-2+', 'LGPL-2.0+',
1198
+ # 'LGPLv3', 'LGPL-v3', 'LGPL-v3.0', 'LGPL-3', 'LGPL-3.0',
1199
+ # 'LGPLv3+', 'LGPL-v3+', 'LGPL-v3.0+', 'LGPL-3+', 'LGPL-3.0+',
1200
+ pattern('LGPL[v-]?[.0-9]*([+]|-only)?([- ]with[- ]exceptions)?'),
1201
+
1202
+ # Uncertain whether this is LGPL 2 or 3, but in any case we think weak copyleft should be OK
1203
+ # for pipeline or server use as long as we're not distributing sources.
1204
+ 'LGPL',
1205
+ 'GNU Library or Lesser General Public License (LGPL)',
1206
+
1207
+ # GPL
1208
+ # * library exception operates like LGPL
1209
+ # * classpath exception is a linking exception related to Oracle
1210
+ # Refs:
1211
+ # * https://www.gnu.org/licenses/old-licenses/gpl-1.0.en.html
1212
+ # * https://spdx.org/licenses/GPL-2.0-with-GCC-exception.html
1213
+ # * https://spdx.org/licenses/GPL-3.0-with-GCC-exception.html
1214
+ pattern('(GNU General Public License|GPL)[ ]?[v-]?[123]([.]0)?([+]|[- ]only)?'
1215
+ '([- ]with[- ]GCC(([- ]runtime)?[- ]library)?[- ]exception([- ][.0-9]*)?)?'
1216
+ '([- ]with[- ]Classpath[- ]exception([- ][.0-9]+)?)?'),
1217
+
1218
+ # Linking = "GPLv3 compatible only", Private Use = Yes
820
1219
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
821
- 'GNU Lesser General Public License v3 or later (LGPLv3+)': [
822
- 'pytest-redis', # used only privately in testing, not used in server code, not modified, not distributed
823
- 'mirakuru', # required by pytest-redis (used only where it's used)
824
- ],
1220
+ 'GPL-2-or-3', # we sometimes generate this token
1221
+ # 'GPLv2+', 'GPL-v2+', 'GPL-v2.0+', 'GPL-2+', 'GPL-2.0+',
1222
+ # 'GPLv3', 'GPL-v3', 'GPL-v3.0', 'GPL-3', 'GPL-3.0',
1223
+ # 'GPLv3+', 'GPL-v3+', 'GPL-v3.0+', 'GPL-3+', 'GPL-3.0+',
1224
+ # 'GPLv3-only', 'GPL-3-only', 'GPL-v3-only', 'GPL-3.0-only', 'GPL-v3.0-only',
825
1225
 
826
- 'GNU General Public License (GPL)': [
827
- 'docutils', # Used only privately as a separate documentation-generation task for ReadTheDocs
828
- ],
1226
+ # Uncertain whether this is GPL 2 or 3, but we'll assume that means we can use either.
1227
+ # And version 3 is our preferred interpretation.
1228
+ 'GNU General Public License',
1229
+ 'GPL',
829
1230
 
830
- # Linking = With Restrictions, Private Use = Yes
831
- # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
832
- # 'GNU Lesser General Public License v3 or later (LGPLv3+)',
1231
+ RLicenseFramework.R_LANGUAGE_LICENSE_NAME
833
1232
 
834
- # Linking = With Restrictions, Private Use = Yes
835
- # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
836
- 'GNU Library or Lesser General Public License (LGPL)': [
837
- 'psycopg2', # Used at runtime during server operation, but not modified or distributed
838
- 'psycopg2-binary', # Used at runtime during server operation, but not modified or distributed
839
- 'chardet', # Potentially used downstream in loadxl to detect charset for text files
840
- 'pyzmq', # Used in post-deploy-perf-tests, not distributed, and not modified or distributed
841
- ],
1233
+ ]
842
1234
 
843
- 'GPL-2.0': [
844
- # The license file for the node-forge javascript library says:
845
- #
846
- # "You may use the Forge project under the terms of either the BSD License or the
847
- # GNU General Public License (GPL) Version 2."
848
- #
849
- # (We choose to use it under the BSD license.)
850
- # Ref: https://www.npmjs.com/package/node-forge?activeTab=code
851
- 'node-forge',
852
- ],
853
1235
 
854
- 'MIT*': [
1236
+ @LicenseCheckerRegistry.register_checker('park-lab-common-server')
1237
+ class ParkLabCommonServerLicenseChecker(ParkLabCommonLicenseChecker):
1238
+ """
1239
+ Checker for servers from Park Lab.
855
1240
 
856
- # This library uses a mix of licenses, but they (MIT, CC0) generally seem permissive.
857
- # (It also mentions that some tools for building/testing use other libraries.)
858
- # Ref: https://github.com/requirejs/domReady/blob/master/LICENSE
859
- 'domready',
1241
+ If you're at some other organization, we recommend you make a class that has values
1242
+ suitable to your own organizational needs.
1243
+ """
860
1244
 
861
- # This library is under 'COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1'
862
- # Ref: https://github.com/javaee/jsonp/blob/master/LICENSE.txt
863
- # About CDDL ...
864
- # Linking = Permissive, Private Use = ?
865
- # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
866
- 'jsonp',
1245
+ LICENSE_FRAMEWORKS = ['python', 'javascript']
867
1246
 
868
- # This library says pretty clearly it intends MIT license.
869
- # Ref: https://www.npmjs.com/package/component-indexof
870
- # Linking = Permissive, Private Use = Yes
1247
+ EXCEPTIONS = augment(
1248
+ ParkLabCommonLicenseChecker.EXCEPTIONS,
1249
+ by={
1250
+ 'BSD*': [
1251
+ # Although modified to insert the author name into the license text itself,
1252
+ # the license for these libraries are essentially BSD-3-Clause.
1253
+ 'formatio',
1254
+ 'samsam',
1255
+
1256
+ # There are some slightly different versions of what appear to be BSD licenses here,
1257
+ # but clearly the license is permissive.
1258
+ # Ref: https://www.npmjs.com/package/mutation-observer?activeTab=readme
1259
+ 'mutation-observer',
1260
+ ],
1261
+
1262
+ 'Custom: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global': [
1263
+ # The use of this URL appears to be a syntax error in the definition of entries-ponyfill
1264
+ # In fact this seems to be covered by a CC0-1.0 license.
1265
+ # Ref: https://unpkg.com/browse/object.entries-ponyfill@1.0.1/LICENSE
1266
+ 'object.entries-ponyfill',
1267
+ ],
1268
+
1269
+ 'Custom: https://github.com/saikocat/colorbrewer.': [
1270
+ # The use of this URL appears to be a syntax error in the definition of cartocolor
1271
+ # In fact, this seems to be covered by a CC-BY-3.0 license.
1272
+ # Ref: https://www.npmjs.com/package/cartocolor?activeTab=readme
1273
+ 'cartocolor',
1274
+ ],
1275
+
1276
+ 'Custom: https://travis-ci.org/component/emitter.png': [
1277
+ # The use of this png appears to be a syntax error in the definition of emitter-component.
1278
+ # In fact, emitter-component uses an MIT License
1279
+ # Ref: https://www.npmjs.com/package/emitter-component
1280
+ # Ref: https://github.com/component/emitter/blob/master/LICENSE
1281
+ 'emitter-component',
1282
+ ],
1283
+
1284
+ # The 'turfs-jsts' repository (https://github.com/DenisCarriere/turf-jsts/blob/master/README.md)
1285
+ # seems to lack a license, but appears to be forked from the jsts library that uses
1286
+ # the Eclipse Public License 1.0 and Eclipse Distribution License 1.0, so probably a permissive
1287
+ # license is intended.
1288
+ 'Custom: https://travis-ci.org/DenisCarriere/turf-jsts.svg': [
1289
+ 'turf-jsts'
1290
+ ],
1291
+
1292
+ 'GNU General Public License (GPL)': [
1293
+ 'docutils', # Used only privately as a separate documentation-generation task for ReadTheDocs
1294
+ ],
1295
+
1296
+ # Linking = With Restrictions, Private Use = Yes
871
1297
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
872
- 'component-indexof',
1298
+ # 'GNU Lesser General Public License v3 or later (LGPLv3+)',
873
1299
 
874
- # These look like a pretty straight MIT license.
875
- # Linking = Permissive, Private Use = Yes
1300
+ # Linking = With Restrictions, Private Use = Yes
876
1301
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
877
- 'mixin', # LICENSE file at https://www.npmjs.com/package/mixin?activeTab=code
878
- 'stack-trace', # https://github.com/stacktracejs/stacktrace.js/blob/master/LICENSE
879
- 'typed-function', # LICENSE at https://www.npmjs.com/package/typed-function?activeTab=code
880
-
881
- ],
882
-
883
- 'UNLICENSED': [
884
- # The udn-browser library is our own and has been observed to sometimes show up in some contexts
885
- # as UNLICENSED, when really it's MIT.
886
- # Ref: https://github.com/dbmi-bgm/udn-browser/blob/main/LICENSE
887
- 'udn-browser',
888
- ],
1302
+ 'GNU Library or Lesser General Public License (LGPL)': [
1303
+ 'psycopg2', # Used at runtime during server operation, but not modified or distributed
1304
+ 'psycopg2-binary', # Used at runtime during server operation, but not modified or distributed
1305
+ 'chardet', # Potentially used downstream in loadxl to detect charset for text files
1306
+ 'pyzmq', # Used in post-deploy-perf-tests, not distributed, and not modified or distributed
1307
+ ],
1308
+
1309
+ 'GPL-2.0': [
1310
+ # The license file for the node-forge javascript library says:
1311
+ #
1312
+ # "You may use the Forge project under the terms of either the BSD License or the
1313
+ # GNU General Public License (GPL) Version 2."
1314
+ #
1315
+ # (We choose to use it under the BSD license.)
1316
+ # Ref: https://www.npmjs.com/package/node-forge?activeTab=code
1317
+ 'node-forge',
1318
+ ],
1319
+
1320
+ 'MIT*': [
1321
+
1322
+ # This library uses a mix of licenses, but they (MIT, CC0) generally seem permissive.
1323
+ # (It also mentions that some tools for building/testing use other libraries.)
1324
+ # Ref: https://github.com/requirejs/domReady/blob/master/LICENSE
1325
+ 'domready',
1326
+
1327
+ # This library is under 'COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1'
1328
+ # Ref: https://github.com/javaee/jsonp/blob/master/LICENSE.txt
1329
+ # About CDDL ...
1330
+ # Linking = Permissive, Private Use = ?
1331
+ # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
1332
+ 'jsonp',
1333
+
1334
+ # This library says pretty clearly it intends MIT license.
1335
+ # Ref: https://www.npmjs.com/package/component-indexof
1336
+ # Linking = Permissive, Private Use = Yes
1337
+ # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
1338
+ 'component-indexof',
1339
+
1340
+ # These look like a pretty straight MIT license.
1341
+ # Linking = Permissive, Private Use = Yes
1342
+ # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
1343
+ 'mixin', # LICENSE file at https://www.npmjs.com/package/mixin?activeTab=code
1344
+ 'stack-trace', # https://github.com/stacktracejs/stacktrace.js/blob/master/LICENSE
1345
+ 'typed-function', # LICENSE at https://www.npmjs.com/package/typed-function?activeTab=code
1346
+
1347
+ ],
1348
+
1349
+ 'UNLICENSED': [
1350
+ # The udn-browser library is our own and has been observed to sometimes show up in some contexts
1351
+ # as UNLICENSED, when really it's MIT.
1352
+ # Ref: https://github.com/dbmi-bgm/udn-browser/blob/main/LICENSE
1353
+ 'udn-browser',
1354
+ ],
1355
+ })
1356
+
1357
+
1358
+ @LicenseCheckerRegistry.register_checker('c4-infrastructure')
1359
+ class C4InfrastructureLicenseChecker(ParkLabCommonServerLicenseChecker):
1360
+ """
1361
+ Checker for C4 infrastructure (Fourfront, CGAP, SMaHT) from Park Lab.
1362
+ """
889
1363
 
890
- }
1364
+ LICENSE_TITLE = "(The )?MIT License"
891
1365
 
892
1366
 
1367
+ @LicenseCheckerRegistry.register_checker('c4-python-infrastructure')
893
1368
  class C4PythonInfrastructureLicenseChecker(C4InfrastructureLicenseChecker):
894
1369
  """
895
- For situations like dcicutils and dcicsnovault where there's no Javascript, this will test just Python.
1370
+ Checker for C4 python library infrastructure (Fourfront, CGAP, SMaHT) from Park Lab.
896
1371
  """
897
1372
  LICENSE_FRAMEWORKS = ['python']
1373
+
1374
+
1375
+ @LicenseCheckerRegistry.register_checker('scan2-pipeline')
1376
+ class Scan2PipelineLicenseChecker(ParkLabGplPipelineLicenseChecker):
1377
+ """
1378
+ Checker for SCAN2 library from Park Lab.
1379
+ """
1380
+
1381
+ EXCEPTIONS = augment(
1382
+ ParkLabGplPipelineLicenseChecker.EXCEPTIONS,
1383
+ by={
1384
+ 'Custom: Matrix file LICENCE': [
1385
+ # The custom information in https://cran.r-project.org/web/packages/Matrix/LICENCE
1386
+ # says there are potential extra restrictions beyond a simple GPL license
1387
+ # if SparseSuite is used, but it is not requested explicitly by Scan2, and we're
1388
+ # trusting that any other libraries used by Scan2 would have investigated this.
1389
+ # So, effectively, we think the Matrix library for this situation operates the
1390
+ # same as if it were just GPL-3 licensed, and we are fine with that.
1391
+ 'Matrix'
1392
+ ],
1393
+
1394
+ "MISSING": [
1395
+ # mysql-common and mysql-libs are GPL, but since they are delivered by conda
1396
+ # and not distributed as part of the Scan2 distribution, they should be OK.
1397
+ # Ref: https://redresscompliance.com/mysql-license-a-complete-guide-to-licensing/#:~:text=commercial%20use # noQA
1398
+ 'mysql-common',
1399
+ 'mysql-libs',
1400
+
1401
+ # This is our own library
1402
+ 'r-scan2', 'scan2',
1403
+ ]
1404
+ }
1405
+ )
1406
+
1407
+ EXPECTED_MISSING_LICENSES = ParkLabGplPipelineLicenseChecker.EXPECTED_MISSING_LICENSES + [
1408
+
1409
+ ]
dcicutils/misc_utils.py CHANGED
@@ -7,10 +7,11 @@ import datetime
7
7
  import functools
8
8
  import hashlib
9
9
  import inspect
10
- import math
11
10
  import io
12
- import os
11
+ import json
13
12
  import logging
13
+ import math
14
+ import os
14
15
  import pytz
15
16
  import re
16
17
  import rfc3986.validators
@@ -20,8 +21,8 @@ import warnings
20
21
  import webtest # importing the library makes it easier to mock testing
21
22
 
22
23
  from collections import defaultdict
23
- from dateutil.parser import parse as dateutil_parse
24
24
  from datetime import datetime as datetime_type
25
+ from dateutil.parser import parse as dateutil_parse
25
26
  from typing import Optional
26
27
 
27
28
 
@@ -1310,6 +1311,11 @@ def file_contents(filename, binary=False):
1310
1311
  return fp.read()
1311
1312
 
1312
1313
 
1314
+ def json_file_contents(filename):
1315
+ with io.open(filename, 'r') as fp:
1316
+ return json.load(fp)
1317
+
1318
+
1313
1319
  def camel_case_to_snake_case(s, separator='_'):
1314
1320
  """
1315
1321
  Converts CamelCase to snake_case.
@@ -0,0 +1,77 @@
1
+ import argparse
2
+
3
+ from dcicutils.command_utils import script_catch_errors, ScriptFailure
4
+ from dcicutils.lang_utils import there_are, conjoined_list
5
+ from dcicutils.license_utils import LicenseOptions, LicenseCheckerRegistry, LicenseChecker, LicenseCheckFailure
6
+ from dcicutils.misc_utils import PRINT, get_error_message
7
+ from typing import Optional, Type
8
+
9
+
10
+ EPILOG = __doc__
11
+
12
+
13
+ ALL_CHECKER_NAMES = LicenseCheckerRegistry.all_checker_names()
14
+ NEWLINE = '\n'
15
+
16
+
17
+ def main():
18
+
19
+ parser = argparse.ArgumentParser(
20
+ description="Runs a license checker",
21
+ epilog=EPILOG,
22
+ formatter_class=argparse.RawDescriptionHelpFormatter
23
+ )
24
+ parser.add_argument("name", type=str, default=None, nargs='?',
25
+ help=f"The name of a checker to run. "
26
+ + there_are(ALL_CHECKER_NAMES, kind='available checker',
27
+ show=True, joiner=conjoined_list, punctuate=True))
28
+ parser.add_argument("--brief", '-b', default=False, action="store_true",
29
+ help="Requests brief output.")
30
+ parser.add_argument("--debug", '-q', default=False, action="store_true",
31
+ help="Requests additional debugging output.")
32
+ parser.add_argument("--conda-prefix", "--conda_prefix", "--cp", default=LicenseOptions.CONDA_PREFIX,
33
+ help=(f"Overrides the CONDA_PREFIX (default {LicenseOptions.CONDA_PREFIX!r})."))
34
+
35
+ args = parser.parse_args()
36
+
37
+ with script_catch_errors():
38
+ run_license_checker(name=args.name, verbose=not args.brief, debug=args.debug, conda_prefix=args.conda_prefix)
39
+
40
+
41
+ def show_help_for_choosing_license_checker():
42
+ PRINT("")
43
+ PRINT(there_are(ALL_CHECKER_NAMES, kind='available checker', show=False, punctuation_mark=':'))
44
+ PRINT("")
45
+ wid = max(len(x) for x in ALL_CHECKER_NAMES) + 1
46
+ for checker_name in ALL_CHECKER_NAMES:
47
+ checker_class = LicenseCheckerRegistry.lookup_checker(checker_name)
48
+ checker_doc = (checker_class.__doc__ or '<missing doc>').strip(' \t\n\r')
49
+ PRINT(f"{(checker_name + ':').ljust(wid)} {checker_doc.split(NEWLINE)[0]}")
50
+ PRINT("")
51
+ PRINT("=" * 42, "NOTES & DISCLAIMERS", "=" * 42)
52
+ PRINT("Park Lab is a research laboratory in the Department of Biomedical Informatics at Harvard Medical School.")
53
+ PRINT("Park Lab checkers are intended for internal use and may not be suitable for other purposes.")
54
+ PRINT("External organizations must make their own independent choices about license acceptability.")
55
+ PRINT("Such choices can be integrated with this tool as follows:")
56
+ PRINT(" * Import LicenseChecker and LicenseCheckerRegistry from dcicutils.license_utils.")
57
+ PRINT(" * Make your own subclass of LicenseChecker, specifying a doc string and appropriate constraints.")
58
+ PRINT(" * Decorate your subclass with an appropriate call to LicenseCheckerRegistry.register_checker.")
59
+ PRINT("")
60
+
61
+
62
+ def run_license_checker(name: Optional[str],
63
+ verbose=LicenseOptions.VERBOSE,
64
+ debug=LicenseOptions.DEBUG,
65
+ conda_prefix=LicenseOptions.CONDA_PREFIX):
66
+ if name is None:
67
+ show_help_for_choosing_license_checker()
68
+ else:
69
+ try:
70
+ checker_class: Type[LicenseChecker] = LicenseCheckerRegistry.lookup_checker(name)
71
+ except Exception as e:
72
+ raise ScriptFailure(str(e))
73
+ try:
74
+ with LicenseOptions.selected_options(verbose=verbose, debug=debug, conda_prefix=conda_prefix):
75
+ checker_class.validate()
76
+ except LicenseCheckFailure as e:
77
+ raise ScriptFailure(get_error_message(e))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 7.12.0
3
+ Version: 7.12.0.1b4
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -30,9 +30,9 @@ dcicutils/jh_utils.py,sha256=Gpsxb9XEzggF_-Eq3ukjKvTnuyb9V1SCSUXkXsES4Kg,11502
30
30
  dcicutils/kibana/dashboards.json,sha256=wHMB_mpJ8OaYhRRgvpZuihaB2lmSF64ADt_8hkBWgQg,16225
31
31
  dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,2164
32
32
  dcicutils/lang_utils.py,sha256=cVLRUGyYeSPJAq3z_RJjA6miajHrXoi6baxF8HzHmLc,27797
33
- dcicutils/license_utils.py,sha256=OhOfTXFivvb6Y3tiJAb1b9Is-OTpBfZjC18M-RvqBqk,40456
33
+ dcicutils/license_utils.py,sha256=AJ7AwUb7YsXwrrncuS5bLwz3B0YYOHAqKwgf1JPLj6w,63798
34
34
  dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
35
- dcicutils/misc_utils.py,sha256=sXJ7ChrMyXZooaCnUtLxWHOmFIqxrxJKGJ6Ayd5i2Gk,91032
35
+ dcicutils/misc_utils.py,sha256=d30xwLFW41FwZVDAEYulWwyZUcLEzmD-pxsMlKH3mF4,91148
36
36
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
37
37
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
38
38
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
@@ -42,14 +42,15 @@ dcicutils/redis_tools.py,sha256=rqGtnVUjNjTlCdL1EMKuEhEMAgRJMiXZJkrKuX255QA,6509
42
42
  dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
43
43
  dcicutils/s3_utils.py,sha256=a9eU3Flh8Asc8xPWLGP16A6UQ_FVwhoFQNqm4ZYgSQ4,28852
44
44
  dcicutils/scripts/publish_to_pypi.py,sha256=qmWyjrg5bNQNfpNKFTZdyMXpRmrECnRV9VmNQddUPQA,13576
45
+ dcicutils/scripts/run_license_checker.py,sha256=psv3c1Of7h4V4yvh93iyI2F3JFPzdzQakKdq97JThRw,3653
45
46
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
46
47
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
47
48
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
48
49
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
49
50
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
50
51
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
51
- dcicutils-7.12.0.dist-info/LICENSE.txt,sha256=t0_-jIjqxNnymZoNJe-OltRIuuF8qfhN0ATlHyrUJPk,1102
52
- dcicutils-7.12.0.dist-info/METADATA,sha256=isoR9wb6CJyIef4ZYG4opKLj5s5c9LG6rihPw7jJP1Q,2999
53
- dcicutils-7.12.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
54
- dcicutils-7.12.0.dist-info/entry_points.txt,sha256=Z3vezbXsTpTIY4N2F33c5e-WDVQxgz_Vsk1oV_JBN7A,146
55
- dcicutils-7.12.0.dist-info/RECORD,,
52
+ dcicutils-7.12.0.1b4.dist-info/LICENSE.txt,sha256=t0_-jIjqxNnymZoNJe-OltRIuuF8qfhN0ATlHyrUJPk,1102
53
+ dcicutils-7.12.0.1b4.dist-info/METADATA,sha256=WBf2fEjWMlOtieSs4nq5zbiThbHAYzQliH7gmJ_0L04,3003
54
+ dcicutils-7.12.0.1b4.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
55
+ dcicutils-7.12.0.1b4.dist-info/entry_points.txt,sha256=8wbw5csMIgBXhkwfgsgJeuFcoUc0WsucUxmOyml2aoA,209
56
+ dcicutils-7.12.0.1b4.dist-info/RECORD,,
@@ -1,4 +1,5 @@
1
1
  [console_scripts]
2
2
  publish-to-pypi=dcicutils.scripts.publish_to_pypi:main
3
+ run-license-checker=dcicutils.scripts.run_license_checker:main
3
4
  show-contributors=dcicutils.contribution_scripts:show_contributors_main
4
5