dcicutils 7.12.0__py3-none-any.whl → 7.12.0.1b4__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,7 @@
1
1
  import contextlib
2
+ import csv
2
3
  import datetime
4
+ import glob
3
5
  import io
4
6
  import json
5
7
  # import logging
@@ -23,13 +25,18 @@ except ImportError: # pragma: no cover - not worth unit testing this case
23
25
  # import piplicenses
24
26
 
25
27
  from collections import defaultdict
26
- from typing import Any, Dict, DefaultDict, List, Optional, Type, Union
28
+ from typing import Any, Dict, DefaultDict, List, Optional, Type, TypeVar, Union
27
29
 
28
30
  # For obscure reasons related to how this file is used for early prototyping, these must use absolute references
29
31
  # to modules, not relative references. Later when things are better installed, we can make refs relative again.
32
+ from dcicutils.exceptions import InvalidParameterError
30
33
  from dcicutils.lang_utils import there_are
31
- from dcicutils.misc_utils import PRINT, get_error_message, local_attrs
34
+ from dcicutils.misc_utils import (
35
+ PRINT, get_error_message, ignorable, ignored, json_file_contents, local_attrs, environ_bool,
36
+ remove_suffix,
37
+ )
32
38
 
39
+ T = TypeVar("T")
33
40
 
34
41
  # logging.basicConfig()
35
42
  # logger = logging.getLogger(__name__)
@@ -43,6 +50,14 @@ _NAME = 'name'
43
50
  _STATUS = 'status'
44
51
 
45
52
 
53
+ def pattern(x):
54
+ return re.compile(x, re.IGNORECASE)
55
+
56
+
57
+ def augment(d: dict, by: dict):
58
+ return dict(d, **by)
59
+
60
+
46
61
  class LicenseStatus:
47
62
  ALLOWED = "ALLOWED"
48
63
  SPECIALLY_ALLOWED = "SPECIALLY_ALLOWED"
@@ -51,6 +66,23 @@ class LicenseStatus:
51
66
  UNEXPECTED_MISSING = "UNEXPECTED_MISSING"
52
67
 
53
68
 
69
+ class LicenseOptions:
70
+ # General verbosity, such as progress information
71
+ VERBOSE = environ_bool("LICENSE_UTILS_VERBOSE", default=True)
72
+ # Specific additional debugging output
73
+ DEBUG = environ_bool("LICENSE_UTILS_DEBUG", default=False)
74
+ CONDA_PREFIX = os.environ.get("CONDA_LICENSE_CHECKER_PREFIX", os.environ.get("CONDA_PREFIX", ""))
75
+
76
+ @classmethod
77
+ @contextlib.contextmanager
78
+ def selected_options(cls, verbose=VERBOSE, debug=DEBUG, conda_prefix=CONDA_PREFIX):
79
+ """
80
+ Allows a script, for example, to specify overrides for these options dynamically.
81
+ """
82
+ with local_attrs(cls, VERBOSE=verbose, DEBUG=debug, CONDA_PREFIX=conda_prefix):
83
+ yield
84
+
85
+
54
86
  class LicenseFramework:
55
87
 
56
88
  NAME = None
@@ -87,13 +119,13 @@ class LicenseFrameworkRegistry:
87
119
  yield
88
120
 
89
121
  @classmethod
90
- def register(cls, *, name):
122
+ def register_framework(cls, *, name):
91
123
  """
92
124
  Declares a python license framework classs.
93
125
  Mostly these names will be language names like 'python' or 'javascript',
94
126
  but they might be names of other, non-linguistic frameworks (like 'cgap-pipeline', for example).
95
127
  """
96
- def _decorator(framework_class):
128
+ def _decorator(framework_class: T) -> T:
97
129
  if not issubclass(framework_class, LicenseFramework):
98
130
  raise ValueError(f"The class {framework_class.__name__} does not inherit from LicenseFramework.")
99
131
  framework_class.NAME = name
@@ -117,25 +149,108 @@ class LicenseFrameworkRegistry:
117
149
  return sorted(cls.LICENSE_FRAMEWORKS.values(), key=lambda x: x.NAME)
118
150
 
119
151
 
120
- @LicenseFrameworkRegistry.register(name='javascript')
152
+ # This is intended to match ' (= 3)', ' (>= 3)', ' (version 3)', ' (version 3 or greater)'
153
+ # It will incidentally and harmlessly also take ' (>version 3)' or '(>= 3 or greater)'.
154
+ # It will also correctly handle the unlikely case of ' (= 3 or greater)'
155
+
156
+ _OR_LATER_PATTERN = '(?:[- ]or[ -](?:greater|later))'
157
+ _PARENTHETICAL_VERSION_CONSTRAINT = re.compile(f'( [(]([>]?)(?:[=]|version) ([0-9.]+)({_OR_LATER_PATTERN}?)[)])')
158
+ _POSTFIX_OR_LATER_PATTERN = re.compile(f"({_OR_LATER_PATTERN})")
159
+ _GPL_VERSION_CHOICE = re.compile('^GPL-v?([0-9.+]) (?:OR|[|]) GPL-v?([0-9.+])$')
160
+
161
+
162
+ def simplify_license_versions(licenses_spec: str, *, for_package_name) -> str:
163
+ m = _GPL_VERSION_CHOICE.match(licenses_spec)
164
+ if m:
165
+ version_a, version_b = m.groups()
166
+ return f"GPL-{version_a}-or-{version_b}"
167
+ # We only care which licenses were mentioned, not what algebra is used on them.
168
+ # (Thankfully there are no NOTs, and that's probably not by accident, since that would be too big a set.)
169
+ # So for us, either (FOO AND BAR) or (FOO OR BAR) is the same because we want to treat it as "FOO,BAR".
170
+ # If all of those licenses match, all is good. That _does_ mean some things like (MIT OR GPL-3.0) will
171
+ # have trouble passing unless both MIT and GPL-3.0 are allowed.
172
+ transform_count = 0
173
+ original_licenses_spec = licenses_spec
174
+ ignorable(original_licenses_spec) # sometimes useful for debugging
175
+ while True:
176
+ if transform_count > 100: # It'd be surprising if there were even ten of these to convert.
177
+ warnings.warn(f"Transforming {for_package_name} {licenses_spec!r} seemed to be looping."
178
+ f" Please report this as a bug.")
179
+ return licenses_spec # return the unmodified
180
+ transform_count += 1
181
+ m = _PARENTHETICAL_VERSION_CONSTRAINT.search(licenses_spec)
182
+ if not m:
183
+ break
184
+ matched, greater, version_spec, greater2 = m.groups()
185
+ is_greater = bool(greater or greater2)
186
+ licenses_spec = licenses_spec.replace(matched,
187
+ f"-{version_spec}"
188
+ f"{'+' if is_greater else ''}")
189
+ transform_count = 0
190
+ while True:
191
+ if transform_count > 100: # It'd be surprising if there were even ten of these to convert.
192
+ warnings.warn(f"Transforming {for_package_name} {licenses_spec!r} seemed to be looping."
193
+ f" Please report this as a bug.")
194
+ return licenses_spec # return the unmodified
195
+ transform_count += 1
196
+ m = _POSTFIX_OR_LATER_PATTERN.search(licenses_spec)
197
+ if not m:
198
+ break
199
+ matched = m.group(1)
200
+ licenses_spec = licenses_spec.replace(matched, '+')
201
+ if LicenseOptions.DEBUG and licenses_spec != original_licenses_spec:
202
+ PRINT(f"Rewriting {original_licenses_spec!r} as {licenses_spec!r}.")
203
+ return licenses_spec
204
+
205
+
206
+ def extract_boolean_terms(boolean_expression: str, for_package_name: str) -> List[str]:
207
+ # We only care which licenses were mentioned, not what algebra is used on them.
208
+ # (Thankfully there are no NOTs, and that's probably not by accident, since that would be too big a set.)
209
+ # So for us, either (FOO AND BAR) or (FOO OR BAR) is the same because we want to treat it as "FOO,BAR".
210
+ # If all of those licenses match, all is good. That _does_ mean some things like (MIT OR GPL-3.0) will
211
+ # have trouble passing unless both MIT and GPL-3.0 are allowed.
212
+ revised_boolean_expression = (
213
+ boolean_expression
214
+ .replace('(', '')
215
+ .replace(')', '')
216
+ .replace(' AND ', ',')
217
+ .replace(' and ', ',')
218
+ .replace(' & ', ',')
219
+ .replace(' OR ', ',')
220
+ .replace(' or ', ',')
221
+ .replace('|', ',')
222
+ .replace(';', ',')
223
+ .replace(' + ', ',')
224
+ .replace('file ', f'Custom: {for_package_name} file ')
225
+ )
226
+ terms = [x for x in sorted(map(lambda x: x.strip(), revised_boolean_expression.split(','))) if x]
227
+ if LicenseOptions.DEBUG and revised_boolean_expression != boolean_expression:
228
+ PRINT(f"Rewriting {boolean_expression!r} as {terms!r}.")
229
+ return terms
230
+
231
+
232
+ @LicenseFrameworkRegistry.register_framework(name='javascript')
121
233
  class JavascriptLicenseFramework(LicenseFramework):
122
234
 
123
235
  @classmethod
124
- def implicated_licenses(cls, *, licenses_spec: str):
125
- # We only care which licenses were mentioned, not what algebra is used on them.
126
- # (Thankfully there are no NOTs, and that's probably not by accident, since that would be too big a set.)
127
- # So for us, either (FOO AND BAR) or (FOO OR BAR) is the same because we want to treat it as "FOO,BAR".
128
- # If all of those licenses match, all is good. That _does_ mean some things like (MIT OR GPL-3.0) will
129
- # have trouble passing unless both MIT and GPL-3.0 are allowed.
130
- licenses = sorted(map(lambda x: x.strip(),
131
- (licenses_spec
132
- .replace('(', '')
133
- .replace(')', '')
134
- .replace(' AND ', ',')
135
- .replace(' OR ', ',')
136
- ).split(',')))
236
+ def implicated_licenses(cls, *, package_name, licenses_spec: str) -> List[str]:
237
+ ignored(package_name)
238
+ licenses_spec = simplify_license_versions(licenses_spec, for_package_name=package_name)
239
+ licenses = extract_boolean_terms(licenses_spec, for_package_name=package_name)
137
240
  return licenses
138
241
 
242
+ VERSION_PATTERN = re.compile('^.+?([@][0-9.][^@]*|)$')
243
+
244
+ @classmethod
245
+ def strip_version(cls, raw_name):
246
+ name = raw_name
247
+ m = cls.VERSION_PATTERN.match(raw_name) # e.g., @foo/bar@3.7
248
+ if m:
249
+ suffix = m.group(1)
250
+ if suffix:
251
+ name = remove_suffix(m.group(1), name)
252
+ return name
253
+
139
254
  @classmethod
140
255
  def get_dependencies(cls):
141
256
  output = subprocess.check_output(['npx', 'license-checker', '--summary', '--json'],
@@ -147,24 +262,20 @@ class JavascriptLicenseFramework(LicenseFramework):
147
262
  # e.g., this happens if there's no javascript in the repo
148
263
  raise Exception("No javascript license data was found.")
149
264
  result = []
150
- for name, record in records.items():
151
- licenses_spec = record.get(_LICENSES)
152
- if '(' in licenses_spec:
153
- licenses = cls.implicated_licenses(licenses_spec=licenses_spec)
154
- PRINT(f"Rewriting {licenses_spec!r} as {licenses!r}")
155
- elif licenses_spec:
156
- licenses = [licenses_spec]
157
- else:
158
- licenses = []
265
+ for raw_name, record in records.items():
266
+ name = cls.strip_version(raw_name)
267
+ raw_licenses_spec = record.get(_LICENSES)
268
+ licenses = cls.implicated_licenses(licenses_spec=raw_licenses_spec, package_name=name)
159
269
  entry = {
160
- _NAME: name.lstrip('@').split('@')[0], # e.g., @foo/bar@3.7
161
- _LICENSES: licenses # TODO: could parse this better.
270
+ _NAME: name,
271
+ _LICENSES: licenses,
272
+ _FRAMEWORK: 'javascript'
162
273
  }
163
274
  result.append(entry)
164
275
  return result
165
276
 
166
277
 
167
- @LicenseFrameworkRegistry.register(name='python')
278
+ @LicenseFrameworkRegistry.register_framework(name='python')
168
279
  class PythonLicenseFramework(LicenseFramework):
169
280
 
170
281
  @classmethod
@@ -184,15 +295,107 @@ class PythonLicenseFramework(LicenseFramework):
184
295
  entry = {
185
296
  _NAME: license_name,
186
297
  _LICENSES: licenses,
187
- _LANGUAGE: 'python',
298
+ _FRAMEWORK: 'python',
188
299
  }
189
300
  result.append(entry)
190
301
  return sorted(result, key=lambda x: x.get(_NAME).lower())
191
302
 
192
303
 
193
- class LicenseFileParser:
304
+ @LicenseFrameworkRegistry.register_framework(name='conda')
305
+ class CondaLicenseFramework(LicenseFramework):
306
+
307
+ @classmethod
308
+ def get_dependencies(cls):
309
+ prefix = LicenseOptions.CONDA_PREFIX
310
+ result = []
311
+ filespec = os.path.join(prefix, "conda-meta/*.json")
312
+ files = glob.glob(filespec)
313
+ for file in files:
314
+ data = json_file_contents(file)
315
+ package_name = data['name']
316
+ package_license = data.get('license') or "MISSING"
317
+ if package_license:
318
+ # print(f"package_license={package_license}")
319
+ simplified_package_license_spec = simplify_license_versions(package_license,
320
+ for_package_name=package_name)
321
+ # print(f" =simplified_package_license_spec => {simplified_package_license_spec}")
322
+ package_licenses = extract_boolean_terms(simplified_package_license_spec,
323
+ for_package_name=package_name)
324
+ # print(f"=> {package_licenses}")
325
+ else:
326
+ package_licenses = []
327
+ entry = {
328
+ _NAME: package_name,
329
+ _LICENSES: package_licenses,
330
+ _FRAMEWORK: 'conda',
331
+ }
332
+ result.append(entry)
333
+ result.sort(key=lambda x: x['name'])
334
+ # print(f"conda get_dependencies result={json.dumps(result, indent=2)}")
335
+ # print("conda deps = ", json.dumps(result, indent=2))
336
+ return result
337
+
338
+
339
+ @LicenseFrameworkRegistry.register_framework(name='r')
340
+ class RLicenseFramework(LicenseFramework):
341
+
342
+ R_PART_SPEC = re.compile("^Part of R [0-9.]+$")
343
+ R_LANGUAGE_LICENSE_NAME = 'R-language-license'
344
+
345
+ @classmethod
346
+ def implicated_licenses(cls, *, package_name, licenses_spec: str) -> List[str]:
347
+ if cls.R_PART_SPEC.match(licenses_spec):
348
+ return [cls.R_LANGUAGE_LICENSE_NAME]
349
+ licenses_spec = simplify_license_versions(licenses_spec, for_package_name=package_name)
350
+ licenses = extract_boolean_terms(licenses_spec, for_package_name=package_name)
351
+ return licenses
352
+
353
+ @classmethod
354
+ def get_dependencies(cls):
355
+ # NOTE: Although the R Language itself is released under the GPL, our belief is that it is
356
+ # still possible to write programs in R that are not GPL, even programs that use commercial licenses.
357
+ # So we do ordinary license checking here, same as in other frameworks.
358
+ # For notes on this, see the R FAQ.
359
+ # Ref: https://cran.r-project.org/doc/FAQ/R-FAQ.html#Can-I-use-R-for-commercial-purposes_003f
360
+
361
+ _PACKAGE = "Package"
362
+ _LICENSE = "License"
363
+
364
+ found_problems = 0
365
+
366
+ output_bytes = subprocess.check_output(['r', '--no-echo', '-q', '-e',
367
+ f'write.csv(installed.packages()[,c("Package", "License")])'],
368
+ # This will output to stderr if there's an error,
369
+ # but it will still put {} on stdout, which is good enough for us.
370
+ stderr=subprocess.DEVNULL)
371
+ output = output_bytes.decode('utf-8')
372
+ result = []
373
+ first_line = True
374
+ for entry in csv.reader(io.StringIO(output)): # [ignore, package, license]
375
+ if first_line:
376
+ first_line = False
377
+ if entry == ["", _PACKAGE, _LICENSE]: # we expect headers
378
+ continue
379
+ try:
380
+ package_name = entry[1]
381
+ licenses_spec = entry[2]
382
+ licenses = cls.implicated_licenses(package_name=package_name, licenses_spec=licenses_spec)
383
+ entry = {
384
+ _NAME: package_name,
385
+ _LICENSES: licenses,
386
+ _FRAMEWORK: 'r',
387
+ }
388
+ result.append(entry)
389
+ except Exception as e:
390
+ found_problems += 1
391
+ if LicenseOptions.VERBOSE:
392
+ PRINT(get_error_message(e))
393
+ if found_problems > 0:
394
+ warnings.warn(there_are(found_problems, kind="problem", show=False, punctuate=True, tense='past'))
395
+ return sorted(result, key=lambda x: x.get(_NAME).lower())
194
396
 
195
- VERBOSE = False
397
+
398
+ class LicenseFileParser:
196
399
 
197
400
  SEPARATORS = '-.,'
198
401
  SEPARATORS_AND_WHITESPACE = SEPARATORS + ' \t'
@@ -230,8 +433,6 @@ class LicenseFileParser:
230
433
  lines = []
231
434
  for i, line in enumerate(fp):
232
435
  line = line.strip(' \t\n\r')
233
- if cls.VERBOSE: # pragma: no cover - this is just for debugging
234
- PRINT(str(i).rjust(3), line)
235
436
  m = cls.COPYRIGHT_LINE.match(line) if line[:1].isupper() else None
236
437
  if not m:
237
438
  lines.append(line)
@@ -316,14 +517,12 @@ class LicenseChecker:
316
517
  Note that if you don't like these license names, which are admittedly non-standard and do nt seem to use
317
518
  SPDX naming conventions, you can customize the get_dependencies method to return a different
318
519
  list, one of the form
319
- [{"name": "libname", "license_classifier": ["license1", "license2", ...], "language": "python"}]
520
+ [{"name": "libname", "license_classifier": ["license1", "license2", ...], "framework": "python"}]
320
521
  by whatever means you like and using whatever names you like.
321
522
  """
322
523
 
323
524
  # Set this to True in subclasses if you want your organization's policy to be that you see
324
525
  # some visible proof of which licenses were checked.
325
- VERBOSE = True
326
-
327
526
  LICENSE_TITLE = None
328
527
  COPYRIGHT_OWNER = None
329
528
  LICENSE_FRAMEWORKS = None
@@ -378,6 +577,22 @@ class LicenseChecker:
378
577
  check_license_title=license_title or cls.LICENSE_TITLE,
379
578
  analysis=analysis)
380
579
 
580
+ CHOICE_REGEXPS = {}
581
+
582
+ @classmethod
583
+ def _make_regexp_for_choices(cls, choices):
584
+ inner_pattern = '|'.join('^' + (re.escape(choice) if isinstance(choice, str) else choice.pattern) + '$'
585
+ for choice in choices) or "^$"
586
+ return re.compile(f"({inner_pattern})", re.IGNORECASE)
587
+
588
+ @classmethod
589
+ def _find_regexp_for_choices(cls, choices):
590
+ key = str(choices)
591
+ regexp = cls.CHOICE_REGEXPS.get(key)
592
+ if not regexp:
593
+ cls.CHOICE_REGEXPS[key] = regexp = cls._make_regexp_for_choices(choices)
594
+ return regexp
595
+
381
596
  @classmethod
382
597
  def analyze_license_dependencies_for_framework(cls, *,
383
598
  analysis: LicenseAnalysis,
@@ -385,7 +600,7 @@ class LicenseChecker:
385
600
  acceptable: Optional[List[str]] = None,
386
601
  exceptions: Optional[Dict[str, str]] = None,
387
602
  ) -> None:
388
- acceptable = (acceptable or []) + (cls.ALLOWED or [])
603
+ acceptability_regexp = cls._find_regexp_for_choices((acceptable or []) + (cls.ALLOWED or []))
389
604
  exceptions = dict(cls.EXCEPTIONS or {}, **(exceptions or {}))
390
605
 
391
606
  try:
@@ -415,7 +630,7 @@ class LicenseChecker:
415
630
  by_special_exception = False
416
631
  for license_name in license_names:
417
632
  special_exceptions = exceptions.get(license_name, [])
418
- if license_name in acceptable:
633
+ if acceptability_regexp.match(license_name): # license_name in acceptable:
419
634
  pass
420
635
  elif name in special_exceptions:
421
636
  by_special_exception = True
@@ -430,7 +645,7 @@ class LicenseChecker:
430
645
  _LICENSES: license_names,
431
646
  _STATUS: status
432
647
  })
433
- if cls.VERBOSE: # pragma: no cover - this is just for debugging
648
+ if LicenseOptions.VERBOSE: # pragma: no cover - this is just for debugging
434
649
  PRINT(f"Checked {framework.NAME} {name}:"
435
650
  f" {'; '.join(license_names) if license_names else '---'} ({status})")
436
651
 
@@ -459,7 +674,7 @@ class LicenseChecker:
459
674
  def show_unacceptable_licenses(cls, *, analysis: LicenseAnalysis) -> LicenseAnalysis:
460
675
  if analysis.unacceptable:
461
676
  PRINT(there_are(analysis.unacceptable, kind="unacceptable license", show=False, punctuation_mark=':'))
462
- for license, names in analysis.unacceptable.items():
677
+ for license, names in sorted(analysis.unacceptable.items()):
463
678
  PRINT(f" {license}: {', '.join(names)}")
464
679
  return analysis
465
680
 
@@ -499,6 +714,30 @@ class LicenseChecker:
499
714
  raise LicenseAcceptabilityCheckFailure(unacceptable_licenses=analysis.unacceptable)
500
715
 
501
716
 
717
+ class LicenseCheckerRegistry:
718
+
719
+ REGISTRY: Dict[str, Type[LicenseChecker]] = {}
720
+
721
+ @classmethod
722
+ def register_checker(cls, name: str):
723
+ def _register(license_checker_class: Type[LicenseChecker]):
724
+ cls.REGISTRY[name] = license_checker_class
725
+ return license_checker_class
726
+ return _register
727
+
728
+ @classmethod
729
+ def lookup_checker(cls, name: str) -> Type[LicenseChecker]:
730
+ result: Optional[Type[LicenseChecker]] = cls.REGISTRY.get(name)
731
+ if result is None:
732
+ raise InvalidParameterError(parameter='checker_name', value=name,
733
+ options=cls.all_checker_names())
734
+ return result
735
+
736
+ @classmethod
737
+ def all_checker_names(cls):
738
+ return list(cls.REGISTRY.keys())
739
+
740
+
502
741
  class LicenseCheckFailure(Exception):
503
742
 
504
743
  DEFAULT_MESSAGE = "License check failure."
@@ -523,16 +762,13 @@ class LicenseAcceptabilityCheckFailure(LicenseCheckFailure):
523
762
  super().__init__(message=message)
524
763
 
525
764
 
526
- class C4InfrastructureLicenseChecker(LicenseChecker):
765
+ @LicenseCheckerRegistry.register_checker('park-lab-common')
766
+ class ParkLabCommonLicenseChecker(LicenseChecker):
527
767
  """
528
- This set of values is useful to us in Park Lab where these tools were developed.
529
- If you're at some other organization, we recommend you make a class that has values
530
- suitable to your own organizational needs.
768
+ Minimal checker common to all tech from Park Lab.
531
769
  """
532
770
 
533
771
  COPYRIGHT_OWNER = "President and Fellows of Harvard College"
534
- LICENSE_TITLE = "(The )?MIT License"
535
- LICENSE_FRAMEWORKS = ['python', 'javascript']
536
772
 
537
773
  ALLOWED = [
538
774
 
@@ -548,16 +784,39 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
548
784
  'AFL-2.1',
549
785
 
550
786
  # Linking = Permissive, Private Use = Yes
787
+ # Apache licenses before version 2.0 are controversial, but we here construe an unmarked naming to imply
788
+ # any version, and hence v2.
551
789
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
552
790
  'Apache Software License',
553
791
  'Apache-Style',
554
- 'Apache-2.0',
792
+ pattern("Apache([- ]2([.]0)?)?([- ]Licen[cs]e)?([- ]with[- ]LLVM[- ]exception)?"),
793
+ # 'Apache-2.0',
794
+
795
+ # Artistic License 1.0 was confusing to people, so its status as permissive is in general uncertain,
796
+ # however the issue seems to revolve around point 8 (relating to whether or not perl is deliberately
797
+ # exposed). That isn't in play for our uses, so we don't flag it here.
798
+ # Artistic license 2.0 is a permissive license.
799
+ # Ref: https://en.wikipedia.org/wiki/Artistic_License
800
+ 'Artistic-1.0-Perl',
801
+ pattern('Artistic[- ]2([.]0)?'),
802
+
803
+ # According to Wikipedia, the Boost is considered permissive and BSD-like.
804
+ # Refs:
805
+ # *
806
+ # * https://en.wikipedia.org/wiki/Boost_(C%2B%2B_libraries)#License
807
+ pattern('(BSL|Boost(([- ]Software)?[- ]License)?)([- ]1([.]0)?)?'),
555
808
 
556
809
  # Linking = Permissive, Private Use = Yes
557
810
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
558
- 'BSD License',
559
- 'BSD-2-Clause',
560
- 'BSD-3-Clause',
811
+ pattern('((modified[- ])?[234][- ]Clause[- ])?BSD([- ][234][- ]Clause)?( Licen[cs]e)?'),
812
+ # 'BSD License',
813
+ # 'BSD-2-Clause',
814
+ # 'BSD-3-Clause',
815
+ # 'BSD 3-Clause',
816
+
817
+ # BZIP2 is a permissive license
818
+ # Ref: https://github.com/asimonov-im/bzip2/blob/master/LICENSE
819
+ pattern('bzip2(-1[.0-9]*)'),
561
820
 
562
821
  # Linking = Public Domain, Private Use = Public Domain
563
822
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
@@ -570,6 +829,10 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
570
829
  'CC-BY-3.0',
571
830
  'CC-BY-4.0',
572
831
 
832
+ # The curl license is a permissive license.
833
+ # Ref: https://curl.se/docs/copyright.html
834
+ 'curl',
835
+
573
836
  # Linking = Permissive, Private Use = ?
574
837
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
575
838
  'CDDL',
@@ -583,9 +846,32 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
583
846
  'Eclipse Public License',
584
847
  'EPL-2.0',
585
848
 
849
+ # The FSF Unlimited License (FSFUL) seems to be a completely permissive license.
850
+ # Refs:
851
+ # * https://spdx.org/licenses/FSFUL.html
852
+ # * https://fedoraproject.org/wiki/Licensing/FSF_Unlimited_License
853
+ 'FSF Unlimited License',
854
+ 'FSFUL',
855
+
856
+ # The FreeType license is a permissive license.
857
+ # Ref: LicenseRef-FreeType
858
+ pattern('(Licen[cs]eRef-)?(FTL|FreeType( Licen[cs]e)?)'),
859
+
586
860
  # Linking = Yes, Cat = Permissive Software Licenses
587
861
  # Ref: https://en.wikipedia.org/wiki/Historical_Permission_Notice_and_Disclaimer
588
862
  'Historical Permission Notice and Disclaimer (HPND)',
863
+ 'HPND',
864
+ pattern('(Licen[cs]eRef-)?PIL'),
865
+ # The Pillow or Python Image Library is an HPND license, which is a simple permissive license:
866
+ # Refs:
867
+ # * https://github.com/python-pillow/Pillow/blob/main/LICENSE
868
+ # * https://www.fsf.org/blogs/licensing/historical-permission-notice-and-disclaimer-added-to-license-list
869
+
870
+ # The IJG license, used by Independent JPEG Group (IJG) is a custom permissive license.
871
+ # Refs:
872
+ # * https://en.wikipedia.org/wiki/Libjpeg
873
+ # * https://github.com/libjpeg-turbo/libjpeg-turbo/blob/main/LICENSE.md
874
+ 'IJG',
589
875
 
590
876
  # Linking = Permissive, Private Use = Permissive
591
877
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
@@ -610,10 +896,11 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
610
896
  'OFL-1.1',
611
897
 
612
898
  # Ref: https://en.wikipedia.org/wiki/Public_domain
613
- 'Public Domain',
899
+ pattern('(Licen[cs]eRef-)?Public[- ]Domain([- ]dedic[t]?ation)?'), # "dedictation" is a typo in docutils
614
900
 
615
901
  # Linking = Permissive, Private Use = Permissive
616
902
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
903
+ pattern('(Licen[cs]eRef-)?PSF-2([.][.0-9]*)'),
617
904
  'Python Software Foundation License',
618
905
  'Python-2.0',
619
906
 
@@ -621,11 +908,32 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
621
908
  # Ref: https://en.wikipedia.org/wiki/Pylons_project
622
909
  'Repoze Public License',
623
910
 
911
+ # The TCL or Tcl/Tk licenses are permissive licenses.
912
+ # Ref: https://www.tcl.tk/software/tcltk/license.html
913
+ # The one used by the tktable library has a 'bourbon' clause that doesn't add compliance requirements
914
+ # Ref: https://github.com/wjoye/tktable/blob/master/license.txt
915
+ pattern('Tcl([/]tk)?'),
916
+
917
+ # The Ubuntu Font Licence is mostly permissive. It contains some restrictions if you are going to modify the
918
+ # fonts that require you to change the name to avoid confusion. But for our purposes, we're assuming that's
919
+ # not done, and so we're not flagging it.
920
+ pattern('Ubuntu Font Licen[cs]e Version( 1([.]0)?)?'),
921
+
624
922
  # Linking = Permissive/Public domain, Private Use = Permissive/Public domain
625
923
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
626
924
  'The Unlicense (Unlicense)',
627
925
  'Unlicense',
628
926
 
927
+ # Various licenses seem to call themselves or be summed up as unlimited.
928
+ # So far we know of none that are not highly permissive.
929
+ # * boot and KernSmooth are reported by R as being 'Unlimited'
930
+ # Refs:
931
+ # * https://cran.r-project.org/web/packages/KernSmooth/index.html
932
+ # (https://github.com/cran/KernSmooth/blob/master/LICENCE.note)
933
+ # * https://cran.r-project.org/package=boot
934
+ # (https://github.com/cran/boot/blob/master/DESCRIPTION)
935
+ 'Unlimited',
936
+
629
937
  # Linking = Permissive, Private Use = ?
630
938
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
631
939
  'W3C License',
@@ -646,6 +954,109 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
646
954
  'Zope Public License',
647
955
  ]
648
956
 
957
+ EXCEPTIONS = {
958
+
959
+ # The Bioconductor zlibbioc license is a permissive license.
960
+ # Ref: https://github.com/Bioconductor/zlibbioc/blob/devel/LICENSE
961
+ 'Custom: bioconductor-zlibbioc file LICENSE': [
962
+ 'bioconductor-zlibbioc'
963
+ ],
964
+
965
+ # The Bioconductor rsamtools license is an MIT license
966
+ # Ref: https://bioconductor.org/packages/release/bioc/licenses/Rsamtools/LICENSE
967
+ 'Custom: bioconductor-rsamtools file LICENSE': [
968
+ 'bioconductor-rsamtools'
969
+ ],
970
+
971
+ # DFSG = Debian Free Software Guidelines
972
+ # Ref: https://en.wikipedia.org/wiki/Debian_Free_Software_Guidelines
973
+ # Used as an apparent modifier to other licenses, to say they are approved per Debian.
974
+ # For example in this case, pytest-timeout has license: DFSG approved, MIT License,
975
+ # but is really just an MIT License that someone has checked is DFSG approved.
976
+ 'DFSG approved': [
977
+ 'pytest-timeout', # MIT Licensed
978
+ ],
979
+
980
+ 'FOSS': [
981
+ # The r-stringi library is a conda library that implements a stringi (pronounced "stringy") library for R.
982
+ # The COnda source feed is: https://github.com/conda-forge/r-stringi-feedstock
983
+ # This page explains that the home source is https://stringi.gagolewski.com/ but that's a doc page.
984
+ # The doc page says:
985
+ # > stringi’s source code is hosted on GitHub.
986
+ # > It is distributed under the open source BSD-3-clause license.
987
+ # The source code has a license that begins with a BSD-3-clause license and includes numerous others,
988
+ # but they all appear to be permissive.
989
+ # Ref: https://github.com/gagolews/stringi/blob/master/LICENSE
990
+ 'stringi', 'r-stringi',
991
+ ],
992
+
993
+ # Linking = With Restrictions, Private Use = Yes
994
+ # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
995
+ 'GNU Lesser General Public License v2 or later (LGPLv2+)': [
996
+ 'chardet' # used at runtime during server operation (ingestion), but not modified or distributed
997
+ ],
998
+
999
+ # Linking = With Restrictions, Private Use = Yes
1000
+ # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
1001
+ 'GNU Lesser General Public License v3 or later (LGPLv3+)': [
1002
+ # used only privately in testing, not used in server code, not modified, not distributed
1003
+ 'pytest-redis',
1004
+ # required by pytest-redis (used only where it's used)
1005
+ 'mirakuru',
1006
+ ],
1007
+
1008
+ 'GNU General Public License (GPL)': [
1009
+ 'docutils', # Used only privately as a separate documentation-generation task for ReadTheDocs
1010
+ ],
1011
+
1012
+ 'MIT/X11 Derivative': [
1013
+ # The license used by libxkbcommon is complicated and involves numerous included licenses,
1014
+ # but all are permissive.
1015
+ # Ref: https://github.com/xkbcommon/libxkbcommon/blob/master/LICENSE
1016
+ 'libxkbcommon',
1017
+ ],
1018
+
1019
+ 'None': [
1020
+ # It's not obvious why Conda shows this license as 'None'.
1021
+ # In fact, though, BSD 3-Clause "New" or "Revised" License
1022
+ # Ref: https://github.com/AnacondaRecipes/_libgcc_mutex-feedstock/blob/master/LICENSE.txt
1023
+ '_libgcc_mutex',
1024
+ ],
1025
+
1026
+ 'PostgreSQL': [
1027
+ # The libpq library is actually licensed with a permissive BSD 3-Clause "New" or "Revised" License
1028
+ # Ref: https://github.com/lpsmith/postgresql-libpq/blob/master/LICENSE
1029
+ 'libpq',
1030
+ ],
1031
+
1032
+ 'UCSD': [
1033
+ # It isn't obvious why these show up with a UCSD license in Conda.
1034
+ # The actual sources say it should be a 2-clause BSD license:
1035
+ # Refs:
1036
+ # * https://github.com/AlexandrovLab/SigProfilerMatrixGenerator/blob/master/LICENSE
1037
+ # * https://github.com/AlexandrovLab/SigProfilerPlotting/blob/master/LICENSE
1038
+ 'sigprofilermatrixgenerator',
1039
+ 'sigprofilerplotting',
1040
+ ],
1041
+
1042
+ 'X11': [
1043
+ # The ncurses library has a VERY complicated history, BUT seems consistently permissive
1044
+ # and the most recent version seems to be essentially the MIT license.
1045
+ # Refs:
1046
+ # * https://en.wikipedia.org/wiki/Ncurses#License
1047
+ # * https://invisible-island.net/ncurses/ncurses-license.html
1048
+ 'ncurses'
1049
+ ],
1050
+
1051
+ 'zlib-acknowledgement': [
1052
+ # It isn't clear whey libpng shows up with this license name, but the license for libpng
1053
+ # is a permissive license.
1054
+ # Ref: https://github.com/glennrp/libpng/blob/libpng16/LICENSE
1055
+ 'libpng',
1056
+ ],
1057
+
1058
+ }
1059
+
649
1060
  EXPECTED_MISSING_LICENSES = [
650
1061
 
651
1062
  # This is a name we use for our C4 portals. And it isn't published.
@@ -726,7 +1137,7 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
726
1137
  'responses',
727
1138
 
728
1139
  # This seems to get flagged sometimes, but is not the pypi snovault library, it's what our dcicsnovault
729
- # calls itself internally.. In any case, it's under MIT license and OK.
1140
+ # calls itself internally. In any case, it's under MIT license and OK.
730
1141
  # Ref: https://github.com/4dn-dcic/snovault/blob/master/LICENSE.txt
731
1142
  'snovault',
732
1143
 
@@ -757,141 +1168,242 @@ class C4InfrastructureLicenseChecker(LicenseChecker):
757
1168
 
758
1169
  ]
759
1170
 
760
- EXCEPTIONS = {
761
1171
 
762
- 'BSD*': [
763
- # Although modified to insert the author name into the license text itself,
764
- # the license for these libraries are essentially BSD-3-Clause.
765
- 'formatio',
766
- 'samsam',
767
-
768
- # There are some slightly different versions of what appear to be BSD licenses here,
769
- # but clearly the license is permissive.
770
- # Ref: https://www.npmjs.com/package/mutation-observer?activeTab=readme
771
- 'mutation-observer',
772
- ],
1172
+ @LicenseCheckerRegistry.register_checker('park-lab-pipeline')
1173
+ class ParkLabPipelineLicenseChecker(ParkLabCommonLicenseChecker):
1174
+ """
1175
+ Minimal checker common to pipelines from Park Lab.
1176
+ """
773
1177
 
774
- 'Custom: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global': [
775
- # The use of this URL appears to be a syntax error in the definition of entries-ponyfill
776
- # In fact this seems to be covered by a CC0-1.0 license.
777
- # Ref: https://unpkg.com/browse/object.entries-ponyfill@1.0.1/LICENSE
778
- 'object.entries-ponyfill',
779
- ],
1178
+ LICENSE_FRAMEWORKS = ['python', 'conda', 'r']
780
1179
 
781
- 'Custom: https://github.com/saikocat/colorbrewer.': [
782
- # The use of this URL appears to be a syntax error in the definition of cartocolor
783
- # In fact, this seems to be covered by a CC-BY-3.0 license.
784
- # Ref: https://www.npmjs.com/package/cartocolor?activeTab=readme
785
- 'cartocolor',
786
- ],
787
1180
 
788
- 'Custom: https://travis-ci.org/component/emitter.png': [
789
- # The use of this png appears to be a syntax error in the definition of emitter-component.
790
- # In fact, emitter-component uses an MIT License
791
- # Ref: https://www.npmjs.com/package/emitter-component
792
- # Ref: https://github.com/component/emitter/blob/master/LICENSE
793
- 'emitter-component',
794
- ],
1181
+ @LicenseCheckerRegistry.register_checker('park-lab-gpl-pipeline')
1182
+ class ParkLabGplPipelineLicenseChecker(ParkLabCommonLicenseChecker):
1183
+ """
1184
+ Minimal checker common to GPL pipelines from Park Lab.
1185
+ """
795
1186
 
796
- # The 'turfs-jsts' repository (https://github.com/DenisCarriere/turf-jsts/blob/master/README.md)
797
- # seems to lack a license, but appears to be forked from the jsts library that uses
798
- # the Eclipse Public License 1.0 and Eclipse Distribution License 1.0, so probably a permissive
799
- # license is intended.
800
- 'Custom: https://travis-ci.org/DenisCarriere/turf-jsts.svg': [
801
- 'turf-jsts'
802
- ],
803
-
804
- # DFSG = Debian Free Software Guidelines
805
- # Ref: https://en.wikipedia.org/wiki/Debian_Free_Software_Guidelines
806
- # Used as an apparent modifier to other licenses, to say they are approved per Debian.
807
- # For example in this case, pytest-timeout has license: DFSG approved, MIT License,
808
- # but is really just an MIT License that someone has checked is DFSG approved.
809
- 'DFSG approved': [
810
- 'pytest-timeout', # MIT Licensed
811
- ],
1187
+ ALLOWED = ParkLabPipelineLicenseChecker.ALLOWED + [
812
1188
 
813
1189
  # Linking = With Restrictions, Private Use = Yes
814
1190
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
815
- 'GNU Lesser General Public License v2 or later (LGPLv2+)': [
816
- 'chardet' # used at runtime during server operation (ingestion), but not modified or distributed
817
- ],
818
-
819
- # Linking = With Restrictions, Private Use = Yes
1191
+ # The "exceptions", if present, indicate waivers to source delivery requirements.
1192
+ # Ref: https://spdx.org/licenses/LGPL-3.0-linking-exception.html
1193
+ pattern('GNU Lesser General Public License v2( or later)?( [(]LGPL[v]?[23][+]?[)])?'),
1194
+ # 'GNU Lesser General Public License v2 or later (LGPLv2+)',
1195
+ # 'GNU Lesser General Public License v3 or later (LGPLv3+)',
1196
+ # 'LGPLv2', 'LGPL-v2', 'LGPL-v2.0', 'LGPL-2', 'LGPL-2.0',
1197
+ # 'LGPLv2+', 'LGPL-v2+', 'LGPL-v2.0+', 'LGPL-2+', 'LGPL-2.0+',
1198
+ # 'LGPLv3', 'LGPL-v3', 'LGPL-v3.0', 'LGPL-3', 'LGPL-3.0',
1199
+ # 'LGPLv3+', 'LGPL-v3+', 'LGPL-v3.0+', 'LGPL-3+', 'LGPL-3.0+',
1200
+ pattern('LGPL[v-]?[.0-9]*([+]|-only)?([- ]with[- ]exceptions)?'),
1201
+
1202
+ # Uncertain whether this is LGPL 2 or 3, but in any case we think weak copyleft should be OK
1203
+ # for pipeline or server use as long as we're not distributing sources.
1204
+ 'LGPL',
1205
+ 'GNU Library or Lesser General Public License (LGPL)',
1206
+
1207
+ # GPL
1208
+ # * library exception operates like LGPL
1209
+ # * classpath exception is a linking exception related to Oracle
1210
+ # Refs:
1211
+ # * https://www.gnu.org/licenses/old-licenses/gpl-1.0.en.html
1212
+ # * https://spdx.org/licenses/GPL-2.0-with-GCC-exception.html
1213
+ # * https://spdx.org/licenses/GPL-3.0-with-GCC-exception.html
1214
+ pattern('(GNU General Public License|GPL)[ ]?[v-]?[123]([.]0)?([+]|[- ]only)?'
1215
+ '([- ]with[- ]GCC(([- ]runtime)?[- ]library)?[- ]exception([- ][.0-9]*)?)?'
1216
+ '([- ]with[- ]Classpath[- ]exception([- ][.0-9]+)?)?'),
1217
+
1218
+ # Linking = "GPLv3 compatible only", Private Use = Yes
820
1219
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
821
- 'GNU Lesser General Public License v3 or later (LGPLv3+)': [
822
- 'pytest-redis', # used only privately in testing, not used in server code, not modified, not distributed
823
- 'mirakuru', # required by pytest-redis (used only where it's used)
824
- ],
1220
+ 'GPL-2-or-3', # we sometimes generate this token
1221
+ # 'GPLv2+', 'GPL-v2+', 'GPL-v2.0+', 'GPL-2+', 'GPL-2.0+',
1222
+ # 'GPLv3', 'GPL-v3', 'GPL-v3.0', 'GPL-3', 'GPL-3.0',
1223
+ # 'GPLv3+', 'GPL-v3+', 'GPL-v3.0+', 'GPL-3+', 'GPL-3.0+',
1224
+ # 'GPLv3-only', 'GPL-3-only', 'GPL-v3-only', 'GPL-3.0-only', 'GPL-v3.0-only',
825
1225
 
826
- 'GNU General Public License (GPL)': [
827
- 'docutils', # Used only privately as a separate documentation-generation task for ReadTheDocs
828
- ],
1226
+ # Uncertain whether this is GPL 2 or 3, but we'll assume that means we can use either.
1227
+ # And version 3 is our preferred interpretation.
1228
+ 'GNU General Public License',
1229
+ 'GPL',
829
1230
 
830
- # Linking = With Restrictions, Private Use = Yes
831
- # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
832
- # 'GNU Lesser General Public License v3 or later (LGPLv3+)',
1231
+ RLicenseFramework.R_LANGUAGE_LICENSE_NAME
833
1232
 
834
- # Linking = With Restrictions, Private Use = Yes
835
- # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
836
- 'GNU Library or Lesser General Public License (LGPL)': [
837
- 'psycopg2', # Used at runtime during server operation, but not modified or distributed
838
- 'psycopg2-binary', # Used at runtime during server operation, but not modified or distributed
839
- 'chardet', # Potentially used downstream in loadxl to detect charset for text files
840
- 'pyzmq', # Used in post-deploy-perf-tests, not distributed, and not modified or distributed
841
- ],
1233
+ ]
842
1234
 
843
- 'GPL-2.0': [
844
- # The license file for the node-forge javascript library says:
845
- #
846
- # "You may use the Forge project under the terms of either the BSD License or the
847
- # GNU General Public License (GPL) Version 2."
848
- #
849
- # (We choose to use it under the BSD license.)
850
- # Ref: https://www.npmjs.com/package/node-forge?activeTab=code
851
- 'node-forge',
852
- ],
853
1235
 
854
- 'MIT*': [
1236
+ @LicenseCheckerRegistry.register_checker('park-lab-common-server')
1237
+ class ParkLabCommonServerLicenseChecker(ParkLabCommonLicenseChecker):
1238
+ """
1239
+ Checker for servers from Park Lab.
855
1240
 
856
- # This library uses a mix of licenses, but they (MIT, CC0) generally seem permissive.
857
- # (It also mentions that some tools for building/testing use other libraries.)
858
- # Ref: https://github.com/requirejs/domReady/blob/master/LICENSE
859
- 'domready',
1241
+ If you're at some other organization, we recommend you make a class that has values
1242
+ suitable to your own organizational needs.
1243
+ """
860
1244
 
861
- # This library is under 'COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1'
862
- # Ref: https://github.com/javaee/jsonp/blob/master/LICENSE.txt
863
- # About CDDL ...
864
- # Linking = Permissive, Private Use = ?
865
- # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
866
- 'jsonp',
1245
+ LICENSE_FRAMEWORKS = ['python', 'javascript']
867
1246
 
868
- # This library says pretty clearly it intends MIT license.
869
- # Ref: https://www.npmjs.com/package/component-indexof
870
- # Linking = Permissive, Private Use = Yes
1247
+ EXCEPTIONS = augment(
1248
+ ParkLabCommonLicenseChecker.EXCEPTIONS,
1249
+ by={
1250
+ 'BSD*': [
1251
+ # Although modified to insert the author name into the license text itself,
1252
+ # the license for these libraries are essentially BSD-3-Clause.
1253
+ 'formatio',
1254
+ 'samsam',
1255
+
1256
+ # There are some slightly different versions of what appear to be BSD licenses here,
1257
+ # but clearly the license is permissive.
1258
+ # Ref: https://www.npmjs.com/package/mutation-observer?activeTab=readme
1259
+ 'mutation-observer',
1260
+ ],
1261
+
1262
+ 'Custom: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global': [
1263
+ # The use of this URL appears to be a syntax error in the definition of entries-ponyfill
1264
+ # In fact this seems to be covered by a CC0-1.0 license.
1265
+ # Ref: https://unpkg.com/browse/object.entries-ponyfill@1.0.1/LICENSE
1266
+ 'object.entries-ponyfill',
1267
+ ],
1268
+
1269
+ 'Custom: https://github.com/saikocat/colorbrewer.': [
1270
+ # The use of this URL appears to be a syntax error in the definition of cartocolor
1271
+ # In fact, this seems to be covered by a CC-BY-3.0 license.
1272
+ # Ref: https://www.npmjs.com/package/cartocolor?activeTab=readme
1273
+ 'cartocolor',
1274
+ ],
1275
+
1276
+ 'Custom: https://travis-ci.org/component/emitter.png': [
1277
+ # The use of this png appears to be a syntax error in the definition of emitter-component.
1278
+ # In fact, emitter-component uses an MIT License
1279
+ # Ref: https://www.npmjs.com/package/emitter-component
1280
+ # Ref: https://github.com/component/emitter/blob/master/LICENSE
1281
+ 'emitter-component',
1282
+ ],
1283
+
1284
+ # The 'turfs-jsts' repository (https://github.com/DenisCarriere/turf-jsts/blob/master/README.md)
1285
+ # seems to lack a license, but appears to be forked from the jsts library that uses
1286
+ # the Eclipse Public License 1.0 and Eclipse Distribution License 1.0, so probably a permissive
1287
+ # license is intended.
1288
+ 'Custom: https://travis-ci.org/DenisCarriere/turf-jsts.svg': [
1289
+ 'turf-jsts'
1290
+ ],
1291
+
1292
+ 'GNU General Public License (GPL)': [
1293
+ 'docutils', # Used only privately as a separate documentation-generation task for ReadTheDocs
1294
+ ],
1295
+
1296
+ # Linking = With Restrictions, Private Use = Yes
871
1297
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
872
- 'component-indexof',
1298
+ # 'GNU Lesser General Public License v3 or later (LGPLv3+)',
873
1299
 
874
- # These look like a pretty straight MIT license.
875
- # Linking = Permissive, Private Use = Yes
1300
+ # Linking = With Restrictions, Private Use = Yes
876
1301
  # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
877
- 'mixin', # LICENSE file at https://www.npmjs.com/package/mixin?activeTab=code
878
- 'stack-trace', # https://github.com/stacktracejs/stacktrace.js/blob/master/LICENSE
879
- 'typed-function', # LICENSE at https://www.npmjs.com/package/typed-function?activeTab=code
880
-
881
- ],
882
-
883
- 'UNLICENSED': [
884
- # The udn-browser library is our own and has been observed to sometimes show up in some contexts
885
- # as UNLICENSED, when really it's MIT.
886
- # Ref: https://github.com/dbmi-bgm/udn-browser/blob/main/LICENSE
887
- 'udn-browser',
888
- ],
1302
+ 'GNU Library or Lesser General Public License (LGPL)': [
1303
+ 'psycopg2', # Used at runtime during server operation, but not modified or distributed
1304
+ 'psycopg2-binary', # Used at runtime during server operation, but not modified or distributed
1305
+ 'chardet', # Potentially used downstream in loadxl to detect charset for text files
1306
+ 'pyzmq', # Used in post-deploy-perf-tests, not distributed, and not modified or distributed
1307
+ ],
1308
+
1309
+ 'GPL-2.0': [
1310
+ # The license file for the node-forge javascript library says:
1311
+ #
1312
+ # "You may use the Forge project under the terms of either the BSD License or the
1313
+ # GNU General Public License (GPL) Version 2."
1314
+ #
1315
+ # (We choose to use it under the BSD license.)
1316
+ # Ref: https://www.npmjs.com/package/node-forge?activeTab=code
1317
+ 'node-forge',
1318
+ ],
1319
+
1320
+ 'MIT*': [
1321
+
1322
+ # This library uses a mix of licenses, but they (MIT, CC0) generally seem permissive.
1323
+ # (It also mentions that some tools for building/testing use other libraries.)
1324
+ # Ref: https://github.com/requirejs/domReady/blob/master/LICENSE
1325
+ 'domready',
1326
+
1327
+ # This library is under 'COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.1'
1328
+ # Ref: https://github.com/javaee/jsonp/blob/master/LICENSE.txt
1329
+ # About CDDL ...
1330
+ # Linking = Permissive, Private Use = ?
1331
+ # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
1332
+ 'jsonp',
1333
+
1334
+ # This library says pretty clearly it intends MIT license.
1335
+ # Ref: https://www.npmjs.com/package/component-indexof
1336
+ # Linking = Permissive, Private Use = Yes
1337
+ # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
1338
+ 'component-indexof',
1339
+
1340
+ # These look like a pretty straight MIT license.
1341
+ # Linking = Permissive, Private Use = Yes
1342
+ # Ref: https://en.wikipedia.org/wiki/Comparison_of_free_and_open-source_software_licenses
1343
+ 'mixin', # LICENSE file at https://www.npmjs.com/package/mixin?activeTab=code
1344
+ 'stack-trace', # https://github.com/stacktracejs/stacktrace.js/blob/master/LICENSE
1345
+ 'typed-function', # LICENSE at https://www.npmjs.com/package/typed-function?activeTab=code
1346
+
1347
+ ],
1348
+
1349
+ 'UNLICENSED': [
1350
+ # The udn-browser library is our own and has been observed to sometimes show up in some contexts
1351
+ # as UNLICENSED, when really it's MIT.
1352
+ # Ref: https://github.com/dbmi-bgm/udn-browser/blob/main/LICENSE
1353
+ 'udn-browser',
1354
+ ],
1355
+ })
1356
+
1357
+
1358
+ @LicenseCheckerRegistry.register_checker('c4-infrastructure')
1359
+ class C4InfrastructureLicenseChecker(ParkLabCommonServerLicenseChecker):
1360
+ """
1361
+ Checker for C4 infrastructure (Fourfront, CGAP, SMaHT) from Park Lab.
1362
+ """
889
1363
 
890
- }
1364
+ LICENSE_TITLE = "(The )?MIT License"
891
1365
 
892
1366
 
1367
+ @LicenseCheckerRegistry.register_checker('c4-python-infrastructure')
893
1368
  class C4PythonInfrastructureLicenseChecker(C4InfrastructureLicenseChecker):
894
1369
  """
895
- For situations like dcicutils and dcicsnovault where there's no Javascript, this will test just Python.
1370
+ Checker for C4 python library infrastructure (Fourfront, CGAP, SMaHT) from Park Lab.
896
1371
  """
897
1372
  LICENSE_FRAMEWORKS = ['python']
1373
+
1374
+
1375
+ @LicenseCheckerRegistry.register_checker('scan2-pipeline')
1376
+ class Scan2PipelineLicenseChecker(ParkLabGplPipelineLicenseChecker):
1377
+ """
1378
+ Checker for SCAN2 library from Park Lab.
1379
+ """
1380
+
1381
+ EXCEPTIONS = augment(
1382
+ ParkLabGplPipelineLicenseChecker.EXCEPTIONS,
1383
+ by={
1384
+ 'Custom: Matrix file LICENCE': [
1385
+ # The custom information in https://cran.r-project.org/web/packages/Matrix/LICENCE
1386
+ # says there are potential extra restrictions beyond a simple GPL license
1387
+ # if SparseSuite is used, but it is not requested explicitly by Scan2, and we're
1388
+ # trusting that any other libraries used by Scan2 would have investigated this.
1389
+ # So, effectively, we think the Matrix library for this situation operates the
1390
+ # same as if it were just GPL-3 licensed, and we are fine with that.
1391
+ 'Matrix'
1392
+ ],
1393
+
1394
+ "MISSING": [
1395
+ # mysql-common and mysql-libs are GPL, but since they are delivered by conda
1396
+ # and not distributed as part of the Scan2 distribution, they should be OK.
1397
+ # Ref: https://redresscompliance.com/mysql-license-a-complete-guide-to-licensing/#:~:text=commercial%20use # noQA
1398
+ 'mysql-common',
1399
+ 'mysql-libs',
1400
+
1401
+ # This is our own library
1402
+ 'r-scan2', 'scan2',
1403
+ ]
1404
+ }
1405
+ )
1406
+
1407
+ EXPECTED_MISSING_LICENSES = ParkLabGplPipelineLicenseChecker.EXPECTED_MISSING_LICENSES + [
1408
+
1409
+ ]
dcicutils/misc_utils.py CHANGED
@@ -7,10 +7,11 @@ import datetime
7
7
  import functools
8
8
  import hashlib
9
9
  import inspect
10
- import math
11
10
  import io
12
- import os
11
+ import json
13
12
  import logging
13
+ import math
14
+ import os
14
15
  import pytz
15
16
  import re
16
17
  import rfc3986.validators
@@ -20,8 +21,8 @@ import warnings
20
21
  import webtest # importing the library makes it easier to mock testing
21
22
 
22
23
  from collections import defaultdict
23
- from dateutil.parser import parse as dateutil_parse
24
24
  from datetime import datetime as datetime_type
25
+ from dateutil.parser import parse as dateutil_parse
25
26
  from typing import Optional
26
27
 
27
28
 
@@ -1310,6 +1311,11 @@ def file_contents(filename, binary=False):
1310
1311
  return fp.read()
1311
1312
 
1312
1313
 
1314
+ def json_file_contents(filename):
1315
+ with io.open(filename, 'r') as fp:
1316
+ return json.load(fp)
1317
+
1318
+
1313
1319
  def camel_case_to_snake_case(s, separator='_'):
1314
1320
  """
1315
1321
  Converts CamelCase to snake_case.
@@ -0,0 +1,77 @@
1
+ import argparse
2
+
3
+ from dcicutils.command_utils import script_catch_errors, ScriptFailure
4
+ from dcicutils.lang_utils import there_are, conjoined_list
5
+ from dcicutils.license_utils import LicenseOptions, LicenseCheckerRegistry, LicenseChecker, LicenseCheckFailure
6
+ from dcicutils.misc_utils import PRINT, get_error_message
7
+ from typing import Optional, Type
8
+
9
+
10
+ EPILOG = __doc__
11
+
12
+
13
+ ALL_CHECKER_NAMES = LicenseCheckerRegistry.all_checker_names()
14
+ NEWLINE = '\n'
15
+
16
+
17
+ def main():
18
+
19
+ parser = argparse.ArgumentParser(
20
+ description="Runs a license checker",
21
+ epilog=EPILOG,
22
+ formatter_class=argparse.RawDescriptionHelpFormatter
23
+ )
24
+ parser.add_argument("name", type=str, default=None, nargs='?',
25
+ help=f"The name of a checker to run. "
26
+ + there_are(ALL_CHECKER_NAMES, kind='available checker',
27
+ show=True, joiner=conjoined_list, punctuate=True))
28
+ parser.add_argument("--brief", '-b', default=False, action="store_true",
29
+ help="Requests brief output.")
30
+ parser.add_argument("--debug", '-q', default=False, action="store_true",
31
+ help="Requests additional debugging output.")
32
+ parser.add_argument("--conda-prefix", "--conda_prefix", "--cp", default=LicenseOptions.CONDA_PREFIX,
33
+ help=(f"Overrides the CONDA_PREFIX (default {LicenseOptions.CONDA_PREFIX!r})."))
34
+
35
+ args = parser.parse_args()
36
+
37
+ with script_catch_errors():
38
+ run_license_checker(name=args.name, verbose=not args.brief, debug=args.debug, conda_prefix=args.conda_prefix)
39
+
40
+
41
+ def show_help_for_choosing_license_checker():
42
+ PRINT("")
43
+ PRINT(there_are(ALL_CHECKER_NAMES, kind='available checker', show=False, punctuation_mark=':'))
44
+ PRINT("")
45
+ wid = max(len(x) for x in ALL_CHECKER_NAMES) + 1
46
+ for checker_name in ALL_CHECKER_NAMES:
47
+ checker_class = LicenseCheckerRegistry.lookup_checker(checker_name)
48
+ checker_doc = (checker_class.__doc__ or '<missing doc>').strip(' \t\n\r')
49
+ PRINT(f"{(checker_name + ':').ljust(wid)} {checker_doc.split(NEWLINE)[0]}")
50
+ PRINT("")
51
+ PRINT("=" * 42, "NOTES & DISCLAIMERS", "=" * 42)
52
+ PRINT("Park Lab is a research laboratory in the Department of Biomedical Informatics at Harvard Medical School.")
53
+ PRINT("Park Lab checkers are intended for internal use and may not be suitable for other purposes.")
54
+ PRINT("External organizations must make their own independent choices about license acceptability.")
55
+ PRINT("Such choices can be integrated with this tool as follows:")
56
+ PRINT(" * Import LicenseChecker and LicenseCheckerRegistry from dcicutils.license_utils.")
57
+ PRINT(" * Make your own subclass of LicenseChecker, specifying a doc string and appropriate constraints.")
58
+ PRINT(" * Decorate your subclass with an appropriate call to LicenseCheckerRegistry.register_checker.")
59
+ PRINT("")
60
+
61
+
62
+ def run_license_checker(name: Optional[str],
63
+ verbose=LicenseOptions.VERBOSE,
64
+ debug=LicenseOptions.DEBUG,
65
+ conda_prefix=LicenseOptions.CONDA_PREFIX):
66
+ if name is None:
67
+ show_help_for_choosing_license_checker()
68
+ else:
69
+ try:
70
+ checker_class: Type[LicenseChecker] = LicenseCheckerRegistry.lookup_checker(name)
71
+ except Exception as e:
72
+ raise ScriptFailure(str(e))
73
+ try:
74
+ with LicenseOptions.selected_options(verbose=verbose, debug=debug, conda_prefix=conda_prefix):
75
+ checker_class.validate()
76
+ except LicenseCheckFailure as e:
77
+ raise ScriptFailure(get_error_message(e))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 7.12.0
3
+ Version: 7.12.0.1b4
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -30,9 +30,9 @@ dcicutils/jh_utils.py,sha256=Gpsxb9XEzggF_-Eq3ukjKvTnuyb9V1SCSUXkXsES4Kg,11502
30
30
  dcicutils/kibana/dashboards.json,sha256=wHMB_mpJ8OaYhRRgvpZuihaB2lmSF64ADt_8hkBWgQg,16225
31
31
  dcicutils/kibana/readme.md,sha256=3KmHF9FH6A6xwYsNxRFLw27q0XzHYnjZOlYUnn3VkQQ,2164
32
32
  dcicutils/lang_utils.py,sha256=cVLRUGyYeSPJAq3z_RJjA6miajHrXoi6baxF8HzHmLc,27797
33
- dcicutils/license_utils.py,sha256=OhOfTXFivvb6Y3tiJAb1b9Is-OTpBfZjC18M-RvqBqk,40456
33
+ dcicutils/license_utils.py,sha256=AJ7AwUb7YsXwrrncuS5bLwz3B0YYOHAqKwgf1JPLj6w,63798
34
34
  dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
35
- dcicutils/misc_utils.py,sha256=sXJ7ChrMyXZooaCnUtLxWHOmFIqxrxJKGJ6Ayd5i2Gk,91032
35
+ dcicutils/misc_utils.py,sha256=d30xwLFW41FwZVDAEYulWwyZUcLEzmD-pxsMlKH3mF4,91148
36
36
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
37
37
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
38
38
  dcicutils/project_utils.py,sha256=qPdCaFmWUVBJw4rw342iUytwdQC0P-XKpK4mhyIulMM,31250
@@ -42,14 +42,15 @@ dcicutils/redis_tools.py,sha256=rqGtnVUjNjTlCdL1EMKuEhEMAgRJMiXZJkrKuX255QA,6509
42
42
  dcicutils/redis_utils.py,sha256=VJ-7g8pOZqR1ZCtdcjKz3-6as2DMUcs1b1zG6wSprH4,6462
43
43
  dcicutils/s3_utils.py,sha256=a9eU3Flh8Asc8xPWLGP16A6UQ_FVwhoFQNqm4ZYgSQ4,28852
44
44
  dcicutils/scripts/publish_to_pypi.py,sha256=qmWyjrg5bNQNfpNKFTZdyMXpRmrECnRV9VmNQddUPQA,13576
45
+ dcicutils/scripts/run_license_checker.py,sha256=psv3c1Of7h4V4yvh93iyI2F3JFPzdzQakKdq97JThRw,3653
45
46
  dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19745
46
47
  dcicutils/snapshot_utils.py,sha256=ymP7PXH6-yEiXAt75w0ldQFciGNqWBClNxC5gfX2FnY,22961
47
48
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
48
49
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
49
50
  dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
50
51
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
51
- dcicutils-7.12.0.dist-info/LICENSE.txt,sha256=t0_-jIjqxNnymZoNJe-OltRIuuF8qfhN0ATlHyrUJPk,1102
52
- dcicutils-7.12.0.dist-info/METADATA,sha256=isoR9wb6CJyIef4ZYG4opKLj5s5c9LG6rihPw7jJP1Q,2999
53
- dcicutils-7.12.0.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
54
- dcicutils-7.12.0.dist-info/entry_points.txt,sha256=Z3vezbXsTpTIY4N2F33c5e-WDVQxgz_Vsk1oV_JBN7A,146
55
- dcicutils-7.12.0.dist-info/RECORD,,
52
+ dcicutils-7.12.0.1b4.dist-info/LICENSE.txt,sha256=t0_-jIjqxNnymZoNJe-OltRIuuF8qfhN0ATlHyrUJPk,1102
53
+ dcicutils-7.12.0.1b4.dist-info/METADATA,sha256=WBf2fEjWMlOtieSs4nq5zbiThbHAYzQliH7gmJ_0L04,3003
54
+ dcicutils-7.12.0.1b4.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
55
+ dcicutils-7.12.0.1b4.dist-info/entry_points.txt,sha256=8wbw5csMIgBXhkwfgsgJeuFcoUc0WsucUxmOyml2aoA,209
56
+ dcicutils-7.12.0.1b4.dist-info/RECORD,,
@@ -1,4 +1,5 @@
1
1
  [console_scripts]
2
2
  publish-to-pypi=dcicutils.scripts.publish_to_pypi:main
3
+ run-license-checker=dcicutils.scripts.run_license_checker:main
3
4
  show-contributors=dcicutils.contribution_scripts:show_contributors_main
4
5