RubyGems - licensee - Versions diffs - 9.18.0 → 10.0.0 - Mend

licensee 9.18.0 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (139) hide show

checksums.yaml +4 -4
data/LICENSE.md +1 -1
data/bin/licensee +2 -0
data/lib/licensee/commands/detect.rb +9 -89
data/lib/licensee/commands/detect_helpers.rb +125 -0
data/lib/licensee/commands/diff.rb +64 -35
data/lib/licensee/commands/license_path.rb +1 -0
data/lib/licensee/commands/version.rb +1 -0
data/lib/licensee/content_helper/constants.rb +111 -0
data/lib/licensee/content_helper/normalization_methods.rb +149 -0
data/lib/licensee/content_helper/similarity_methods.rb +63 -0
data/lib/licensee/content_helper.rb +42 -277
data/lib/licensee/hash_helper.rb +9 -7
data/lib/licensee/license/class_methods.rb +67 -0
data/lib/licensee/license/content_methods.rb +52 -0
data/lib/licensee/license/identity_methods.rb +117 -0
data/lib/licensee/license.rb +31 -208
data/lib/licensee/license_field.rb +9 -6
data/lib/licensee/license_meta.rb +4 -1
data/lib/licensee/license_rules.rb +5 -1
data/lib/licensee/matchers/cabal.rb +6 -2
data/lib/licensee/matchers/cargo.rb +1 -0
data/lib/licensee/matchers/copyright.rb +3 -1
data/lib/licensee/matchers/cran.rb +2 -1
data/lib/licensee/matchers/dice.rb +13 -2
data/lib/licensee/matchers/dist_zilla.rb +1 -0
data/lib/licensee/matchers/exact.rb +2 -0
data/lib/licensee/matchers/gemspec.rb +1 -8
data/lib/licensee/matchers/matcher.rb +5 -3
data/lib/licensee/matchers/npm_bower.rb +1 -0
data/lib/licensee/matchers/nuget.rb +1 -0
data/lib/licensee/matchers/package.rb +21 -5
data/lib/licensee/matchers/spdx.rb +1 -0
data/lib/licensee/matchers.rb +1 -0
data/lib/licensee/project_files/license_file.rb +28 -3
data/lib/licensee/project_files/package_manager_file.rb +1 -0
data/lib/licensee/project_files/project_file.rb +8 -5
data/lib/licensee/project_files/readme_file.rb +1 -0
data/lib/licensee/project_files.rb +1 -0
data/lib/licensee/projects/fs_project.rb +2 -0
data/lib/licensee/projects/git_project.rb +30 -4
data/lib/licensee/projects/github_project.rb +25 -5
data/lib/licensee/projects/project.rb +31 -34
data/lib/licensee/projects.rb +1 -0
data/lib/licensee/rule.rb +2 -0
data/lib/licensee/version.rb +1 -1
data/lib/licensee.rb +23 -2
data/spec/bin_spec.rb +8 -8
data/spec/fixture_spec.rb +18 -19
data/spec/fixtures/bsd-3-linebreak-owner/LICENSE +30 -0
data/spec/fixtures/bsd-3-multilinecopyright/LICENSE +27 -0
data/spec/fixtures/detect.json +3 -3
data/spec/fixtures/fixtures.yml +35 -11
data/spec/fixtures/license-hashes.json +4 -4
data/spec/fixtures/licenses-dir/LICENSES/MIT.txt +21 -0
data/spec/fixtures/licenses-dir-with-license-ref/LICENSES/LicenseRef-MIT.txt +21 -0
data/spec/fixtures/licenses-dir-with-multiple-license-files/LICENSES/MIT.txt +21 -0
data/spec/fixtures/licenses-dir-with-multiple-license-files/LICENSES/MPL-2.0.txt +362 -0
data/spec/fixtures/licenses-dir-with-top-level-license/LICENSE.md +195 -0
data/spec/fixtures/licenses-dir-with-top-level-license/LICENSES/MIT.txt +21 -0
data/spec/integration_spec.rb +247 -274
data/spec/licensee/commands/detect_spec.rb +94 -21
data/spec/licensee/commands/license_path_spec.rb +13 -9
data/spec/licensee/commands/version_spec.rb +12 -8
data/spec/licensee/content_helper_spec.rb +159 -111
data/spec/licensee/hash_helper_spec.rb +9 -10
data/spec/licensee/license_field_spec.rb +17 -22
data/spec/licensee/license_meta_spec.rb +29 -37
data/spec/licensee/license_rules_spec.rb +19 -19
data/spec/licensee/license_spec.rb +219 -264
data/spec/licensee/licensee_filesystem_spec.rb +40 -0
data/spec/licensee/matchers/cabal_matcher_spec.rb +67 -31
data/spec/licensee/matchers/cargo_matcher_spec.rb +7 -7
data/spec/licensee/matchers/copyright_matcher_spec.rb +21 -10
data/spec/licensee/matchers/cran_matcher_spec.rb +6 -6
data/spec/licensee/matchers/dice_matcher_spec.rb +47 -33
data/spec/licensee/matchers/dist_zilla_matcher_spec.rb +7 -7
data/spec/licensee/matchers/exact_matcher_spec.rb +4 -4
data/spec/licensee/matchers/gemspec_matcher_spec.rb +10 -10
data/spec/licensee/matchers/matcher_spec.rb +14 -4
data/spec/licensee/matchers/npm_bower_matcher_spec.rb +20 -12
data/spec/licensee/matchers/nu_get_matcher_spec.rb +12 -12
data/spec/licensee/matchers/package_matcher_spec.rb +40 -12
data/spec/licensee/matchers/reference_matcher_spec.rb +17 -13
data/spec/licensee/matchers/spdx_matcher_spec.rb +9 -9
data/spec/licensee/project_files/license_file_spec.rb +136 -72
data/spec/licensee/project_files/package_manager_file_spec.rb +3 -3
data/spec/licensee/project_files/project_file_spec.rb +29 -23
data/spec/licensee/project_files/readme_file_spec.rb +13 -13
data/spec/licensee/project_spec.rb +168 -123
data/spec/licensee/projects/git_hub_project_spec.rb +268 -26
data/spec/licensee/projects/git_project_spec.rb +23 -1
data/spec/licensee/projects/project_spec.rb +15 -0
data/spec/licensee/rule_spec.rb +19 -22
data/spec/licensee_spec.rb +23 -11
data/spec/spec_helper.rb +3 -1
data/spec/vendored_license_spec.rb +37 -60
data/vendor/choosealicense.com/_licenses/blueoak-1.0.0.txt +1 -1
data/vendor/choosealicense.com/_licenses/bsd-4-clause.txt +1 -1
data/vendor/choosealicense.com/_licenses/cern-ohl-p-2.0.txt +1 -1
data/vendor/choosealicense.com/_licenses/cern-ohl-s-2.0.txt +1 -1
data/vendor/choosealicense.com/_licenses/cern-ohl-w-2.0.txt +2 -2
data/vendor/choosealicense.com/_licenses/gpl-2.0.txt +1 -1
data/vendor/choosealicense.com/_licenses/gpl-3.0.txt +1 -1
data/vendor/choosealicense.com/_licenses/mit-0.txt +1 -1
data/vendor/choosealicense.com/_licenses/osl-3.0.txt +1 -1
data/vendor/choosealicense.com/_licenses/zlib.txt +1 -1
data/vendor/license-list-XML/src/0BSD.xml +1 -1
data/vendor/license-list-XML/src/AFL-3.0.xml +1 -1
data/vendor/license-list-XML/src/AGPL-3.0.xml +1 -1
data/vendor/license-list-XML/src/Apache-2.0.xml +1 -1
data/vendor/license-list-XML/src/Artistic-2.0.xml +1 -1
data/vendor/license-list-XML/src/BSD-2-Clause-Patent.xml +1 -1
data/vendor/license-list-XML/src/BSD-2-Clause.xml +1 -1
data/vendor/license-list-XML/src/BSD-3-Clause.xml +3 -3
data/vendor/license-list-XML/src/BSD-4-Clause.xml +3 -2
data/vendor/license-list-XML/src/BSL-1.0.xml +1 -1
data/vendor/license-list-XML/src/ECL-2.0.xml +1 -1
data/vendor/license-list-XML/src/EPL-1.0.xml +1 -1
data/vendor/license-list-XML/src/EPL-2.0.xml +3 -1
data/vendor/license-list-XML/src/EUPL-1.1.xml +1 -1
data/vendor/license-list-XML/src/EUPL-1.2.xml +1 -1
data/vendor/license-list-XML/src/GPL-2.0.xml +11 -6
data/vendor/license-list-XML/src/GPL-3.0.xml +1 -1
data/vendor/license-list-XML/src/ISC.xml +1 -1
data/vendor/license-list-XML/src/LGPL-2.1.xml +6 -3
data/vendor/license-list-XML/src/LGPL-3.0.xml +1 -1
data/vendor/license-list-XML/src/LPPL-1.3c.xml +2 -2
data/vendor/license-list-XML/src/MIT.xml +32 -14
data/vendor/license-list-XML/src/MPL-2.0.xml +3 -3
data/vendor/license-list-XML/src/MS-PL.xml +1 -1
data/vendor/license-list-XML/src/MS-RL.xml +1 -1
data/vendor/license-list-XML/src/NCSA.xml +1 -1
data/vendor/license-list-XML/src/OFL-1.1.xml +1 -1
data/vendor/license-list-XML/src/OSL-3.0.xml +1 -1
data/vendor/license-list-XML/src/PostgreSQL.xml +1 -1
data/vendor/license-list-XML/src/UPL-1.0.xml +1 -1
data/vendor/license-list-XML/src/Zlib.xml +1 -1
metadata +48 -30

data/lib/licensee/content_helper/similarity_methods.rb ADDED Viewed

@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+module Licensee
+  module ContentHelper
+    # Mixin providing wordset-based similarity scoring.
+    module SimilarityMethods
+      # Given another license or project file, calculates the similarity
+      # as a percentage of words in common, minus a tiny penalty that
+      # increases with size difference between licenses so that false
+      # positives for long licenses are ruled out by this score alone.
+      def similarity(other)
+        overlap = (wordset_fieldless & other.wordset).size
+        (overlap * 200.0) / similarity_denominator(other)
+      end
+      # Given another license or project file, calculates the Dice coefficient
+      # over bigrams (consecutive word pairs).  Unlike wordset similarity this
+      # is sensitive to word order, making it resistant to adversarial scrambling
+      # where all the correct words appear but in the wrong sequence.
+      def bigram_similarity(other)
+        my_bigrams = bigrams
+        other_bigrams = other.bigrams
+        total = my_bigrams.size + other_bigrams.size
+        return 0.0 if total.zero?
+        overlap = (my_bigrams & other_bigrams).size
+        (overlap * 200.0) / total
+      end
+      private
+      def wordset_fieldless
+        @wordset_fieldless ||= wordset - fields_normalized_set
+      end
+      def similarity_denominator(other)
+        total = wordset_fieldless.size + other.wordset.size - fields_normalized_set.size
+        total + (variation_adjusted_length_delta(other) / 4)
+      end
+      # Returns an array of strings of substitutable fields in normalized content
+      def fields_normalized
+        @fields_normalized ||= content_normalized.scan(LicenseField::FIELD_REGEX).flatten
+      end
+      def fields_normalized_set
+        @fields_normalized_set ||= fields_normalized.to_set
+      end
+      def variation_adjusted_length_delta(other)
+        delta = length_delta(other)
+        # The content helper mixin is used in different objects
+        # Licenses have a more advanced SPDX alt. segement-based delta.
+        # Use that if it's present, otherwise, just return the simple delta.
+        return delta unless respond_to?(:spdx_alt_segments, true)
+        adjusted_delta = delta - ([fields_normalized.size, spdx_alt_segments].max * 5)
+        adjusted_delta.positive? ? adjusted_delta : 0
+      end
+    end
+  end
+end

data/lib/licensee/content_helper.rb CHANGED Viewed

@@ -1,112 +1,31 @@
 # frozen_string_literal: true
-require 'set'
 require 'digest'
+require_relative 'content_helper/constants'
+require_relative 'content_helper/normalization_methods'
+require_relative 'content_helper/similarity_methods'
 module Licensee
+  # Text normalization, hashing, wrapping, and similarity helpers for license content.
   module ContentHelper
-    DIGEST = Digest::SHA1
-    START_REGEX = /\A\s*/
-    END_OF_TERMS_REGEX = /^[\s#*_]*end of (the )?terms and conditions[\s#*_]*$/i
-    REGEXES = {
-      bom:                 /#{START_REGEX}\xEF\xBB\xBF/,
-      hrs:                 /^\s*[=\-*]{3,}\s*$/,
-      all_rights_reserved: /#{START_REGEX}all rights reserved\.?$/i,
-      whitespace:          /\s+/,
-      markdown_headings:   /^\s*#+/,
-      version:             /#{START_REGEX}version.*$/i,
-      span_markup:         /[_*~]+(.*?)[_*~]+/,
-      link_markup:         /\[(.+?)\]\(.+?\)/,
-      block_markup:        /^\s*>/,
-      border_markup:       /^[*-](.*?)[*-]$/,
-      comment_markup:      %r{^\s*?[/*]{1,2}},
-      url:                 %r{#{START_REGEX}https?://[^ ]+\n},
-      bullet:              /\n\n\s*(?:[*-]|\(?[\da-z]{1,2}[).])\s+/i,
-      developed_by:        /#{START_REGEX}developed by:.*?\n\n/im,
-      cc_dedication:       /The\s+text\s+of\s+the\s+Creative\s+Commons.*?Public\s+Domain\s+Dedication./im,
-      cc_wiki:             /wiki.creativecommons.org/i,
-      cc_legal_code:       /^\s*Creative Commons Legal Code\s*$/i,
-      cc0_info:            /For more information, please see\s*\S+zero\S+/im,
-      cc0_disclaimer:      /CREATIVE COMMONS CORPORATION.*?\n\n/im,
-      unlicense_info:      /For more information, please.*\S+unlicense\S+/im,
-      mit_optional:        /\(including the next paragraph\)/i
-    }.freeze
-    NORMALIZATIONS = {
-      lists:      { from: /^\s*(?:\d\.|[*-])(?: [*_]{0,2}\(?[\da-z]\)[*_]{0,2})?\s+([^\n])/, to: '- \1' },
-      https:      { from: /http:/, to: 'https:' },
-      ampersands: { from: '&', to: 'and' },
-      dashes:     { from: /(?<!^)([—–-]+)(?!$)/, to: '-' },
-      quote:      { from: /[`'"‘“’”]/, to: "'" },
-      hyphenated: { from: /(\w+)-\s*\n\s*(\w+)/, to: '\1-\2' }
-    }.freeze
-    # Legally equivalent words that schould be ignored for comparison
-    # See https://spdx.org/spdx-license-list/matching-guidelines
-    VARIETAL_WORDS = {
-      'acknowledgment'  => 'acknowledgement',
-      'analogue'        => 'analog',
-      'analyse'         => 'analyze',
-      'artefact'        => 'artifact',
-      'authorisation'   => 'authorization',
-      'authorised'      => 'authorized',
-      'calibre'         => 'caliber',
-      'cancelled'       => 'canceled',
-      'capitalisations' => 'capitalizations',
-      'catalogue'       => 'catalog',
-      'categorise'      => 'categorize',
-      'centre'          => 'center',
-      'emphasised'      => 'emphasized',
-      'favour'          => 'favor',
-      'favourite'       => 'favorite',
-      'fulfil'          => 'fulfill',
-      'fulfilment'      => 'fulfillment',
-      'initialise'      => 'initialize',
-      'judgment'        => 'judgement',
-      'labelling'       => 'labeling',
-      'labour'          => 'labor',
-      'licence'         => 'license',
-      'maximise'        => 'maximize',
-      'modelled'        => 'modeled',
-      'modelling'       => 'modeling',
-      'offence'         => 'offense',
-      'optimise'        => 'optimize',
-      'organisation'    => 'organization',
-      'organise'        => 'organize',
-      'practise'        => 'practice',
-      'programme'       => 'program',
-      'realise'         => 'realize',
-      'recognise'       => 'recognize',
-      'signalling'      => 'signaling',
-      'sub-license'     => 'sublicense',
-      'sub license'     => 'sublicense',
-      'utilisation'     => 'utilization',
-      'whilst'          => 'while',
-      'wilful'          => 'wilfull',
-      'non-commercial'  => 'noncommercial',
-      'per cent'        => 'percent',
-      'copyright owner' => 'copyright holder'
-    }.freeze
-    STRIP_METHODS = %i[
-      bom
-      cc_optional
-      cc0_optional
-      unlicense_optional
-      borders
-      title
-      version
-      url
-      copyright
-      title
-      block_markup
-      developed_by
-      end_of_terms
-      whitespace
-      mit_optional
-    ].freeze
+    include Constants
+    include NormalizationMethods
+    include SimilarityMethods
     # A set of each word in the license, without duplicates
     def wordset
-      @wordset ||= content_normalized&.scan(%r{(?:[\w/-](?:'s|(?<=s)')?)+})&.to_set
+      @wordset ||= words&.to_set
+    end
+    # A set of consecutive word pairs (bigrams) in the license, without duplicates.
+    # Unlike wordset, bigrams are order-sensitive, making similarity scores
+    # robust against adversarial word scrambling (see GitHub issue #602).
+    def bigrams
+      @bigrams ||= if words.nil? || words.length < 2
+                     Set.new
+                   else
+                     words.each_cons(2).to_set { |a, b| "#{a} #{b}" }
+                   end
     end
     # Number of characters in the normalized content
@@ -121,52 +40,11 @@ module Licensee
       (length - other.length).abs
     end
-    # Given another license or project file, calculates the similarity
-    # as a percentage of words in common, minus a tiny penalty that
-    # increases with size difference between licenses so that false
-    # positives for long licnses are ruled out by this score alone.
-    def similarity(other)
-      overlap = (wordset_fieldless & other.wordset).size
-      total = wordset_fieldless.size + other.wordset.size -
-              fields_normalized_set.size
-      (overlap * 200.0) / (total + (variation_adjusted_length_delta(other) / 4))
-    end
     # SHA1 of the normalized content
     def content_hash
       @content_hash ||= DIGEST.hexdigest content_normalized
     end
-    # Content with the title and version removed
-    # The first time should normally be the attribution line
-    # Used to dry up `content_normalized` but we need the case sensitive
-    # content with attribution first to detect attribuion in LicenseFile
-    def content_without_title_and_version
-      @content_without_title_and_version ||= begin
-        @_content = nil
-        ops = %i[html hrs comments markdown_headings link_markup title version]
-        ops.each { |op| strip(op) }
-        _content
-      end
-    end
-    def content_normalized(wrap: nil)
-      @content_normalized ||= begin
-        @_content = content_without_title_and_version.downcase
-        (NORMALIZATIONS.keys + %i[spelling span_markup bullets]).each { |op| normalize(op) }
-        STRIP_METHODS.each { |op| strip(op) }
-        _content
-      end
-      if wrap.nil?
-        @content_normalized
-      else
-        Licensee::ContentHelper.wrap(@content_normalized, wrap)
-      end
-    end
     # Backwards compatibalize constants to avoid a breaking change
     def self.const_missing(const)
       key = const.to_s.downcase.gsub('_regex', '').to_sym
@@ -177,19 +55,26 @@ module Licensee
     def self.wrap(text, line_width = 80)
       return if text.nil?
+      text = normalize_for_wrapping(text)
+      wrapped = wrap_lines(text, line_width)
+      wrapped.strip
+    end
+    def self.normalize_for_wrapping(text)
       text = text.clone
       text.gsub!(REGEXES[:bullet]) { |m| "\n#{m}\n" }
-      text.gsub!(/([^\n])\n([^\n])/, '\1 \2')
+      text.gsub!(/([^\n])\n([^\n])/, '\\1 \\2')
+      text
+    end
-      text = text.split("\n").collect do |line|
-        if line =~ REGEXES[:hrs] || line.length <= line_width
-          line
-        else
-          line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
-        end
-      end * "\n"
+    def self.wrap_lines(text, line_width)
+      text.split("\n").map { |line| wrap_line(line, line_width) }.join("\n")
+    end
+    def self.wrap_line(line, line_width)
+      return line if line =~ REGEXES[:hrs] || line.length <= line_width
-      text.strip
+      line.gsub(/(.{1,#{line_width}})(\s+|$)/, "\\1\n").strip
     end
     def self.format_percent(float)
@@ -198,7 +83,7 @@ module Licensee
     def self.title_regex
       @title_regex ||= begin
-        licenses = Licensee::License.all(hidden: true, psuedo: false)
+        licenses = Licensee::License.all(hidden: true, pseudo: false)
         titles = licenses.map(&:title_regex)
         # Title regex must include the version to support matching within
@@ -216,134 +101,14 @@ module Licensee
     private
-    def _content
-      @_content ||= content.to_s.dup.strip
+    # Ordered array of words extracted from the normalized content.
+    # Memoized so that both wordset and bigrams share the same scan result.
+    def words
+      @words ||= content_normalized&.scan(%r{(?:[\w/-](?:'s|(?<=s)')?)+})
     end
-    def strip(regex_or_sym)
-      return unless _content
-      if regex_or_sym.is_a?(Symbol)
-        meth = "strip_#{regex_or_sym}"
-        return send(meth) if respond_to?(meth, true)
-        raise ArgumentError, "#{regex_or_sym} is an invalid regex reference" unless REGEXES[regex_or_sym]
-        regex_or_sym = REGEXES[regex_or_sym]
-      end
-      @_content = _content.gsub(regex_or_sym, ' ').squeeze(' ').strip
-    end
-    def strip_title
-      strip(ContentHelper.title_regex) while _content =~ ContentHelper.title_regex
-    end
-    def strip_borders
-      normalize(REGEXES[:border_markup], '\1')
-    end
-    def strip_comments
-      lines = _content.split("\n")
-      return if lines.count == 1
-      return unless lines.all? { |line| line =~ REGEXES[:comment_markup] }
-      strip(:comment_markup)
-    end
-    def strip_copyright
-      regex = Regexp.union(Matchers::Copyright::REGEX, REGEXES[:all_rights_reserved])
-      strip(regex) while _content =~ regex
-    end
-    def strip_cc0_optional
-      return unless _content.include? 'associating cc0'
-      strip(REGEXES[:cc_legal_code])
-      strip(REGEXES[:cc0_info])
-      strip(REGEXES[:cc0_disclaimer])
-    end
-    def strip_cc_optional
-      return unless _content.include? 'creative commons'
-      strip(REGEXES[:cc_dedication])
-      strip(REGEXES[:cc_wiki])
-    end
-    def strip_unlicense_optional
-      return unless _content.include? 'unlicense'
-      strip(REGEXES[:unlicense_info])
-    end
-    def strip_end_of_terms
-      body, _partition, _instructions = _content.partition(END_OF_TERMS_REGEX)
-      @_content = body
-    end
-    def normalize_span_markup
-      normalize(REGEXES[:span_markup], '\1')
-    end
-    def strip_link_markup
-      normalize(REGEXES[:link_markup], '\1')
-    end
-    def strip_html
-      return unless respond_to?(:filename) && filename
-      return unless /\.html?/i.match?(File.extname(filename))
-      require 'reverse_markdown'
-      @_content = ReverseMarkdown.convert(_content, unknown_tags: :bypass)
-    end
-    def normalize(from_or_key, to = nil)
-      operation = { from: from_or_key, to: to } if to
-      operation ||= NORMALIZATIONS[from_or_key]
-      if operation
-        @_content = _content.gsub operation[:from], operation[:to]
-      elsif respond_to?(:"normalize_#{from_or_key}", true)
-        send(:"normalize_#{from_or_key}")
-      else
-        raise ArgumentError, "#{from_or_key} is an invalid normalization"
-      end
-    end
-    def normalize_spelling
-      normalize(/\b#{Regexp.union(VARIETAL_WORDS.keys)}\b/, VARIETAL_WORDS)
-    end
-    def normalize_bullets
-      normalize(REGEXES[:bullet], "\n\n- ")
-      normalize(/\)\s+\(/, ')(')
-    end
-    def wordset_fieldless
-      @wordset_fieldless ||= wordset - fields_normalized_set
-    end
-    # Returns an array of strings of substitutable fields in normalized content
-    def fields_normalized
-      @fields_normalized ||=
-        content_normalized.scan(LicenseField::FIELD_REGEX).flatten
-    end
-    def fields_normalized_set
-      @fields_normalized_set ||= fields_normalized.to_set
-    end
-    def variation_adjusted_length_delta(other)
-      delta = length_delta(other)
-      # The content helper mixin is used in different objects
-      # Licenses have a more advanced SPDX alt. segement-based delta.
-      # Use that if it's present, otherwise, just return the simple delta.
-      return delta unless respond_to?(:spdx_alt_segments, true)
-      adjusted_delta = delta - ([fields_normalized.size, spdx_alt_segments].max * 5)
-      adjusted_delta.positive? ? adjusted_delta : 0
+    def _content
+      @_content ||= content.to_s.dup.strip
     end
   end
 end

data/lib/licensee/hash_helper.rb CHANGED Viewed

@@ -1,22 +1,24 @@
 # frozen_string_literal: true
 module Licensee
+  # Mixin that provides a `to_h` based on a class's `HASH_METHODS`.
   module HashHelper
     def to_h
       hash = {}
       self.class::HASH_METHODS.each do |method|
         key = method.to_s.delete('?').to_sym
         value = public_send(method)
-        hash[key] = if value.is_a?(Array)
-                      value.map { |v| v.respond_to?(:to_h) ? v.to_h : v }
-                    elsif value.respond_to?(:to_h) && !value.nil?
-                      value.to_h
-                    else
-                      value
-                    end
+        hash[key] = serialize_hash_value(value)
       end
       hash
     end
+    def serialize_hash_value(value)
+      return value.map { |v| v.respond_to?(:to_h) ? v.to_h : v } if value.is_a?(Array)
+      return value.to_h if value.respond_to?(:to_h) && !value.nil?
+      value
+    end
   end
 end

data/lib/licensee/license/class_methods.rb ADDED Viewed

@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+module Licensee
+  class License
+    # Class-level lookup and caching for licenses.
+    module ClassMethods
+      # All license objects defined via Licensee (via choosealicense.com)
+      #
+      # Options:
+      # - :hidden - boolean, return hidden licenses (default: false)
+      # - :featured - boolean, return only (non)featured licenses (default: all)
+      #
+      # Returns an Array of License objects.
+      def all(options = {})
+        @all[options] ||= begin
+          normalized_options = LicenseAllHelper.normalize_all_options(options, DEFAULT_OPTIONS)
+          output = licenses.dup
+          LicenseAllHelper.apply_all_filters!(output, normalized_options)
+          output.sort_by!(&:key)
+          LicenseAllHelper.filter_featured(output, normalized_options[:featured])
+        end
+      end
+      def keys
+        @keys ||= license_files.map do |license_file|
+          ::File.basename(license_file, '.txt').downcase
+        end + PSEUDO_LICENSES
+      end
+      def find(key, options = {})
+        options = { hidden: true }.merge(options)
+        keys_licenses(options)[key.downcase]
+      end
+      alias [] find
+      alias find_by_key find
+      # Given a license title or nickname, fuzzy match the license
+      def find_by_title(title)
+        License.all(hidden: true, pseudo: false).find do |license|
+          title =~ /\A(the )?#{license.title_regex}( license)?\z/i
+        end
+      end
+      def license_dir
+        ::File.expand_path '../../../vendor/choosealicense.com/_licenses', __dir__
+      end
+      def license_files
+        @license_files ||= Dir.glob("#{license_dir}/*.txt")
+      end
+      def spdx_dir
+        ::File.expand_path '../../../vendor/license-list-XML/src', __dir__
+      end
+      private
+      def licenses
+        @licenses ||= keys.map { |key| new(key) }
+      end
+      def keys_licenses(options = {})
+        @keys_licenses[options] ||= all(options).to_h { |l| [l.key, l] }
+      end
+    end
+  end
+end

data/lib/licensee/license/content_methods.rb ADDED Viewed

@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+module Licensee
+  class License
+    # Instance methods for loading and working with license content.
+    module ContentMethods
+      # Path to vendored license file on disk
+      def path
+        @path ||= File.expand_path "#{@key}.txt", Licensee::License.license_dir
+      end
+      # The license body (e.g., contents - frontmatter)
+      def content
+        @content ||= parts[2] if parts && parts[2]
+      end
+      alias to_s content
+      alias text content
+      alias body content
+      # Returns an array of strings of substitutable fields in the license body
+      def fields
+        @fields ||= LicenseField.from_content(content)
+      end
+      # Returns a string with `[fields]` replaced by `{{{fields}}}`
+      # Does not mangle non-supported fields in the form of `[field]`
+      def content_for_mustache
+        @content_for_mustache ||= content.gsub(LicenseField::FIELD_REGEX, '{{{\1}}}')
+      end
+      private
+      # Raw content of license file, including YAML front matter
+      def raw_content
+        return if pseudo_license?
+        raise Licensee::InvalidLicense, "'#{key}' is not a valid license key" unless File.exist?(path)
+        @raw_content ||= File.read(path, encoding: 'utf-8')
+      end
+      def parts
+        return unless raw_content
+        @parts ||= raw_content.match(/\A(---\n.*\n---\n+)?(.*)/m).to_a
+      end
+      def yaml
+        @yaml ||= parts[1] if parts
+      end
+    end
+  end
+end