spandx 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9bc9c449db0cbce37e841c358d608953e9d9cde996aec5b8c4304457bce934a4
4
- data.tar.gz: 28fe5ca88aa73cd47e1de6aa70bb49868a642896da10ddde3ecbcf6037fc612e
3
+ metadata.gz: a9c82d16a4a78e075ecebcd7308580b5e3ca44bcc274d3070ade1d546b9a38fa
4
+ data.tar.gz: 540a6a7f1d12f14c5d44feb15826b2f61ba8ee422d72e662328457b8df7260fc
5
5
  SHA512:
6
- metadata.gz: 33d9b54d4efa74cb7a76032c13ece5be12e11ef52c2dc8d088bb2f78837a1b92c1f85c2e7b01df17d3dae0566dad4db24758d29dae438751e466d95fb06952cb
7
- data.tar.gz: b9f35181f330a8782e314b1118b731f32f98844f51f01560b3fbf975daed4d63e13923082483fb8713fb38826e89abe0f3889e5f39d9695780a95536d132c4b9
6
+ metadata.gz: 1bb0a40e4723a2b3fc2b50a92d3df34df978753f5e14a76534758d9c946349475dfa18a123c1e0b3f1d89ac01fbbe9eecb520ca6842c122ba5af122d9466c639
7
+ data.tar.gz: faaedb6aaaf9ae486219ef396092c108ff359723a267291b5ef37b2bcf834195b5ace2578067598f97c71b68f78f4efb8ade261c3953184fd4a7be34eed9d7a9
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- Version 0.1.7
1
+ Version 0.2.0
2
2
 
3
3
  # Changelog
4
4
 
@@ -9,7 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
 
10
10
  ## [Unreleased]
11
11
  ### Added
12
- - nil
12
+ - Nil
13
+
14
+ ## [0.2.0] - 2020-01-28
15
+ ### Added
16
+ - Parse .NET `sln` files
17
+ - Add ability to choose Levenshtein algorithm
13
18
 
14
19
  ## [0.1.7] - 2020-01-28
15
20
  ### Added
@@ -47,7 +52,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
47
52
  ### Added
48
53
  - Provide ruby API to the latest SPDX catalogue.
49
54
 
50
- [Unreleased]: https://github.com/mokhan/spandx/compare/v0.1.7...HEAD
55
+ [Unreleased]: https://github.com/mokhan/spandx/compare/v0.2.0...HEAD
56
+ [0.2.0]: https://github.com/mokhan/spandx/compare/v0.1.7...v0.2.0
51
57
  [0.1.7]: https://github.com/mokhan/spandx/compare/v0.1.6...v0.1.7
52
58
  [0.1.6]: https://github.com/mokhan/spandx/compare/v0.1.5...v0.1.6
53
59
  [0.1.5]: https://github.com/mokhan/spandx/compare/v0.1.4...v0.1.5
@@ -2,22 +2,33 @@
2
2
 
3
3
  module Spandx
4
4
  class Content
5
- attr_reader :tokens, :threshold
5
+ attr_reader :raw, :threshold
6
6
 
7
- def initialize(content, threshold: 89.0)
7
+ def initialize(raw, threshold: 89.0)
8
8
  @threshold = threshold
9
- @tokens = tokenize(canonicalize(content)).to_set
9
+ @raw = raw
10
10
  end
11
11
 
12
- def similar?(other)
13
- similarity_score(other) > threshold
12
+ def tokens
13
+ @tokens ||= tokenize(canonicalize(raw)).to_set
14
14
  end
15
15
 
16
- # https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#Ruby
17
- def similarity_score(other)
18
- overlap = (tokens & other.tokens).size
19
- total = tokens.size + other.tokens.size
20
- 100.0 * (overlap * 2.0 / total)
16
+ def similar?(other, algorithm: :dice_coefficient)
17
+ case algorithm
18
+ when :dice_coefficient
19
+ similarity_score(other) > threshold
20
+ when :levenshtein
21
+ similarity_score(other) < threshold
22
+ end
23
+ end
24
+
25
+ def similarity_score(other, algorithm: :dice_coefficient)
26
+ case algorithm
27
+ when :dice_coefficient
28
+ dice_coefficient(other)
29
+ when :levenshtein
30
+ Text::Levenshtein.distance(raw, other.raw, 100)
31
+ end
21
32
  end
22
33
 
23
34
  private
@@ -33,5 +44,12 @@ module Spandx
33
44
  def blank?(content)
34
45
  content.nil? || content.chomp.strip.empty?
35
46
  end
47
+
48
+ # https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#Ruby
49
+ def dice_coefficient(other)
50
+ overlap = (tokens & other.tokens).size
51
+ total = tokens.size + other.tokens.size
52
+ 100.0 * (overlap * 2.0 / total)
53
+ end
36
54
  end
37
55
  end
@@ -14,7 +14,8 @@ module Spandx
14
14
  end
15
15
 
16
16
  def read(file)
17
- IO.read(File.join(path, file))
17
+ full_path = File.join(path, file)
18
+ IO.read(full_path) if File.exist?(full_path)
18
19
  end
19
20
 
20
21
  private
@@ -9,41 +9,49 @@ module Spandx
9
9
  def initialize(http: Spandx.http, catalogue:)
10
10
  @http = http
11
11
  @catalogue = catalogue
12
+ @guess = Guess.new(catalogue)
12
13
  end
13
14
 
14
15
  def licenses_for(name, version)
15
16
  document = nuspec_for(name, version)
16
17
 
17
- exact_licenses_from(document) ||
18
+ extract_licenses_from(document) ||
18
19
  guess_licenses_from(document)
19
20
  end
20
21
 
21
22
  private
22
23
 
23
- attr_reader :http, :catalogue
24
+ attr_reader :http, :catalogue, :guess
24
25
 
25
26
  def nuspec_url_for(name, version)
26
27
  "https://api.nuget.org/v3-flatcontainer/#{name}/#{version}/#{name}.nuspec"
27
28
  end
28
29
 
29
30
  def nuspec_for(name, version)
30
- from_xml(http.get(nuspec_url_for(name, version)).body)
31
+ response = http.get(nuspec_url_for(name, version))
32
+ from_xml(response.body) if http.ok?(response)
31
33
  end
32
34
 
33
35
  def from_xml(xml)
34
36
  Nokogiri::XML(xml).tap(&:remove_namespaces!)
35
37
  end
36
38
 
37
- def exact_licenses_from(document)
39
+ def extract_licenses_from(document)
38
40
  licenses = document.search('//package/metadata/license')
39
41
  licenses.map(&:text) if licenses.any?
40
42
  end
41
43
 
42
44
  def guess_licenses_from(document)
43
- guess = Guess.new(catalogue)
44
45
  document
45
46
  .search('//package/metadata/licenseUrl')
46
- .map { |node| guess.license_for(http.get(node.text).body) }
47
+ .map { |node| guess_license_for(node.text) }
48
+ .compact
49
+ end
50
+
51
+ def guess_license_for(url)
52
+ response = http.get(url)
53
+
54
+ guess.license_for(response.body) if http.ok?(response)
47
55
  end
48
56
  end
49
57
  end
data/lib/spandx/guess.rb CHANGED
@@ -27,19 +27,35 @@ module Spandx
27
27
  @catalogue = catalogue
28
28
  end
29
29
 
30
- def license_for(raw_content)
30
+ def license_for(raw_content, algorithm: :dice_coefficient)
31
31
  content = Content.new(raw_content)
32
-
33
- max_score = nil
32
+ score = nil
34
33
  catalogue.each do |license|
35
34
  next if license.deprecated_license_id?
36
35
 
37
- percentage = content.similarity_score(license.content)
38
- if max_score.nil? || percentage > max_score.score
39
- max_score = Score.new(percentage, license)
40
- end
36
+ score = algorithm == :levenshtein ? levenshtein(content, license, score) : dice(content, license, score)
37
+ end
38
+ score&.item&.id
39
+ end
40
+
41
+ private
42
+
43
+ def levenshtein(target, other, score)
44
+ percentage = target.similarity_score(other.content, algorithm: :levenshtein)
45
+ if score.nil? || percentage < score.score
46
+ return Score.new(percentage, other)
41
47
  end
42
- max_score.item.id
48
+
49
+ score
50
+ end
51
+
52
+ def dice(target, other, score)
53
+ percentage = target.similarity_score(other.content, algorithm: :dice_coefficient)
54
+ if (percentage > 89.0) && (score.nil? || percentage > score.score)
55
+ return Score.new(percentage, other)
56
+ end
57
+
58
+ score
43
59
  end
44
60
  end
45
61
  end
@@ -61,7 +61,11 @@ module Spandx
61
61
  end
62
62
 
63
63
  def content
64
- @content ||= Content.new(Spandx.db.read("text/#{id}.txt"))
64
+ @content ||= Content.new(raw_content)
65
+ end
66
+
67
+ def raw_content
68
+ @raw_content ||= (Spandx.db.read("text/#{id}.txt") || '')
65
69
  end
66
70
 
67
71
  def <=>(other)
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Parsers
5
+ class Sln < Base
6
+ def self.matches?(filename)
7
+ filename.match?(/.*\.sln/)
8
+ end
9
+
10
+ def parse(file_path)
11
+ project_paths_from(file_path).map do |path|
12
+ Parsers
13
+ .for(path, catalogue: catalogue)
14
+ .parse(path)
15
+ end.flatten
16
+ end
17
+
18
+ private
19
+
20
+ def project_paths_from(file_path)
21
+ IO.readlines(file_path).map do |line|
22
+ next unless project_line?(line)
23
+
24
+ path = project_path_from(line)
25
+ next unless path
26
+
27
+ path = File.join(File.dirname(file_path), path)
28
+ Pathname.new(path).cleanpath.to_path
29
+ end.compact
30
+ end
31
+
32
+ def project_line?(line)
33
+ line.match?(/^\s*Project\(/)
34
+ end
35
+
36
+ def project_path_from(line)
37
+ path = line.split('"')[5]
38
+ return unless path
39
+
40
+ path = path.tr('\\', '/')
41
+ path.match?(/\.[a-z]{2}proj$/) ? path : nil
42
+ end
43
+ end
44
+ end
45
+ end
@@ -5,6 +5,7 @@ require 'spandx/parsers/csproj'
5
5
  require 'spandx/parsers/gemfile_lock'
6
6
  require 'spandx/parsers/packages_config'
7
7
  require 'spandx/parsers/pipfile_lock'
8
+ require 'spandx/parsers/sln'
8
9
 
9
10
  module Spandx
10
11
  module Parsers
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spandx
4
- VERSION = '0.1.7'
4
+ VERSION = '0.2.0'
5
5
  end
data/lib/spandx.rb CHANGED
@@ -6,6 +6,7 @@ require 'json'
6
6
  require 'net/hippie'
7
7
  require 'nokogiri'
8
8
  require 'pathname'
9
+ require 'text'
9
10
 
10
11
  require 'spandx/catalogue'
11
12
  require 'spandx/content'
data/spandx.gemspec CHANGED
@@ -33,10 +33,12 @@ Gem::Specification.new do |spec|
33
33
  spec.add_dependency 'bundler', '>= 1.16', '< 3.0.0'
34
34
  spec.add_dependency 'net-hippie', '~> 0.3'
35
35
  spec.add_dependency 'nokogiri', '~> 1.10'
36
+ spec.add_dependency 'text', '~> 1.3'
36
37
  spec.add_dependency 'thor', '~> 0.1'
37
38
  spec.add_development_dependency 'bundler-audit', '~> 0.6'
38
39
  spec.add_development_dependency 'rake', '~> 13.0'
39
40
  spec.add_development_dependency 'rspec', '~> 3.0'
41
+ spec.add_development_dependency 'rspec-benchmark', '~> 0.5'
40
42
  spec.add_development_dependency 'rubocop', '~> 0.52'
41
43
  spec.add_development_dependency 'rubocop-rspec', '~> 1.22'
42
44
  spec.add_development_dependency 'vcr', '~> 5.0'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spandx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mo khan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-28 00:00:00.000000000 Z
11
+ date: 2020-01-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -58,6 +58,20 @@ dependencies:
58
58
  - - "~>"
59
59
  - !ruby/object:Gem::Version
60
60
  version: '1.10'
61
+ - !ruby/object:Gem::Dependency
62
+ name: text
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.3'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.3'
61
75
  - !ruby/object:Gem::Dependency
62
76
  name: thor
63
77
  requirement: !ruby/object:Gem::Requirement
@@ -114,6 +128,20 @@ dependencies:
114
128
  - - "~>"
115
129
  - !ruby/object:Gem::Version
116
130
  version: '3.0'
131
+ - !ruby/object:Gem::Dependency
132
+ name: rspec-benchmark
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '0.5'
138
+ type: :development
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '0.5'
117
145
  - !ruby/object:Gem::Dependency
118
146
  name: rubocop
119
147
  requirement: !ruby/object:Gem::Requirement
@@ -205,6 +233,7 @@ files:
205
233
  - lib/spandx/parsers/gemfile_lock.rb
206
234
  - lib/spandx/parsers/packages_config.rb
207
235
  - lib/spandx/parsers/pipfile_lock.rb
236
+ - lib/spandx/parsers/sln.rb
208
237
  - lib/spandx/report.rb
209
238
  - lib/spandx/version.rb
210
239
  - spandx.gemspec