spandx 0.1.7 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9bc9c449db0cbce37e841c358d608953e9d9cde996aec5b8c4304457bce934a4
4
- data.tar.gz: 28fe5ca88aa73cd47e1de6aa70bb49868a642896da10ddde3ecbcf6037fc612e
3
+ metadata.gz: a9c82d16a4a78e075ecebcd7308580b5e3ca44bcc274d3070ade1d546b9a38fa
4
+ data.tar.gz: 540a6a7f1d12f14c5d44feb15826b2f61ba8ee422d72e662328457b8df7260fc
5
5
  SHA512:
6
- metadata.gz: 33d9b54d4efa74cb7a76032c13ece5be12e11ef52c2dc8d088bb2f78837a1b92c1f85c2e7b01df17d3dae0566dad4db24758d29dae438751e466d95fb06952cb
7
- data.tar.gz: b9f35181f330a8782e314b1118b731f32f98844f51f01560b3fbf975daed4d63e13923082483fb8713fb38826e89abe0f3889e5f39d9695780a95536d132c4b9
6
+ metadata.gz: 1bb0a40e4723a2b3fc2b50a92d3df34df978753f5e14a76534758d9c946349475dfa18a123c1e0b3f1d89ac01fbbe9eecb520ca6842c122ba5af122d9466c639
7
+ data.tar.gz: faaedb6aaaf9ae486219ef396092c108ff359723a267291b5ef37b2bcf834195b5ace2578067598f97c71b68f78f4efb8ade261c3953184fd4a7be34eed9d7a9
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- Version 0.1.7
1
+ Version 0.2.0
2
2
 
3
3
  # Changelog
4
4
 
@@ -9,7 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
 
10
10
  ## [Unreleased]
11
11
  ### Added
12
- - nil
12
+ - Nil
13
+
14
+ ## [0.2.0] - 2020-01-28
15
+ ### Added
16
+ - Parse .NET `sln` files
17
+ - Add ability to choose Levenshtein algorithm
13
18
 
14
19
  ## [0.1.7] - 2020-01-28
15
20
  ### Added
@@ -47,7 +52,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
47
52
  ### Added
48
53
  - Provide ruby API to the latest SPDX catalogue.
49
54
 
50
- [Unreleased]: https://github.com/mokhan/spandx/compare/v0.1.7...HEAD
55
+ [Unreleased]: https://github.com/mokhan/spandx/compare/v0.2.0...HEAD
56
+ [0.2.0]: https://github.com/mokhan/spandx/compare/v0.1.7...v0.2.0
51
57
  [0.1.7]: https://github.com/mokhan/spandx/compare/v0.1.6...v0.1.7
52
58
  [0.1.6]: https://github.com/mokhan/spandx/compare/v0.1.5...v0.1.6
53
59
  [0.1.5]: https://github.com/mokhan/spandx/compare/v0.1.4...v0.1.5
@@ -2,22 +2,33 @@
2
2
 
3
3
  module Spandx
4
4
  class Content
5
- attr_reader :tokens, :threshold
5
+ attr_reader :raw, :threshold
6
6
 
7
- def initialize(content, threshold: 89.0)
7
+ def initialize(raw, threshold: 89.0)
8
8
  @threshold = threshold
9
- @tokens = tokenize(canonicalize(content)).to_set
9
+ @raw = raw
10
10
  end
11
11
 
12
- def similar?(other)
13
- similarity_score(other) > threshold
12
+ def tokens
13
+ @tokens ||= tokenize(canonicalize(raw)).to_set
14
14
  end
15
15
 
16
- # https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#Ruby
17
- def similarity_score(other)
18
- overlap = (tokens & other.tokens).size
19
- total = tokens.size + other.tokens.size
20
- 100.0 * (overlap * 2.0 / total)
16
+ def similar?(other, algorithm: :dice_coefficient)
17
+ case algorithm
18
+ when :dice_coefficient
19
+ similarity_score(other) > threshold
20
+ when :levenshtein
21
+ similarity_score(other) < threshold
22
+ end
23
+ end
24
+
25
+ def similarity_score(other, algorithm: :dice_coefficient)
26
+ case algorithm
27
+ when :dice_coefficient
28
+ dice_coefficient(other)
29
+ when :levenshtein
30
+ Text::Levenshtein.distance(raw, other.raw, 100)
31
+ end
21
32
  end
22
33
 
23
34
  private
@@ -33,5 +44,12 @@ module Spandx
33
44
  def blank?(content)
34
45
  content.nil? || content.chomp.strip.empty?
35
46
  end
47
+
48
+ # https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#Ruby
49
+ def dice_coefficient(other)
50
+ overlap = (tokens & other.tokens).size
51
+ total = tokens.size + other.tokens.size
52
+ 100.0 * (overlap * 2.0 / total)
53
+ end
36
54
  end
37
55
  end
@@ -14,7 +14,8 @@ module Spandx
14
14
  end
15
15
 
16
16
  def read(file)
17
- IO.read(File.join(path, file))
17
+ full_path = File.join(path, file)
18
+ IO.read(full_path) if File.exist?(full_path)
18
19
  end
19
20
 
20
21
  private
@@ -9,41 +9,49 @@ module Spandx
9
9
  def initialize(http: Spandx.http, catalogue:)
10
10
  @http = http
11
11
  @catalogue = catalogue
12
+ @guess = Guess.new(catalogue)
12
13
  end
13
14
 
14
15
  def licenses_for(name, version)
15
16
  document = nuspec_for(name, version)
16
17
 
17
- exact_licenses_from(document) ||
18
+ extract_licenses_from(document) ||
18
19
  guess_licenses_from(document)
19
20
  end
20
21
 
21
22
  private
22
23
 
23
- attr_reader :http, :catalogue
24
+ attr_reader :http, :catalogue, :guess
24
25
 
25
26
  def nuspec_url_for(name, version)
26
27
  "https://api.nuget.org/v3-flatcontainer/#{name}/#{version}/#{name}.nuspec"
27
28
  end
28
29
 
29
30
  def nuspec_for(name, version)
30
- from_xml(http.get(nuspec_url_for(name, version)).body)
31
+ response = http.get(nuspec_url_for(name, version))
32
+ from_xml(response.body) if http.ok?(response)
31
33
  end
32
34
 
33
35
  def from_xml(xml)
34
36
  Nokogiri::XML(xml).tap(&:remove_namespaces!)
35
37
  end
36
38
 
37
- def exact_licenses_from(document)
39
+ def extract_licenses_from(document)
38
40
  licenses = document.search('//package/metadata/license')
39
41
  licenses.map(&:text) if licenses.any?
40
42
  end
41
43
 
42
44
  def guess_licenses_from(document)
43
- guess = Guess.new(catalogue)
44
45
  document
45
46
  .search('//package/metadata/licenseUrl')
46
- .map { |node| guess.license_for(http.get(node.text).body) }
47
+ .map { |node| guess_license_for(node.text) }
48
+ .compact
49
+ end
50
+
51
+ def guess_license_for(url)
52
+ response = http.get(url)
53
+
54
+ guess.license_for(response.body) if http.ok?(response)
47
55
  end
48
56
  end
49
57
  end
data/lib/spandx/guess.rb CHANGED
@@ -27,19 +27,35 @@ module Spandx
27
27
  @catalogue = catalogue
28
28
  end
29
29
 
30
- def license_for(raw_content)
30
+ def license_for(raw_content, algorithm: :dice_coefficient)
31
31
  content = Content.new(raw_content)
32
-
33
- max_score = nil
32
+ score = nil
34
33
  catalogue.each do |license|
35
34
  next if license.deprecated_license_id?
36
35
 
37
- percentage = content.similarity_score(license.content)
38
- if max_score.nil? || percentage > max_score.score
39
- max_score = Score.new(percentage, license)
40
- end
36
+ score = algorithm == :levenshtein ? levenshtein(content, license, score) : dice(content, license, score)
37
+ end
38
+ score&.item&.id
39
+ end
40
+
41
+ private
42
+
43
+ def levenshtein(target, other, score)
44
+ percentage = target.similarity_score(other.content, algorithm: :levenshtein)
45
+ if score.nil? || percentage < score.score
46
+ return Score.new(percentage, other)
41
47
  end
42
- max_score.item.id
48
+
49
+ score
50
+ end
51
+
52
+ def dice(target, other, score)
53
+ percentage = target.similarity_score(other.content, algorithm: :dice_coefficient)
54
+ if (percentage > 89.0) && (score.nil? || percentage > score.score)
55
+ return Score.new(percentage, other)
56
+ end
57
+
58
+ score
43
59
  end
44
60
  end
45
61
  end
@@ -61,7 +61,11 @@ module Spandx
61
61
  end
62
62
 
63
63
  def content
64
- @content ||= Content.new(Spandx.db.read("text/#{id}.txt"))
64
+ @content ||= Content.new(raw_content)
65
+ end
66
+
67
+ def raw_content
68
+ @raw_content ||= (Spandx.db.read("text/#{id}.txt") || '')
65
69
  end
66
70
 
67
71
  def <=>(other)
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Spandx
4
+ module Parsers
5
+ class Sln < Base
6
+ def self.matches?(filename)
7
+ filename.match?(/.*\.sln/)
8
+ end
9
+
10
+ def parse(file_path)
11
+ project_paths_from(file_path).map do |path|
12
+ Parsers
13
+ .for(path, catalogue: catalogue)
14
+ .parse(path)
15
+ end.flatten
16
+ end
17
+
18
+ private
19
+
20
+ def project_paths_from(file_path)
21
+ IO.readlines(file_path).map do |line|
22
+ next unless project_line?(line)
23
+
24
+ path = project_path_from(line)
25
+ next unless path
26
+
27
+ path = File.join(File.dirname(file_path), path)
28
+ Pathname.new(path).cleanpath.to_path
29
+ end.compact
30
+ end
31
+
32
+ def project_line?(line)
33
+ line.match?(/^\s*Project\(/)
34
+ end
35
+
36
+ def project_path_from(line)
37
+ path = line.split('"')[5]
38
+ return unless path
39
+
40
+ path = path.tr('\\', '/')
41
+ path.match?(/\.[a-z]{2}proj$/) ? path : nil
42
+ end
43
+ end
44
+ end
45
+ end
@@ -5,6 +5,7 @@ require 'spandx/parsers/csproj'
5
5
  require 'spandx/parsers/gemfile_lock'
6
6
  require 'spandx/parsers/packages_config'
7
7
  require 'spandx/parsers/pipfile_lock'
8
+ require 'spandx/parsers/sln'
8
9
 
9
10
  module Spandx
10
11
  module Parsers
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Spandx
4
- VERSION = '0.1.7'
4
+ VERSION = '0.2.0'
5
5
  end
data/lib/spandx.rb CHANGED
@@ -6,6 +6,7 @@ require 'json'
6
6
  require 'net/hippie'
7
7
  require 'nokogiri'
8
8
  require 'pathname'
9
+ require 'text'
9
10
 
10
11
  require 'spandx/catalogue'
11
12
  require 'spandx/content'
data/spandx.gemspec CHANGED
@@ -33,10 +33,12 @@ Gem::Specification.new do |spec|
33
33
  spec.add_dependency 'bundler', '>= 1.16', '< 3.0.0'
34
34
  spec.add_dependency 'net-hippie', '~> 0.3'
35
35
  spec.add_dependency 'nokogiri', '~> 1.10'
36
+ spec.add_dependency 'text', '~> 1.3'
36
37
  spec.add_dependency 'thor', '~> 0.1'
37
38
  spec.add_development_dependency 'bundler-audit', '~> 0.6'
38
39
  spec.add_development_dependency 'rake', '~> 13.0'
39
40
  spec.add_development_dependency 'rspec', '~> 3.0'
41
+ spec.add_development_dependency 'rspec-benchmark', '~> 0.5'
40
42
  spec.add_development_dependency 'rubocop', '~> 0.52'
41
43
  spec.add_development_dependency 'rubocop-rspec', '~> 1.22'
42
44
  spec.add_development_dependency 'vcr', '~> 5.0'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: spandx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mo khan
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-28 00:00:00.000000000 Z
11
+ date: 2020-01-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -58,6 +58,20 @@ dependencies:
58
58
  - - "~>"
59
59
  - !ruby/object:Gem::Version
60
60
  version: '1.10'
61
+ - !ruby/object:Gem::Dependency
62
+ name: text
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '1.3'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '1.3'
61
75
  - !ruby/object:Gem::Dependency
62
76
  name: thor
63
77
  requirement: !ruby/object:Gem::Requirement
@@ -114,6 +128,20 @@ dependencies:
114
128
  - - "~>"
115
129
  - !ruby/object:Gem::Version
116
130
  version: '3.0'
131
+ - !ruby/object:Gem::Dependency
132
+ name: rspec-benchmark
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '0.5'
138
+ type: :development
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '0.5'
117
145
  - !ruby/object:Gem::Dependency
118
146
  name: rubocop
119
147
  requirement: !ruby/object:Gem::Requirement
@@ -205,6 +233,7 @@ files:
205
233
  - lib/spandx/parsers/gemfile_lock.rb
206
234
  - lib/spandx/parsers/packages_config.rb
207
235
  - lib/spandx/parsers/pipfile_lock.rb
236
+ - lib/spandx/parsers/sln.rb
208
237
  - lib/spandx/report.rb
209
238
  - lib/spandx/version.rb
210
239
  - spandx.gemspec