synt 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +35 -0
- data/bin/synt +14 -0
- data/lib/synt.rb +19 -0
- data/lib/synt/cli.rb +34 -0
- data/lib/synt/parser.rb +12 -0
- data/lib/synt/similar.rb +88 -0
- data/lib/synt/similar/jaccard.rb +16 -0
- data/lib/synt/similar/tanimoto.rb +25 -0
- data/lib/synt/version.rb +3 -0
- data/readme.md +15 -0
- data/spec/synt.spec +0 -0
- data/synt.gemspec +22 -0
- metadata +73 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 80be7f4e38ed147f4dcc65f317e1b77c385ee952
|
4
|
+
data.tar.gz: 84158235542049a3ad23b9eff0a7f963dc06a6a7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7218180ffe4ee3800c5baa92197a72ad4651242116680076af07476bf10aa520991d03886a32bb0c08362d30c6e483e047fb04394b2304234afe9f0a2fd134ea
|
7
|
+
data.tar.gz: 087908cb71c757e8f43176049f6e055affd56f79863b3ac8171deffae1dc566b7f4c163bd819ae49cba8c6656dbd602c2953034c4f6514558ab9684a080e1eda
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
ast (2.0.0)
|
5
|
+
docile (1.1.5)
|
6
|
+
manowar (0.0.1)
|
7
|
+
multi_json (1.10.1)
|
8
|
+
parser (2.2.0.pre.4)
|
9
|
+
ast (>= 1.1, < 3.0)
|
10
|
+
slop (~> 3.4, >= 3.4.5)
|
11
|
+
powerpack (0.0.9)
|
12
|
+
rainbow (2.0.0)
|
13
|
+
rubocop (0.25.0)
|
14
|
+
parser (>= 2.2.0.pre.4, < 3.0)
|
15
|
+
powerpack (~> 0.0.6)
|
16
|
+
rainbow (>= 1.99.1, < 3.0)
|
17
|
+
ruby-progressbar (~> 1.4)
|
18
|
+
ruby-progressbar (1.6.0)
|
19
|
+
simplecov (0.9.0)
|
20
|
+
docile (~> 1.1.0)
|
21
|
+
multi_json
|
22
|
+
simplecov-html (~> 0.8.0)
|
23
|
+
simplecov-html (0.8.0)
|
24
|
+
simplecov-lcov (0.3.0)
|
25
|
+
slop (3.6.0)
|
26
|
+
|
27
|
+
PLATFORMS
|
28
|
+
ruby
|
29
|
+
|
30
|
+
DEPENDENCIES
|
31
|
+
manowar (= 0.0.1)
|
32
|
+
rubocop (= 0.25.0)
|
33
|
+
simplecov (= 0.9.0)
|
34
|
+
simplecov-lcov
|
35
|
+
slop (= 3.6.0)
|
data/bin/synt
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
if ENV.has_key? 'TEST_COV'
|
4
|
+
require 'simplecov'
|
5
|
+
require 'simplecov-lcov'
|
6
|
+
SimpleCov.formatter = SimpleCov::Formatter::LcovFormatter
|
7
|
+
SimpleCov.start do
|
8
|
+
add_filter "/fixtures/"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
require_relative '../lib/synt'
|
13
|
+
|
14
|
+
Synt.new
|
data/lib/synt.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative 'synt/cli'
|
2
|
+
require_relative 'synt/similar'
|
3
|
+
|
4
|
+
module Synt
|
5
|
+
extend self
|
6
|
+
|
7
|
+
def new
|
8
|
+
# TODO: move to CLI? this is lib.
|
9
|
+
opts = CLI.parse
|
10
|
+
diff = Similar.compare opts
|
11
|
+
|
12
|
+
puts "Inputs are %#{diff} similar."
|
13
|
+
|
14
|
+
if opts.threshold? && diff < opts[:threshold].to_f
|
15
|
+
puts "Similarity threshold of #{opts[:threshold]} hit."
|
16
|
+
exit 1
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/synt/cli.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'slop'
|
2
|
+
require 'manowar'
|
3
|
+
require_relative 'version'
|
4
|
+
|
5
|
+
define 'Synt'
|
6
|
+
|
7
|
+
module Synt::CLI
|
8
|
+
extend self
|
9
|
+
|
10
|
+
def parse
|
11
|
+
Slop.parse help: true, &options
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def options
|
17
|
+
Proc.new {
|
18
|
+
banner 'Usage: synt.rb [options]'
|
19
|
+
|
20
|
+
on 't=', 'tokenize=', 'file or string to tokenize'
|
21
|
+
on 'c=', 'compare=', 'File or String to compare to something.'
|
22
|
+
on 't=', 'to=', 'File or String to compare against.'
|
23
|
+
on 'a=', 'algorithm=', 'Similarity algorithm [default=jaccard,tanimoto].'
|
24
|
+
on 'n=', 'ngram=', 'Specify what ngrams are generated and used for
|
25
|
+
comparing token sequences. [default=1,2,4..5,10,...,all]'
|
26
|
+
on 'd=', 'threshold=', 'Similarity threshold and exit with error.'
|
27
|
+
|
28
|
+
on '-v', 'version', 'Print the version.' do
|
29
|
+
puts Synt::VERSION
|
30
|
+
exit 0
|
31
|
+
end
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
data/lib/synt/parser.rb
ADDED
data/lib/synt/similar.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'manowar'
|
2
|
+
require_relative 'parser'
|
3
|
+
require_relative 'similar/jaccard'
|
4
|
+
require_relative 'similar/tanimoto'
|
5
|
+
|
6
|
+
define 'Synt'
|
7
|
+
|
8
|
+
module Synt::Similar
|
9
|
+
extend self
|
10
|
+
|
11
|
+
def compare opts
|
12
|
+
error 'no compare propery provided' if !opts[:compare]
|
13
|
+
error 'no to propery provided' if !opts[:to]
|
14
|
+
|
15
|
+
src = opts[:compare] || ''
|
16
|
+
cmp = opts[:to] || ''
|
17
|
+
algorithm = algorithms[opts[:algorithm] || 'jaccard']
|
18
|
+
src_t = cmp_t = nil
|
19
|
+
n_start, n_end = ngram_range opts[:ngram]
|
20
|
+
|
21
|
+
src_t = normalize_ripper_tokens Synt::Parser.parse(src)
|
22
|
+
cmp_t = normalize_ripper_tokens Synt::Parser.parse(cmp)
|
23
|
+
|
24
|
+
a = generate_ngrams src_t, n_start, n_end
|
25
|
+
b = generate_ngrams cmp_t, n_start, n_end
|
26
|
+
|
27
|
+
sim = algorithm.compare a, b
|
28
|
+
|
29
|
+
sim.to_f.round 2
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def algorithms
|
35
|
+
{ 'jaccard' => Jaccard, 'tanimoto' => Tanimoto }
|
36
|
+
end
|
37
|
+
|
38
|
+
def generate_ngrams arr, start, nend
|
39
|
+
nend = arr.length if nend.nil?
|
40
|
+
start = 1 if start.nil?
|
41
|
+
|
42
|
+
if nend > arr.length
|
43
|
+
puts 'ngram end value exceeds length- setting start/end to: 1.'
|
44
|
+
end
|
45
|
+
|
46
|
+
if start == nend && start == 1 # short circuit
|
47
|
+
return arr
|
48
|
+
end
|
49
|
+
|
50
|
+
sets = []
|
51
|
+
|
52
|
+
(start..nend).to_a.each_index do |n_len|
|
53
|
+
arr.each_with_index do |token, index|
|
54
|
+
s_len = index + n_len
|
55
|
+
if s_len <= arr.length
|
56
|
+
sets.push arr[index, s_len].join('')
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
sets
|
62
|
+
end
|
63
|
+
|
64
|
+
def ngram_range ngram
|
65
|
+
is_range = /\.\./
|
66
|
+
|
67
|
+
if !ngram
|
68
|
+
return 1, 1
|
69
|
+
elsif ngram =~ is_range
|
70
|
+
n = ngram.split '..'
|
71
|
+
return n[0].to_i, n[1].to_i
|
72
|
+
elsif ngram != 'all'
|
73
|
+
n = ngram.to_i
|
74
|
+
return n, n
|
75
|
+
else
|
76
|
+
return nil, nil
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def normalize_ripper_tokens tokens
|
81
|
+
tokens.select { |t| t && t !~ /^\s*$/ }
|
82
|
+
end
|
83
|
+
|
84
|
+
def error msg
|
85
|
+
puts msg
|
86
|
+
exit 1
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# See: http://en.wikipedia.org/wiki/Jaccard_index
|
2
|
+
require 'manowar'
|
3
|
+
|
4
|
+
define 'Synt::Similar'
|
5
|
+
|
6
|
+
module Synt::Similar::Jaccard
|
7
|
+
extend self
|
8
|
+
|
9
|
+
def compare src, cmp
|
10
|
+
a = src.uniq
|
11
|
+
b = cmp.uniq
|
12
|
+
i = a & b
|
13
|
+
u = a | b
|
14
|
+
i.length.to_f / u.length.to_f * 100
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# This is an (apparently) generalized jaccard algorithm, with its unique uses.
|
2
|
+
|
3
|
+
# See: http://en.wikipedia.org/wiki/Talk%3AJaccard_index#Tanimoto_coefficient
|
4
|
+
|
5
|
+
# I understand that since data is ultimately
|
6
|
+
# reduced to unique lists of comparable, scalar data, the equation can
|
7
|
+
# be further generalized as a function of cardinal lengths.
|
8
|
+
# This may or may not be correct (in theory)!
|
9
|
+
# I am still unsure if this works for multisets.
|
10
|
+
# The output seem to be as expected, though.
|
11
|
+
require 'manowar'
|
12
|
+
|
13
|
+
define 'Synt::Similar'
|
14
|
+
|
15
|
+
module Synt::Similar::Tanimoto
|
16
|
+
extend self
|
17
|
+
|
18
|
+
def compare src, cmp
|
19
|
+
a = src.uniq
|
20
|
+
b = cmp.uniq
|
21
|
+
i = a & b
|
22
|
+
|
23
|
+
i.length.to_f / (a.length.to_f + b.length.to_f - i.length.to_f) * 100
|
24
|
+
end
|
25
|
+
end
|
data/lib/synt/version.rb
ADDED
data/readme.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
## Synt.rb
|
2
|
+
|
3
|
+
This is the Ruby implementation of Synt.
|
4
|
+
|
5
|
+
### Supported Languages
|
6
|
+
|
7
|
+
* Ruby
|
8
|
+
|
9
|
+
For more languages, see the top level [Synt](http://github.com/brentlintner/synt) project.
|
10
|
+
|
11
|
+
### Requirements
|
12
|
+
|
13
|
+
* Ruby
|
14
|
+
* RubyGems
|
15
|
+
* Bundler
|
data/spec/synt.spec
ADDED
File without changes
|
data/synt.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'synt/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "synt"
|
8
|
+
spec.version = Synt::VERSION
|
9
|
+
spec.authors = ["Brent Lintner"]
|
10
|
+
spec.email = ["brent.lintner@gmail.com"]
|
11
|
+
spec.summary = "Similar code analysis."
|
12
|
+
spec.description = "Calculate the percentage of difference between code."
|
13
|
+
spec.homepage = "https://github.com/brentlintner/synt"
|
14
|
+
spec.license = "ISC"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split("\n")
|
17
|
+
spec.executables = ["synt"]
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: synt
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brent Lintner
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-09-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
description: Calculate the percentage of difference between code.
|
28
|
+
email:
|
29
|
+
- brent.lintner@gmail.com
|
30
|
+
executables:
|
31
|
+
- synt
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- Gemfile
|
36
|
+
- Gemfile.lock
|
37
|
+
- bin/synt
|
38
|
+
- lib/synt.rb
|
39
|
+
- lib/synt/cli.rb
|
40
|
+
- lib/synt/parser.rb
|
41
|
+
- lib/synt/similar.rb
|
42
|
+
- lib/synt/similar/jaccard.rb
|
43
|
+
- lib/synt/similar/tanimoto.rb
|
44
|
+
- lib/synt/version.rb
|
45
|
+
- readme.md
|
46
|
+
- spec/synt.spec
|
47
|
+
- synt.gemspec
|
48
|
+
homepage: https://github.com/brentlintner/synt
|
49
|
+
licenses:
|
50
|
+
- ISC
|
51
|
+
metadata: {}
|
52
|
+
post_install_message:
|
53
|
+
rdoc_options: []
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
requirements: []
|
67
|
+
rubyforge_project:
|
68
|
+
rubygems_version: 2.2.2
|
69
|
+
signing_key:
|
70
|
+
specification_version: 4
|
71
|
+
summary: Similar code analysis.
|
72
|
+
test_files:
|
73
|
+
- spec/synt.spec
|