synt 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +35 -0
- data/bin/synt +14 -0
- data/lib/synt.rb +19 -0
- data/lib/synt/cli.rb +34 -0
- data/lib/synt/parser.rb +12 -0
- data/lib/synt/similar.rb +88 -0
- data/lib/synt/similar/jaccard.rb +16 -0
- data/lib/synt/similar/tanimoto.rb +25 -0
- data/lib/synt/version.rb +3 -0
- data/readme.md +15 -0
- data/spec/synt.spec +0 -0
- data/synt.gemspec +22 -0
- metadata +73 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 80be7f4e38ed147f4dcc65f317e1b77c385ee952
|
4
|
+
data.tar.gz: 84158235542049a3ad23b9eff0a7f963dc06a6a7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7218180ffe4ee3800c5baa92197a72ad4651242116680076af07476bf10aa520991d03886a32bb0c08362d30c6e483e047fb04394b2304234afe9f0a2fd134ea
|
7
|
+
data.tar.gz: 087908cb71c757e8f43176049f6e055affd56f79863b3ac8171deffae1dc566b7f4c163bd819ae49cba8c6656dbd602c2953034c4f6514558ab9684a080e1eda
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
ast (2.0.0)
|
5
|
+
docile (1.1.5)
|
6
|
+
manowar (0.0.1)
|
7
|
+
multi_json (1.10.1)
|
8
|
+
parser (2.2.0.pre.4)
|
9
|
+
ast (>= 1.1, < 3.0)
|
10
|
+
slop (~> 3.4, >= 3.4.5)
|
11
|
+
powerpack (0.0.9)
|
12
|
+
rainbow (2.0.0)
|
13
|
+
rubocop (0.25.0)
|
14
|
+
parser (>= 2.2.0.pre.4, < 3.0)
|
15
|
+
powerpack (~> 0.0.6)
|
16
|
+
rainbow (>= 1.99.1, < 3.0)
|
17
|
+
ruby-progressbar (~> 1.4)
|
18
|
+
ruby-progressbar (1.6.0)
|
19
|
+
simplecov (0.9.0)
|
20
|
+
docile (~> 1.1.0)
|
21
|
+
multi_json
|
22
|
+
simplecov-html (~> 0.8.0)
|
23
|
+
simplecov-html (0.8.0)
|
24
|
+
simplecov-lcov (0.3.0)
|
25
|
+
slop (3.6.0)
|
26
|
+
|
27
|
+
PLATFORMS
|
28
|
+
ruby
|
29
|
+
|
30
|
+
DEPENDENCIES
|
31
|
+
manowar (= 0.0.1)
|
32
|
+
rubocop (= 0.25.0)
|
33
|
+
simplecov (= 0.9.0)
|
34
|
+
simplecov-lcov
|
35
|
+
slop (= 3.6.0)
|
data/bin/synt
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
if ENV.has_key? 'TEST_COV'
|
4
|
+
require 'simplecov'
|
5
|
+
require 'simplecov-lcov'
|
6
|
+
SimpleCov.formatter = SimpleCov::Formatter::LcovFormatter
|
7
|
+
SimpleCov.start do
|
8
|
+
add_filter "/fixtures/"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
require_relative '../lib/synt'
|
13
|
+
|
14
|
+
Synt.new
|
data/lib/synt.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require_relative 'synt/cli'
|
2
|
+
require_relative 'synt/similar'
|
3
|
+
|
4
|
+
module Synt
|
5
|
+
extend self
|
6
|
+
|
7
|
+
def new
|
8
|
+
# TODO: move to CLI? this is lib.
|
9
|
+
opts = CLI.parse
|
10
|
+
diff = Similar.compare opts
|
11
|
+
|
12
|
+
puts "Inputs are %#{diff} similar."
|
13
|
+
|
14
|
+
if opts.threshold? && diff < opts[:threshold].to_f
|
15
|
+
puts "Similarity threshold of #{opts[:threshold]} hit."
|
16
|
+
exit 1
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/synt/cli.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'slop'
|
2
|
+
require 'manowar'
|
3
|
+
require_relative 'version'
|
4
|
+
|
5
|
+
define 'Synt'
|
6
|
+
|
7
|
+
module Synt::CLI
|
8
|
+
extend self
|
9
|
+
|
10
|
+
def parse
|
11
|
+
Slop.parse help: true, &options
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def options
|
17
|
+
Proc.new {
|
18
|
+
banner 'Usage: synt.rb [options]'
|
19
|
+
|
20
|
+
on 't=', 'tokenize=', 'file or string to tokenize'
|
21
|
+
on 'c=', 'compare=', 'File or String to compare to something.'
|
22
|
+
on 't=', 'to=', 'File or String to compare against.'
|
23
|
+
on 'a=', 'algorithm=', 'Similarity algorithm [default=jaccard,tanimoto].'
|
24
|
+
on 'n=', 'ngram=', 'Specify what ngrams are generated and used for
|
25
|
+
comparing token sequences. [default=1,2,4..5,10,...,all]'
|
26
|
+
on 'd=', 'threshold=', 'Similarity threshold and exit with error.'
|
27
|
+
|
28
|
+
on '-v', 'version', 'Print the version.' do
|
29
|
+
puts Synt::VERSION
|
30
|
+
exit 0
|
31
|
+
end
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
data/lib/synt/parser.rb
ADDED
data/lib/synt/similar.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'manowar'
|
2
|
+
require_relative 'parser'
|
3
|
+
require_relative 'similar/jaccard'
|
4
|
+
require_relative 'similar/tanimoto'
|
5
|
+
|
6
|
+
define 'Synt'
|
7
|
+
|
8
|
+
module Synt::Similar
|
9
|
+
extend self
|
10
|
+
|
11
|
+
def compare opts
|
12
|
+
error 'no compare propery provided' if !opts[:compare]
|
13
|
+
error 'no to propery provided' if !opts[:to]
|
14
|
+
|
15
|
+
src = opts[:compare] || ''
|
16
|
+
cmp = opts[:to] || ''
|
17
|
+
algorithm = algorithms[opts[:algorithm] || 'jaccard']
|
18
|
+
src_t = cmp_t = nil
|
19
|
+
n_start, n_end = ngram_range opts[:ngram]
|
20
|
+
|
21
|
+
src_t = normalize_ripper_tokens Synt::Parser.parse(src)
|
22
|
+
cmp_t = normalize_ripper_tokens Synt::Parser.parse(cmp)
|
23
|
+
|
24
|
+
a = generate_ngrams src_t, n_start, n_end
|
25
|
+
b = generate_ngrams cmp_t, n_start, n_end
|
26
|
+
|
27
|
+
sim = algorithm.compare a, b
|
28
|
+
|
29
|
+
sim.to_f.round 2
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def algorithms
|
35
|
+
{ 'jaccard' => Jaccard, 'tanimoto' => Tanimoto }
|
36
|
+
end
|
37
|
+
|
38
|
+
def generate_ngrams arr, start, nend
|
39
|
+
nend = arr.length if nend.nil?
|
40
|
+
start = 1 if start.nil?
|
41
|
+
|
42
|
+
if nend > arr.length
|
43
|
+
puts 'ngram end value exceeds length- setting start/end to: 1.'
|
44
|
+
end
|
45
|
+
|
46
|
+
if start == nend && start == 1 # short circuit
|
47
|
+
return arr
|
48
|
+
end
|
49
|
+
|
50
|
+
sets = []
|
51
|
+
|
52
|
+
(start..nend).to_a.each_index do |n_len|
|
53
|
+
arr.each_with_index do |token, index|
|
54
|
+
s_len = index + n_len
|
55
|
+
if s_len <= arr.length
|
56
|
+
sets.push arr[index, s_len].join('')
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
sets
|
62
|
+
end
|
63
|
+
|
64
|
+
def ngram_range ngram
|
65
|
+
is_range = /\.\./
|
66
|
+
|
67
|
+
if !ngram
|
68
|
+
return 1, 1
|
69
|
+
elsif ngram =~ is_range
|
70
|
+
n = ngram.split '..'
|
71
|
+
return n[0].to_i, n[1].to_i
|
72
|
+
elsif ngram != 'all'
|
73
|
+
n = ngram.to_i
|
74
|
+
return n, n
|
75
|
+
else
|
76
|
+
return nil, nil
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def normalize_ripper_tokens tokens
|
81
|
+
tokens.select { |t| t && t !~ /^\s*$/ }
|
82
|
+
end
|
83
|
+
|
84
|
+
def error msg
|
85
|
+
puts msg
|
86
|
+
exit 1
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# See: http://en.wikipedia.org/wiki/Jaccard_index
|
2
|
+
require 'manowar'
|
3
|
+
|
4
|
+
define 'Synt::Similar'
|
5
|
+
|
6
|
+
module Synt::Similar::Jaccard
|
7
|
+
extend self
|
8
|
+
|
9
|
+
def compare src, cmp
|
10
|
+
a = src.uniq
|
11
|
+
b = cmp.uniq
|
12
|
+
i = a & b
|
13
|
+
u = a | b
|
14
|
+
i.length.to_f / u.length.to_f * 100
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# This is an (apparently) generalized jaccard algorithm, with its unique uses.
|
2
|
+
|
3
|
+
# See: http://en.wikipedia.org/wiki/Talk%3AJaccard_index#Tanimoto_coefficient
|
4
|
+
|
5
|
+
# I understand that since data is ultimately
|
6
|
+
# reduced to unique lists of comparable, scalar data, the equation can
|
7
|
+
# be further generalized as a function of cardinal lengths.
|
8
|
+
# This may or may not be correct (in theory)!
|
9
|
+
# I am still unsure if this works for multisets.
|
10
|
+
# The output seem to be as expected, though.
|
11
|
+
require 'manowar'
|
12
|
+
|
13
|
+
define 'Synt::Similar'
|
14
|
+
|
15
|
+
module Synt::Similar::Tanimoto
|
16
|
+
extend self
|
17
|
+
|
18
|
+
def compare src, cmp
|
19
|
+
a = src.uniq
|
20
|
+
b = cmp.uniq
|
21
|
+
i = a & b
|
22
|
+
|
23
|
+
i.length.to_f / (a.length.to_f + b.length.to_f - i.length.to_f) * 100
|
24
|
+
end
|
25
|
+
end
|
data/lib/synt/version.rb
ADDED
data/readme.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
## Synt.rb
|
2
|
+
|
3
|
+
This is the Ruby implementation of Synt.
|
4
|
+
|
5
|
+
### Supported Languages
|
6
|
+
|
7
|
+
* Ruby
|
8
|
+
|
9
|
+
For more languages, see the top level [Synt](http://github.com/brentlintner/synt) project.
|
10
|
+
|
11
|
+
### Requirements
|
12
|
+
|
13
|
+
* Ruby
|
14
|
+
* RubyGems
|
15
|
+
* Bundler
|
data/spec/synt.spec
ADDED
File without changes
|
data/synt.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'synt/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "synt"
|
8
|
+
spec.version = Synt::VERSION
|
9
|
+
spec.authors = ["Brent Lintner"]
|
10
|
+
spec.email = ["brent.lintner@gmail.com"]
|
11
|
+
spec.summary = "Similar code analysis."
|
12
|
+
spec.description = "Calculate the percentage of difference between code."
|
13
|
+
spec.homepage = "https://github.com/brentlintner/synt"
|
14
|
+
spec.license = "ISC"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split("\n")
|
17
|
+
spec.executables = ["synt"]
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: synt
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Brent Lintner
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-09-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
description: Calculate the percentage of difference between code.
|
28
|
+
email:
|
29
|
+
- brent.lintner@gmail.com
|
30
|
+
executables:
|
31
|
+
- synt
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- Gemfile
|
36
|
+
- Gemfile.lock
|
37
|
+
- bin/synt
|
38
|
+
- lib/synt.rb
|
39
|
+
- lib/synt/cli.rb
|
40
|
+
- lib/synt/parser.rb
|
41
|
+
- lib/synt/similar.rb
|
42
|
+
- lib/synt/similar/jaccard.rb
|
43
|
+
- lib/synt/similar/tanimoto.rb
|
44
|
+
- lib/synt/version.rb
|
45
|
+
- readme.md
|
46
|
+
- spec/synt.spec
|
47
|
+
- synt.gemspec
|
48
|
+
homepage: https://github.com/brentlintner/synt
|
49
|
+
licenses:
|
50
|
+
- ISC
|
51
|
+
metadata: {}
|
52
|
+
post_install_message:
|
53
|
+
rdoc_options: []
|
54
|
+
require_paths:
|
55
|
+
- lib
|
56
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
requirements: []
|
67
|
+
rubyforge_project:
|
68
|
+
rubygems_version: 2.2.2
|
69
|
+
signing_key:
|
70
|
+
specification_version: 4
|
71
|
+
summary: Similar code analysis.
|
72
|
+
test_files:
|
73
|
+
- spec/synt.spec
|