synt 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 80be7f4e38ed147f4dcc65f317e1b77c385ee952
4
+ data.tar.gz: 84158235542049a3ad23b9eff0a7f963dc06a6a7
5
+ SHA512:
6
+ metadata.gz: 7218180ffe4ee3800c5baa92197a72ad4651242116680076af07476bf10aa520991d03886a32bb0c08362d30c6e483e047fb04394b2304234afe9f0a2fd134ea
7
+ data.tar.gz: 087908cb71c757e8f43176049f6e055affd56f79863b3ac8171deffae1dc566b7f4c163bd819ae49cba8c6656dbd602c2953034c4f6514558ab9684a080e1eda
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'slop', '3.6.0'
4
+ gem 'manowar', '0.0.1'
5
+ gem 'rubocop', '0.25.0', require: false
6
+ gem 'simplecov', '0.9.0', require: false
7
+ gem 'simplecov-lcov', require: false
@@ -0,0 +1,35 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ ast (2.0.0)
5
+ docile (1.1.5)
6
+ manowar (0.0.1)
7
+ multi_json (1.10.1)
8
+ parser (2.2.0.pre.4)
9
+ ast (>= 1.1, < 3.0)
10
+ slop (~> 3.4, >= 3.4.5)
11
+ powerpack (0.0.9)
12
+ rainbow (2.0.0)
13
+ rubocop (0.25.0)
14
+ parser (>= 2.2.0.pre.4, < 3.0)
15
+ powerpack (~> 0.0.6)
16
+ rainbow (>= 1.99.1, < 3.0)
17
+ ruby-progressbar (~> 1.4)
18
+ ruby-progressbar (1.6.0)
19
+ simplecov (0.9.0)
20
+ docile (~> 1.1.0)
21
+ multi_json
22
+ simplecov-html (~> 0.8.0)
23
+ simplecov-html (0.8.0)
24
+ simplecov-lcov (0.3.0)
25
+ slop (3.6.0)
26
+
27
+ PLATFORMS
28
+ ruby
29
+
30
+ DEPENDENCIES
31
+ manowar (= 0.0.1)
32
+ rubocop (= 0.25.0)
33
+ simplecov (= 0.9.0)
34
+ simplecov-lcov
35
+ slop (= 3.6.0)
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ if ENV.has_key? 'TEST_COV'
4
+ require 'simplecov'
5
+ require 'simplecov-lcov'
6
+ SimpleCov.formatter = SimpleCov::Formatter::LcovFormatter
7
+ SimpleCov.start do
8
+ add_filter "/fixtures/"
9
+ end
10
+ end
11
+
12
+ require_relative '../lib/synt'
13
+
14
+ Synt.new
@@ -0,0 +1,19 @@
1
+ require_relative 'synt/cli'
2
+ require_relative 'synt/similar'
3
+
4
+ module Synt
5
+ extend self
6
+
7
+ def new
8
+ # TODO: move to CLI? this is lib.
9
+ opts = CLI.parse
10
+ diff = Similar.compare opts
11
+
12
+ puts "Inputs are %#{diff} similar."
13
+
14
+ if opts.threshold? && diff < opts[:threshold].to_f
15
+ puts "Similarity threshold of #{opts[:threshold]} hit."
16
+ exit 1
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,34 @@
1
+ require 'slop'
2
+ require 'manowar'
3
+ require_relative 'version'
4
+
5
+ define 'Synt'
6
+
7
+ module Synt::CLI
8
+ extend self
9
+
10
+ def parse
11
+ Slop.parse help: true, &options
12
+ end
13
+
14
+ private
15
+
16
+ def options
17
+ Proc.new {
18
+ banner 'Usage: synt.rb [options]'
19
+
20
+ on 't=', 'tokenize=', 'file or string to tokenize'
21
+ on 'c=', 'compare=', 'File or String to compare to something.'
22
+ on 't=', 'to=', 'File or String to compare against.'
23
+ on 'a=', 'algorithm=', 'Similarity algorithm [default=jaccard,tanimoto].'
24
+ on 'n=', 'ngram=', 'Specify what ngrams are generated and used for
25
+ comparing token sequences. [default=1,2,4..5,10,...,all]'
26
+ on 'd=', 'threshold=', 'Similarity threshold and exit with error.'
27
+
28
+ on '-v', 'version', 'Print the version.' do
29
+ puts Synt::VERSION
30
+ exit 0
31
+ end
32
+ }
33
+ end
34
+ end
@@ -0,0 +1,12 @@
1
+ require 'ripper'
2
+ require 'manowar'
3
+
4
+ define 'Synt'
5
+
6
+ module Synt::Parser
7
+ extend self
8
+
9
+ def parse string
10
+ Ripper.tokenize string
11
+ end
12
+ end
@@ -0,0 +1,88 @@
1
+ require 'manowar'
2
+ require_relative 'parser'
3
+ require_relative 'similar/jaccard'
4
+ require_relative 'similar/tanimoto'
5
+
6
+ define 'Synt'
7
+
8
+ module Synt::Similar
9
+ extend self
10
+
11
+ def compare opts
12
+ error 'no compare propery provided' if !opts[:compare]
13
+ error 'no to propery provided' if !opts[:to]
14
+
15
+ src = opts[:compare] || ''
16
+ cmp = opts[:to] || ''
17
+ algorithm = algorithms[opts[:algorithm] || 'jaccard']
18
+ src_t = cmp_t = nil
19
+ n_start, n_end = ngram_range opts[:ngram]
20
+
21
+ src_t = normalize_ripper_tokens Synt::Parser.parse(src)
22
+ cmp_t = normalize_ripper_tokens Synt::Parser.parse(cmp)
23
+
24
+ a = generate_ngrams src_t, n_start, n_end
25
+ b = generate_ngrams cmp_t, n_start, n_end
26
+
27
+ sim = algorithm.compare a, b
28
+
29
+ sim.to_f.round 2
30
+ end
31
+
32
+ private
33
+
34
+ def algorithms
35
+ { 'jaccard' => Jaccard, 'tanimoto' => Tanimoto }
36
+ end
37
+
38
+ def generate_ngrams arr, start, nend
39
+ nend = arr.length if nend.nil?
40
+ start = 1 if start.nil?
41
+
42
+ if nend > arr.length
43
+ puts 'ngram end value exceeds length- setting start/end to: 1.'
44
+ end
45
+
46
+ if start == nend && start == 1 # short circuit
47
+ return arr
48
+ end
49
+
50
+ sets = []
51
+
52
+ (start..nend).to_a.each_index do |n_len|
53
+ arr.each_with_index do |token, index|
54
+ s_len = index + n_len
55
+ if s_len <= arr.length
56
+ sets.push arr[index, s_len].join('')
57
+ end
58
+ end
59
+ end
60
+
61
+ sets
62
+ end
63
+
64
+ def ngram_range ngram
65
+ is_range = /\.\./
66
+
67
+ if !ngram
68
+ return 1, 1
69
+ elsif ngram =~ is_range
70
+ n = ngram.split '..'
71
+ return n[0].to_i, n[1].to_i
72
+ elsif ngram != 'all'
73
+ n = ngram.to_i
74
+ return n, n
75
+ else
76
+ return nil, nil
77
+ end
78
+ end
79
+
80
+ def normalize_ripper_tokens tokens
81
+ tokens.select { |t| t && t !~ /^\s*$/ }
82
+ end
83
+
84
+ def error msg
85
+ puts msg
86
+ exit 1
87
+ end
88
+ end
@@ -0,0 +1,16 @@
1
+ # See: http://en.wikipedia.org/wiki/Jaccard_index
2
+ require 'manowar'
3
+
4
+ define 'Synt::Similar'
5
+
6
+ module Synt::Similar::Jaccard
7
+ extend self
8
+
9
+ def compare src, cmp
10
+ a = src.uniq
11
+ b = cmp.uniq
12
+ i = a & b
13
+ u = a | b
14
+ i.length.to_f / u.length.to_f * 100
15
+ end
16
+ end
@@ -0,0 +1,25 @@
1
+ # This is an (apparently) generalized jaccard algorithm, with its unique uses.
2
+
3
+ # See: http://en.wikipedia.org/wiki/Talk%3AJaccard_index#Tanimoto_coefficient
4
+
5
+ # I understand that since data is ultimately
6
+ # reduced to unique lists of comparable, scalar data, the equation can
7
+ # be further generalized as a function of cardinal lengths.
8
+ # This may or may not be correct (in theory)!
9
+ # I am still unsure if this works for multisets.
10
+ # The output seem to be as expected, though.
11
+ require 'manowar'
12
+
13
+ define 'Synt::Similar'
14
+
15
+ module Synt::Similar::Tanimoto
16
+ extend self
17
+
18
+ def compare src, cmp
19
+ a = src.uniq
20
+ b = cmp.uniq
21
+ i = a & b
22
+
23
+ i.length.to_f / (a.length.to_f + b.length.to_f - i.length.to_f) * 100
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module Synt
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,15 @@
1
+ ## Synt.rb
2
+
3
+ This is the Ruby implementation of Synt.
4
+
5
+ ### Supported Languages
6
+
7
+ * Ruby
8
+
9
+ For more languages, see the top level [Synt](http://github.com/brentlintner/synt) project.
10
+
11
+ ### Requirements
12
+
13
+ * Ruby
14
+ * RubyGems
15
+ * Bundler
File without changes
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'synt/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "synt"
8
+ spec.version = Synt::VERSION
9
+ spec.authors = ["Brent Lintner"]
10
+ spec.email = ["brent.lintner@gmail.com"]
11
+ spec.summary = "Similar code analysis."
12
+ spec.description = "Calculate the percentage of difference between code."
13
+ spec.homepage = "https://github.com/brentlintner/synt"
14
+ spec.license = "ISC"
15
+
16
+ spec.files = `git ls-files`.split("\n")
17
+ spec.executables = ["synt"]
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: synt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brent Lintner
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ description: Calculate the percentage of difference between code.
28
+ email:
29
+ - brent.lintner@gmail.com
30
+ executables:
31
+ - synt
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - Gemfile
36
+ - Gemfile.lock
37
+ - bin/synt
38
+ - lib/synt.rb
39
+ - lib/synt/cli.rb
40
+ - lib/synt/parser.rb
41
+ - lib/synt/similar.rb
42
+ - lib/synt/similar/jaccard.rb
43
+ - lib/synt/similar/tanimoto.rb
44
+ - lib/synt/version.rb
45
+ - readme.md
46
+ - spec/synt.spec
47
+ - synt.gemspec
48
+ homepage: https://github.com/brentlintner/synt
49
+ licenses:
50
+ - ISC
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubyforge_project:
68
+ rubygems_version: 2.2.2
69
+ signing_key:
70
+ specification_version: 4
71
+ summary: Similar code analysis.
72
+ test_files:
73
+ - spec/synt.spec