synt 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 80be7f4e38ed147f4dcc65f317e1b77c385ee952
4
+ data.tar.gz: 84158235542049a3ad23b9eff0a7f963dc06a6a7
5
+ SHA512:
6
+ metadata.gz: 7218180ffe4ee3800c5baa92197a72ad4651242116680076af07476bf10aa520991d03886a32bb0c08362d30c6e483e047fb04394b2304234afe9f0a2fd134ea
7
+ data.tar.gz: 087908cb71c757e8f43176049f6e055affd56f79863b3ac8171deffae1dc566b7f4c163bd819ae49cba8c6656dbd602c2953034c4f6514558ab9684a080e1eda
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'slop', '3.6.0'
4
+ gem 'manowar', '0.0.1'
5
+ gem 'rubocop', '0.25.0', require: false
6
+ gem 'simplecov', '0.9.0', require: false
7
+ gem 'simplecov-lcov', require: false
@@ -0,0 +1,35 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ ast (2.0.0)
5
+ docile (1.1.5)
6
+ manowar (0.0.1)
7
+ multi_json (1.10.1)
8
+ parser (2.2.0.pre.4)
9
+ ast (>= 1.1, < 3.0)
10
+ slop (~> 3.4, >= 3.4.5)
11
+ powerpack (0.0.9)
12
+ rainbow (2.0.0)
13
+ rubocop (0.25.0)
14
+ parser (>= 2.2.0.pre.4, < 3.0)
15
+ powerpack (~> 0.0.6)
16
+ rainbow (>= 1.99.1, < 3.0)
17
+ ruby-progressbar (~> 1.4)
18
+ ruby-progressbar (1.6.0)
19
+ simplecov (0.9.0)
20
+ docile (~> 1.1.0)
21
+ multi_json
22
+ simplecov-html (~> 0.8.0)
23
+ simplecov-html (0.8.0)
24
+ simplecov-lcov (0.3.0)
25
+ slop (3.6.0)
26
+
27
+ PLATFORMS
28
+ ruby
29
+
30
+ DEPENDENCIES
31
+ manowar (= 0.0.1)
32
+ rubocop (= 0.25.0)
33
+ simplecov (= 0.9.0)
34
+ simplecov-lcov
35
+ slop (= 3.6.0)
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ if ENV.has_key? 'TEST_COV'
4
+ require 'simplecov'
5
+ require 'simplecov-lcov'
6
+ SimpleCov.formatter = SimpleCov::Formatter::LcovFormatter
7
+ SimpleCov.start do
8
+ add_filter "/fixtures/"
9
+ end
10
+ end
11
+
12
+ require_relative '../lib/synt'
13
+
14
+ Synt.new
@@ -0,0 +1,19 @@
1
+ require_relative 'synt/cli'
2
+ require_relative 'synt/similar'
3
+
4
+ module Synt
5
+ extend self
6
+
7
+ def new
8
+ # TODO: move to CLI? this is lib.
9
+ opts = CLI.parse
10
+ diff = Similar.compare opts
11
+
12
+ puts "Inputs are %#{diff} similar."
13
+
14
+ if opts.threshold? && diff < opts[:threshold].to_f
15
+ puts "Similarity threshold of #{opts[:threshold]} hit."
16
+ exit 1
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,34 @@
1
+ require 'slop'
2
+ require 'manowar'
3
+ require_relative 'version'
4
+
5
+ define 'Synt'
6
+
7
+ module Synt::CLI
8
+ extend self
9
+
10
+ def parse
11
+ Slop.parse help: true, &options
12
+ end
13
+
14
+ private
15
+
16
+ def options
17
+ Proc.new {
18
+ banner 'Usage: synt.rb [options]'
19
+
20
+ on 't=', 'tokenize=', 'file or string to tokenize'
21
+ on 'c=', 'compare=', 'File or String to compare to something.'
22
+ on 't=', 'to=', 'File or String to compare against.'
23
+ on 'a=', 'algorithm=', 'Similarity algorithm [default=jaccard,tanimoto].'
24
+ on 'n=', 'ngram=', 'Specify what ngrams are generated and used for
25
+ comparing token sequences. [default=1,2,4..5,10,...,all]'
26
+ on 'd=', 'threshold=', 'Similarity threshold and exit with error.'
27
+
28
+ on '-v', 'version', 'Print the version.' do
29
+ puts Synt::VERSION
30
+ exit 0
31
+ end
32
+ }
33
+ end
34
+ end
@@ -0,0 +1,12 @@
1
+ require 'ripper'
2
+ require 'manowar'
3
+
4
+ define 'Synt'
5
+
6
+ module Synt::Parser
7
+ extend self
8
+
9
+ def parse string
10
+ Ripper.tokenize string
11
+ end
12
+ end
@@ -0,0 +1,88 @@
1
+ require 'manowar'
2
+ require_relative 'parser'
3
+ require_relative 'similar/jaccard'
4
+ require_relative 'similar/tanimoto'
5
+
6
+ define 'Synt'
7
+
8
+ module Synt::Similar
9
+ extend self
10
+
11
+ def compare opts
12
+ error 'no compare propery provided' if !opts[:compare]
13
+ error 'no to propery provided' if !opts[:to]
14
+
15
+ src = opts[:compare] || ''
16
+ cmp = opts[:to] || ''
17
+ algorithm = algorithms[opts[:algorithm] || 'jaccard']
18
+ src_t = cmp_t = nil
19
+ n_start, n_end = ngram_range opts[:ngram]
20
+
21
+ src_t = normalize_ripper_tokens Synt::Parser.parse(src)
22
+ cmp_t = normalize_ripper_tokens Synt::Parser.parse(cmp)
23
+
24
+ a = generate_ngrams src_t, n_start, n_end
25
+ b = generate_ngrams cmp_t, n_start, n_end
26
+
27
+ sim = algorithm.compare a, b
28
+
29
+ sim.to_f.round 2
30
+ end
31
+
32
+ private
33
+
34
+ def algorithms
35
+ { 'jaccard' => Jaccard, 'tanimoto' => Tanimoto }
36
+ end
37
+
38
+ def generate_ngrams arr, start, nend
39
+ nend = arr.length if nend.nil?
40
+ start = 1 if start.nil?
41
+
42
+ if nend > arr.length
43
+ puts 'ngram end value exceeds length- setting start/end to: 1.'
44
+ end
45
+
46
+ if start == nend && start == 1 # short circuit
47
+ return arr
48
+ end
49
+
50
+ sets = []
51
+
52
+ (start..nend).to_a.each_index do |n_len|
53
+ arr.each_with_index do |token, index|
54
+ s_len = index + n_len
55
+ if s_len <= arr.length
56
+ sets.push arr[index, s_len].join('')
57
+ end
58
+ end
59
+ end
60
+
61
+ sets
62
+ end
63
+
64
+ def ngram_range ngram
65
+ is_range = /\.\./
66
+
67
+ if !ngram
68
+ return 1, 1
69
+ elsif ngram =~ is_range
70
+ n = ngram.split '..'
71
+ return n[0].to_i, n[1].to_i
72
+ elsif ngram != 'all'
73
+ n = ngram.to_i
74
+ return n, n
75
+ else
76
+ return nil, nil
77
+ end
78
+ end
79
+
80
+ def normalize_ripper_tokens tokens
81
+ tokens.select { |t| t && t !~ /^\s*$/ }
82
+ end
83
+
84
+ def error msg
85
+ puts msg
86
+ exit 1
87
+ end
88
+ end
@@ -0,0 +1,16 @@
1
+ # See: http://en.wikipedia.org/wiki/Jaccard_index
2
+ require 'manowar'
3
+
4
+ define 'Synt::Similar'
5
+
6
+ module Synt::Similar::Jaccard
7
+ extend self
8
+
9
+ def compare src, cmp
10
+ a = src.uniq
11
+ b = cmp.uniq
12
+ i = a & b
13
+ u = a | b
14
+ i.length.to_f / u.length.to_f * 100
15
+ end
16
+ end
@@ -0,0 +1,25 @@
1
+ # This is an (apparently) generalized jaccard algorithm, with its unique uses.
2
+
3
+ # See: http://en.wikipedia.org/wiki/Talk%3AJaccard_index#Tanimoto_coefficient
4
+
5
+ # I understand that since data is ultimately
6
+ # reduced to unique lists of comparable, scalar data, the equation can
7
+ # be further generalized as a function of cardinal lengths.
8
+ # This may or may not be correct (in theory)!
9
+ # I am still unsure if this works for multisets.
10
+ # The output seem to be as expected, though.
11
+ require 'manowar'
12
+
13
+ define 'Synt::Similar'
14
+
15
+ module Synt::Similar::Tanimoto
16
+ extend self
17
+
18
+ def compare src, cmp
19
+ a = src.uniq
20
+ b = cmp.uniq
21
+ i = a & b
22
+
23
+ i.length.to_f / (a.length.to_f + b.length.to_f - i.length.to_f) * 100
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module Synt
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,15 @@
1
+ ## Synt.rb
2
+
3
+ This is the Ruby implementation of Synt.
4
+
5
+ ### Supported Languages
6
+
7
+ * Ruby
8
+
9
+ For more languages, see the top level [Synt](http://github.com/brentlintner/synt) project.
10
+
11
+ ### Requirements
12
+
13
+ * Ruby
14
+ * RubyGems
15
+ * Bundler
File without changes
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'synt/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "synt"
8
+ spec.version = Synt::VERSION
9
+ spec.authors = ["Brent Lintner"]
10
+ spec.email = ["brent.lintner@gmail.com"]
11
+ spec.summary = "Similar code analysis."
12
+ spec.description = "Calculate the percentage of difference between code."
13
+ spec.homepage = "https://github.com/brentlintner/synt"
14
+ spec.license = "ISC"
15
+
16
+ spec.files = `git ls-files`.split("\n")
17
+ spec.executables = ["synt"]
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: synt
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Brent Lintner
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ description: Calculate the percentage of difference between code.
28
+ email:
29
+ - brent.lintner@gmail.com
30
+ executables:
31
+ - synt
32
+ extensions: []
33
+ extra_rdoc_files: []
34
+ files:
35
+ - Gemfile
36
+ - Gemfile.lock
37
+ - bin/synt
38
+ - lib/synt.rb
39
+ - lib/synt/cli.rb
40
+ - lib/synt/parser.rb
41
+ - lib/synt/similar.rb
42
+ - lib/synt/similar/jaccard.rb
43
+ - lib/synt/similar/tanimoto.rb
44
+ - lib/synt/version.rb
45
+ - readme.md
46
+ - spec/synt.spec
47
+ - synt.gemspec
48
+ homepage: https://github.com/brentlintner/synt
49
+ licenses:
50
+ - ISC
51
+ metadata: {}
52
+ post_install_message:
53
+ rdoc_options: []
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ requirements: []
67
+ rubyforge_project:
68
+ rubygems_version: 2.2.2
69
+ signing_key:
70
+ specification_version: 4
71
+ summary: Similar code analysis.
72
+ test_files:
73
+ - spec/synt.spec