plagiarism2 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 253b52f49b6d8943468f5f58aabe4d98eb0d19b1
4
+ data.tar.gz: 6ed790050a1d5efe1715ad596e5f67047a2d82bc
5
+ SHA512:
6
+ metadata.gz: c7a9fcf7eb0cfdb33c1521be06a8fed18880c7e24fa4661a151fdc69a1700c45af00693657bbf83759cfc0015bba8369e8df6f525e616de299fe317ca6f30d2f
7
+ data.tar.gz: 9560ccc5cb705f9c6a07601f69d14fa6863f56962a62a0261fe87ea663d7a45136ec89be1d6aefd4d5bda770c0df2c41feb9b54dec050e9c69876feee9579bc1
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.swp
11
+ *.swo
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.3.0
4
+ before_install: gem install bundler -v 1.11.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in plagiarism.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 MQuy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # Plagiarism
2
+
3
+ Search for plagiarism and check the duplication of your content from Bing, Google, Yahoo and DuckDuckGo.
4
+
5
+ ## Installation
6
+
7
+ ```ruby
8
+ gem 'plagiarism2', require: 'plagiarism'
9
+ ```
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install plagiarism2
18
+
19
+ ## Usage
20
+
21
+ Add in your config
22
+
23
+ ```ruby
24
+ Plagiarism.configure do |config|
25
+ config.strategies = [xxx] # => [:google, :bing, :duck, :yahoo]
26
+ config.whitelists = ['www.ring.md']
27
+ end
28
+ ```
29
+
30
+ In case of using bing engine, you have to set access key
31
+
32
+ ```ruby
33
+ config.bing_key = xxx
34
+ ```
35
+
36
+ After that you can check the unique of content
37
+
38
+ ```ruby
39
+ text = 'Latte user story paradigm affordances experiential innovate venture capital physical computing. Ship it agile actionable insight iterate thought leader pitch deck experiential iterate. Venture capital food-truck quantitative vs. qualitative SpaceTeam convergence agile.'
40
+ Plagiarism.unique? text
41
+ ```
42
+
43
+ `Plagiarism.unique?` is true when all strategies is true
44
+
45
+ ## Contributing
46
+
47
+ Bug reports and pull requests are welcome on GitHub at https://github.com/MQuy/plagiarism. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
48
+
49
+ > Disclaim: Yahoo and DuckDuckGo don't support api, therefore I have to crawl data, if you find any solution to fix, please help me.
50
+
51
+ ## License
52
+
53
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
54
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "plagiarism"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/lib/plagiarism.rb ADDED
@@ -0,0 +1,19 @@
1
+ require 'typhoeus'
2
+ require 'pragmatic_segmenter'
3
+ require 'nokogiri'
4
+
5
+ require 'plagiarism/version'
6
+ require 'plagiarism/config'
7
+ require 'plagiarism/strategy'
8
+
9
+ module Plagiarism
10
+ extend self
11
+
12
+ def configure
13
+ yield Config
14
+ end
15
+
16
+ def unique?(content, params = {})
17
+ Strategy.unique?(content, params)
18
+ end
19
+ end
@@ -0,0 +1,9 @@
1
+ module Plagiarism
2
+ module Config
3
+ extend self
4
+
5
+ attr_accessor :strategies
6
+ attr_accessor :whitelists
7
+ attr_accessor :bing_key
8
+ end
9
+ end
@@ -0,0 +1,23 @@
1
+ module Plagiarism
2
+ module Strategies
3
+ class Bing < Engine
4
+ URL = 'https://api.datamarket.azure.com/Bing/SearchWeb/v1/Web'
5
+
6
+ class << self
7
+
8
+ def fetch(content, params)
9
+ Typhoeus.get(URL, params: params.merge('$format' => :json, 'Query' => "'#{content}'"), userpwd: ":#{Config.bing_key}")
10
+ end
11
+
12
+ def exists?(response)
13
+ JSON.parse(response)['d']['results'].all? do |r|
14
+ uri = URI.parse(r['Url'])
15
+ uri.host =~ whitelists_regex
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,25 @@
1
+ module Plagiarism
2
+ module Strategies
3
+ class Duck < Engine
4
+ URL = 'https://duckduckgo.com/html'
5
+
6
+ class << self
7
+
8
+ def fetch(content, params)
9
+ Typhoeus.get(URL, params: params.merge(q: content))
10
+ end
11
+
12
+ def exists?(response)
13
+ doc = Nokogiri::HTML response
14
+ doc.css('.results_links_deep').all? do |row|
15
+ href = row.at_css('.result__a').attributes['href'].value rescue ''
16
+ uri = URI.parse href
17
+ uri.host =~ whitelists_regex
18
+ end
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,41 @@
1
+ module Plagiarism
2
+ module Strategies
3
+ class Engine
4
+ THRESHOLD = 0.8
5
+
6
+ attr_accessor :content, :params
7
+
8
+ class << self
9
+ def fetch(content, params)
10
+ raise
11
+ end
12
+
13
+ def valid_segments(ps, params)
14
+ ps.segment.count do |sentence|
15
+ typhoeus = fetch("\"#{sentence}\"", params)
16
+ exists?(typhoeus.response_body)
17
+ end
18
+ end
19
+
20
+ def exists?(response)
21
+ raise
22
+ end
23
+
24
+ def whitelists_regex
25
+ whitelists = Config.whitelists.map { |w| Regexp.new w }
26
+ Regexp.union whitelists
27
+ end
28
+ end
29
+
30
+ def initialize(c, p)
31
+ @content, @params = c, p
32
+ end
33
+
34
+ def unique?
35
+ ps = PragmaticSegmenter::Segmenter.new(text: content)
36
+ valid_segments = self.class.valid_segments(ps, params)
37
+ valid_segments / ps.segment.size >= THRESHOLD
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,24 @@
1
+ module Plagiarism
2
+ module Strategies
3
+ class Google < Engine
4
+ URL = 'https://ajax.googleapis.com/ajax/services/search/web'
5
+ VERSION = '1.0'
6
+
7
+ class << self
8
+
9
+ def fetch(content, params)
10
+ Typhoeus.get URL, params: params.merge(v: VERSION, q: content, rsz: :large)
11
+ end
12
+
13
+ def exists?(response)
14
+ JSON.parse(response)['responseData']['results'].all? do |r|
15
+ uri = URI.parse(r['unescapedUrl'])
16
+ uri.host =~ whitelists_regex
17
+ end
18
+ end
19
+
20
+ end
21
+
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,25 @@
1
+ module Plagiarism
2
+ module Strategies
3
+ class Yahoo < Engine
4
+ URL = 'https://search.yahoo.com/search'
5
+
6
+ class << self
7
+
8
+ def fetch(content, params)
9
+ Typhoeus.get(URL, params: params.merge(p: content))
10
+ end
11
+
12
+ def exists?(response)
13
+ doc = Nokogiri::HTML response
14
+ doc.css('.searchCenterMiddle li').all? do |row|
15
+ href = row.at_css('.compTitle div').content.strip rescue ''
16
+ uri = URI.parse(href =~ /^http/ ? href : 'https://' + href)
17
+ uri.host =~ whitelists_regex
18
+ end
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,22 @@
1
+ require 'plagiarism/strategries/engine'
2
+ require 'plagiarism/strategries/google'
3
+ require 'plagiarism/strategries/bing'
4
+ require 'plagiarism/strategries/duck'
5
+ require 'plagiarism/strategries/yahoo'
6
+
7
+ module Plagiarism
8
+ module Strategy
9
+ extend self
10
+
11
+ def get(name = :google)
12
+ Strategies.const_get(name.to_s.classify)
13
+ end
14
+
15
+ def unique?(content, params)
16
+ Config.strategies.all? do |strategy|
17
+ klass = get(strategy)
18
+ klass.new(content, params).unique?
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,3 @@
1
+ module Plagiarism
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'plagiarism/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "plagiarism2"
8
+ spec.version = Plagiarism::VERSION
9
+ spec.authors = ["MQuy"]
10
+ spec.email = ["sugiacupit@gmail.com"]
11
+
12
+ spec.summary = %q{Check the unique content on internet.}
13
+ spec.description = %q{Check the unique content on internet from bing, google, yahoo and duckduckgo.}
14
+ spec.homepage = "https://github.com/MQuy/plagiarism"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.11"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec", "~> 3.0"
25
+
26
+ spec.add_dependency 'typhoeus', '~> 1.0'
27
+ spec.add_dependency 'pragmatic_segmenter', '~> 0.3'
28
+ spec.add_dependency 'nokogiri', '~> 1.6'
29
+ end
metadata ADDED
@@ -0,0 +1,147 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: plagiarism2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - MQuy
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-04-01 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.11'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: typhoeus
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pragmatic_segmenter
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.3'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.3'
83
+ - !ruby/object:Gem::Dependency
84
+ name: nokogiri
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.6'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.6'
97
+ description: Check the unique content on internet from bing, google, yahoo and duckduckgo.
98
+ email:
99
+ - sugiacupit@gmail.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - ".rspec"
106
+ - ".travis.yml"
107
+ - Gemfile
108
+ - LICENSE.txt
109
+ - README.md
110
+ - Rakefile
111
+ - bin/console
112
+ - bin/setup
113
+ - lib/plagiarism.rb
114
+ - lib/plagiarism/config.rb
115
+ - lib/plagiarism/strategries/bing.rb
116
+ - lib/plagiarism/strategries/duck.rb
117
+ - lib/plagiarism/strategries/engine.rb
118
+ - lib/plagiarism/strategries/google.rb
119
+ - lib/plagiarism/strategries/yahoo.rb
120
+ - lib/plagiarism/strategy.rb
121
+ - lib/plagiarism/version.rb
122
+ - plagiarism.gemspec
123
+ homepage: https://github.com/MQuy/plagiarism
124
+ licenses:
125
+ - MIT
126
+ metadata: {}
127
+ post_install_message:
128
+ rdoc_options: []
129
+ require_paths:
130
+ - lib
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubyforge_project:
143
+ rubygems_version: 2.5.1
144
+ signing_key:
145
+ specification_version: 4
146
+ summary: Check the unique content on internet.
147
+ test_files: []