plagiarism2 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -2
- data/lib/plagiarism/config.rb +4 -0
- data/lib/plagiarism/strategries/bing.rb +3 -2
- data/lib/plagiarism/strategries/duck.rb +1 -1
- data/lib/plagiarism/strategries/engine.rb +1 -1
- data/lib/plagiarism/strategries/google.rb +11 -4
- data/lib/plagiarism/strategries/yahoo.rb +1 -1
- data/lib/plagiarism/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: edb2847694cb3d1c3f8d827b5e67ffbe77b8b9ed
|
4
|
+
data.tar.gz: 6689d076612acf8b6c803e098b5467dd19ba41f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f2f82df5e758d90bd19ec427cd091f82626c47db8cf45974e9efdf56b6c088a2372213b303be53dcebb4164f06d7075b15845f9467013ee34ffc1c42db0ca33c
|
7
|
+
data.tar.gz: 6f331bfc6199816bf36ecdbd8f4413e605729fa89872f35322bd8ba7993a02e42726a492186d2f3c44b720d607def80a5ba72516938d5dc34b1c84f112d129c5
|
data/README.md
CHANGED
@@ -27,12 +27,19 @@ Plagiarism.configure do |config|
|
|
27
27
|
end
|
28
28
|
```
|
29
29
|
|
30
|
-
|
30
|
+
+ Using bing engine, you have to set access key (you can get it from [here](https://datamarket.azure.com/dataset/bing/searchweb))
|
31
31
|
|
32
32
|
```ruby
|
33
33
|
config.bing_key = xxx
|
34
34
|
```
|
35
35
|
|
36
|
+
+ Using google engine, you have to set two keys (you can get it from [here](https://developers.google.com/custom-search/json-api/v1/using_rest))
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
config.google_key = xxx
|
40
|
+
config.google_cx = xx
|
41
|
+
```
|
42
|
+
|
36
43
|
After that you can check the unique of content
|
37
44
|
|
38
45
|
```ruby
|
@@ -46,7 +53,7 @@ Plagiarism.unique? text
|
|
46
53
|
|
47
54
|
Bug reports and pull requests are welcome on GitHub at https://github.com/MQuy/plagiarism. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
48
55
|
|
49
|
-
> Disclaim: Yahoo and DuckDuckGo don't support api, therefore
|
56
|
+
> Disclaim: Yahoo and DuckDuckGo don't support api, therefore plagiarism has to crawl data and they will mark plagiarism as spam as we request too much, if you find any better solution, please help me.
|
50
57
|
|
51
58
|
## License
|
52
59
|
|
data/lib/plagiarism/config.rb
CHANGED
@@ -10,8 +10,9 @@ module Plagiarism
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def exists?(response)
|
13
|
-
JSON.parse(response)['d']['results']
|
14
|
-
|
13
|
+
results = JSON.parse(response)['d']['results'] rescue []
|
14
|
+
results.all? do |r|
|
15
|
+
uri = URI.parse URI::encode(r['Url'])
|
15
16
|
uri.host =~ whitelists_regex
|
16
17
|
end
|
17
18
|
end
|
@@ -13,7 +13,7 @@ module Plagiarism
|
|
13
13
|
doc = Nokogiri::HTML response
|
14
14
|
doc.css('.results_links_deep:not(.result--no-result)').all? do |row|
|
15
15
|
href = row.at_css('.result__a').attributes['href'].value rescue ''
|
16
|
-
uri = URI.parse href
|
16
|
+
uri = URI.parse URI::encode(href)
|
17
17
|
uri.host =~ whitelists_regex
|
18
18
|
end
|
19
19
|
end
|
@@ -1,18 +1,25 @@
|
|
1
1
|
module Plagiarism
|
2
2
|
module Strategies
|
3
3
|
class Google < Engine
|
4
|
-
URL = 'https://
|
4
|
+
URL = 'https://www.googleapis.com/customsearch/v1'
|
5
5
|
VERSION = '1.0'
|
6
6
|
|
7
7
|
class << self
|
8
8
|
|
9
9
|
def fetch(content, params)
|
10
|
-
Typhoeus.get URL, params: params.merge(
|
10
|
+
Typhoeus.get URL, params: params.merge(
|
11
|
+
key: Config.google_key,
|
12
|
+
cx: Config.google_cx,
|
13
|
+
q: content,
|
14
|
+
fields: 'items(link)',
|
15
|
+
prettyPrint: false
|
16
|
+
)
|
11
17
|
end
|
12
18
|
|
13
19
|
def exists?(response)
|
14
|
-
JSON.parse(response)['
|
15
|
-
|
20
|
+
results = JSON.parse(response)['items'] || []
|
21
|
+
results.all? do |r|
|
22
|
+
uri = URI.parse URI::encode(r['link'])
|
16
23
|
uri.host =~ whitelists_regex
|
17
24
|
end
|
18
25
|
end
|
@@ -13,7 +13,7 @@ module Plagiarism
|
|
13
13
|
doc = Nokogiri::HTML response
|
14
14
|
doc.css('.searchCenterMiddle li').all? do |row|
|
15
15
|
href = row.at_css('.compTitle div').content.strip rescue ''
|
16
|
-
uri = URI.parse(href =~ /^http/ ? href : 'https://' + href)
|
16
|
+
uri = URI.parse URI::encode(href =~ /^http/ ? href : 'https://' + href)
|
17
17
|
uri.host =~ whitelists_regex
|
18
18
|
end
|
19
19
|
end
|
data/lib/plagiarism/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: plagiarism2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MQuy
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-04-
|
11
|
+
date: 2016-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|