RubyGems - plagiarism2 - Versions diffs - 0.0.3 → 0.0.4 - Mend

plagiarism2 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/README.md +9 -2
data/lib/plagiarism/config.rb +4 -0
data/lib/plagiarism/strategries/bing.rb +3 -2
data/lib/plagiarism/strategries/duck.rb +1 -1
data/lib/plagiarism/strategries/engine.rb +1 -1
data/lib/plagiarism/strategries/google.rb +11 -4
data/lib/plagiarism/strategries/yahoo.rb +1 -1
data/lib/plagiarism/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 1af144aabf96e33a5ec59209dc16a798eb46f89b
-  data.tar.gz: 32a0aa5a9eb659c7c788eb10bb6cdfb0990771af
+  metadata.gz: edb2847694cb3d1c3f8d827b5e67ffbe77b8b9ed
+  data.tar.gz: 6689d076612acf8b6c803e098b5467dd19ba41f6
 SHA512:
-  metadata.gz: 172b035e164062011de6133cf9ef773cafc6e43ab8859ee7731cd9a642a99f27e33c25beff740b05b293faf7e6281ba03493f0e15254d4de2f390aa1383938fd
-  data.tar.gz: 8a5a00d48dc1f11198215626692c9af94bae13e83bf80904f3576d56ecd6dfafcad890cd7510eb4164ddba01c61f6fb21d2796875af8fb15e660783d525e0e66
+  metadata.gz: f2f82df5e758d90bd19ec427cd091f82626c47db8cf45974e9efdf56b6c088a2372213b303be53dcebb4164f06d7075b15845f9467013ee34ffc1c42db0ca33c
+  data.tar.gz: 6f331bfc6199816bf36ecdbd8f4413e605729fa89872f35322bd8ba7993a02e42726a492186d2f3c44b720d607def80a5ba72516938d5dc34b1c84f112d129c5

data/README.md CHANGED Viewed

@@ -27,12 +27,19 @@ Plagiarism.configure do |config|
 end
 ```
-In case of using bing engine, you have to set access key
++ Using bing engine, you have to set access key (you can get it from [here](https://datamarket.azure.com/dataset/bing/searchweb))
 ```ruby
 config.bing_key = xxx
 ```
++ Using google engine, you have to set two keys (you can get it from [here](https://developers.google.com/custom-search/json-api/v1/using_rest))
+```ruby
+config.google_key = xxx
+config.google_cx = xx
+```
 After that you can check the unique of content
 ```ruby
@@ -46,7 +53,7 @@ Plagiarism.unique? text
 Bug reports and pull requests are welcome on GitHub at https://github.com/MQuy/plagiarism. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
-> Disclaim: Yahoo and DuckDuckGo don't support api, therefore I have to crawl data, if you find any solution to fix, please help me.
+> Disclaim: Yahoo and DuckDuckGo don't support api, therefore plagiarism has to crawl data and they will mark plagiarism as spam as we request too much, if you find any better solution, please help me.
 ## License

data/lib/plagiarism/config.rb CHANGED Viewed

@@ -4,6 +4,10 @@ module Plagiarism
     attr_accessor :strategies
     attr_accessor :whitelists
     attr_accessor :bing_key
+    attr_accessor :google_key
+    attr_accessor :google_cx
   end
 end

data/lib/plagiarism/strategries/bing.rb CHANGED Viewed

@@ -10,8 +10,9 @@ module Plagiarism
         end
         def exists?(response)
-          JSON.parse(response)['d']['results'].all? do |r|
-            uri = URI.parse(r['Url'])
+          results = JSON.parse(response)['d']['results'] rescue []
+          results.all? do |r|
+            uri = URI.parse URI::encode(r['Url'])
             uri.host =~ whitelists_regex
           end
         end

data/lib/plagiarism/strategries/duck.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module Plagiarism
           doc = Nokogiri::HTML response
           doc.css('.results_links_deep:not(.result--no-result)').all? do |row|
             href = row.at_css('.result__a').attributes['href'].value rescue ''
-            uri = URI.parse href
+            uri = URI.parse URI::encode(href)
             uri.host =~ whitelists_regex
           end
         end

data/lib/plagiarism/strategries/engine.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module Plagiarism
         def valid_segments(ps, params)
           ps.segment.count do |sentence|
             typhoeus = fetch("\"#{sentence}\"", params)
-            exists?(typhoeus.response_body)
+            typhoeus.success? && exists?(typhoeus.response_body)
           end
         end

data/lib/plagiarism/strategries/google.rb CHANGED Viewed

@@ -1,18 +1,25 @@
 module Plagiarism
   module Strategies
     class Google < Engine
-      URL = 'https://ajax.googleapis.com/ajax/services/search/web'
+      URL = 'https://www.googleapis.com/customsearch/v1'
       VERSION = '1.0'
       class << self
         def fetch(content, params)
-          Typhoeus.get URL, params: params.merge(v: VERSION, q: content, rsz: :large)
+          Typhoeus.get URL, params: params.merge(
+            key: Config.google_key,
+            cx: Config.google_cx,
+            q: content,
+            fields: 'items(link)',
+            prettyPrint: false
+          )
         end
         def exists?(response)
-          JSON.parse(response)['responseData']['results'].all? do |r|
-            uri = URI.parse(r['unescapedUrl'])
+          results = JSON.parse(response)['items'] || []
+          results.all? do |r|
+            uri = URI.parse URI::encode(r['link'])
             uri.host =~ whitelists_regex
           end
         end

data/lib/plagiarism/strategries/yahoo.rb CHANGED Viewed

@@ -13,7 +13,7 @@ module Plagiarism
           doc = Nokogiri::HTML response
           doc.css('.searchCenterMiddle li').all? do |row|
             href = row.at_css('.compTitle div').content.strip rescue ''
-            uri = URI.parse(href =~ /^http/ ? href : 'https://' + href)
+            uri = URI.parse URI::encode(href =~ /^http/ ? href : 'https://' + href)
             uri.host =~ whitelists_regex
           end
         end

data/lib/plagiarism/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Plagiarism
-  VERSION = "0.0.3"
+  VERSION = "0.0.4"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: plagiarism2
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
 platform: ruby
 authors:
 - MQuy
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2016-04-01 00:00:00.000000000 Z
+date: 2016-04-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler