blekko-search 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,50 @@
1
+ # gitignore tips and tricks from https://github.com/github/gitignore
2
+
3
+ # Ignore bundler config
4
+ /.bundle
5
+
6
+ # Ignore the default SQLite database.
7
+ /db/*.sqlite3
8
+
9
+ # Ignore all logfiles and tempfiles.
10
+ /log/*.log
11
+ /tmp
12
+
13
+ # https://github.com/github/gitignore/blob/master/Ruby.gitignore
14
+ *.gem
15
+ *.rbc
16
+ .bundle
17
+ .config
18
+ coverage
19
+ InstalledFiles
20
+ lib/bundler/man
21
+ pkg
22
+ rdoc
23
+ spec/reports
24
+ test/tmp
25
+ test/version_tmp
26
+ tmp
27
+
28
+ # YARD artifacts
29
+ .yardoc
30
+ _yardoc
31
+ doc/
32
+
33
+
34
+ .DS_Store
35
+ .AppleDouble
36
+ .LSOverride
37
+ Icon
38
+
39
+ # https://github.com/github/gitignore/blob/master/Global/OSX.gitignore
40
+ # Thumbnails
41
+ ._*
42
+
43
+ # Files that might appear on external disk
44
+ .Spotlight-V100
45
+ .Trashes
46
+
47
+ # https://github.com/github/gitignore/blob/master/Global/TextMate.gitignore
48
+ *.tmproj
49
+ *.tmproject
50
+ tmtags
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in blekko-search.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Sean Devine
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,141 @@
1
+ # Blekko
2
+
3
+ Search the Internet (or parts of the Internet!) with ease. This gem is powered by [blekko.com](http://www.blekko.com).
4
+
5
+ This gem is based on work done on [earmarkd.com](http://www.earmarkd.com) during [RailsRumble 2012](http://railsrumble.com).
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's ``Gemfile``:
10
+
11
+ gem 'blekko-search'
12
+
13
+ And then execute:
14
+
15
+ $ bundle install
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install blekko-search
20
+
21
+ ## Usage
22
+
23
+ Use this gem for quick searches or to manage groups of slashtags and more complicated search goals.
24
+
25
+ ### Searching
26
+
27
+ While blekko asks that you [request an API key](http://help.blekko.com/index.php/does-blekko-have-an-api/), you don't need one to start.
28
+
29
+ blekko = Blekko.new
30
+
31
+ While blekko is known for their slashtag based searching, you can search without one:
32
+
33
+ results = blekko.search("chicago")
34
+
35
+ The search method returns an array of ``SearchResult`` instances that expose all blekko attributes (and a few more).
36
+
37
+ If you want to search using [blekko slashtags](http://blekko.com/tag/show), include a ``:slashtags`` argument in your search.
38
+
39
+ results = blekko.search("chicago", slashtags: "/sports")
40
+
41
+ Blekko allows for a maximum of 100 results per search, but if you'd like more, you can set the ``:total_size`` argument.
42
+
43
+ results = blekko.search("something funny", total_size: 1000)
44
+
45
+ By default, the searches will be made 100 results at a time, but you can reduce the page size using the ``:page_size`` argument too.
46
+
47
+ Blekko asks that users of its API limit searches to one per second. This gem doesn't include that, but you may want to implement that feature (or send a pull request) especially if you are multithreading the search.
48
+
49
+ #### Results
50
+ Each result includes the attributes that blekko provides, plus a couple more:
51
+
52
+ - ``n_group`` (alias: ``sequence``) = The number of the search result in the overall results from blekko.
53
+ - ``url`` = The url of the result.
54
+ - ``display_url`` = A url formatted for display.
55
+ - ``rss`` = The rss of the result, if available.
56
+ - ``rss_title`` = The title of the rss of the result, if available.
57
+ - ``short_host_url`` = The url of the host of the result.
58
+ - ``short_host`` = The url of the host formatted for display.
59
+ - ``snippet`` (alias: ``abstract``) = A description of the result formatted for display including html
60
+ - ``toplevel`` = Is the result a top level domain?
61
+ - ``url_title`` = The title of the url formatted for display including html
62
+ - ``date`` = The date of the the result's document, if available. Parsed from ``doc_date_iso``.
63
+ - ``address`` = The address of the result, if available (not too often).
64
+ - ``geocluster`` = The geocluster of the result, if avaiable.
65
+ - ``lat`` = The lat of the result, if available.
66
+ - ``lon`` = The lon of the result, if available.
67
+ - ``phone`` = The phone number of the result, if available.
68
+ - ``zip`` = The zip of the result, if available.
69
+ - ``is_robots_banned`` = Does this result ban robots? Almost never available.
70
+
71
+ ### Slashtags
72
+ From ["What is a slashtag?" on blekko.com](http://help.blekko.com/index.php/what-is-a-slashtag/):
73
+ >A slashtag is an easy-to-create custom search engine. It is a tool used to filter search results and helps you to search only high quality sites, without spam or content farms. Slashtags contain a list of websites and when you search with a slashtag, you only search those sites. Some slashtags perform functions such as ordering the results by date.
74
+
75
+ #### View
76
+ You can view the urls for any slashtag that is public, or for any private slashtag that you have access to if you are logged in.
77
+
78
+ slashtag = blekko.slashtag("/sports")
79
+
80
+ By default, the slashtag will load the urls from blekko if the slashtag exists.
81
+
82
+ urls = blekko.slashtag("/sports").urls
83
+
84
+ Prevent the slashtag from loading its ``urls`` automatically by setting the ``:eager_load`` argument to ``false``.
85
+
86
+ slashtag = blekko.slashtag("/sports", eager_load: false)
87
+
88
+ Access the urls currently saved on blekko.com at any point:
89
+
90
+ slashtag.saved_urls
91
+
92
+ #### Create & Edit
93
+ To create or edit a slashtag you'll need to use a blekko instance that has logged in using it's username and password.
94
+
95
+ blekko = Blekko.new(username: "derekrose", password: "comeback", api_key: "1")
96
+
97
+ You will be logged in automatically if you provide all three credentials when you create the Blekko instance. Otherwise, you can call the ``login`` method to authenticate.
98
+
99
+ blekko = Blekko.new
100
+ blekko.username = "derekrose"
101
+ blekko.password = "comeback"
102
+ blekko.api_key = "1"
103
+ blekko.login
104
+
105
+ Create a slashtag using the same syntax as used for the view method. You can pass in the ``urls`` when you initialize a new instance.
106
+
107
+ slashtag = blekko.slashtag("/my/sports", urls: ["http://www.espn.com", "http://http://sportsillustrated.cnn.com"])
108
+
109
+ You can add urls to an existing slashtag object.
110
+
111
+ slashtag = blekko.slashtag("/my/sports")
112
+ slashtag.urls << "http://www.espn.com"
113
+
114
+ To save the slashtags to blekko call ``save!``. This will either create a new slashtag or update the existing slashtag.
115
+
116
+ slashtag.save!
117
+
118
+ #### Remove URLS
119
+
120
+ You can also remove URLs from a slashtag.
121
+
122
+ slashtag = blekko.slashtag("/my/sports")
123
+ slashtag.remove_urls!("http://www.espn.com")
124
+
125
+ #### Delete
126
+
127
+ The API does not currently support deleting slashtags. You'll need to login to [blekko.com](http://www.blekko.com) to delete a slashtag from your account.
128
+
129
+ ## Acknowlegements
130
+ Thanks to [blekko.com](http://www.blekko.com) for providing API access to their search engine.
131
+
132
+ <3 [@barelyknown](http://www.twitter.com/barelyknown)
133
+
134
+ ## Contributing
135
+
136
+ 1. Fork it
137
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
138
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
139
+ 4. Push to the branch (`git push origin my-new-feature`)
140
+ 5. Create new Pull Request
141
+ 6. Thank you :)
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'blekko-search/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "blekko-search"
8
+ gem.version = BlekkoSearch::VERSION
9
+ gem.authors = ["Sean Devine"]
10
+ gem.email = ["barelyknown@icloud.com"]
11
+ gem.description = %q(Search and manage slashtags for blekko.com)
12
+ gem.summary = %q(Search and manage slashtags for blekko.com)
13
+ gem.homepage = "https://github.com/barelyknown/blekko-search"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end
@@ -0,0 +1,9 @@
1
+ require 'cgi'
2
+ require 'open-uri'
3
+ require 'json'
4
+ require 'net/http'
5
+ require "blekko-search/version"
6
+ require "blekko-search/blekko"
7
+ require "blekko-search/search"
8
+ require "blekko-search/search_result"
9
+ require "blekko-search/slashtag"
@@ -0,0 +1,48 @@
1
+ class Blekko
2
+ HOST = "www.blekko.com"
3
+ DEFAULT_MAX_FREQUENCY_PER_SECOND = 1
4
+
5
+ attr_accessor :protocol, :api_key, :max_frequency_per_second, :username, :password, :login_cookie
6
+
7
+ def initialize(args={})
8
+ @api_key = args[:api_key]
9
+ @protocol = args[:secure] ? "https://" : "http://"
10
+ @username = args[:username]
11
+ @password = args[:password]
12
+ @max_frequency_per_second = args[:max_frequency_per_second] || DEFAULT_MAX_FREQUENCY_PER_SECOND
13
+ login if @api_key && @username && @password
14
+ end
15
+
16
+ def host
17
+ HOST
18
+ end
19
+
20
+ def search(query, args={})
21
+ Blekko::Search.new(self, query, args).search
22
+ end
23
+
24
+ def slashtag(name, args={})
25
+ Blekko::Slashtag.new(self, name, args)
26
+ end
27
+
28
+ def login_uri
29
+ URI("https://blekko.com/login?u=#{CGI.escape(username)}&p=#{CGI.escape(password)}&auth=#{api_key}")
30
+ end
31
+
32
+ def headers
33
+ {
34
+ "Cookie" => "A=AfGTtGoSio7Hnc1xiaSrwkJX4ggMfcFiBXufPUPQXZvB5TRe36Q6tPI4woK6SKO8%2Bh8qeD7z0qEk%2B4Ceg5N9HA95UTpznKUvuuEfb04GiwhAlKARpLnp18%2BI6EYQfes1PB0QNnhHwEAC3kLjyJqCZbsxVw8ud4Z6F%2Fbg6BvJj28L;",
35
+ "User-Agent" => "Ruby"
36
+ }
37
+ end
38
+
39
+ def login
40
+ raise ArgumentError, "Username and password are required" unless username && password
41
+ Net::HTTP.start(login_uri.host, login_uri.port, use_ssl: true) do |http|
42
+ response = http.request Net::HTTP::Get.new login_uri.request_uri
43
+ self.login_cookie = response.get_fields('Set-Cookie').find { |c| c =~ /\AA=/ }
44
+ end
45
+ end
46
+
47
+
48
+ end
@@ -0,0 +1,67 @@
1
+ class Blekko
2
+ class Search
3
+
4
+ DEFAULT_PAGE_SIZE = 100
5
+ DEFAULT_PAGE_NUMBER = 0
6
+ PREFIX = "/ws/?q="
7
+ RESPONSE_FORMAT = "/json+/"
8
+
9
+ attr_accessor :query, :slashtags, :results
10
+
11
+ def initialize(blekko, query, args={})
12
+ args = {page_size: DEFAULT_PAGE_SIZE }.merge(args)
13
+ @blekko = blekko
14
+ @query = query
15
+ @slashtags = *args[:slashtags] || []
16
+ @page_size = args[:page_size]
17
+ @total_size = args[:total_size] || @page_size
18
+ end
19
+
20
+ def results
21
+ @results ||= []
22
+ end
23
+
24
+ def search
25
+ page_number = 0
26
+ number_of_searches.times do
27
+ response = JSON.load(open(url(page_number)))
28
+ if response['RESULT']
29
+ self.results += response['RESULT'].collect { |r| Blekko::SearchResult.new(r) }
30
+ else
31
+ return results
32
+ end
33
+ page_number += 1
34
+ end
35
+ results[0,@total_size]
36
+ end
37
+
38
+ def number_of_searches
39
+ @number_of_searches ||= (@total_size.to_f / @page_size).ceil
40
+ end
41
+
42
+ def escaped_query
43
+ CGI.escape(query + " ") + @slashtags.join("+") + "+"
44
+ end
45
+
46
+ def page_size_param
47
+ "ps=#{@page_size}"
48
+ end
49
+
50
+ def page_number_param(page_number)
51
+ "p=#{page_number}"
52
+ end
53
+
54
+ def auth_param
55
+ @blekko.api_key ? "auth=#{@blekko.api_key}" : nil
56
+ end
57
+
58
+ def params(page_number)
59
+ [page_size_param, auth_param, page_number_param(page_number)].compact.join("&")
60
+ end
61
+
62
+ def url(page_number)
63
+ @blekko.protocol + @blekko.host + PREFIX + escaped_query + RESPONSE_FORMAT + params(page_number)
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,25 @@
1
+ class Blekko
2
+ class SearchResult
3
+ attr_accessor :n_group, :display_url, :rss, :rss_title, :short_host, :short_host_url,
4
+ :snippet, :toplevel, :url, :url_title, :doc_date_iso, :address, :geocluster,
5
+ :lat, :lon, :phone, :zip, :is_robots_banned
6
+
7
+ def initialize(result)
8
+ result.each do |key, value|
9
+ send("#{key}=", value) if respond_to? "#{key}="
10
+ end
11
+ end
12
+
13
+ def datetime
14
+ DateTime.parse(doc_date_iso) if doc_date_iso
15
+ end
16
+
17
+ def toplevel
18
+ @toplevel == "1" ? true : false
19
+ end
20
+
21
+ alias_method :sequence, :n_group
22
+ alias_method :abstract, :snippet
23
+
24
+ end
25
+ end
@@ -0,0 +1,72 @@
1
+ class Blekko
2
+ class Slashtag
3
+
4
+ attr_accessor :name, :blekko, :urls
5
+
6
+ def initialize(blekko, name, args={})
7
+ args = { eager_load: true }.merge(args)
8
+ @blekko = blekko
9
+ @name = name
10
+ @urls = *args[:urls]
11
+ if args[:eager_load] && !@urls
12
+ self.urls = saved_urls
13
+ end
14
+ end
15
+
16
+ def urls
17
+ @urls ||= []
18
+ end
19
+
20
+ def saved_urls
21
+ url = blekko.protocol + blekko.host + "/tag/view?name=" + CGI.escape(name) + "&format=text&auth=#{blekko.api_key}"
22
+ lines = open(url).collect { |line| line.strip }
23
+ unless lines.first.scan(" ").any?
24
+ lines.collect { |line| line }
25
+ end
26
+ end
27
+
28
+ def save!
29
+ begin
30
+ if create!.read =~ /already exists/
31
+ update!
32
+ end
33
+ true
34
+ rescue
35
+ false
36
+ end
37
+ end
38
+
39
+ def remove_urls!(target_urls)
40
+ open(remove_url(target_urls), blekko.headers)
41
+ true
42
+ end
43
+
44
+
45
+ def create!
46
+ open(save_url("create"), blekko.headers)
47
+ end
48
+
49
+ def update!
50
+ open(save_url("update"), blekko.headers)
51
+ end
52
+
53
+
54
+ def save_url(method, target_urls=urls)
55
+ "https://" + blekko.host + "/tag/add?name=#{name}&submit=#{method}&urls=#{urls.join("%0A")}&auth=#{blekko.api_key}"
56
+ end
57
+
58
+ def remove_url(target_urls)
59
+ "https://" + blekko.host + "/tag/edit?submit=1&type=del&name=#{name}&urls=#{target_urls.join("%0A")}&auth=#{blekko.api_key}"
60
+ end
61
+
62
+ def delete_url
63
+ "https://blekko.com/tag/delete?submit=1&name=#{name}&auth=#{blekko.api_key}"
64
+ end
65
+
66
+ def delete!
67
+ return ArgumentError, "This is not implemented by blekko yet"
68
+ open(delete_url, blekko.headers)
69
+ end
70
+
71
+ end
72
+ end
@@ -0,0 +1,3 @@
1
+ module BlekkoSearch
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: blekko-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sean Devine
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-19 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Search and manage slashtags for blekko.com
15
+ email:
16
+ - barelyknown@icloud.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - Gemfile
23
+ - LICENSE.txt
24
+ - README.md
25
+ - Rakefile
26
+ - blekko-search.gemspec
27
+ - lib/blekko-search.rb
28
+ - lib/blekko-search/blekko.rb
29
+ - lib/blekko-search/search.rb
30
+ - lib/blekko-search/search_result.rb
31
+ - lib/blekko-search/slashtag.rb
32
+ - lib/blekko-search/version.rb
33
+ homepage: https://github.com/barelyknown/blekko-search
34
+ licenses: []
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 1.8.24
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: Search and manage slashtags for blekko.com
57
+ test_files: []