blekko-search 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,50 @@
1
+ # gitignore tips and tricks from https://github.com/github/gitignore
2
+
3
+ # Ignore bundler config
4
+ /.bundle
5
+
6
+ # Ignore the default SQLite database.
7
+ /db/*.sqlite3
8
+
9
+ # Ignore all logfiles and tempfiles.
10
+ /log/*.log
11
+ /tmp
12
+
13
+ # https://github.com/github/gitignore/blob/master/Ruby.gitignore
14
+ *.gem
15
+ *.rbc
16
+ .bundle
17
+ .config
18
+ coverage
19
+ InstalledFiles
20
+ lib/bundler/man
21
+ pkg
22
+ rdoc
23
+ spec/reports
24
+ test/tmp
25
+ test/version_tmp
26
+ tmp
27
+
28
+ # YARD artifacts
29
+ .yardoc
30
+ _yardoc
31
+ doc/
32
+
33
+
34
+ .DS_Store
35
+ .AppleDouble
36
+ .LSOverride
37
+ Icon
38
+
39
+ # https://github.com/github/gitignore/blob/master/Global/OSX.gitignore
40
+ # Thumbnails
41
+ ._*
42
+
43
+ # Files that might appear on external disk
44
+ .Spotlight-V100
45
+ .Trashes
46
+
47
+ # https://github.com/github/gitignore/blob/master/Global/TextMate.gitignore
48
+ *.tmproj
49
+ *.tmproject
50
+ tmtags
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in blekko-search.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Sean Devine
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,141 @@
1
+ # Blekko
2
+
3
+ Search the Internet (or parts of the Internet!) with ease. This gem is powered by [blekko.com](http://www.blekko.com).
4
+
5
+ This gem is based on work done on [earmarkd.com](http://www.earmarkd.com) during [RailsRumble 2012](http://railsrumble.com).
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's ``Gemfile``:
10
+
11
+ gem 'blekko-search'
12
+
13
+ And then execute:
14
+
15
+ $ bundle install
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install blekko-search
20
+
21
+ ## Usage
22
+
23
+ Use this gem for quick searches or to manage groups of slashtags and more complicated search goals.
24
+
25
+ ### Searching
26
+
27
+ While blekko asks that you [request an API key](http://help.blekko.com/index.php/does-blekko-have-an-api/), you don't need one to start.
28
+
29
+ blekko = Blekko.new
30
+
31
+ While blekko is known for their slashtag based searching, you can search without one:
32
+
33
+ results = blekko.search("chicago")
34
+
35
+ The search method returns an array of ``SearchResult`` instances that expose all blekko attributes (and a few more).
36
+
37
+ If you want to search using [blekko slashtags](http://blekko.com/tag/show), include a ``:slashtags`` argument in your search.
38
+
39
+ results = blekko.search("chicago", slashtags: "/sports")
40
+
41
+ Blekko allows for a maximum of 100 results per search, but if you'd like more, you can set the ``:total_size`` argument.
42
+
43
+ results = blekko.search("something funny", total_size: 1000)
44
+
45
+ By default, the searches will be made 100 results at a time, but you can reduce the page size using the ``:page_size`` argument too.
46
+
47
+ Blekko asks that users of its API limit searches to one per second. This gem doesn't include that, but you may want to implement that feature (or send a pull request) especially if you are multithreading the search.
48
+
49
+ #### Results
50
+ Each result includes the attributes that blekko provides, plus a couple more:
51
+
52
+ - ``n_group`` (alias: ``sequence``) = The number of the search result in the overall results from blekko.
53
+ - ``url`` = The url of the result.
54
+ - ``display_url`` = A url formatted for display.
55
+ - ``rss`` = The rss of the result, if available.
56
+ - ``rss_title`` = The title of the rss of the result, if available.
57
+ - ``short_host_url`` = The url of the host of the result.
58
+ - ``short_host`` = The url of the host formatted for display.
59
+ - ``snippet`` (alias: ``abstract``) = A description of the result formatted for display including html
60
+ - ``toplevel`` = Is the result a top level domain?
61
+ - ``url_title`` = The title of the url formatted for display including html
62
+ - ``date`` = The date of the the result's document, if available. Parsed from ``doc_date_iso``.
63
+ - ``address`` = The address of the result, if available (not too often).
64
+ - ``geocluster`` = The geocluster of the result, if avaiable.
65
+ - ``lat`` = The lat of the result, if available.
66
+ - ``lon`` = The lon of the result, if available.
67
+ - ``phone`` = The phone number of the result, if available.
68
+ - ``zip`` = The zip of the result, if available.
69
+ - ``is_robots_banned`` = Does this result ban robots? Almost never available.
70
+
71
+ ### Slashtags
72
+ From ["What is a slashtag?" on blekko.com](http://help.blekko.com/index.php/what-is-a-slashtag/):
73
+ >A slashtag is an easy-to-create custom search engine. It is a tool used to filter search results and helps you to search only high quality sites, without spam or content farms. Slashtags contain a list of websites and when you search with a slashtag, you only search those sites. Some slashtags perform functions such as ordering the results by date.
74
+
75
+ #### View
76
+ You can view the urls for any slashtag that is public, or for any private slashtag that you have access to if you are logged in.
77
+
78
+ slashtag = blekko.slashtag("/sports")
79
+
80
+ By default, the slashtag will load the urls from blekko if the slashtag exists.
81
+
82
+ urls = blekko.slashtag("/sports").urls
83
+
84
+ Prevent the slashtag from loading its ``urls`` automatically by setting the ``:eager_load`` argument to ``false``.
85
+
86
+ slashtag = blekko.slashtag("/sports", eager_load: false)
87
+
88
+ Access the urls currently saved on blekko.com at any point:
89
+
90
+ slashtag.saved_urls
91
+
92
+ #### Create & Edit
93
+ To create or edit a slashtag you'll need to use a blekko instance that has logged in using it's username and password.
94
+
95
+ blekko = Blekko.new(username: "derekrose", password: "comeback", api_key: "1")
96
+
97
+ You will be logged in automatically if you provide all three credentials when you create the Blekko instance. Otherwise, you can call the ``login`` method to authenticate.
98
+
99
+ blekko = Blekko.new
100
+ blekko.username = "derekrose"
101
+ blekko.password = "comeback"
102
+ blekko.api_key = "1"
103
+ blekko.login
104
+
105
+ Create a slashtag using the same syntax as used for the view method. You can pass in the ``urls`` when you initialize a new instance.
106
+
107
+ slashtag = blekko.slashtag("/my/sports", urls: ["http://www.espn.com", "http://http://sportsillustrated.cnn.com"])
108
+
109
+ You can add urls to an existing slashtag object.
110
+
111
+ slashtag = blekko.slashtag("/my/sports")
112
+ slashtag.urls << "http://www.espn.com"
113
+
114
+ To save the slashtags to blekko call ``save!``. This will either create a new slashtag or update the existing slashtag.
115
+
116
+ slashtag.save!
117
+
118
+ #### Remove URLS
119
+
120
+ You can also remove URLs from a slashtag.
121
+
122
+ slashtag = blekko.slashtag("/my/sports")
123
+ slashtag.remove_urls!("http://www.espn.com")
124
+
125
+ #### Delete
126
+
127
+ The API does not currently support deleting slashtags. You'll need to login to [blekko.com](http://www.blekko.com) to delete a slashtag from your account.
128
+
129
+ ## Acknowlegements
130
+ Thanks to [blekko.com](http://www.blekko.com) for providing API access to their search engine.
131
+
132
+ <3 [@barelyknown](http://www.twitter.com/barelyknown)
133
+
134
+ ## Contributing
135
+
136
+ 1. Fork it
137
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
138
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
139
+ 4. Push to the branch (`git push origin my-new-feature`)
140
+ 5. Create new Pull Request
141
+ 6. Thank you :)
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'blekko-search/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "blekko-search"
8
+ gem.version = BlekkoSearch::VERSION
9
+ gem.authors = ["Sean Devine"]
10
+ gem.email = ["barelyknown@icloud.com"]
11
+ gem.description = %q(Search and manage slashtags for blekko.com)
12
+ gem.summary = %q(Search and manage slashtags for blekko.com)
13
+ gem.homepage = "https://github.com/barelyknown/blekko-search"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end
@@ -0,0 +1,9 @@
1
+ require 'cgi'
2
+ require 'open-uri'
3
+ require 'json'
4
+ require 'net/http'
5
+ require "blekko-search/version"
6
+ require "blekko-search/blekko"
7
+ require "blekko-search/search"
8
+ require "blekko-search/search_result"
9
+ require "blekko-search/slashtag"
@@ -0,0 +1,48 @@
1
+ class Blekko
2
+ HOST = "www.blekko.com"
3
+ DEFAULT_MAX_FREQUENCY_PER_SECOND = 1
4
+
5
+ attr_accessor :protocol, :api_key, :max_frequency_per_second, :username, :password, :login_cookie
6
+
7
+ def initialize(args={})
8
+ @api_key = args[:api_key]
9
+ @protocol = args[:secure] ? "https://" : "http://"
10
+ @username = args[:username]
11
+ @password = args[:password]
12
+ @max_frequency_per_second = args[:max_frequency_per_second] || DEFAULT_MAX_FREQUENCY_PER_SECOND
13
+ login if @api_key && @username && @password
14
+ end
15
+
16
+ def host
17
+ HOST
18
+ end
19
+
20
+ def search(query, args={})
21
+ Blekko::Search.new(self, query, args).search
22
+ end
23
+
24
+ def slashtag(name, args={})
25
+ Blekko::Slashtag.new(self, name, args)
26
+ end
27
+
28
+ def login_uri
29
+ URI("https://blekko.com/login?u=#{CGI.escape(username)}&p=#{CGI.escape(password)}&auth=#{api_key}")
30
+ end
31
+
32
+ def headers
33
+ {
34
+ "Cookie" => "A=AfGTtGoSio7Hnc1xiaSrwkJX4ggMfcFiBXufPUPQXZvB5TRe36Q6tPI4woK6SKO8%2Bh8qeD7z0qEk%2B4Ceg5N9HA95UTpznKUvuuEfb04GiwhAlKARpLnp18%2BI6EYQfes1PB0QNnhHwEAC3kLjyJqCZbsxVw8ud4Z6F%2Fbg6BvJj28L;",
35
+ "User-Agent" => "Ruby"
36
+ }
37
+ end
38
+
39
+ def login
40
+ raise ArgumentError, "Username and password are required" unless username && password
41
+ Net::HTTP.start(login_uri.host, login_uri.port, use_ssl: true) do |http|
42
+ response = http.request Net::HTTP::Get.new login_uri.request_uri
43
+ self.login_cookie = response.get_fields('Set-Cookie').find { |c| c =~ /\AA=/ }
44
+ end
45
+ end
46
+
47
+
48
+ end
@@ -0,0 +1,67 @@
1
+ class Blekko
2
+ class Search
3
+
4
+ DEFAULT_PAGE_SIZE = 100
5
+ DEFAULT_PAGE_NUMBER = 0
6
+ PREFIX = "/ws/?q="
7
+ RESPONSE_FORMAT = "/json+/"
8
+
9
+ attr_accessor :query, :slashtags, :results
10
+
11
+ def initialize(blekko, query, args={})
12
+ args = {page_size: DEFAULT_PAGE_SIZE }.merge(args)
13
+ @blekko = blekko
14
+ @query = query
15
+ @slashtags = *args[:slashtags] || []
16
+ @page_size = args[:page_size]
17
+ @total_size = args[:total_size] || @page_size
18
+ end
19
+
20
+ def results
21
+ @results ||= []
22
+ end
23
+
24
+ def search
25
+ page_number = 0
26
+ number_of_searches.times do
27
+ response = JSON.load(open(url(page_number)))
28
+ if response['RESULT']
29
+ self.results += response['RESULT'].collect { |r| Blekko::SearchResult.new(r) }
30
+ else
31
+ return results
32
+ end
33
+ page_number += 1
34
+ end
35
+ results[0,@total_size]
36
+ end
37
+
38
+ def number_of_searches
39
+ @number_of_searches ||= (@total_size.to_f / @page_size).ceil
40
+ end
41
+
42
+ def escaped_query
43
+ CGI.escape(query + " ") + @slashtags.join("+") + "+"
44
+ end
45
+
46
+ def page_size_param
47
+ "ps=#{@page_size}"
48
+ end
49
+
50
+ def page_number_param(page_number)
51
+ "p=#{page_number}"
52
+ end
53
+
54
+ def auth_param
55
+ @blekko.api_key ? "auth=#{@blekko.api_key}" : nil
56
+ end
57
+
58
+ def params(page_number)
59
+ [page_size_param, auth_param, page_number_param(page_number)].compact.join("&")
60
+ end
61
+
62
+ def url(page_number)
63
+ @blekko.protocol + @blekko.host + PREFIX + escaped_query + RESPONSE_FORMAT + params(page_number)
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,25 @@
1
+ class Blekko
2
+ class SearchResult
3
+ attr_accessor :n_group, :display_url, :rss, :rss_title, :short_host, :short_host_url,
4
+ :snippet, :toplevel, :url, :url_title, :doc_date_iso, :address, :geocluster,
5
+ :lat, :lon, :phone, :zip, :is_robots_banned
6
+
7
+ def initialize(result)
8
+ result.each do |key, value|
9
+ send("#{key}=", value) if respond_to? "#{key}="
10
+ end
11
+ end
12
+
13
+ def datetime
14
+ DateTime.parse(doc_date_iso) if doc_date_iso
15
+ end
16
+
17
+ def toplevel
18
+ @toplevel == "1" ? true : false
19
+ end
20
+
21
+ alias_method :sequence, :n_group
22
+ alias_method :abstract, :snippet
23
+
24
+ end
25
+ end
@@ -0,0 +1,72 @@
1
+ class Blekko
2
+ class Slashtag
3
+
4
+ attr_accessor :name, :blekko, :urls
5
+
6
+ def initialize(blekko, name, args={})
7
+ args = { eager_load: true }.merge(args)
8
+ @blekko = blekko
9
+ @name = name
10
+ @urls = *args[:urls]
11
+ if args[:eager_load] && !@urls
12
+ self.urls = saved_urls
13
+ end
14
+ end
15
+
16
+ def urls
17
+ @urls ||= []
18
+ end
19
+
20
+ def saved_urls
21
+ url = blekko.protocol + blekko.host + "/tag/view?name=" + CGI.escape(name) + "&format=text&auth=#{blekko.api_key}"
22
+ lines = open(url).collect { |line| line.strip }
23
+ unless lines.first.scan(" ").any?
24
+ lines.collect { |line| line }
25
+ end
26
+ end
27
+
28
+ def save!
29
+ begin
30
+ if create!.read =~ /already exists/
31
+ update!
32
+ end
33
+ true
34
+ rescue
35
+ false
36
+ end
37
+ end
38
+
39
+ def remove_urls!(target_urls)
40
+ open(remove_url(target_urls), blekko.headers)
41
+ true
42
+ end
43
+
44
+
45
+ def create!
46
+ open(save_url("create"), blekko.headers)
47
+ end
48
+
49
+ def update!
50
+ open(save_url("update"), blekko.headers)
51
+ end
52
+
53
+
54
+ def save_url(method, target_urls=urls)
55
+ "https://" + blekko.host + "/tag/add?name=#{name}&submit=#{method}&urls=#{urls.join("%0A")}&auth=#{blekko.api_key}"
56
+ end
57
+
58
+ def remove_url(target_urls)
59
+ "https://" + blekko.host + "/tag/edit?submit=1&type=del&name=#{name}&urls=#{target_urls.join("%0A")}&auth=#{blekko.api_key}"
60
+ end
61
+
62
+ def delete_url
63
+ "https://blekko.com/tag/delete?submit=1&name=#{name}&auth=#{blekko.api_key}"
64
+ end
65
+
66
+ def delete!
67
+ return ArgumentError, "This is not implemented by blekko yet"
68
+ open(delete_url, blekko.headers)
69
+ end
70
+
71
+ end
72
+ end
@@ -0,0 +1,3 @@
1
+ module BlekkoSearch
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: blekko-search
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sean Devine
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-19 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Search and manage slashtags for blekko.com
15
+ email:
16
+ - barelyknown@icloud.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - Gemfile
23
+ - LICENSE.txt
24
+ - README.md
25
+ - Rakefile
26
+ - blekko-search.gemspec
27
+ - lib/blekko-search.rb
28
+ - lib/blekko-search/blekko.rb
29
+ - lib/blekko-search/search.rb
30
+ - lib/blekko-search/search_result.rb
31
+ - lib/blekko-search/slashtag.rb
32
+ - lib/blekko-search/version.rb
33
+ homepage: https://github.com/barelyknown/blekko-search
34
+ licenses: []
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 1.8.24
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: Search and manage slashtags for blekko.com
57
+ test_files: []