sshingler-jkl 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/License.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008,2009 Steven Shingler
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
File without changes
@@ -0,0 +1,52 @@
1
+ require 'json'
2
+ require 'lib/rest_client'
3
+
4
+ module Jkl
5
+
6
+ LICENSE_ID = YAML::load_file('config/keys.yml')['calais']
7
+ C_URI = URI.parse('http://api.opencalais.com/enlighten/rest/')
8
+
9
+ def get_from_calais(content)
10
+ post_args = { 'licenseID' => LICENSE_ID, 'content' => content,
11
+ 'paramsXML' => paramsXML('application/json') }
12
+ post_to(C_URI, post_args)
13
+ end
14
+
15
+ def get_tag_from_json(response)
16
+ result = JSON.parse response
17
+ result.delete_if {|key, value| key == "doc" } # ditching the doc
18
+ cleaned_result = []
19
+ result.each do |key,tag|
20
+ tag = clean_unwanted_items_from_hash tag
21
+ cleaned_result << tag
22
+ yield tag if block_given?
23
+ end
24
+ cleaned_result
25
+ end
26
+
27
+ #jkl doesn't work with these aspects of the calais response, also removing blanks
28
+ def clean_unwanted_items_from_hash h
29
+ h.delete_if {|k, v| k == "relevance" }
30
+ h.delete_if {|k, v| k == "instances" }
31
+ h.delete_if {|k,v| v == "N/A"}
32
+ h.delete_if {|k,v| v == []}
33
+ h.delete_if {|k,v| v == ""}
34
+ h
35
+ end
36
+
37
+ private
38
+
39
+ def paramsXML(format)
40
+ <<-paramsXML;
41
+ <c:params xmlns:c="http://s.opencalais.com/1/pred/"
42
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
43
+ <c:processingDirectives
44
+ c:contentType="text/txt"
45
+ c:outputFormat="#{format}">
46
+ </c:processingDirectives>
47
+ <c:userDirectives />
48
+ <c:externalMetadata />
49
+ </c:params>
50
+ paramsXML
51
+ end
52
+ end
data/lib/jkl_client.rb ADDED
@@ -0,0 +1,26 @@
1
+ require 'lib/rest_client.rb'
2
+ require 'lib/rss_client.rb'
3
+ require 'lib/calais_client.rb'
4
+ require 'lib/url_doc_handler.rb'
5
+
6
+ module Jkl
7
+
8
+ def headlines(keyphrase)
9
+ get_from_as_xml "#{YAML::load_file('config/config.yml')['topix']}#{CGI::escape(keyphrase)}"
10
+ end
11
+
12
+ def pages(headlines)
13
+ items = get_items_from headlines
14
+ descriptions = ""
15
+ items.each do |item|
16
+ descriptions << attribute_from(item, :description).gsub("<![CDATA[",'').gsub("]]>",'')
17
+ end
18
+ descriptions
19
+ end
20
+
21
+ def tags(pages)
22
+ cal_response = get_from_calais(pages)
23
+ get_tag_from_json(cal_response)
24
+ end
25
+
26
+ end
@@ -0,0 +1,20 @@
1
+ require 'couchrest'
2
+
3
+ module Jkl
4
+
5
+ SERVER = CouchRest.database! YAML::load_file('config/config.yml')['db']
6
+
7
+ def delete_db
8
+ SERVER.delete! rescue nil
9
+ end
10
+
11
+ class Trend < CouchRest::ExtendedDocument
12
+
13
+ use_database SERVER
14
+ property :name
15
+ view_by :name
16
+ timestamps!
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,36 @@
1
+ require 'net/http'
2
+ require 'hpricot'
3
+
4
+ module Jkl
5
+
6
+ def post_to(uri, post_args)
7
+ begin
8
+ resp, data = Net::HTTP.post_form(uri, post_args)
9
+ data
10
+ rescue URI::InvalidURIError => e
11
+ puts("WARN: Invalid URI: #{e}")
12
+ rescue SocketError => e
13
+ puts("WARN: Could not connect: #{e}")
14
+ rescue Errno::ECONNREFUSED => e
15
+ puts("WARN: Connection refused: #{e}")
16
+ end
17
+ end
18
+
19
+ def get_from(uri)
20
+ begin
21
+ res = Net::HTTP.get_response(URI.parse(uri))
22
+ res.body
23
+ rescue URI::InvalidURIError => e
24
+ puts("WARN: Invalid URI: #{e}")
25
+ rescue SocketError => e
26
+ puts("WARN: Could not connect: #{e}")
27
+ rescue Errno::ECONNREFUSED => e
28
+ puts("WARN: Connection refused: #{e}")
29
+ end
30
+ end
31
+
32
+ def get_from_as_xml(uri)
33
+ Hpricot.XML get_from uri
34
+ end
35
+
36
+ end
data/lib/rss_client.rb ADDED
@@ -0,0 +1,15 @@
1
+ require 'hpricot'
2
+
3
+ module Jkl
4
+
5
+ def get_items_from(rssdoc)
6
+ items = []
7
+ (rssdoc/:item).each { |rssitem| items.push rssitem } unless rssdoc==nil
8
+ items
9
+ end
10
+
11
+ def attribute_from(item, name)
12
+ (item/name).inner_html
13
+ end
14
+
15
+ end
@@ -0,0 +1,27 @@
1
+ require 'hpricot'
2
+ require 'rest_client'
3
+
4
+ module Jkl
5
+
6
+ def sanitize(text)
7
+ str = ""
8
+ text.to_s.gsub(/<\/?[^>]*>/, "").split("\r").each do |l| # remove tags
9
+ l = l.chomp.gsub("\t",'').gsub(/\s{2,}/,'') # remove tabs and larger spaces
10
+ str << l unless l.count(" ") < 5 # remove short lines - ususally just navigation
11
+ end
12
+ str
13
+ end
14
+
15
+ def from_doc(response)
16
+ begin
17
+ Hpricot(response)
18
+ rescue URI::InvalidURIError => e
19
+ puts("WARN: Problem with getting a connection: #{e}")
20
+ rescue SocketError => e
21
+ puts("WARN: Could not connect to feed: #{e}")
22
+ rescue Errno::ECONNREFUSED => e
23
+ puts("WARN: Connection refused: #{e}")
24
+ end
25
+ end
26
+
27
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sshingler-jkl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - steven shingler
8
+ - richard vaughan
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-08-27 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description: Jkl is a Ruby library and Sinatra app for dealing with information overload.
18
+ email: "'shingler@gmail.com'"
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files:
24
+ - README
25
+ - License.txt
26
+ files:
27
+ - lib/calais_client.rb
28
+ - lib/jkl_client.rb
29
+ - lib/persistence_client.rb
30
+ - lib/rest_client.rb
31
+ - lib/rss_client.rb
32
+ - lib/url_doc_handler.rb
33
+ - README
34
+ - License.txt
35
+ has_rdoc: false
36
+ homepage: http://github.com/sshingler/jkl
37
+ licenses:
38
+ post_install_message:
39
+ rdoc_options:
40
+ - --inline-source
41
+ - --charset=UTF-8
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ requirements: []
57
+
58
+ rubyforge_project:
59
+ rubygems_version: 1.3.5
60
+ signing_key:
61
+ specification_version: 2
62
+ summary: Jkl is a Ruby library and Sinatra app for dealing with information overload.
63
+ test_files: []
64
+