sshingler-jkl 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/License.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008,2009 Steven Shingler
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
File without changes
@@ -0,0 +1,52 @@
1
+ require 'json'
2
+ require 'lib/rest_client'
3
+
4
+ module Jkl
5
+
6
+ LICENSE_ID = YAML::load_file('config/keys.yml')['calais']
7
+ C_URI = URI.parse('http://api.opencalais.com/enlighten/rest/')
8
+
9
+ def get_from_calais(content)
10
+ post_args = { 'licenseID' => LICENSE_ID, 'content' => content,
11
+ 'paramsXML' => paramsXML('application/json') }
12
+ post_to(C_URI, post_args)
13
+ end
14
+
15
+ def get_tag_from_json(response)
16
+ result = JSON.parse response
17
+ result.delete_if {|key, value| key == "doc" } # ditching the doc
18
+ cleaned_result = []
19
+ result.each do |key,tag|
20
+ tag = clean_unwanted_items_from_hash tag
21
+ cleaned_result << tag
22
+ yield tag if block_given?
23
+ end
24
+ cleaned_result
25
+ end
26
+
27
+ #jkl doesn't work with these aspects of the calais response, also removing blanks
28
+ def clean_unwanted_items_from_hash h
29
+ h.delete_if {|k, v| k == "relevance" }
30
+ h.delete_if {|k, v| k == "instances" }
31
+ h.delete_if {|k,v| v == "N/A"}
32
+ h.delete_if {|k,v| v == []}
33
+ h.delete_if {|k,v| v == ""}
34
+ h
35
+ end
36
+
37
+ private
38
+
39
+ def paramsXML(format)
40
+ <<-paramsXML;
41
+ <c:params xmlns:c="http://s.opencalais.com/1/pred/"
42
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
43
+ <c:processingDirectives
44
+ c:contentType="text/txt"
45
+ c:outputFormat="#{format}">
46
+ </c:processingDirectives>
47
+ <c:userDirectives />
48
+ <c:externalMetadata />
49
+ </c:params>
50
+ paramsXML
51
+ end
52
+ end
data/lib/jkl_client.rb ADDED
@@ -0,0 +1,26 @@
1
+ require 'lib/rest_client.rb'
2
+ require 'lib/rss_client.rb'
3
+ require 'lib/calais_client.rb'
4
+ require 'lib/url_doc_handler.rb'
5
+
6
+ module Jkl
7
+
8
+ def headlines(keyphrase)
9
+ get_from_as_xml "#{YAML::load_file('config/config.yml')['topix']}#{CGI::escape(keyphrase)}"
10
+ end
11
+
12
+ def pages(headlines)
13
+ items = get_items_from headlines
14
+ descriptions = ""
15
+ items.each do |item|
16
+ descriptions << attribute_from(item, :description).gsub("<![CDATA[",'').gsub("]]>",'')
17
+ end
18
+ descriptions
19
+ end
20
+
21
+ def tags(pages)
22
+ cal_response = get_from_calais(pages)
23
+ get_tag_from_json(cal_response)
24
+ end
25
+
26
+ end
@@ -0,0 +1,20 @@
1
+ require 'couchrest'
2
+
3
+ module Jkl
4
+
5
+ SERVER = CouchRest.database! YAML::load_file('config/config.yml')['db']
6
+
7
+ def delete_db
8
+ SERVER.delete! rescue nil
9
+ end
10
+
11
+ class Trend < CouchRest::ExtendedDocument
12
+
13
+ use_database SERVER
14
+ property :name
15
+ view_by :name
16
+ timestamps!
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,36 @@
1
+ require 'net/http'
2
+ require 'hpricot'
3
+
4
+ module Jkl
5
+
6
+ def post_to(uri, post_args)
7
+ begin
8
+ resp, data = Net::HTTP.post_form(uri, post_args)
9
+ data
10
+ rescue URI::InvalidURIError => e
11
+ puts("WARN: Invalid URI: #{e}")
12
+ rescue SocketError => e
13
+ puts("WARN: Could not connect: #{e}")
14
+ rescue Errno::ECONNREFUSED => e
15
+ puts("WARN: Connection refused: #{e}")
16
+ end
17
+ end
18
+
19
+ def get_from(uri)
20
+ begin
21
+ res = Net::HTTP.get_response(URI.parse(uri))
22
+ res.body
23
+ rescue URI::InvalidURIError => e
24
+ puts("WARN: Invalid URI: #{e}")
25
+ rescue SocketError => e
26
+ puts("WARN: Could not connect: #{e}")
27
+ rescue Errno::ECONNREFUSED => e
28
+ puts("WARN: Connection refused: #{e}")
29
+ end
30
+ end
31
+
32
+ def get_from_as_xml(uri)
33
+ Hpricot.XML get_from uri
34
+ end
35
+
36
+ end
data/lib/rss_client.rb ADDED
@@ -0,0 +1,15 @@
1
+ require 'hpricot'
2
+
3
+ module Jkl
4
+
5
+ def get_items_from(rssdoc)
6
+ items = []
7
+ (rssdoc/:item).each { |rssitem| items.push rssitem } unless rssdoc==nil
8
+ items
9
+ end
10
+
11
+ def attribute_from(item, name)
12
+ (item/name).inner_html
13
+ end
14
+
15
+ end
@@ -0,0 +1,27 @@
1
+ require 'hpricot'
2
+ require 'rest_client'
3
+
4
+ module Jkl
5
+
6
+ def sanitize(text)
7
+ str = ""
8
+ text.to_s.gsub(/<\/?[^>]*>/, "").split("\r").each do |l| # remove tags
9
+ l = l.chomp.gsub("\t",'').gsub(/\s{2,}/,'') # remove tabs and larger spaces
10
+ str << l unless l.count(" ") < 5 # remove short lines - ususally just navigation
11
+ end
12
+ str
13
+ end
14
+
15
+ def from_doc(response)
16
+ begin
17
+ Hpricot(response)
18
+ rescue URI::InvalidURIError => e
19
+ puts("WARN: Problem with getting a connection: #{e}")
20
+ rescue SocketError => e
21
+ puts("WARN: Could not connect to feed: #{e}")
22
+ rescue Errno::ECONNREFUSED => e
23
+ puts("WARN: Connection refused: #{e}")
24
+ end
25
+ end
26
+
27
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sshingler-jkl
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - steven shingler
8
+ - richard vaughan
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-08-27 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description: Jkl is a Ruby library and Sinatra app for dealing with information overload.
18
+ email: "'shingler@gmail.com'"
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files:
24
+ - README
25
+ - License.txt
26
+ files:
27
+ - lib/calais_client.rb
28
+ - lib/jkl_client.rb
29
+ - lib/persistence_client.rb
30
+ - lib/rest_client.rb
31
+ - lib/rss_client.rb
32
+ - lib/url_doc_handler.rb
33
+ - README
34
+ - License.txt
35
+ has_rdoc: false
36
+ homepage: http://github.com/sshingler/jkl
37
+ licenses:
38
+ post_install_message:
39
+ rdoc_options:
40
+ - --inline-source
41
+ - --charset=UTF-8
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ requirements: []
57
+
58
+ rubyforge_project:
59
+ rubygems_version: 1.3.5
60
+ signing_key:
61
+ specification_version: 2
62
+ summary: Jkl is a Ruby library and Sinatra app for dealing with information overload.
63
+ test_files: []
64
+