sshingler-jkl 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/License.txt +20 -0
- data/README +0 -0
- data/lib/calais_client.rb +52 -0
- data/lib/jkl_client.rb +26 -0
- data/lib/persistence_client.rb +20 -0
- data/lib/rest_client.rb +36 -0
- data/lib/rss_client.rb +15 -0
- data/lib/url_doc_handler.rb +27 -0
- metadata +64 -0
data/License.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008,2009 Steven Shingler
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
File without changes
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'lib/rest_client'
|
3
|
+
|
4
|
+
module Jkl
|
5
|
+
|
6
|
+
LICENSE_ID = YAML::load_file('config/keys.yml')['calais']
|
7
|
+
C_URI = URI.parse('http://api.opencalais.com/enlighten/rest/')
|
8
|
+
|
9
|
+
def get_from_calais(content)
|
10
|
+
post_args = { 'licenseID' => LICENSE_ID, 'content' => content,
|
11
|
+
'paramsXML' => paramsXML('application/json') }
|
12
|
+
post_to(C_URI, post_args)
|
13
|
+
end
|
14
|
+
|
15
|
+
def get_tag_from_json(response)
|
16
|
+
result = JSON.parse response
|
17
|
+
result.delete_if {|key, value| key == "doc" } # ditching the doc
|
18
|
+
cleaned_result = []
|
19
|
+
result.each do |key,tag|
|
20
|
+
tag = clean_unwanted_items_from_hash tag
|
21
|
+
cleaned_result << tag
|
22
|
+
yield tag if block_given?
|
23
|
+
end
|
24
|
+
cleaned_result
|
25
|
+
end
|
26
|
+
|
27
|
+
#jkl doesn't work with these aspects of the calais response, also removing blanks
|
28
|
+
def clean_unwanted_items_from_hash h
|
29
|
+
h.delete_if {|k, v| k == "relevance" }
|
30
|
+
h.delete_if {|k, v| k == "instances" }
|
31
|
+
h.delete_if {|k,v| v == "N/A"}
|
32
|
+
h.delete_if {|k,v| v == []}
|
33
|
+
h.delete_if {|k,v| v == ""}
|
34
|
+
h
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def paramsXML(format)
|
40
|
+
<<-paramsXML;
|
41
|
+
<c:params xmlns:c="http://s.opencalais.com/1/pred/"
|
42
|
+
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
43
|
+
<c:processingDirectives
|
44
|
+
c:contentType="text/txt"
|
45
|
+
c:outputFormat="#{format}">
|
46
|
+
</c:processingDirectives>
|
47
|
+
<c:userDirectives />
|
48
|
+
<c:externalMetadata />
|
49
|
+
</c:params>
|
50
|
+
paramsXML
|
51
|
+
end
|
52
|
+
end
|
data/lib/jkl_client.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'lib/rest_client.rb'
|
2
|
+
require 'lib/rss_client.rb'
|
3
|
+
require 'lib/calais_client.rb'
|
4
|
+
require 'lib/url_doc_handler.rb'
|
5
|
+
|
6
|
+
module Jkl
|
7
|
+
|
8
|
+
def headlines(keyphrase)
|
9
|
+
get_from_as_xml "#{YAML::load_file('config/config.yml')['topix']}#{CGI::escape(keyphrase)}"
|
10
|
+
end
|
11
|
+
|
12
|
+
def pages(headlines)
|
13
|
+
items = get_items_from headlines
|
14
|
+
descriptions = ""
|
15
|
+
items.each do |item|
|
16
|
+
descriptions << attribute_from(item, :description).gsub("<![CDATA[",'').gsub("]]>",'')
|
17
|
+
end
|
18
|
+
descriptions
|
19
|
+
end
|
20
|
+
|
21
|
+
def tags(pages)
|
22
|
+
cal_response = get_from_calais(pages)
|
23
|
+
get_tag_from_json(cal_response)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'couchrest'
|
2
|
+
|
3
|
+
module Jkl
|
4
|
+
|
5
|
+
SERVER = CouchRest.database! YAML::load_file('config/config.yml')['db']
|
6
|
+
|
7
|
+
def delete_db
|
8
|
+
SERVER.delete! rescue nil
|
9
|
+
end
|
10
|
+
|
11
|
+
class Trend < CouchRest::ExtendedDocument
|
12
|
+
|
13
|
+
use_database SERVER
|
14
|
+
property :name
|
15
|
+
view_by :name
|
16
|
+
timestamps!
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
data/lib/rest_client.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'hpricot'
|
3
|
+
|
4
|
+
module Jkl
|
5
|
+
|
6
|
+
def post_to(uri, post_args)
|
7
|
+
begin
|
8
|
+
resp, data = Net::HTTP.post_form(uri, post_args)
|
9
|
+
data
|
10
|
+
rescue URI::InvalidURIError => e
|
11
|
+
puts("WARN: Invalid URI: #{e}")
|
12
|
+
rescue SocketError => e
|
13
|
+
puts("WARN: Could not connect: #{e}")
|
14
|
+
rescue Errno::ECONNREFUSED => e
|
15
|
+
puts("WARN: Connection refused: #{e}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_from(uri)
|
20
|
+
begin
|
21
|
+
res = Net::HTTP.get_response(URI.parse(uri))
|
22
|
+
res.body
|
23
|
+
rescue URI::InvalidURIError => e
|
24
|
+
puts("WARN: Invalid URI: #{e}")
|
25
|
+
rescue SocketError => e
|
26
|
+
puts("WARN: Could not connect: #{e}")
|
27
|
+
rescue Errno::ECONNREFUSED => e
|
28
|
+
puts("WARN: Connection refused: #{e}")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_from_as_xml(uri)
|
33
|
+
Hpricot.XML get_from uri
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
data/lib/rss_client.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
require 'rest_client'
|
3
|
+
|
4
|
+
module Jkl
|
5
|
+
|
6
|
+
def sanitize(text)
|
7
|
+
str = ""
|
8
|
+
text.to_s.gsub(/<\/?[^>]*>/, "").split("\r").each do |l| # remove tags
|
9
|
+
l = l.chomp.gsub("\t",'').gsub(/\s{2,}/,'') # remove tabs and larger spaces
|
10
|
+
str << l unless l.count(" ") < 5 # remove short lines - ususally just navigation
|
11
|
+
end
|
12
|
+
str
|
13
|
+
end
|
14
|
+
|
15
|
+
def from_doc(response)
|
16
|
+
begin
|
17
|
+
Hpricot(response)
|
18
|
+
rescue URI::InvalidURIError => e
|
19
|
+
puts("WARN: Problem with getting a connection: #{e}")
|
20
|
+
rescue SocketError => e
|
21
|
+
puts("WARN: Could not connect to feed: #{e}")
|
22
|
+
rescue Errno::ECONNREFUSED => e
|
23
|
+
puts("WARN: Connection refused: #{e}")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sshingler-jkl
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- steven shingler
|
8
|
+
- richard vaughan
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2009-08-27 00:00:00 -07:00
|
14
|
+
default_executable:
|
15
|
+
dependencies: []
|
16
|
+
|
17
|
+
description: Jkl is a Ruby library and Sinatra app for dealing with information overload.
|
18
|
+
email: "'shingler@gmail.com'"
|
19
|
+
executables: []
|
20
|
+
|
21
|
+
extensions: []
|
22
|
+
|
23
|
+
extra_rdoc_files:
|
24
|
+
- README
|
25
|
+
- License.txt
|
26
|
+
files:
|
27
|
+
- lib/calais_client.rb
|
28
|
+
- lib/jkl_client.rb
|
29
|
+
- lib/persistence_client.rb
|
30
|
+
- lib/rest_client.rb
|
31
|
+
- lib/rss_client.rb
|
32
|
+
- lib/url_doc_handler.rb
|
33
|
+
- README
|
34
|
+
- License.txt
|
35
|
+
has_rdoc: false
|
36
|
+
homepage: http://github.com/sshingler/jkl
|
37
|
+
licenses:
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options:
|
40
|
+
- --inline-source
|
41
|
+
- --charset=UTF-8
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: "0"
|
49
|
+
version:
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
requirements: []
|
57
|
+
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 1.3.5
|
60
|
+
signing_key:
|
61
|
+
specification_version: 2
|
62
|
+
summary: Jkl is a Ruby library and Sinatra app for dealing with information overload.
|
63
|
+
test_files: []
|
64
|
+
|