jakal 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -24,9 +24,3 @@ Feature: http features
24
24
  When I request some RSS
25
25
  Then I should get a response
26
26
  And I should receive some headlines
27
-
28
- Scenario: Work with RSS
29
- Given I have some RSS
30
- Then I should receive some headlines
31
-
32
-
@@ -13,7 +13,8 @@ Given /^I have a mock calais response$/ do
13
13
  end
14
14
 
15
15
  When /^I post to calais$/ do
16
- @response = Jkl::get_from_calais @text
16
+ key = YAML::load_file('config/keys.yml')['calais']
17
+ @response = Jkl::Extraction::get_from_calais(key, @text)
17
18
  end
18
19
 
19
20
  When /^I remove the unwanted items$/ do
@@ -44,7 +45,7 @@ end
44
45
 
45
46
  When /^I request the nested entities from calais$/ do
46
47
  key = YAML::load_file('config/keys.yml')['calais']
47
- @response = Jkl::tags key, @text
48
+ @response = Jkl::Extraction::tags key, @text
48
49
  end
49
50
 
50
51
  Then /^I should receive the entities grouped into categories$/ do
@@ -9,7 +9,7 @@ end
9
9
  When /^I request some RSS$/ do
10
10
  keyphrase = @keyphrase || "iraq"
11
11
  url = "#{YAML::load_file('config/config.yml')['topix']}#{CGI::escape(keyphrase)}"
12
- @response = Jkl::get_from_as_xml url
12
+ @response = Jkl::get_xml_from url
13
13
  end
14
14
 
15
15
  Given /^I have some RSS$/ do
@@ -28,17 +28,16 @@ When /^I request some trends$/ do
28
28
  @response = output['trends']
29
29
  end
30
30
 
31
-
32
31
  Then /^I should get a response$/ do
33
32
  @response.should_not == nil
34
33
  #puts @response.inspect
35
34
  end
36
35
 
37
36
  Then /^I should receive some headlines$/ do
38
- @items = Jkl::get_items_from @response
37
+ @items = Jkl::Rss::items @response
39
38
  @links = []
40
39
  @items.each do |item|
41
- @links << Jkl::attribute_from(item, :link)
40
+ @links << Jkl::Rss::attribute_from(item, :link)
42
41
  end
43
42
  @links.should_not == nil
44
43
  @links.length.should > 0
data/lib/jkl.rb CHANGED
@@ -1,56 +1,8 @@
1
- require "cgi"
2
1
  require "jkl/rest_client.rb"
3
2
  require "jkl/rss_client.rb"
4
3
  require "jkl/calais_client.rb"
5
4
  require "jkl/url_doc_handler.rb"
6
5
 
7
6
  module Jkl
8
-
9
- class << self
10
7
 
11
- def headlines(feed, keyphrase)
12
- get_from_as_xml "#{feed}#{keyphrase}"
13
- end
14
-
15
- def pages(headlines)
16
- items = get_items_from headlines
17
- descriptions = ""
18
- items.each do |item|
19
- descriptions << attribute_from(item, :description).gsub("<![CDATA[","").gsub("]]>","")
20
- end
21
- descriptions
22
- end
23
-
24
- def descriptions(headlines)
25
- items = get_items_from headlines
26
- descriptions = []
27
- items.each do |item|
28
- descriptions << attribute_from(item, :description).gsub("<![CDATA[","").gsub("]]>","")
29
- end
30
- descriptions
31
- end
32
-
33
- def links(headlines)
34
- items = get_items_from headlines
35
- links = []
36
- items.each do |item|
37
- links << attribute_from(item, :link)
38
- end
39
- links
40
- end
41
-
42
- def tags(key, text)
43
- nested_list = {}
44
- entities(key,text).each do |a|
45
- nested_list = nested_list.merge!(a){ |key,v1,v2| v1+v2 }
46
- end
47
- nested_list
48
- end
49
-
50
- def entities(key,text)
51
- calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
52
- end
53
-
54
- end
55
-
56
8
  end
@@ -1,72 +1,81 @@
1
1
  require "json"
2
- require "rest_client"
3
2
  require "calais"
4
3
 
5
- module Jkl
4
+ require "rest_client"
6
5
 
7
- class << self
6
+ module Jkl
7
+ module Extraction
8
+ class << self
8
9
 
9
- #using the calais gem
10
- def calais_response(key, pages)
11
- Calais.process_document(
12
- :content => pages,
13
- :content_type => :text,
14
- :license_id => key
15
- )
16
- end
10
+ #using the calais gem
11
+ def calais_response(key, pages)
12
+ Calais.process_document(
13
+ :content => pages,
14
+ :content_type => :text,
15
+ :license_id => key
16
+ )
17
+ end
17
18
 
18
- def get_from_calais(content)
19
- begin
20
- license_id = YAML::load_file('config/keys.yml')['calais']
21
- c_uri = URI.parse('http://api.opencalais.com/enlighten/rest/')
22
- post_args = { 'licenseID' => license_id, 'content' => content,
23
- 'paramsXML' => paramsXML('application/json') }
24
- post_to(c_uri, post_args)
25
- rescue Exception => e
26
- puts e
19
+ def tags(key, text)
20
+ nested_list = {}
21
+ entities(key,text).each do |a|
22
+ nested_list = nested_list.merge!(a){ |key,v1,v2| v1+v2 }
23
+ end
24
+ nested_list
25
+ end
26
+
27
+ def entities(key,text)
28
+ calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
29
+ end
30
+
31
+ #not using calais gem, experimenting with json response
32
+ def get_from_calais(key, content)
33
+ post_args = {
34
+ "licenseID" => key,
35
+ "content" => content,
36
+ "paramsXML" => paramsXML("application/json")
37
+ }
38
+ Jkl::post_to(URI.parse("http://api.opencalais.com/enlighten/rest/"), post_args)
27
39
  end
28
- end
29
40
 
30
- def get_tag_from_json(response)
31
- result = JSON.parse response
32
- result.delete_if {|key, value| key == "doc" } # ditching the doc
33
- cleaned_result = []
34
- result.each do |key,tag|
35
- tag = Jkl::clean_unwanted_items_from_hash tag
36
- cleaned_result << tag
37
- yield tag if block_given?
41
+ def get_tag_from_json(response)
42
+ result = JSON.parse response
43
+ result.delete_if {|key, value| key == "doc" } # ditching the doc
44
+ cleaned_result = []
45
+ result.each do |key,tag|
46
+ tag = Jkl::clean_unwanted_items_from_hash tag
47
+ cleaned_result << tag
48
+ yield tag if block_given?
49
+ end
50
+ cleaned_result
38
51
  end
39
52
 
40
- cleaned_result
41
- end
42
-
43
- #jkl doesn't work with these aspects of the calais response, also removing blanks
44
- def clean_unwanted_items_from_hash h
45
- h.delete_if {|k, v| k == "relevance" }
46
- h.delete_if {|k, v| k == "instances" }
47
- h.delete_if {|k, v| v == "N/A"}
48
- h.delete_if {|k, v| v == []}
49
- h.delete_if {|k, v| v == ""}
50
- h.delete_if {|k, v| k == "_typeGroup"}
51
- h
52
- end
53
+ def clean_unwanted_items_from_hash h
54
+ h.delete_if {|k, v| k == "relevance" }
55
+ h.delete_if {|k, v| k == "instances" }
56
+ h.delete_if {|k, v| v == "N/A"}
57
+ h.delete_if {|k, v| v == []}
58
+ h.delete_if {|k, v| v == ""}
59
+ h.delete_if {|k, v| k == "_typeGroup"}
60
+ h
61
+ end
53
62
 
54
- private
63
+ private
55
64
 
56
- def paramsXML(format)
57
- <<-paramsXML;
58
- <c:params xmlns:c="http://s.opencalais.com/1/pred/"
59
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
60
- <c:processingDirectives
61
- c:contentType="text/txt"
62
- c:outputFormat="#{format}">
63
- </c:processingDirectives>
64
- <c:userDirectives />
65
- <c:externalMetadata />
66
- </c:params>
67
- paramsXML
68
- end
65
+ def paramsXML(format)
66
+ <<-paramsXML;
67
+ <c:params xmlns:c="http://s.opencalais.com/1/pred/"
68
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
69
+ <c:processingDirectives
70
+ c:contentType="text/txt"
71
+ c:outputFormat="#{format}">
72
+ </c:processingDirectives>
73
+ <c:userDirectives />
74
+ <c:externalMetadata />
75
+ </c:params>
76
+ paramsXML
77
+ end
69
78
 
79
+ end
70
80
  end
71
-
72
81
  end
@@ -2,7 +2,6 @@ require 'net/http'
2
2
  require 'hpricot'
3
3
 
4
4
  module Jkl
5
-
6
5
  class << self
7
6
 
8
7
  def post_to(uri, post_args)
@@ -31,10 +30,9 @@ module Jkl
31
30
  end
32
31
  end
33
32
 
34
- def get_from_as_xml(uri)
33
+ def get_xml_from(uri)
35
34
  Hpricot.XML get_from uri
36
35
  end
37
36
 
38
37
  end
39
-
40
- end
38
+ end
@@ -1,19 +1,27 @@
1
1
  require 'hpricot'
2
2
 
3
3
  module Jkl
4
-
5
- class << self
4
+ module Rss
5
+ class << self
6
6
 
7
- def get_items_from(rssdoc)
8
- items = []
9
- (rssdoc/:item).each { |rssitem| items.push rssitem } unless rssdoc==nil
10
- items
11
- end
12
-
13
- def attribute_from(item, name)
14
- (item/name).inner_html
7
+ def items(rss_doc)
8
+ (rss_doc/:item)
9
+ end
10
+
11
+ def links(items)
12
+ items.map{|item| attribute_from(item,:link)}
13
+ end
14
+
15
+ def descriptions(items)
16
+ items.map do |item|
17
+ attribute_from(item, :description).gsub("<![CDATA[","").gsub("]]>","")
18
+ end
19
+ end
20
+
21
+ def attribute_from(item, name)
22
+ (item/name).inner_html
23
+ end
24
+
15
25
  end
16
-
17
26
  end
18
-
19
27
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jakal
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - sshingler