jakal 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,9 +24,3 @@ Feature: http features
24
24
  When I request some RSS
25
25
  Then I should get a response
26
26
  And I should receive some headlines
27
-
28
- Scenario: Work with RSS
29
- Given I have some RSS
30
- Then I should receive some headlines
31
-
32
-
@@ -13,7 +13,8 @@ Given /^I have a mock calais response$/ do
13
13
  end
14
14
 
15
15
  When /^I post to calais$/ do
16
- @response = Jkl::get_from_calais @text
16
+ key = YAML::load_file('config/keys.yml')['calais']
17
+ @response = Jkl::Extraction::get_from_calais(key, @text)
17
18
  end
18
19
 
19
20
  When /^I remove the unwanted items$/ do
@@ -44,7 +45,7 @@ end
44
45
 
45
46
  When /^I request the nested entities from calais$/ do
46
47
  key = YAML::load_file('config/keys.yml')['calais']
47
- @response = Jkl::tags key, @text
48
+ @response = Jkl::Extraction::tags key, @text
48
49
  end
49
50
 
50
51
  Then /^I should receive the entities grouped into categories$/ do
@@ -9,7 +9,7 @@ end
9
9
  When /^I request some RSS$/ do
10
10
  keyphrase = @keyphrase || "iraq"
11
11
  url = "#{YAML::load_file('config/config.yml')['topix']}#{CGI::escape(keyphrase)}"
12
- @response = Jkl::get_from_as_xml url
12
+ @response = Jkl::get_xml_from url
13
13
  end
14
14
 
15
15
  Given /^I have some RSS$/ do
@@ -28,17 +28,16 @@ When /^I request some trends$/ do
28
28
  @response = output['trends']
29
29
  end
30
30
 
31
-
32
31
  Then /^I should get a response$/ do
33
32
  @response.should_not == nil
34
33
  #puts @response.inspect
35
34
  end
36
35
 
37
36
  Then /^I should receive some headlines$/ do
38
- @items = Jkl::get_items_from @response
37
+ @items = Jkl::Rss::items @response
39
38
  @links = []
40
39
  @items.each do |item|
41
- @links << Jkl::attribute_from(item, :link)
40
+ @links << Jkl::Rss::attribute_from(item, :link)
42
41
  end
43
42
  @links.should_not == nil
44
43
  @links.length.should > 0
data/lib/jkl.rb CHANGED
@@ -1,56 +1,8 @@
1
- require "cgi"
2
1
  require "jkl/rest_client.rb"
3
2
  require "jkl/rss_client.rb"
4
3
  require "jkl/calais_client.rb"
5
4
  require "jkl/url_doc_handler.rb"
6
5
 
7
6
  module Jkl
8
-
9
- class << self
10
7
 
11
- def headlines(feed, keyphrase)
12
- get_from_as_xml "#{feed}#{keyphrase}"
13
- end
14
-
15
- def pages(headlines)
16
- items = get_items_from headlines
17
- descriptions = ""
18
- items.each do |item|
19
- descriptions << attribute_from(item, :description).gsub("<![CDATA[","").gsub("]]>","")
20
- end
21
- descriptions
22
- end
23
-
24
- def descriptions(headlines)
25
- items = get_items_from headlines
26
- descriptions = []
27
- items.each do |item|
28
- descriptions << attribute_from(item, :description).gsub("<![CDATA[","").gsub("]]>","")
29
- end
30
- descriptions
31
- end
32
-
33
- def links(headlines)
34
- items = get_items_from headlines
35
- links = []
36
- items.each do |item|
37
- links << attribute_from(item, :link)
38
- end
39
- links
40
- end
41
-
42
- def tags(key, text)
43
- nested_list = {}
44
- entities(key,text).each do |a|
45
- nested_list = nested_list.merge!(a){ |key,v1,v2| v1+v2 }
46
- end
47
- nested_list
48
- end
49
-
50
- def entities(key,text)
51
- calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
52
- end
53
-
54
- end
55
-
56
8
  end
@@ -1,72 +1,81 @@
1
1
  require "json"
2
- require "rest_client"
3
2
  require "calais"
4
3
 
5
- module Jkl
4
+ require "rest_client"
6
5
 
7
- class << self
6
+ module Jkl
7
+ module Extraction
8
+ class << self
8
9
 
9
- #using the calais gem
10
- def calais_response(key, pages)
11
- Calais.process_document(
12
- :content => pages,
13
- :content_type => :text,
14
- :license_id => key
15
- )
16
- end
10
+ #using the calais gem
11
+ def calais_response(key, pages)
12
+ Calais.process_document(
13
+ :content => pages,
14
+ :content_type => :text,
15
+ :license_id => key
16
+ )
17
+ end
17
18
 
18
- def get_from_calais(content)
19
- begin
20
- license_id = YAML::load_file('config/keys.yml')['calais']
21
- c_uri = URI.parse('http://api.opencalais.com/enlighten/rest/')
22
- post_args = { 'licenseID' => license_id, 'content' => content,
23
- 'paramsXML' => paramsXML('application/json') }
24
- post_to(c_uri, post_args)
25
- rescue Exception => e
26
- puts e
19
+ def tags(key, text)
20
+ nested_list = {}
21
+ entities(key,text).each do |a|
22
+ nested_list = nested_list.merge!(a){ |key,v1,v2| v1+v2 }
23
+ end
24
+ nested_list
25
+ end
26
+
27
+ def entities(key,text)
28
+ calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
29
+ end
30
+
31
+ #not using calais gem, experimenting with json response
32
+ def get_from_calais(key, content)
33
+ post_args = {
34
+ "licenseID" => key,
35
+ "content" => content,
36
+ "paramsXML" => paramsXML("application/json")
37
+ }
38
+ Jkl::post_to(URI.parse("http://api.opencalais.com/enlighten/rest/"), post_args)
27
39
  end
28
- end
29
40
 
30
- def get_tag_from_json(response)
31
- result = JSON.parse response
32
- result.delete_if {|key, value| key == "doc" } # ditching the doc
33
- cleaned_result = []
34
- result.each do |key,tag|
35
- tag = Jkl::clean_unwanted_items_from_hash tag
36
- cleaned_result << tag
37
- yield tag if block_given?
41
+ def get_tag_from_json(response)
42
+ result = JSON.parse response
43
+ result.delete_if {|key, value| key == "doc" } # ditching the doc
44
+ cleaned_result = []
45
+ result.each do |key,tag|
46
+ tag = Jkl::clean_unwanted_items_from_hash tag
47
+ cleaned_result << tag
48
+ yield tag if block_given?
49
+ end
50
+ cleaned_result
38
51
  end
39
52
 
40
- cleaned_result
41
- end
42
-
43
- #jkl doesn't work with these aspects of the calais response, also removing blanks
44
- def clean_unwanted_items_from_hash h
45
- h.delete_if {|k, v| k == "relevance" }
46
- h.delete_if {|k, v| k == "instances" }
47
- h.delete_if {|k, v| v == "N/A"}
48
- h.delete_if {|k, v| v == []}
49
- h.delete_if {|k, v| v == ""}
50
- h.delete_if {|k, v| k == "_typeGroup"}
51
- h
52
- end
53
+ def clean_unwanted_items_from_hash h
54
+ h.delete_if {|k, v| k == "relevance" }
55
+ h.delete_if {|k, v| k == "instances" }
56
+ h.delete_if {|k, v| v == "N/A"}
57
+ h.delete_if {|k, v| v == []}
58
+ h.delete_if {|k, v| v == ""}
59
+ h.delete_if {|k, v| k == "_typeGroup"}
60
+ h
61
+ end
53
62
 
54
- private
63
+ private
55
64
 
56
- def paramsXML(format)
57
- <<-paramsXML;
58
- <c:params xmlns:c="http://s.opencalais.com/1/pred/"
59
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
60
- <c:processingDirectives
61
- c:contentType="text/txt"
62
- c:outputFormat="#{format}">
63
- </c:processingDirectives>
64
- <c:userDirectives />
65
- <c:externalMetadata />
66
- </c:params>
67
- paramsXML
68
- end
65
+ def paramsXML(format)
66
+ <<-paramsXML;
67
+ <c:params xmlns:c="http://s.opencalais.com/1/pred/"
68
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
69
+ <c:processingDirectives
70
+ c:contentType="text/txt"
71
+ c:outputFormat="#{format}">
72
+ </c:processingDirectives>
73
+ <c:userDirectives />
74
+ <c:externalMetadata />
75
+ </c:params>
76
+ paramsXML
77
+ end
69
78
 
79
+ end
70
80
  end
71
-
72
81
  end
@@ -2,7 +2,6 @@ require 'net/http'
2
2
  require 'hpricot'
3
3
 
4
4
  module Jkl
5
-
6
5
  class << self
7
6
 
8
7
  def post_to(uri, post_args)
@@ -31,10 +30,9 @@ module Jkl
31
30
  end
32
31
  end
33
32
 
34
- def get_from_as_xml(uri)
33
+ def get_xml_from(uri)
35
34
  Hpricot.XML get_from uri
36
35
  end
37
36
 
38
37
  end
39
-
40
- end
38
+ end
@@ -1,19 +1,27 @@
1
1
  require 'hpricot'
2
2
 
3
3
  module Jkl
4
-
5
- class << self
4
+ module Rss
5
+ class << self
6
6
 
7
- def get_items_from(rssdoc)
8
- items = []
9
- (rssdoc/:item).each { |rssitem| items.push rssitem } unless rssdoc==nil
10
- items
11
- end
12
-
13
- def attribute_from(item, name)
14
- (item/name).inner_html
7
+ def items(rss_doc)
8
+ (rss_doc/:item)
9
+ end
10
+
11
+ def links(items)
12
+ items.map{|item| attribute_from(item,:link)}
13
+ end
14
+
15
+ def descriptions(items)
16
+ items.map do |item|
17
+ attribute_from(item, :description).gsub("<![CDATA[","").gsub("]]>","")
18
+ end
19
+ end
20
+
21
+ def attribute_from(item, name)
22
+ (item/name).inner_html
23
+ end
24
+
15
25
  end
16
-
17
26
  end
18
-
19
27
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jakal
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - sshingler