jakal 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -11,6 +11,10 @@ module Jkl
11
11
  )
12
12
  end
13
13
 
14
+ def entities(key,text)
15
+ calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
16
+ end
17
+
14
18
  def tags(key, text)
15
19
  nested_list = {}
16
20
  entities(key,text).each do |a|
@@ -19,10 +23,6 @@ module Jkl
19
23
  nested_list
20
24
  end
21
25
 
22
- def entities(key,text)
23
- calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
24
- end
25
-
26
26
  end
27
27
  end
28
28
  end
@@ -5,6 +5,7 @@ module Jkl
5
5
  def sanitize(text)
6
6
  remove_short_lines(strip_all_tags(remove_script_tags(text)))
7
7
  end
8
+ alias :clean :sanitize
8
9
 
9
10
  def strip_all_tags(text)
10
11
  text.gsub(/<\/?[^>]*>/, "")
data/lib/jkl.rb CHANGED
@@ -16,10 +16,19 @@ module Jkl
16
16
  end
17
17
  end
18
18
 
19
+ def topix_links(keyphrase)
20
+ url = YAML::load_file('config/config.yml')['topix']
21
+ links("#{url}#{keyphrase}")
22
+ end
23
+
19
24
  def tags(key, link)
20
25
  text = Jkl::Text::sanitize(Jkl::get_from(link))
21
26
  Jkl::Extraction::tags(key, text)
22
27
  end
23
28
 
29
+ def trends
30
+ url = YAML::load_file('config/config.yml')['twitter']
31
+ JSON.parse(Jkl::get_from(url))["trends"].map{|t| t["name"]}
32
+ end
24
33
  end
25
34
  end
File without changes
File without changes
@@ -0,0 +1,82 @@
1
+ require "test/unit"
2
+ require "shoulda"
3
+ require "webmock/test_unit"
4
+ require "yaml"
5
+ require "lib/jkl"
6
+
7
+ class JklTest < Test::Unit::TestCase
8
+ include WebMock
9
+
10
+ context "Using Jkl" do
11
+ setup do
12
+ stub_twitter
13
+ stub_topix
14
+ stub_news_article
15
+ end
16
+
17
+ should "GET trends" do
18
+ trends = Jkl::trends
19
+ assert trends.length == 10
20
+ assert trends[0] == "London"
21
+ end
22
+
23
+ should "GET news article URLS for a trend" do
24
+ articles = Jkl::topix_links(Jkl::trends[0])
25
+ assert articles.length == 2
26
+ assert articles[0] = "http://www.localnews8.com/Global/story.asp?S=10876507"
27
+ end
28
+
29
+ should "extract text from a news article" do
30
+ articles = Jkl::topix_links(Jkl::trends[0])
31
+ text = Jkl::Text::sanitize(Jkl::get_from(articles[0]))
32
+ assert_not_nil text
33
+ end
34
+
35
+ should "extract tags from some text" do
36
+ key = YAML::load_file('config/keys.yml')['calais']
37
+ text = <<-EOF
38
+ Barack Obama said today that he expects there
39
+ to be conflict within his new security team after
40
+ confirming Hillary Clinton as his choice for US Secretary of State."
41
+ EOF
42
+ tags = Jkl::Extraction::tags(key, text)
43
+ assert tags["Person"][0] == "Barack Obama"
44
+ puts Jkl::Extraction::entities(key,text)
45
+ end
46
+ end
47
+
48
+ private
49
+ def stub_twitter
50
+ url = YAML::load_file('config/config.yml')['twitter']
51
+ response = <<-EOF
52
+ {"trends":[
53
+ {"name":"London","url":"http://search.twitter.com/search?q=London"},
54
+ {"name":"Geneva","url":"http://search.twitter.com/search?q=Geneva"},
55
+ {"name":"Kabul","url":"http://search.twitter.com/search?q=Kabul"},
56
+ {"name":"Chicago","url":"http://search.twitter.com/search?q=Chicago"},
57
+ {"name":"Cannes","url":"http://search.twitter.com/search?q=Cannes"},
58
+ {"name":"Verona","url":"http://search.twitter.com/search?q=Verona"},
59
+ {"name":"Milan","url":"http://search.twitter.com/search?q=Milan"},
60
+ {"name":"New York","url":"http://search.twitter.com/search?q=New%20York"},
61
+ {"name":"Paris","url":"http://search.twitter.com/search?q=Paris"},
62
+ {"name":"Melbourne","url":"http://search.twitter.com/search?q=Melbourne"}
63
+ ],"as_of":"Sat, 1 Jan 1970 00:00:00 +0000"}
64
+ EOF
65
+ stub_request(:get, url).to_return(:body => response)
66
+ end
67
+ def stub_topix
68
+ url = YAML::load_file('config/config.yml')['topix']
69
+ response = raw = File.open('test/fixtures/topix_rss.xml','r') do |file|
70
+ file.readlines.to_s
71
+ end
72
+ stub_request(:get, "#{url}London").to_return(:body => response)
73
+ end
74
+ def stub_news_article
75
+ response = raw = File.open('test/fixtures/bbc_story.html','r') do |file|
76
+ file.readlines.to_s
77
+ end
78
+ stub_request(:get, "http://www.localnews8.com/Global/story.asp?S=10876507").to_return(
79
+ :body => response
80
+ )
81
+ end
82
+ end
@@ -0,0 +1,72 @@
1
+ require "test/unit"
2
+ require "shoulda"
3
+ require "webmock/test_unit"
4
+ require "yaml"
5
+ require "lib/jkl"
6
+
7
+ class TextCleaningTest < Test::Unit::TestCase
8
+ context "Cleaning Text" do
9
+
10
+ should "Remove short lines" do
11
+ input = <<-HTML
12
+ the cat sat on the mat
13
+ a short line
14
+ HTML
15
+ result = Jkl::Text::remove_short_lines input
16
+ assert result == "the cat sat on the mat"
17
+ end
18
+
19
+ should "Remove script tags" do
20
+ input = <<-HTML
21
+ the cat sat on the mat
22
+ <script type="text/javascript" charset="utf-8">
23
+ function nofunction(){var bob;}
24
+ </script>
25
+ a short line
26
+ HTML
27
+ result = Jkl::Text::remove_short_lines input
28
+ assert result == "the cat sat on the mat"
29
+ end
30
+
31
+ should "Remove html comments" do
32
+ input = <<-HTML
33
+ the cat sat on the mat
34
+ <!-- a comment-->
35
+ a short line
36
+ HTML
37
+ result = Jkl::Text::remove_short_lines input
38
+ assert result == "the cat sat on the mat"
39
+ end
40
+
41
+ should "Remove blank lines" do
42
+ input = <<-HTML
43
+ the cat sat on the mat
44
+
45
+ a short line
46
+ HTML
47
+ result = Jkl::Text::remove_short_lines input
48
+ assert result == "the cat sat on the mat"
49
+ end
50
+
51
+ should "Strip all tags" do
52
+ input = <<-HTML
53
+ <p>the cat sat on the mat</p>
54
+ HTML
55
+ result = Jkl::Text::strip_all_tags input
56
+ assert result == "the cat sat on the mat\n" #TODO fix carriage return
57
+ end
58
+
59
+ should "Clean text" do
60
+ input = <<-HTML
61
+ the cat sat on the mat
62
+ <script type="text/javascript" charset="utf-8">
63
+ function nofunction(){var bob;}
64
+ </script>
65
+ <p> some para stuff here </p>
66
+ some end stuff here
67
+ HTML
68
+ result = Jkl::Text::clean input
69
+ assert result == "the cat sat on the mat"
70
+ end
71
+ end
72
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 6
9
- version: 0.1.6
8
+ - 7
9
+ version: 0.1.7
10
10
  platform: ruby
11
11
  authors:
12
12
  - sshingler
@@ -73,7 +73,7 @@ dependencies:
73
73
  version: 0.0.9
74
74
  type: :runtime
75
75
  version_requirements: *id004
76
- description: Jakal is a Ruby library which contains some utilities for connecting to internet based APIs.
76
+ description: Jakal is a Ruby library which contains some utilities for connecting to internet based APIs and cleaning text.
77
77
  email: "'shingler@gmail.com'"
78
78
  executables: []
79
79
 
@@ -88,20 +88,13 @@ files:
88
88
  - lib/jkl/rest_client.rb
89
89
  - lib/jkl/rss_client.rb
90
90
  - lib/jkl/text_client.rb
91
- - features/calais.feature
92
- - features/http.feature
93
- - features/sanitize-text.feature
94
- - features/mocks/bbc_story.html
95
- - features/mocks/calais.json
96
- - features/mocks/topix_rss.xml
97
- - features/step_definitions/calais_steps.rb
98
- - features/step_definitions/http_steps.rb
99
- - features/step_definitions/sanitize-text_steps.rb
100
- - features/step_definitions/twitter_steps.rb
101
- - features/support/env.rb
91
+ - test/fixtures/bbc_story.html
92
+ - test/fixtures/topix_rss.xml
93
+ - test/unit/jkl_test.rb
94
+ - test/unit/text_cleaning_test.rb
102
95
  - README.md
103
96
  - License.txt
104
- has_rdoc: false
97
+ has_rdoc: true
105
98
  homepage: http://github.com/sshingler/jkl
106
99
  licenses: []
107
100
 
@@ -131,6 +124,6 @@ rubyforge_project:
131
124
  rubygems_version: 1.3.6
132
125
  signing_key:
133
126
  specification_version: 2
134
- summary: Jakal is a Ruby library which contains some utilities for connecting to internet based APIs.
127
+ summary: Jakal is a Ruby library which contains some utilities for connecting to internet based APIs and cleaning text.
135
128
  test_files: []
136
129
 
@@ -1,10 +0,0 @@
1
- Feature: Calais-Specific features
2
- In order to use the Calais Meta-Tagging Service
3
- As a developer
4
- I want to make some requests and inspect some responses
5
-
6
- @live
7
- Scenario: Get nested tags from calais
8
- Given I have some text
9
- When I request the nested entities from calais
10
- Then I should receive the entities grouped into categories
@@ -1,20 +0,0 @@
1
- Feature: http features
2
- In order to use the utility client calls
3
- As a developer
4
- I want to make some requests and inspect some responses
5
-
6
- @live
7
- Scenario: Make a restful post to yahoo
8
- When I post some data to yahoo
9
- Then I should get a response
10
-
11
- @live
12
- Scenario: Make a restful get
13
- When I make a restful get request
14
- Then I should get a response
15
- And I should see some text
16
-
17
- @live
18
- Scenario: Get some trends
19
- When I request some twitter trends
20
- Then I should get a response