jakal 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/jkl/calais_client.rb +4 -4
- data/lib/jkl/text_client.rb +1 -0
- data/lib/jkl.rb +9 -0
- data/{features/mocks → test/fixtures}/bbc_story.html +0 -0
- data/{features/mocks → test/fixtures}/topix_rss.xml +0 -0
- data/test/unit/jkl_test.rb +82 -0
- data/test/unit/text_cleaning_test.rb +72 -0
- metadata +9 -16
- data/features/calais.feature +0 -10
- data/features/http.feature +0 -20
- data/features/mocks/calais.json +0 -2464
- data/features/sanitize-text.feature +0 -71
- data/features/step_definitions/calais_steps.rb +0 -13
- data/features/step_definitions/http_steps.rb +0 -38
- data/features/step_definitions/sanitize-text_steps.rb +0 -79
- data/features/step_definitions/twitter_steps.rb +0 -13
- data/features/support/env.rb +0 -17
data/lib/jkl/calais_client.rb
CHANGED
@@ -11,6 +11,10 @@ module Jkl
|
|
11
11
|
)
|
12
12
|
end
|
13
13
|
|
14
|
+
def entities(key,text)
|
15
|
+
calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
|
16
|
+
end
|
17
|
+
|
14
18
|
def tags(key, text)
|
15
19
|
nested_list = {}
|
16
20
|
entities(key,text).each do |a|
|
@@ -19,10 +23,6 @@ module Jkl
|
|
19
23
|
nested_list
|
20
24
|
end
|
21
25
|
|
22
|
-
def entities(key,text)
|
23
|
-
calais_response(key, text).entities.map{|e| {e.type => [e.attributes["name"]]}}
|
24
|
-
end
|
25
|
-
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
data/lib/jkl/text_client.rb
CHANGED
data/lib/jkl.rb
CHANGED
@@ -16,10 +16,19 @@ module Jkl
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
+
def topix_links(keyphrase)
|
20
|
+
url = YAML::load_file('config/config.yml')['topix']
|
21
|
+
links("#{url}#{keyphrase}")
|
22
|
+
end
|
23
|
+
|
19
24
|
def tags(key, link)
|
20
25
|
text = Jkl::Text::sanitize(Jkl::get_from(link))
|
21
26
|
Jkl::Extraction::tags(key, text)
|
22
27
|
end
|
23
28
|
|
29
|
+
def trends
|
30
|
+
url = YAML::load_file('config/config.yml')['twitter']
|
31
|
+
JSON.parse(Jkl::get_from(url))["trends"].map{|t| t["name"]}
|
32
|
+
end
|
24
33
|
end
|
25
34
|
end
|
File without changes
|
File without changes
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require "shoulda"
|
3
|
+
require "webmock/test_unit"
|
4
|
+
require "yaml"
|
5
|
+
require "lib/jkl"
|
6
|
+
|
7
|
+
class JklTest < Test::Unit::TestCase
|
8
|
+
include WebMock
|
9
|
+
|
10
|
+
context "Using Jkl" do
|
11
|
+
setup do
|
12
|
+
stub_twitter
|
13
|
+
stub_topix
|
14
|
+
stub_news_article
|
15
|
+
end
|
16
|
+
|
17
|
+
should "GET trends" do
|
18
|
+
trends = Jkl::trends
|
19
|
+
assert trends.length == 10
|
20
|
+
assert trends[0] == "London"
|
21
|
+
end
|
22
|
+
|
23
|
+
should "GET news article URLS for a trend" do
|
24
|
+
articles = Jkl::topix_links(Jkl::trends[0])
|
25
|
+
assert articles.length == 2
|
26
|
+
assert articles[0] = "http://www.localnews8.com/Global/story.asp?S=10876507"
|
27
|
+
end
|
28
|
+
|
29
|
+
should "extract text from a news article" do
|
30
|
+
articles = Jkl::topix_links(Jkl::trends[0])
|
31
|
+
text = Jkl::Text::sanitize(Jkl::get_from(articles[0]))
|
32
|
+
assert_not_nil text
|
33
|
+
end
|
34
|
+
|
35
|
+
should "extract tags from some text" do
|
36
|
+
key = YAML::load_file('config/keys.yml')['calais']
|
37
|
+
text = <<-EOF
|
38
|
+
Barack Obama said today that he expects there
|
39
|
+
to be conflict within his new security team after
|
40
|
+
confirming Hillary Clinton as his choice for US Secretary of State."
|
41
|
+
EOF
|
42
|
+
tags = Jkl::Extraction::tags(key, text)
|
43
|
+
assert tags["Person"][0] == "Barack Obama"
|
44
|
+
puts Jkl::Extraction::entities(key,text)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
def stub_twitter
|
50
|
+
url = YAML::load_file('config/config.yml')['twitter']
|
51
|
+
response = <<-EOF
|
52
|
+
{"trends":[
|
53
|
+
{"name":"London","url":"http://search.twitter.com/search?q=London"},
|
54
|
+
{"name":"Geneva","url":"http://search.twitter.com/search?q=Geneva"},
|
55
|
+
{"name":"Kabul","url":"http://search.twitter.com/search?q=Kabul"},
|
56
|
+
{"name":"Chicago","url":"http://search.twitter.com/search?q=Chicago"},
|
57
|
+
{"name":"Cannes","url":"http://search.twitter.com/search?q=Cannes"},
|
58
|
+
{"name":"Verona","url":"http://search.twitter.com/search?q=Verona"},
|
59
|
+
{"name":"Milan","url":"http://search.twitter.com/search?q=Milan"},
|
60
|
+
{"name":"New York","url":"http://search.twitter.com/search?q=New%20York"},
|
61
|
+
{"name":"Paris","url":"http://search.twitter.com/search?q=Paris"},
|
62
|
+
{"name":"Melbourne","url":"http://search.twitter.com/search?q=Melbourne"}
|
63
|
+
],"as_of":"Sat, 1 Jan 1970 00:00:00 +0000"}
|
64
|
+
EOF
|
65
|
+
stub_request(:get, url).to_return(:body => response)
|
66
|
+
end
|
67
|
+
def stub_topix
|
68
|
+
url = YAML::load_file('config/config.yml')['topix']
|
69
|
+
response = raw = File.open('test/fixtures/topix_rss.xml','r') do |file|
|
70
|
+
file.readlines.to_s
|
71
|
+
end
|
72
|
+
stub_request(:get, "#{url}London").to_return(:body => response)
|
73
|
+
end
|
74
|
+
def stub_news_article
|
75
|
+
response = raw = File.open('test/fixtures/bbc_story.html','r') do |file|
|
76
|
+
file.readlines.to_s
|
77
|
+
end
|
78
|
+
stub_request(:get, "http://www.localnews8.com/Global/story.asp?S=10876507").to_return(
|
79
|
+
:body => response
|
80
|
+
)
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require "shoulda"
|
3
|
+
require "webmock/test_unit"
|
4
|
+
require "yaml"
|
5
|
+
require "lib/jkl"
|
6
|
+
|
7
|
+
class TextCleaningTest < Test::Unit::TestCase
|
8
|
+
context "Cleaning Text" do
|
9
|
+
|
10
|
+
should "Remove short lines" do
|
11
|
+
input = <<-HTML
|
12
|
+
the cat sat on the mat
|
13
|
+
a short line
|
14
|
+
HTML
|
15
|
+
result = Jkl::Text::remove_short_lines input
|
16
|
+
assert result == "the cat sat on the mat"
|
17
|
+
end
|
18
|
+
|
19
|
+
should "Remove script tags" do
|
20
|
+
input = <<-HTML
|
21
|
+
the cat sat on the mat
|
22
|
+
<script type="text/javascript" charset="utf-8">
|
23
|
+
function nofunction(){var bob;}
|
24
|
+
</script>
|
25
|
+
a short line
|
26
|
+
HTML
|
27
|
+
result = Jkl::Text::remove_short_lines input
|
28
|
+
assert result == "the cat sat on the mat"
|
29
|
+
end
|
30
|
+
|
31
|
+
should "Remove html comments" do
|
32
|
+
input = <<-HTML
|
33
|
+
the cat sat on the mat
|
34
|
+
<!-- a comment-->
|
35
|
+
a short line
|
36
|
+
HTML
|
37
|
+
result = Jkl::Text::remove_short_lines input
|
38
|
+
assert result == "the cat sat on the mat"
|
39
|
+
end
|
40
|
+
|
41
|
+
should "Remove blank lines" do
|
42
|
+
input = <<-HTML
|
43
|
+
the cat sat on the mat
|
44
|
+
|
45
|
+
a short line
|
46
|
+
HTML
|
47
|
+
result = Jkl::Text::remove_short_lines input
|
48
|
+
assert result == "the cat sat on the mat"
|
49
|
+
end
|
50
|
+
|
51
|
+
should "Strip all tags" do
|
52
|
+
input = <<-HTML
|
53
|
+
<p>the cat sat on the mat</p>
|
54
|
+
HTML
|
55
|
+
result = Jkl::Text::strip_all_tags input
|
56
|
+
assert result == "the cat sat on the mat\n" #TODO fix carriage return
|
57
|
+
end
|
58
|
+
|
59
|
+
should "Clean text" do
|
60
|
+
input = <<-HTML
|
61
|
+
the cat sat on the mat
|
62
|
+
<script type="text/javascript" charset="utf-8">
|
63
|
+
function nofunction(){var bob;}
|
64
|
+
</script>
|
65
|
+
<p> some para stuff here </p>
|
66
|
+
some end stuff here
|
67
|
+
HTML
|
68
|
+
result = Jkl::Text::clean input
|
69
|
+
assert result == "the cat sat on the mat"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 7
|
9
|
+
version: 0.1.7
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- sshingler
|
@@ -73,7 +73,7 @@ dependencies:
|
|
73
73
|
version: 0.0.9
|
74
74
|
type: :runtime
|
75
75
|
version_requirements: *id004
|
76
|
-
description: Jakal is a Ruby library which contains some utilities for connecting to internet based APIs.
|
76
|
+
description: Jakal is a Ruby library which contains some utilities for connecting to internet based APIs and cleaning text.
|
77
77
|
email: "'shingler@gmail.com'"
|
78
78
|
executables: []
|
79
79
|
|
@@ -88,20 +88,13 @@ files:
|
|
88
88
|
- lib/jkl/rest_client.rb
|
89
89
|
- lib/jkl/rss_client.rb
|
90
90
|
- lib/jkl/text_client.rb
|
91
|
-
-
|
92
|
-
-
|
93
|
-
-
|
94
|
-
-
|
95
|
-
- features/mocks/calais.json
|
96
|
-
- features/mocks/topix_rss.xml
|
97
|
-
- features/step_definitions/calais_steps.rb
|
98
|
-
- features/step_definitions/http_steps.rb
|
99
|
-
- features/step_definitions/sanitize-text_steps.rb
|
100
|
-
- features/step_definitions/twitter_steps.rb
|
101
|
-
- features/support/env.rb
|
91
|
+
- test/fixtures/bbc_story.html
|
92
|
+
- test/fixtures/topix_rss.xml
|
93
|
+
- test/unit/jkl_test.rb
|
94
|
+
- test/unit/text_cleaning_test.rb
|
102
95
|
- README.md
|
103
96
|
- License.txt
|
104
|
-
has_rdoc:
|
97
|
+
has_rdoc: true
|
105
98
|
homepage: http://github.com/sshingler/jkl
|
106
99
|
licenses: []
|
107
100
|
|
@@ -131,6 +124,6 @@ rubyforge_project:
|
|
131
124
|
rubygems_version: 1.3.6
|
132
125
|
signing_key:
|
133
126
|
specification_version: 2
|
134
|
-
summary: Jakal is a Ruby library which contains some utilities for connecting to internet based APIs.
|
127
|
+
summary: Jakal is a Ruby library which contains some utilities for connecting to internet based APIs and cleaning text.
|
135
128
|
test_files: []
|
136
129
|
|
data/features/calais.feature
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
Feature: Calais-Specific features
|
2
|
-
In order to use the Calais Meta-Tagging Service
|
3
|
-
As a developer
|
4
|
-
I want to make some requests and inspect some responses
|
5
|
-
|
6
|
-
@live
|
7
|
-
Scenario: Get nested tags from calais
|
8
|
-
Given I have some text
|
9
|
-
When I request the nested entities from calais
|
10
|
-
Then I should receive the entities grouped into categories
|
data/features/http.feature
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
Feature: http features
|
2
|
-
In order to use the utility client calls
|
3
|
-
As a developer
|
4
|
-
I want to make some requests and inspect some responses
|
5
|
-
|
6
|
-
@live
|
7
|
-
Scenario: Make a restful post to yahoo
|
8
|
-
When I post some data to yahoo
|
9
|
-
Then I should get a response
|
10
|
-
|
11
|
-
@live
|
12
|
-
Scenario: Make a restful get
|
13
|
-
When I make a restful get request
|
14
|
-
Then I should get a response
|
15
|
-
And I should see some text
|
16
|
-
|
17
|
-
@live
|
18
|
-
Scenario: Get some trends
|
19
|
-
When I request some twitter trends
|
20
|
-
Then I should get a response
|