sshingler-jkl 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/features/calais.feature +38 -0
- data/features/homepage.feature +20 -0
- data/features/http.feature +25 -0
- data/features/mocks/bbc_story.html +2863 -0
- data/features/mocks/calais.json +2464 -0
- data/features/mocks/topix_rss.xml +47 -0
- data/features/mocks/twitter.json +11 -0
- data/features/persistence.feature +10 -0
- data/features/processing.feature +16 -0
- data/features/sanitize-text.feature +47 -0
- data/features/step_definitions/calais_steps.rb +44 -0
- data/features/step_definitions/home_page_steps.rb +20 -0
- data/features/step_definitions/http_steps.rb +49 -0
- data/features/step_definitions/persistence_steps.rb +19 -0
- data/features/step_definitions/processing_steps.rb +30 -0
- data/features/step_definitions/require_steps.rb +14 -0
- data/features/step_definitions/sanitize-text_steps.rb +20 -0
- data/features/step_definitions/twitter_steps.rb +17 -0
- data/features/support/env.rb +36 -0
- data/features/twitter.feature +16 -0
- data/lib/jkl.rb +1 -0
- metadata +21 -1
@@ -0,0 +1,47 @@
|
|
1
|
+
<?xml version="1.0" encoding="ISO-8859-1"?>
|
2
|
+
<?xml-stylesheet href="/static/rss.3.xsl" type="text/xsl"?>
|
3
|
+
<rss xmlns:topix="http://www.topix.com/partners/rsscomment/" xmlns:georss="http://www.georss.org/georss" version="2.0">
|
4
|
+
<channel>
|
5
|
+
<title>Search for "london" </title>
|
6
|
+
<link>http://www.topix.com/search/article?q=london&x=0&y=0</link>
|
7
|
+
<topix:rsslink>http://www.topix.com/rss/search/article.xml?q=london&x=0&y=0</topix:rsslink>
|
8
|
+
<description>News continually updated from thousands of sources across the web</description>
|
9
|
+
<language>en-us</language>
|
10
|
+
<ttl>240</ttl>
|
11
|
+
<copyright>Copyright 2008, Topix</copyright>
|
12
|
+
<image>
|
13
|
+
<title>Topix</title>
|
14
|
+
<link>http://www.topix.com/</link>
|
15
|
+
<url>http://topix.cachefly.net/pics/topix_homepage_logo2.png</url>
|
16
|
+
</image>
|
17
|
+
<item>
|
18
|
+
<title>Major Michael Jackson tribute planned for Vienna</title>
|
19
|
+
<link>http://www.localnews8.com/Global/story.asp?S=10876507</link>
|
20
|
+
<description><![CDATA[The King of Pop will get a royal send-off next month in Vienna. Events promoter World Awards Media GmbH confirmed Monday that members of Michael Jackson's family and a "high-profile lineup of international stars" are planning a tribute concert in the Austrian capital.]]></description>
|
21
|
+
<source>KIFI</source>
|
22
|
+
<pubDate>Mon, 10 Aug 2009 15:21:58 GMT</pubDate>
|
23
|
+
<category>Jermaine Jackson</category>
|
24
|
+
<category>Michael Jackson</category>
|
25
|
+
<category>Pop/Rock</category>
|
26
|
+
<category>Black Entertainment</category>
|
27
|
+
<category>R-N-B</category>
|
28
|
+
<guid isPermaLink="false">C39RO2C8Q8NQR825</guid>
|
29
|
+
</item>
|
30
|
+
<item>
|
31
|
+
<title>LATEST: Man stabbed to death with machete in pub garden</title>
|
32
|
+
<link>http://www.thisislondon.co.uk/standard/article-23730262-details/LATEST%3A+Man+stabbed+to+death+with+machete+in+pub+garden/article.do</link>
|
33
|
+
<description><![CDATA[A man drinking with his girlfriend in a beer garden was hacked to death with machetes in front of horrified drinkers.]]></description>
|
34
|
+
<source>This is London</source>
|
35
|
+
<pubDate>Mon, 10 Aug 2009 15:21:51 GMT</pubDate>
|
36
|
+
<category>Greater London County, England</category>
|
37
|
+
<category>Cheshire County, England</category>
|
38
|
+
<category>England, United Kingdom</category>
|
39
|
+
<category>United Kingdom</category>
|
40
|
+
<category>Sutton, England</category>
|
41
|
+
<category>London, England</category>
|
42
|
+
<category>World News</category>
|
43
|
+
<category>Essex County, England</category>
|
44
|
+
<guid isPermaLink="false">KN03NCSH3KIVG416</guid>
|
45
|
+
</item>
|
46
|
+
</channel>
|
47
|
+
</rss>
|
@@ -0,0 +1,11 @@
|
|
1
|
+
{
|
2
|
+
"trends": [{
|
3
|
+
"name": "musicmonday",
|
4
|
+
"url": "http:\/\/search.twitter.com\/search?q=%23musicmonday"
|
5
|
+
},
|
6
|
+
{
|
7
|
+
"name": "GI Joe",
|
8
|
+
"url": "http:\/\/search.twitter.com\/search?q=%22GI+Joe%22+OR+Joe"
|
9
|
+
}],
|
10
|
+
"as_of": "Mon, 10 Aug 2009 15:04:54 +0000"
|
11
|
+
}
|
@@ -0,0 +1,10 @@
|
|
1
|
+
Feature: persistence features
|
2
|
+
In order to check out the couchdb persistence layer
|
3
|
+
As a developer
|
4
|
+
I want to make some requests and inspect some responses
|
5
|
+
|
6
|
+
@couchdb_needed
|
7
|
+
Scenario: add a Trend
|
8
|
+
When I persist a Trend "london restaurants"
|
9
|
+
Then I should be able to view that Trend
|
10
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Feature: Processing features
|
2
|
+
In order to integrate our apps
|
3
|
+
As a developer
|
4
|
+
I want to make some requests and inspect some responses
|
5
|
+
|
6
|
+
@connection_needed
|
7
|
+
Scenario: end to end flow, keyphrase to tags
|
8
|
+
Given I have a keyphrase 'london restaurants'
|
9
|
+
When I request some RSS
|
10
|
+
Then I should receive some headlines
|
11
|
+
And I should be able to get the copy from the first headline
|
12
|
+
When I post to calais
|
13
|
+
Then I should receive some tags
|
14
|
+
And I should be able to persist these tags
|
15
|
+
When I generate a view of the recent keyword results
|
16
|
+
Then I should see a network graph
|
@@ -0,0 +1,47 @@
|
|
1
|
+
Feature: Processing features
|
2
|
+
In order to integrate our apps
|
3
|
+
As a developer
|
4
|
+
I want to make some requests and inspect some responses
|
5
|
+
|
6
|
+
@unit @text
|
7
|
+
Scenario: Sanitize some ok text
|
8
|
+
Given I have a keyphrase 'the cat sat on the mat'
|
9
|
+
When I sanitize this text
|
10
|
+
Then it should be ok
|
11
|
+
And it should say 'the cat sat on the mat'
|
12
|
+
|
13
|
+
@unit @text
|
14
|
+
Scenario: Sanitize some short text
|
15
|
+
Given I have a keyphrase 'the cat sat'
|
16
|
+
When I sanitize this text
|
17
|
+
Then it should say ''
|
18
|
+
|
19
|
+
@unit @text
|
20
|
+
Scenario: Sanitize some text with tabs and spaces
|
21
|
+
Given I have a keyphrase 'the cat sat on the mat '
|
22
|
+
When I sanitize this text
|
23
|
+
Then it should say 'the cat sat on the mat'
|
24
|
+
|
25
|
+
@unit @text
|
26
|
+
Scenario: Sanitize some short text with tabs and spaces
|
27
|
+
Given I have a keyphrase 'the cat sat on '
|
28
|
+
When I sanitize this text
|
29
|
+
Then it should say ''
|
30
|
+
|
31
|
+
@unit @text
|
32
|
+
Scenario: Sanitize some tagged short text
|
33
|
+
Given I have a keyphrase '<a href="a-link.html>the cat sat</a>'
|
34
|
+
When I sanitize this text
|
35
|
+
Then it should say ''
|
36
|
+
|
37
|
+
@unit @text
|
38
|
+
Scenario: Sanitize some tagged text
|
39
|
+
Given I have a keyphrase '<a href="a-link.html>the cat sat on the mat</a>'
|
40
|
+
When I sanitize this text
|
41
|
+
Then it should be ok
|
42
|
+
Then it should say 'the cat sat on the mat'
|
43
|
+
|
44
|
+
Scenario: Clean a web page
|
45
|
+
Given I have a sample BBC story
|
46
|
+
When I sanitize this text
|
47
|
+
Then it should be ok
|
@@ -0,0 +1,44 @@
|
|
1
|
+
|
2
|
+
Given /^I have some simple text$/ do
|
3
|
+
@text = "Barack Obama said today that he expects there to be conflict within his new security team after confirming Hillary Clinton as his choice for US Secretary of State."
|
4
|
+
end
|
5
|
+
|
6
|
+
Given /^I have a sanitized sample BBC story$/ do
|
7
|
+
Given "I have a sample BBC story"
|
8
|
+
When "I sanitize this text"
|
9
|
+
end
|
10
|
+
|
11
|
+
Given /^I have a mock calais response$/ do
|
12
|
+
@response = File.open('features/mocks/calais.json','r') {|f| f.readlines.to_s}
|
13
|
+
end
|
14
|
+
|
15
|
+
When /^I post to calais$/ do
|
16
|
+
@response = get_from_calais @text
|
17
|
+
end
|
18
|
+
|
19
|
+
When /^I remove the unwanted items$/ do
|
20
|
+
@processed_json = clean_unwanted_items_from_hash(JSON.parse(@response))
|
21
|
+
end
|
22
|
+
|
23
|
+
Then /^there should no longer be any "([^\"]*)"$/ do |arg1|
|
24
|
+
@processed_json[arg1].should be_nil
|
25
|
+
end
|
26
|
+
|
27
|
+
Then /^I should receive some tags$/ do
|
28
|
+
get_tag_from_json(@response) do |tag|
|
29
|
+
tag.should_not be_nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
Then /^there should be some "([^\"]*)" tags$/ do |arg1|
|
34
|
+
get_tag_from_json(@response) {|tag|
|
35
|
+
#puts tag.inspect
|
36
|
+
tag.each{|k,v| puts "#{k} : #{v}" if k=='_type'}
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
Then /^I should be able to see the whole lot of tags as one block$/ do
|
41
|
+
tags = get_tag_from_json(@response)
|
42
|
+
tags.length.should > 0
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
|
2
|
+
When "I surf to '$url'" do |url|
|
3
|
+
visit url
|
4
|
+
end
|
5
|
+
|
6
|
+
Then /^I should see '(.*)'$/ do |text|
|
7
|
+
response_body.should contain(/#{text}/m)
|
8
|
+
end
|
9
|
+
|
10
|
+
Then /^I should see a list of 10 trends$/ do
|
11
|
+
text = "here are the top twitter trends right now"
|
12
|
+
response_body.should contain(/#{text}/m)
|
13
|
+
response_body.should have_selector("ol.tweets")
|
14
|
+
response_body.should have_selector('li.tweet', :count => 10)
|
15
|
+
end
|
16
|
+
|
17
|
+
Given /^I add my own trend$/ do
|
18
|
+
fill_in 'keyphrase', :with => 'london restaurants'
|
19
|
+
click_button 'search'
|
20
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
When /^I post some data to yahoo$/ do
|
2
|
+
@url = URI.parse('http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction')
|
3
|
+
appid = LICENSE_ID = YAML::load_file('config/keys.yml')['yahoo']
|
4
|
+
context = URI.encode('Italian sculptors and painters of the renaissance favored the Virgin Mary for inspiration')
|
5
|
+
post_args = { 'appid' => appid, 'context' => context, 'output' => 'json' }
|
6
|
+
@response = post_to @url, post_args
|
7
|
+
end
|
8
|
+
|
9
|
+
When /^I request some RSS$/ do
|
10
|
+
keyphrase = @keyphrase || "iraq"
|
11
|
+
url = "#{YAML::load_file('config/config.yml')['topix']}#{CGI::escape(keyphrase)}"
|
12
|
+
@response = get_from_as_xml url
|
13
|
+
end
|
14
|
+
|
15
|
+
Given /^I have some RSS$/ do
|
16
|
+
raw = File.open('features/mocks/topix_rss.xml','r') {|f| f.readlines.to_s}
|
17
|
+
@response = Hpricot.XML raw
|
18
|
+
end
|
19
|
+
|
20
|
+
When /^I make a restful get request$/ do
|
21
|
+
url = "http://news.bbc.co.uk/1/hi/uk_politics/7677419.stm"
|
22
|
+
@response = get_from url
|
23
|
+
end
|
24
|
+
|
25
|
+
Then /^I should get a response$/ do
|
26
|
+
@response.should_not == nil
|
27
|
+
#puts @response
|
28
|
+
end
|
29
|
+
|
30
|
+
Then /^I should receive some headlines$/ do
|
31
|
+
@items = get_items_from @response
|
32
|
+
@links = []
|
33
|
+
@items.each do |item|
|
34
|
+
@links << attribute_from(item, :link)
|
35
|
+
end
|
36
|
+
@links.should_not == nil
|
37
|
+
@links.length.should > 0
|
38
|
+
end
|
39
|
+
|
40
|
+
Then /^I should be able to get the copy from the first headline$/ do
|
41
|
+
@response = get_from @links[0]
|
42
|
+
@response.should_not be_nil
|
43
|
+
@response.should_not == ""
|
44
|
+
@text = sanitize @response
|
45
|
+
end
|
46
|
+
|
47
|
+
Then /^I should see some text$/ do
|
48
|
+
@response.length.should > 0
|
49
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
Before('@couchdb_needed') do
|
2
|
+
end
|
3
|
+
|
4
|
+
When /^I persist a Trend "([^\"]*)"$/ do |keyphrase|
|
5
|
+
@keyphrase = keyphrase
|
6
|
+
trend = Trend.new("name" => @keyphrase)
|
7
|
+
trend.save
|
8
|
+
end
|
9
|
+
|
10
|
+
Then /^I should be able to view that Trend$/ do
|
11
|
+
trends = Trend.by_name :key => @keyphrase
|
12
|
+
trend = trends[0]
|
13
|
+
trend['name'].should == @keyphrase
|
14
|
+
trends.length.should > 0
|
15
|
+
end
|
16
|
+
|
17
|
+
After('@couchdb_needed') do
|
18
|
+
delete_db
|
19
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
|
2
|
+
############### pending steps below ################
|
3
|
+
|
4
|
+
|
5
|
+
When /^I request tags for the first story$/ do
|
6
|
+
r = get_from_calais @story
|
7
|
+
get_tag_from_json(get_from_calais(@story)) do |tag|
|
8
|
+
tag.each{|k,v| puts "#{k} : #{v}"}
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
When /^I request stories from Topix$/ do
|
13
|
+
search_term = 'london'
|
14
|
+
url = "#{YAML::load_file('config/config.yml')['topix']}#{search_term}"
|
15
|
+
@response = get_from_as_xml url
|
16
|
+
end
|
17
|
+
|
18
|
+
When /^I get some news stories from the first keyword$/ do
|
19
|
+
search_term = @trend['name'].gsub('#','') #removing hash from start of trend name
|
20
|
+
search_term = 'london'
|
21
|
+
url = "#{YAML::load_file('config/config.yml')['topix']}#{search_term}"
|
22
|
+
rss_response = get_from_as_xml url
|
23
|
+
items = get_items_from rss_response
|
24
|
+
links = []
|
25
|
+
items.each do |item|
|
26
|
+
links << attribute_from(item, :link)
|
27
|
+
end
|
28
|
+
@story = sanitize from_doc get_from links[0]
|
29
|
+
end
|
30
|
+
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
require 'json'
|
3
|
+
require 'webrat'
|
4
|
+
require 'couchrest'
|
5
|
+
require 'restclient'
|
6
|
+
require 'haml'
|
7
|
+
|
8
|
+
require 'lib/jkl/calais_client.rb'
|
9
|
+
require 'lib/jkl/persistence_client.rb'
|
10
|
+
require 'lib/jkl/rest_client.rb'
|
11
|
+
require 'lib/jkl/rss_client.rb'
|
12
|
+
require 'lib/jkl/url_doc_handler.rb'
|
13
|
+
|
14
|
+
include Jkl
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Given "I have a keyphrase '$text'" do |text|
|
2
|
+
@text = text
|
3
|
+
end
|
4
|
+
|
5
|
+
Given /^I have a sample BBC story$/ do
|
6
|
+
@text = File.open('features/mocks/bbc_story.html','r') {|f| f.readlines.to_s}
|
7
|
+
end
|
8
|
+
|
9
|
+
When /^I sanitize this text$/ do
|
10
|
+
@text = sanitize @text
|
11
|
+
end
|
12
|
+
|
13
|
+
Then /^it should be ok$/ do
|
14
|
+
@text.should_not be_nil
|
15
|
+
@text.should_not == ""
|
16
|
+
end
|
17
|
+
|
18
|
+
Then "it should say '$text'" do |text|
|
19
|
+
@text.should == text
|
20
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
Given /^I have a mock twitter response$/ do
|
2
|
+
@response = File.open('features/mocks/twitter.json','r') {|f| f.readlines.to_s}
|
3
|
+
end
|
4
|
+
|
5
|
+
When /^I request trends data from twitter$/ do
|
6
|
+
@url = YAML::load_file('config/config.yml')['twitter']
|
7
|
+
@response = get_from @url
|
8
|
+
end
|
9
|
+
|
10
|
+
Then /^I should see some trends$/ do
|
11
|
+
result = JSON.parse @response
|
12
|
+
trends = result['trends']
|
13
|
+
trends.each do |subject|
|
14
|
+
subject['name'].length.should > 1
|
15
|
+
subject['url'].length.should > 1
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# See http://wiki.github.com/aslakhellesoy/cucumber/sinatra
|
2
|
+
# for more details about Sinatra with Cucumber
|
3
|
+
|
4
|
+
gem 'rack-test', '>=0.3.0'
|
5
|
+
gem 'aslakhellesoy-webrat', '=0.4.4.1'
|
6
|
+
gem 'sinatra', '=0.9.4'
|
7
|
+
|
8
|
+
# Sinatra
|
9
|
+
app_file = File.join(File.dirname(__FILE__), *%w[.. .. app.rb])
|
10
|
+
require app_file
|
11
|
+
# Force the application name because polyglot breaks the auto-detection logic.
|
12
|
+
Sinatra::Application.app_file = app_file
|
13
|
+
|
14
|
+
require 'spec/expectations'
|
15
|
+
require 'rack/test'
|
16
|
+
require 'webrat'
|
17
|
+
|
18
|
+
#set :environment, :test
|
19
|
+
|
20
|
+
Webrat.configure do |config|
|
21
|
+
config.mode = :rack
|
22
|
+
end
|
23
|
+
|
24
|
+
class MyWorld
|
25
|
+
include Rack::Test::Methods
|
26
|
+
include Webrat::Methods
|
27
|
+
include Webrat::Matchers
|
28
|
+
|
29
|
+
Webrat::Methods.delegate_to_session :response_code, :response_body
|
30
|
+
|
31
|
+
def app
|
32
|
+
Sinatra::Application
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
World{MyWorld.new}
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Feature: twitter features
|
2
|
+
In order to grab some keywords from twitter
|
3
|
+
As a developer
|
4
|
+
I want to make some requests and inspect some responses
|
5
|
+
|
6
|
+
Scenario: Work with twitter trends
|
7
|
+
Given I have a mock twitter response
|
8
|
+
Then I should see some trends
|
9
|
+
|
10
|
+
@connection_needed
|
11
|
+
Scenario: Get some json from twitter
|
12
|
+
When I request trends data from twitter
|
13
|
+
Then I should get a response
|
14
|
+
And I should see some trends
|
15
|
+
|
16
|
+
|
data/lib/jkl.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sshingler-jkl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- steven shingler
|
@@ -30,6 +30,26 @@ files:
|
|
30
30
|
- lib/jkl/rest_client.rb
|
31
31
|
- lib/jkl/rss_client.rb
|
32
32
|
- lib/jkl/url_doc_handler.rb
|
33
|
+
- features/calais.feature
|
34
|
+
- features/homepage.feature
|
35
|
+
- features/http.feature
|
36
|
+
- features/persistence.feature
|
37
|
+
- features/processing.feature
|
38
|
+
- features/sanitize-text.feature
|
39
|
+
- features/twitter.feature
|
40
|
+
- features/mocks/bbc_story.html
|
41
|
+
- features/mocks/calais.json
|
42
|
+
- features/mocks/topix_rss.xml
|
43
|
+
- features/mocks/twitter.json
|
44
|
+
- features/step_definitions/calais_steps.rb
|
45
|
+
- features/step_definitions/home_page_steps.rb
|
46
|
+
- features/step_definitions/http_steps.rb
|
47
|
+
- features/step_definitions/persistence_steps.rb
|
48
|
+
- features/step_definitions/processing_steps.rb
|
49
|
+
- features/step_definitions/require_steps.rb
|
50
|
+
- features/step_definitions/sanitize-text_steps.rb
|
51
|
+
- features/step_definitions/twitter_steps.rb
|
52
|
+
- features/support/env.rb
|
33
53
|
- README.rdoc
|
34
54
|
- License.txt
|
35
55
|
has_rdoc: false
|