jakal 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/jkl/calais_client.rb +4 -4
- data/lib/jkl/text_client.rb +1 -0
- data/lib/jkl.rb +9 -0
- data/{features/mocks → test/fixtures}/bbc_story.html +0 -0
- data/{features/mocks → test/fixtures}/topix_rss.xml +0 -0
- data/test/unit/jkl_test.rb +82 -0
- data/test/unit/text_cleaning_test.rb +72 -0
- metadata +9 -16
- data/features/calais.feature +0 -10
- data/features/http.feature +0 -20
- data/features/mocks/calais.json +0 -2464
- data/features/sanitize-text.feature +0 -71
- data/features/step_definitions/calais_steps.rb +0 -13
- data/features/step_definitions/http_steps.rb +0 -38
- data/features/step_definitions/sanitize-text_steps.rb +0 -79
- data/features/step_definitions/twitter_steps.rb +0 -13
- data/features/support/env.rb +0 -17
@@ -1,71 +0,0 @@
|
|
1
|
-
Feature: Processing features
|
2
|
-
In order to integrate our apps
|
3
|
-
As a developer
|
4
|
-
I want to make some requests and inspect some responses
|
5
|
-
|
6
|
-
@unit @text
|
7
|
-
Scenario: No changes needed
|
8
|
-
Given I have the text "the cat sat on the mat"
|
9
|
-
When I sanitize this text
|
10
|
-
Then there should be no script tags
|
11
|
-
And there should be no tags
|
12
|
-
And there should be no blank lines
|
13
|
-
And it should say "the cat sat on the mat"
|
14
|
-
|
15
|
-
@unit @text
|
16
|
-
Scenario: Remove simple tags
|
17
|
-
Given I have the text "<a href=\"a-link.html\">the cat sat on the mat</a>"
|
18
|
-
When I sanitize this text
|
19
|
-
Then there should be no script tags
|
20
|
-
And there should be no tags
|
21
|
-
And there should be no blank lines
|
22
|
-
Then it should say "the cat sat on the mat"
|
23
|
-
|
24
|
-
@unit @text @wip
|
25
|
-
Scenario: Remove script tags
|
26
|
-
Given I have some script tag data
|
27
|
-
When I sanitize this text
|
28
|
-
Then there should be no script tags
|
29
|
-
And there should be no tags
|
30
|
-
And there should be no blank lines
|
31
|
-
Then it should say "the cat sat on the mat"
|
32
|
-
|
33
|
-
@mock
|
34
|
-
Scenario: Remove script tags
|
35
|
-
Given I have a sample web page
|
36
|
-
When I remove the script tags
|
37
|
-
Then there should be no script tags
|
38
|
-
|
39
|
-
@mock
|
40
|
-
Scenario: Remove all tags
|
41
|
-
Given I have a sample web page
|
42
|
-
When I remove the script tags
|
43
|
-
And I strip all the tags
|
44
|
-
Then there should be no script tags
|
45
|
-
And there should be no tags
|
46
|
-
|
47
|
-
@mock
|
48
|
-
Scenario: Remove empty lines
|
49
|
-
Given a stripped web page
|
50
|
-
When I remove the blank lines
|
51
|
-
Then there should be no blank lines
|
52
|
-
|
53
|
-
@mock
|
54
|
-
Scenario: Remove a short line
|
55
|
-
Given I have the text "the cat sat on the"
|
56
|
-
When I remove the short lines
|
57
|
-
Then it should say ""
|
58
|
-
|
59
|
-
@mock
|
60
|
-
Scenario: Don't remove a long line
|
61
|
-
Given I have the text "the cat sat on the mat"
|
62
|
-
When I remove the short lines
|
63
|
-
Then it should say "the cat sat on the mat"
|
64
|
-
|
65
|
-
@mock
|
66
|
-
Scenario: Santize a sample BBC page
|
67
|
-
Given I have a sample BBC story
|
68
|
-
When I sanitize this text
|
69
|
-
Then there should be no script tags
|
70
|
-
And there should be no tags
|
71
|
-
And there should be no blank lines
|
@@ -1,13 +0,0 @@
|
|
1
|
-
|
2
|
-
Given /^I have some text$/ do
|
3
|
-
@text = "Barack Obama said today that he expects there to be conflict within his new security team after confirming Hillary Clinton as his choice for US Secretary of State."
|
4
|
-
end
|
5
|
-
|
6
|
-
When /^I request the nested entities from calais$/ do
|
7
|
-
key = YAML::load_file('config/keys.yml')['calais']
|
8
|
-
@response = Jkl::Extraction::tags key, @text
|
9
|
-
end
|
10
|
-
|
11
|
-
Then /^I should receive the entities grouped into categories$/ do
|
12
|
-
@response.eql?({"Person"=>["Barack Obama", "Hillary Clinton"], "Position"=>["Secretary of State"]}).should == true
|
13
|
-
end
|
@@ -1,38 +0,0 @@
|
|
1
|
-
When /^I post some data to yahoo$/ do
|
2
|
-
@url = URI.parse('http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction')
|
3
|
-
appid = LICENSE_ID = YAML::load_file('config/keys.yml')['yahoo']
|
4
|
-
context = URI.encode('Italian sculptors and painters of the renaissance favored the Virgin Mary for inspiration')
|
5
|
-
post_args = { 'appid' => appid, 'context' => context, 'output' => 'json' }
|
6
|
-
@response = Jkl::post_to @url, post_args
|
7
|
-
end
|
8
|
-
|
9
|
-
Given /^I have some RSS$/ do
|
10
|
-
raw = File.open('features/mocks/topix_rss.xml','r') {|f| f.readlines.to_s}
|
11
|
-
@response = Hpricot.XML raw
|
12
|
-
end
|
13
|
-
|
14
|
-
When /^I make a restful get request$/ do
|
15
|
-
url = "http://news.bbc.co.uk/1/hi/uk_politics/7677419.stm"
|
16
|
-
@response = Jkl::get_from url
|
17
|
-
end
|
18
|
-
|
19
|
-
When /^I request some twitter trends$/ do
|
20
|
-
twitter_json_url = YAML::load_file('config/config.yml')['twitter']
|
21
|
-
output = JSON.parse Jkl::get_from twitter_json_url
|
22
|
-
@response = output['trends']
|
23
|
-
end
|
24
|
-
|
25
|
-
Then /^I should get a response$/ do
|
26
|
-
@response.should_not == nil
|
27
|
-
end
|
28
|
-
|
29
|
-
Then /^I should be able to get the copy from the first headline$/ do
|
30
|
-
@response = Jkl::get_from @links[0]
|
31
|
-
@response.should_not be_nil
|
32
|
-
@response.should_not == ""
|
33
|
-
@text = Jkl::sanitize @response
|
34
|
-
end
|
35
|
-
|
36
|
-
Then /^I should see some text$/ do
|
37
|
-
@response.length.should > 0
|
38
|
-
end
|
@@ -1,79 +0,0 @@
|
|
1
|
-
Given "I have the text \"$text\"" do |text|
|
2
|
-
@text = text
|
3
|
-
end
|
4
|
-
|
5
|
-
Given /^I have a sample BBC story$/ do
|
6
|
-
@text = File.open('features/mocks/bbc_story.html','r') {|f| f.readlines.to_s}
|
7
|
-
end
|
8
|
-
|
9
|
-
Given /^I have some script tag data$/ do
|
10
|
-
@text = <<-EOF;
|
11
|
-
the cat sat on the mat
|
12
|
-
<script type="text/javascript" charset="utf-8">
|
13
|
-
function nofunction(){var bob;}
|
14
|
-
</script>
|
15
|
-
<p> some para stuff here </p>
|
16
|
-
some end stuff here
|
17
|
-
EOF
|
18
|
-
end
|
19
|
-
|
20
|
-
Given /^I have a sample web page$/ do
|
21
|
-
@text = File.open('features/mocks/sample-web-page.html','r') {|f| f.readlines.to_s}
|
22
|
-
end
|
23
|
-
|
24
|
-
Given /^a stripped web page$/ do
|
25
|
-
Given "I have a sample web page"
|
26
|
-
When "I remove the script tags"
|
27
|
-
And "I strip all the tags"
|
28
|
-
Then "there should be no script tags"
|
29
|
-
And "there should be no tags"
|
30
|
-
end
|
31
|
-
|
32
|
-
When /^I sanitize this text$/ do
|
33
|
-
@text = Jkl::Text::sanitize @text
|
34
|
-
end
|
35
|
-
|
36
|
-
When /^I examine the text$/ do
|
37
|
-
text = Jkl::Text::remove_tabs @text
|
38
|
-
end
|
39
|
-
|
40
|
-
Then "it should say \"$text\"" do |text|
|
41
|
-
@text.to_s.should == text
|
42
|
-
end
|
43
|
-
|
44
|
-
Then /^I can read it$/ do
|
45
|
-
Jkl::Text::document_from(@response).should_not be_nil
|
46
|
-
end
|
47
|
-
|
48
|
-
When /^I remove the script tags$/ do
|
49
|
-
@text = Jkl::Text::remove_script_tags @text
|
50
|
-
end
|
51
|
-
|
52
|
-
When /^I remove the blank lines$/ do
|
53
|
-
@text = Jkl::Text::remove_blank_lines @text
|
54
|
-
end
|
55
|
-
|
56
|
-
When /^I remove the short lines$/ do
|
57
|
-
@text = Jkl::Text::remove_short_lines @text
|
58
|
-
end
|
59
|
-
|
60
|
-
When /^I clean it up$/ do
|
61
|
-
@text = Jkl::Text::remove_short_lines Jkl::Text:: strip_all_tags Jkl::Text::remove_script_tags @text
|
62
|
-
end
|
63
|
-
|
64
|
-
When /^I strip all the tags$/ do
|
65
|
-
@text = Jkl::Text::strip_all_tags @text
|
66
|
-
end
|
67
|
-
|
68
|
-
Then /^there should be no tags$/ do
|
69
|
-
@text.match(/</).should be_nil
|
70
|
-
end
|
71
|
-
|
72
|
-
Then /^there should be no script tags$/ do
|
73
|
-
@text.match(/<script/).should be_nil
|
74
|
-
end
|
75
|
-
|
76
|
-
Then /^there should be no blank lines$/ do
|
77
|
-
@text.match(/\r/).should be_nil
|
78
|
-
@text.match(/\n/).should be_nil
|
79
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
When /^I request trends data from twitter$/ do
|
2
|
-
@url = YAML::load_file('config/config.yml')['twitter']
|
3
|
-
@response = Jkl::get_from @url
|
4
|
-
end
|
5
|
-
|
6
|
-
Then /^I should see some trends$/ do
|
7
|
-
result = JSON.parse @response
|
8
|
-
trends = result['trends']
|
9
|
-
trends.each do |subject|
|
10
|
-
subject['name'].length.should > 1
|
11
|
-
subject['url'].length.should > 1
|
12
|
-
end
|
13
|
-
end
|
data/features/support/env.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
gem 'rack-test'
|
2
|
-
|
3
|
-
require 'spec/expectations'
|
4
|
-
require 'rack/test'
|
5
|
-
require 'hpricot'
|
6
|
-
require 'json'
|
7
|
-
require 'rest_client'
|
8
|
-
|
9
|
-
require 'lib/jkl'
|
10
|
-
include Jkl
|
11
|
-
|
12
|
-
|
13
|
-
class MyWorld
|
14
|
-
include Rack::Test::Methods
|
15
|
-
end
|
16
|
-
|
17
|
-
World{MyWorld.new}
|