alchemy_api 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +24 -0
- data/LICENSE +20 -0
- data/README.markdown +36 -0
- data/Rakefile +47 -0
- data/VERSION +1 -0
- data/alchemy_api.gemspec +92 -0
- data/lib/alchemy_api.rb +29 -0
- data/lib/alchemy_api/base.rb +37 -0
- data/lib/alchemy_api/categorization.rb +47 -0
- data/lib/alchemy_api/language_detection.rb +51 -0
- data/lib/alchemy_api/term_extraction.rb +56 -0
- data/lib/alchemy_api/text_extraction.rb +85 -0
- data/spec/alchemy_api/base_spec.rb +62 -0
- data/spec/alchemy_api/categorization_spec.rb +54 -0
- data/spec/alchemy_api/language_detection_spec.rb +72 -0
- data/spec/alchemy_api/term_extraction_spec.rb +70 -0
- data/spec/alchemy_api/text_extraction_spec.rb +94 -0
- data/spec/alchemy_api_spec.rb +10 -0
- data/spec/cache/categorization/get_categorization_from_html/ddc3cf50efe5bd5c2159abfb49121cfa2314ca88.cache +29 -0
- data/spec/cache/categorization/get_categorization_from_text/8b476a3b532afd2da646b145e9dde07570c27352.cache +29 -0
- data/spec/cache/categorization/get_categorization_from_url/7536a34e1d54a95d8ee07d2a98036362761e1621.cache +27 -0
- data/spec/cache/language_detection/get_language_from_html/0faf7be978647b611d9c59e1efa497dd76e542f5.cache +33 -0
- data/spec/cache/language_detection/get_language_from_text/1ad3f50c1fda37000e24c196f12212ea9d536cb4.cache +33 -0
- data/spec/cache/language_detection/get_language_from_url/d077a95e60be0876bb7650ad213f5f43e83454d4.cache +31 -0
- data/spec/cache/term_extraction/get_ranked_keywords_from_html/7718a0fbd03739e4213a4e66c32a79a10c3499c3.cache +50 -0
- data/spec/cache/term_extraction/get_ranked_keywords_from_text/6f49e68ee4a9150368e671e70b632dbdc40860bb.cache +51 -0
- data/spec/cache/term_extraction/get_ranked_keywords_from_url/b9c291523159563d2224d676ec43b7b79a902d21.cache +48 -0
- data/spec/cache/text_extraction/get_raw_text_from_html/9db19f848a798db1f9a8c6cce9074d03cf2637a8.cache +27 -0
- data/spec/cache/text_extraction/get_raw_text_from_html/e9c236b6e861b57d238c810bb3c307cada170cad.cache +17 -0
- data/spec/cache/text_extraction/get_raw_text_from_url/8f5dff27211163e41ea5e7c3c534acf7b87d2098.cache +25 -0
- data/spec/cache/text_extraction/get_text_from_html/e7e6dba4c8570a41dbcb05233793018fc5ae4e1e.cache +27 -0
- data/spec/cache/text_extraction/get_text_from_url/13facbfeae029d936c7dc18ecaff5d2764b94618.cache +25 -0
- data/spec/cache/text_extraction/get_title_from_html/2a526348db23f992fee293d34f94c087e77290c5.cache +27 -0
- data/spec/cache/text_extraction/get_title_from_url/e84c0c7c67668706ae0cf3eefcd88c0911cd2b65.cache +25 -0
- data/spec/fixtures/article.txt +9 -0
- data/spec/fixtures/bp_spill.html +929 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +20 -0
- metadata +136 -0
@@ -0,0 +1,85 @@
|
|
1
|
+
module AlchemyApi
|
2
|
+
ExtractedText = Struct.new(:url, :text)
|
3
|
+
ExtractedTitle = Struct.new(:url, :title)
|
4
|
+
|
5
|
+
class TextExtraction < Base
|
6
|
+
# Usage:
|
7
|
+
# AlchemyApi::TextExtraction.get_text_from_url(
|
8
|
+
# "http://google.com",
|
9
|
+
# :use_metadata => 1,
|
10
|
+
# :extract_links => 1)
|
11
|
+
post(:get_text_from_url) do |url, *args|
|
12
|
+
options = args.first || {}
|
13
|
+
uri "#{AlchemyApi.base_uri}/URLGetText"
|
14
|
+
params :url => url,
|
15
|
+
:useMetadata => options[:use_metadata] || 1,
|
16
|
+
:extractLinks => options[:extract_links] || 0
|
17
|
+
|
18
|
+
handler do |response|
|
19
|
+
AlchemyApi::TextExtraction.get_text_from_url_handler(response)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
post(:get_raw_text_from_url) do |url|
|
24
|
+
uri "#{AlchemyApi.base_uri}/URLGetRawText"
|
25
|
+
params :url => url
|
26
|
+
handler do |response|
|
27
|
+
AlchemyApi::TextExtraction.get_text_from_url_handler(response)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
post(:get_title_from_url) do |url, *args|
|
32
|
+
options = args.first || {}
|
33
|
+
uri "#{AlchemyApi.base_uri}/URLGetTitle"
|
34
|
+
params :url => url,
|
35
|
+
:useMetadata => options[:use_metadata] || 1
|
36
|
+
handler do |response|
|
37
|
+
AlchemyApi::TextExtraction.get_title_from_url_handler(response)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
post(:get_text_from_html) do |html, *args|
|
42
|
+
options = args.first || {}
|
43
|
+
uri "#{AlchemyApi.base_html_uri}/HTMLGetText"
|
44
|
+
params :html => html,
|
45
|
+
:url => options[:url] || '',
|
46
|
+
:useMetadata => options[:use_metadata] || 1,
|
47
|
+
:extractLinks => options[:extract_links] || 0
|
48
|
+
handler do |response|
|
49
|
+
AlchemyApi::TextExtraction.get_text_from_url_handler(response)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
post(:get_raw_text_from_html) do |html, *args|
|
54
|
+
options = args.first || {}
|
55
|
+
uri "#{AlchemyApi.base_html_uri}/HTMLGetRawText"
|
56
|
+
params :html => html,
|
57
|
+
:url => options[:url] || ''
|
58
|
+
handler do |response|
|
59
|
+
AlchemyApi::TextExtraction.get_text_from_url_handler(response)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
post(:get_title_from_html) do |html, *args|
|
64
|
+
options = args.first || {}
|
65
|
+
uri "#{AlchemyApi.base_html_uri}/HTMLGetTitle"
|
66
|
+
params :html => html,
|
67
|
+
:url => options[:url] || '',
|
68
|
+
:useMetadata => options[:use_metadata] || 1
|
69
|
+
handler do |response|
|
70
|
+
AlchemyApi::TextExtraction.get_title_from_url_handler(response)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.get_title_from_url_handler(response)
|
75
|
+
json = get_json(response)
|
76
|
+
ExtractedTitle.new(json['url'], json['title'])
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.get_text_from_url_handler(response)
|
80
|
+
json = get_json(response)
|
81
|
+
check_json_for_errors_and_raise!(json)
|
82
|
+
ExtractedText.new(json['url'], json['text'])
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../spec_helper'
|
2
|
+
|
3
|
+
describe AlchemyApi::Base do
|
4
|
+
describe "#check_json_for_errors_and_raise!" do
|
5
|
+
before(:each) do
|
6
|
+
@json = {
|
7
|
+
'status' => 'ERROR',
|
8
|
+
'url' => 'http://google.com',
|
9
|
+
'statusInfo' => nil # replace in each test.
|
10
|
+
}
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should raise an error if the API key is invalid" do
|
14
|
+
@json['statusInfo'] = 'invalid-api-key'
|
15
|
+
lambda {
|
16
|
+
AlchemyApi::Base.check_json_for_errors_and_raise!(@json)
|
17
|
+
}.should raise_error(AlchemyApi::InvalidApiKeyError)
|
18
|
+
end
|
19
|
+
|
20
|
+
it "should raise an error if the page is not retrievable" do
|
21
|
+
@json['statusInfo'] = 'cannot-retrieve'
|
22
|
+
lambda {
|
23
|
+
AlchemyApi::Base.check_json_for_errors_and_raise!(@json)
|
24
|
+
}.should raise_error(AlchemyApi::CannotRetrieveUrlError)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should raise an error if the page is not valid HTML" do
|
28
|
+
@json['statusInfo'] = 'page-is-not-html'
|
29
|
+
lambda {
|
30
|
+
AlchemyApi::Base.check_json_for_errors_and_raise!(@json)
|
31
|
+
}.should raise_error(AlchemyApi::PageIsNotValidHtmlError)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should raise an error if the sent HTML was not valid" do
|
35
|
+
@json['statusInfo'] = 'invalid-html'
|
36
|
+
lambda {
|
37
|
+
AlchemyApi::Base.check_json_for_errors_and_raise!(@json)
|
38
|
+
}.should raise_error(AlchemyApi::InvalidHtmlError)
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should raise an error if the content exceeds the max limit" do
|
42
|
+
@json['statusInfo'] = 'content-exceeds-size-limit'
|
43
|
+
lambda {
|
44
|
+
AlchemyApi::Base.check_json_for_errors_and_raise!(@json)
|
45
|
+
}.should raise_error(AlchemyApi::ContentExceedsMaxLimitError)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should raise an error if the content cannot be retrieve due to redirection limit" do
|
49
|
+
@json['statusInfo'] = 'cannot-retrieve:http-redirect-limit'
|
50
|
+
lambda {
|
51
|
+
AlchemyApi::Base.check_json_for_errors_and_raise!(@json)
|
52
|
+
}.should raise_error(AlchemyApi::RedirectionLimitError)
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should raise an UnknownError if we get something we don't recognize" do
|
56
|
+
@json['statusInfo'] = 'fdsafdsfdsafdskjldklfdad'
|
57
|
+
lambda {
|
58
|
+
AlchemyApi::Base.check_json_for_errors_and_raise!(@json)
|
59
|
+
}.should raise_error(AlchemyApi::UnknownError)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../spec_helper"
|
2
|
+
|
3
|
+
describe AlchemyApi::Categorization do
|
4
|
+
typhoeus_spec_cache('spec/cache/categorization/get_categorization_from_text') do |hydra|
|
5
|
+
describe "#get_categorization_from_text" do
|
6
|
+
before(:each) do
|
7
|
+
@url = "http://test.com"
|
8
|
+
text = fixture_for('article.txt')
|
9
|
+
|
10
|
+
@category = AlchemyApi::Categorization.
|
11
|
+
get_categorization_from_text(text)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should return a category name" do
|
15
|
+
@category.name.should_not be_nil
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
typhoeus_spec_cache('spec/cache/categorization/get_categorization_from_url') do |hydra|
|
21
|
+
describe "#get_categorization_from_url" do
|
22
|
+
before(:each) do
|
23
|
+
@url = 'http://www.macrumors.com/2010/04/30/apples-discontinuation-of-lala-streaming-music-service-not-likely-leading-to-imminent-launch-of-web-focused-itunes/'
|
24
|
+
@category = AlchemyApi::Categorization.
|
25
|
+
get_categorization_from_url(@url,
|
26
|
+
:source_text => 'cleaned_or_raw')
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should return a category name" do
|
30
|
+
@category.name.should_not be_nil
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should return a url" do
|
34
|
+
@category.url.should_not be_nil
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
typhoeus_spec_cache('spec/cache/categorization/get_categorization_from_html') do |hydra|
|
40
|
+
describe "#get_categorization_from_html" do
|
41
|
+
before(:each) do
|
42
|
+
@url = "http://www.businessweek.com/news/2010-04-29/bp-spill-may-alter-obama-s-offshore-drilling-plans-update1-.html"
|
43
|
+
@html = fixture_for('bp_spill.html')
|
44
|
+
@category = AlchemyApi::Categorization.
|
45
|
+
get_categorization_from_html(@html, :url => @url,
|
46
|
+
:source_text => 'cleaned_or_raw')
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should return a category" do
|
50
|
+
@category.name.should_not be_nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../spec_helper"
|
2
|
+
|
3
|
+
describe AlchemyApi::LanguageDetection do
|
4
|
+
shared_examples_for 'a language detector' do
|
5
|
+
it "should get a language" do
|
6
|
+
@result.language.should_not be_nil
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should get ISO codes" do
|
10
|
+
@result.iso_639_1.should_not be_nil
|
11
|
+
@result.iso_639_2.should_not be_nil
|
12
|
+
@result.iso_639_3.should_not be_nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should get the ethnologue URL" do
|
16
|
+
@result.ethnologue_url.should_not be_nil
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should get a native speaker count" do
|
20
|
+
@result.native_speakers.should_not be_nil
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should get a wikipedia URL" do
|
24
|
+
@result.wikipedia_url.should_not be_nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
typhoeus_spec_cache('spec/cache/language_detection/get_language_from_url') do |hydra|
|
29
|
+
describe "#get_language_from_url" do
|
30
|
+
before(:each) do
|
31
|
+
@url = 'http://www.humboldtbrews.com/2010_index_music.htm'
|
32
|
+
@result = AlchemyApi::LanguageDetection.
|
33
|
+
get_language_from_url(@url,
|
34
|
+
:source_text => 'cleaned_or_raw')
|
35
|
+
end
|
36
|
+
|
37
|
+
it_should_behave_like 'a language detector'
|
38
|
+
|
39
|
+
it "should get the URL" do
|
40
|
+
@result.url.should == @url
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
typhoeus_spec_cache('spec/cache/language_detection/get_language_from_text') do |hydra|
|
46
|
+
describe "#get_language_from_text" do
|
47
|
+
before(:each) do
|
48
|
+
@url = "http://test.com"
|
49
|
+
text = fixture_for('article.txt')
|
50
|
+
@result = AlchemyApi::LanguageDetection.
|
51
|
+
get_language_from_text(text,
|
52
|
+
:url => @url)
|
53
|
+
end
|
54
|
+
|
55
|
+
it_should_behave_like 'a language detector'
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
typhoeus_spec_cache('spec/cache/language_detection/get_language_from_html') do |hydra|
|
60
|
+
describe "#get_language_from_html" do
|
61
|
+
before(:each) do
|
62
|
+
@url = "http://www.businessweek.com/news/2010-04-29/bp-spill-may-alter-obama-s-offshore-drilling-plans-update1-.html"
|
63
|
+
@html = fixture_for('bp_spill.html')
|
64
|
+
@result = AlchemyApi::LanguageDetection.
|
65
|
+
get_language_from_html(@html, :url => @url,
|
66
|
+
:source_text => 'cleaned_or_raw')
|
67
|
+
end
|
68
|
+
|
69
|
+
it_should_behave_like 'a language detector'
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../spec_helper"
|
2
|
+
|
3
|
+
describe AlchemyApi::TermExtraction do
|
4
|
+
shared_examples_for 'a keyword ranker' do
|
5
|
+
it "should return source text" do
|
6
|
+
@result.source_text.should_not be_nil
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return 5 keywords" do
|
10
|
+
@result.keywords.should have(5).things
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should have relevance scores for the keywords" do
|
14
|
+
@result.keywords.each do |kw|
|
15
|
+
kw.relevance.should >= 0.0
|
16
|
+
kw.relevance.should <= 1.0
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
typhoeus_spec_cache('spec/cache/term_extraction/get_ranked_keywords_from_html') do |hydra|
|
22
|
+
describe "#get_ranked_keywords_from_html" do
|
23
|
+
before(:each) do
|
24
|
+
@url = "http://www.businessweek.com/news/2010-04-29/bp-spill-may-alter-obama-s-offshore-drilling-plans-update1-.html"
|
25
|
+
@html = fixture_for('bp_spill.html')
|
26
|
+
@result = AlchemyApi::TermExtraction.
|
27
|
+
get_ranked_keywords_from_html(@html,
|
28
|
+
:url => @url,
|
29
|
+
:max_retrieve => 5,
|
30
|
+
:show_source_text => true)
|
31
|
+
end
|
32
|
+
|
33
|
+
it_should_behave_like 'a keyword ranker'
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
typhoeus_spec_cache('spec/cache/term_extraction/get_ranked_keywords_from_text') do |hydra|
|
38
|
+
describe "#get_ranked_keywords_from_text" do
|
39
|
+
before(:each) do
|
40
|
+
@url = "http://test.com"
|
41
|
+
text = fixture_for('article.txt')
|
42
|
+
@result = AlchemyApi::TermExtraction.
|
43
|
+
get_ranked_keywords_from_text(text,
|
44
|
+
:url => @url,
|
45
|
+
:max_retrieve => 5,
|
46
|
+
:show_source_text => true)
|
47
|
+
end
|
48
|
+
|
49
|
+
it_should_behave_like 'a keyword ranker'
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
typhoeus_spec_cache('spec/cache/term_extraction/get_ranked_keywords_from_url') do |hydra|
|
54
|
+
describe "#get_ranked_keywords_from_url" do
|
55
|
+
before(:each) do
|
56
|
+
@url = 'http://www.businessweek.com/news/2010-05-02/bp-spill-threatens-gulf-of-mexico-oil-gas-operations-update1-.html'
|
57
|
+
@result = AlchemyApi::TermExtraction.
|
58
|
+
get_ranked_keywords_from_url(@url,
|
59
|
+
:max_retrieve => 5,
|
60
|
+
:show_source_text => true)
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should return the given URL" do
|
64
|
+
@result.url.should == @url
|
65
|
+
end
|
66
|
+
|
67
|
+
it_should_behave_like 'a keyword ranker'
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../spec_helper"
|
2
|
+
|
3
|
+
describe AlchemyApi::TextExtraction do
|
4
|
+
typhoeus_spec_cache('spec/cache/text_extraction/get_text_from_url') do |hydra|
|
5
|
+
describe "#get_text_from_url" do
|
6
|
+
it "should extract text" do
|
7
|
+
url = "http://www.chron.com/disp/story.mpl/business/6981685.html"
|
8
|
+
result = AlchemyApi::TextExtraction.get_text_from_url(url)
|
9
|
+
|
10
|
+
result.url.should == url
|
11
|
+
result.text.should_not be_empty
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
typhoeus_spec_cache('spec/cache/text_extraction/get_raw_text_from_url') do |hydra|
|
17
|
+
describe "#get_raw_text_from_url" do
|
18
|
+
before(:each) do
|
19
|
+
@url = "http://www.chron.com/disp/story.mpl/business/6981685.html"
|
20
|
+
@result = AlchemyApi::TextExtraction.get_raw_text_from_url(@url)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should extract url" do
|
24
|
+
@result.url.should == @url
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should get text back" do
|
28
|
+
@result.text.should_not be_empty
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
typhoeus_spec_cache('spec/cache/text_extraction/get_title_from_url') do |hydra|
|
34
|
+
describe "#get_title_from_url" do
|
35
|
+
before(:each) do
|
36
|
+
@url = "http://www.businessweek.com/news/2010-04-29/bp-spill-may-alter-obama-s-offshore-drilling-plans-update1-.html"
|
37
|
+
@result = AlchemyApi::TextExtraction.get_title_from_url(@url)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should extract url" do
|
41
|
+
@result.url.should == @url
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should get text back" do
|
45
|
+
@result.title.should =~ /BP Spill/
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
typhoeus_spec_cache('spec/cache/text_extraction/get_title_from_html') do |hydra|
|
51
|
+
describe "#get_title_from_html" do
|
52
|
+
before(:each) do
|
53
|
+
@url = "http://www.businessweek.com/news/2010-04-29/bp-spill-may-alter-obama-s-offshore-drilling-plans-update1-.html"
|
54
|
+
@html = fixture_for('bp_spill.html')
|
55
|
+
@result = AlchemyApi::TextExtraction.
|
56
|
+
get_title_from_html(@html, :url => @url)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should get title back" do
|
60
|
+
@result.title.should =~ /BP Spill/
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
typhoeus_spec_cache('spec/cache/text_extraction/get_raw_text_from_html') do |hydra|
|
66
|
+
describe "#get_raw_text_from_html" do
|
67
|
+
before(:each) do
|
68
|
+
@url = "http://www.businessweek.com/news/2010-04-29/bp-spill-may-alter-obama-s-offshore-drilling-plans-update1-.html"
|
69
|
+
@html = fixture_for('bp_spill.html')
|
70
|
+
@result = AlchemyApi::TextExtraction.
|
71
|
+
get_raw_text_from_html(@html, :url => @url)
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should get text back" do
|
75
|
+
@result.text.should_not be_empty
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
typhoeus_spec_cache('spec/cache/text_extraction/get_text_from_html') do |hydra|
|
81
|
+
describe "#get_text_from_html" do
|
82
|
+
before(:each) do
|
83
|
+
@url = "http://www.businessweek.com/news/2010-04-29/bp-spill-may-alter-obama-s-offshore-drilling-plans-update1-.html"
|
84
|
+
@html = fixture_for('bp_spill.html')
|
85
|
+
@result = AlchemyApi::TextExtraction.
|
86
|
+
get_text_from_html(@html, :url => @url)
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should get text back" do
|
90
|
+
@result.text.should_not be_empty
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|