textract 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -15,7 +15,7 @@ describe Textract do
15
15
  url = "http://gawker.com/1694508525"
16
16
  article = Textract.get_text(url)
17
17
  expect(article.text.include?("Import")).to eq true
18
- expect(article.md5).to eq "c11a810a3e73f24aac78fd3e39e69f87"
18
+ expect(article.md5).to eq "9cc00fcdeb4bc41e0649d0776cbb2157"
19
19
  expect(article.author).to eq "Hamilton Nolan"
20
20
  end
21
21
  end
@@ -29,6 +29,19 @@ describe Textract do
29
29
  end
30
30
  end
31
31
 
32
+ it "returns the canonical url if available" do
33
+ VCR.use_cassette("hamno") do
34
+ url = "http://gawker.com/1694508525"
35
+ article = Textract.get_text(url)
36
+ expect(article.url).to eq "http://gawker.com/there-are-no-candidates-for-the-middle-class-1694508525"
37
+ end
38
+ VCR.use_cassette("buzzfeed hash") do
39
+ url = "http://www.buzzfeed.com/katenocera/rand-paul-is-on-his-own-this-time#.sseGm85KG"
40
+ article = Textract.get_text(url)
41
+ expect(article.url).to eq "http://www.buzzfeed.com/katenocera/rand-paul-is-on-his-own-this-time"
42
+ end
43
+ end
44
+
32
45
  it "returns article text based on opengraph description" do
33
46
  VCR.use_cassette('og') do
34
47
  url = "http://www.tedcruz.org/record/our-standard-the-constitution/"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Pash
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-27 00:00:00.000000000 Z
11
+ date: 2015-05-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opengraph_parser
@@ -166,6 +166,7 @@ files:
166
166
  - lib/textract.rb
167
167
  - lib/textract/version.rb
168
168
  - spec/fixtures/vcr_cassettes/bad_frisky.yml
169
+ - spec/fixtures/vcr_cassettes/buzzfeed_hash.yml
169
170
  - spec/fixtures/vcr_cassettes/cruz.yml
170
171
  - spec/fixtures/vcr_cassettes/hamno.yml
171
172
  - spec/fixtures/vcr_cassettes/imgs.yml
@@ -201,6 +202,7 @@ specification_version: 4
201
202
  summary: Extracts article text from a URL
202
203
  test_files:
203
204
  - spec/fixtures/vcr_cassettes/bad_frisky.yml
205
+ - spec/fixtures/vcr_cassettes/buzzfeed_hash.yml
204
206
  - spec/fixtures/vcr_cassettes/cruz.yml
205
207
  - spec/fixtures/vcr_cassettes/hamno.yml
206
208
  - spec/fixtures/vcr_cassettes/imgs.yml