textract 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/textract.rb +1 -1
- data/lib/textract/version.rb +1 -1
- data/spec/fixtures/vcr_cassettes/buzzfeed_hash.yml +2158 -0
- data/spec/lib/textract_spec.rb +14 -1
- metadata +4 -2
data/spec/lib/textract_spec.rb
CHANGED
@@ -15,7 +15,7 @@ describe Textract do
|
|
15
15
|
url = "http://gawker.com/1694508525"
|
16
16
|
article = Textract.get_text(url)
|
17
17
|
expect(article.text.include?("Import")).to eq true
|
18
|
-
expect(article.md5).to eq "
|
18
|
+
expect(article.md5).to eq "9cc00fcdeb4bc41e0649d0776cbb2157"
|
19
19
|
expect(article.author).to eq "Hamilton Nolan"
|
20
20
|
end
|
21
21
|
end
|
@@ -29,6 +29,19 @@ describe Textract do
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
+
it "returns the canonical url if available" do
|
33
|
+
VCR.use_cassette("hamno") do
|
34
|
+
url = "http://gawker.com/1694508525"
|
35
|
+
article = Textract.get_text(url)
|
36
|
+
expect(article.url).to eq "http://gawker.com/there-are-no-candidates-for-the-middle-class-1694508525"
|
37
|
+
end
|
38
|
+
VCR.use_cassette("buzzfeed hash") do
|
39
|
+
url = "http://www.buzzfeed.com/katenocera/rand-paul-is-on-his-own-this-time#.sseGm85KG"
|
40
|
+
article = Textract.get_text(url)
|
41
|
+
expect(article.url).to eq "http://www.buzzfeed.com/katenocera/rand-paul-is-on-his-own-this-time"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
32
45
|
it "returns article text based on opengraph description" do
|
33
46
|
VCR.use_cassette('og') do
|
34
47
|
url = "http://www.tedcruz.org/record/our-standard-the-constitution/"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Pash
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: opengraph_parser
|
@@ -166,6 +166,7 @@ files:
|
|
166
166
|
- lib/textract.rb
|
167
167
|
- lib/textract/version.rb
|
168
168
|
- spec/fixtures/vcr_cassettes/bad_frisky.yml
|
169
|
+
- spec/fixtures/vcr_cassettes/buzzfeed_hash.yml
|
169
170
|
- spec/fixtures/vcr_cassettes/cruz.yml
|
170
171
|
- spec/fixtures/vcr_cassettes/hamno.yml
|
171
172
|
- spec/fixtures/vcr_cassettes/imgs.yml
|
@@ -201,6 +202,7 @@ specification_version: 4
|
|
201
202
|
summary: Extracts article text from a URL
|
202
203
|
test_files:
|
203
204
|
- spec/fixtures/vcr_cassettes/bad_frisky.yml
|
205
|
+
- spec/fixtures/vcr_cassettes/buzzfeed_hash.yml
|
204
206
|
- spec/fixtures/vcr_cassettes/cruz.yml
|
205
207
|
- spec/fixtures/vcr_cassettes/hamno.yml
|
206
208
|
- spec/fixtures/vcr_cassettes/imgs.yml
|