textract 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,43 +1,49 @@
1
- require_relative '../../lib/textract'
1
+ require 'spec_helper'
2
+ require 'textract'
2
3
 
3
- RSpec.configure do |c|
4
- # filter_run is short-form alias for filter_run_including
5
- c.filter_run :focus => true
6
- end
7
-
8
- describe Textract, :focus do
4
+ describe Textract do
9
5
  it "initializes with the get_text method" do
10
- url = "http://www.tedcruz.org/about/"
11
- article = Textract.get_text(url)
12
- expect(article).to be_a_kind_of Textract::Client
6
+ VCR.use_cassette("cruz") do
7
+ url = "http://www.tedcruz.org/about/"
8
+ article = Textract.get_text(url)
9
+ expect(article).to be_a_kind_of Textract::Client
10
+ end
13
11
  end
14
12
 
15
13
  it "returns article text based on article tag" do
16
- url = "http://gawker.com/1694508525"
17
- article = Textract.get_text(url)
18
- expect(article.text.include?("Import")).to eq true
19
- expect(article.md5).to eq "c11a810a3e73f24aac78fd3e39e69f87"
20
- expect(article.author).to eq "Hamilton Nolan"
14
+ VCR.use_cassette("hamno") do
15
+ url = "http://gawker.com/1694508525"
16
+ article = Textract.get_text(url)
17
+ expect(article.text.include?("Import")).to eq true
18
+ expect(article.md5).to eq "c11a810a3e73f24aac78fd3e39e69f87"
19
+ expect(article.author).to eq "Hamilton Nolan"
20
+ end
21
21
  end
22
22
 
23
23
  it "also includes images" do
24
- url = "http://gawker.com/1696731611"
25
- img = "http://i.kinja-img.com/gawker-media/image/upload/s--fWYFlEv6--/c_fit,fl_progressive,q_80,w_636/l3sjlg0ariqomd4ubtl6.jpg"
26
- article = Textract.get_text(url)
27
- expect(article.text.include?(img)).to be true
24
+ VCR.use_cassette('imgs') do
25
+ url = "http://gawker.com/1696731611"
26
+ img = "http://i.kinja-img.com/gawker-media/image/upload/s--fWYFlEv6--/c_fit,fl_progressive,q_80,w_636/l3sjlg0ariqomd4ubtl6.jpg"
27
+ article = Textract.get_text(url)
28
+ expect(article.text.include?(img)).to be true
29
+ end
28
30
  end
29
31
 
30
32
  it "returns article text based on opengraph description" do
31
- url = "http://www.tedcruz.org/record/our-standard-the-constitution/"
32
- article = Textract.get_text(url)
33
- expect(article.text.include?("Ted Cruz")).to eq true
33
+ VCR.use_cassette('og') do
34
+ url = "http://www.tedcruz.org/record/our-standard-the-constitution/"
35
+ article = Textract.get_text(url)
36
+ expect(article.text.include?("Ted Cruz")).to eq true
37
+ end
34
38
  end
35
39
 
36
40
  it "can find a twitter profile given a selector" do
37
- url = "https://twitter.com/lifehacker"
38
- article = Textract.get_text(url, 'p.ProfileHeaderCard-bio.u-dir')
39
- expect(article.text.strip).to eq "Don't live to geek; geek to live."
40
- expect(article.title).to eq "Lifehacker (@lifehacker) | Twitter"
41
+ VCR.use_cassette('selector') do
42
+ url = "https://twitter.com/lifehacker"
43
+ article = Textract.get_text(url, 'p.ProfileHeaderCard-bio.u-dir')
44
+ expect(article.text.strip).to eq "Don't live to geek; geek to live."
45
+ expect(article.title).to eq "Lifehacker (@lifehacker) | Twitter"
46
+ end
41
47
  end
42
48
 
43
49
  it "gets the page title from the title tag" do
@@ -51,9 +57,19 @@ describe Textract, :focus do
51
57
  end
52
58
 
53
59
  it "converts itself to json" do
54
- url = "http://gawker.com/1694508525"
55
- article = Textract.get_text(url)
56
- expect(article.to_json).to be_a_kind_of String
60
+ VCR.use_cassette('json') do
61
+ url = "http://gawker.com/1694508525"
62
+ article = Textract.get_text(url)
63
+ expect(article.to_json).to be_a_kind_of String
64
+ end
65
+ end
66
+
67
+ it "handles problem urls" do
68
+ VCR.use_cassette('bad frisky') do
69
+ url = "http://www.thefrisky.com/2015-04-22/10-things-i-was-irrationally-jealous-of-in-high-school-and-admittedly-still-am/"
70
+ article = Textract.get_text(url)
71
+ expect(article.to_json).to be_a_kind_of String
72
+ end
57
73
  end
58
74
 
59
75
  end
@@ -0,0 +1,12 @@
1
+ RSpec.configure do |c|
2
+ c.filter_run_including :focus => true
3
+ c.run_all_when_everything_filtered = true
4
+ end
5
+
6
+ require 'vcr'
7
+ VCR.configure do |config|
8
+ config.cassette_library_dir = "spec/fixtures/vcr_cassettes"
9
+ config.hook_into :webmock
10
+ config.allow_http_connections_when_no_cassette = true
11
+ end
12
+
data/textract.gemspec CHANGED
@@ -27,4 +27,6 @@ Gem::Specification.new do |spec|
27
27
  spec.add_development_dependency "rake", "~> 10.0"
28
28
  spec.add_development_dependency "rspec"
29
29
  spec.add_development_dependency "guard-rspec"
30
+ spec.add_development_dependency "vcr"
31
+ spec.add_development_dependency "webmock"
30
32
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Pash
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-10 00:00:00.000000000 Z
11
+ date: 2015-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: opengraph_parser
@@ -122,6 +122,34 @@ dependencies:
122
122
  - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: vcr
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: webmock
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
125
153
  description: Extracts article text from a URL
126
154
  email:
127
155
  - adam.pash@gmail.com
@@ -137,7 +165,15 @@ files:
137
165
  - Rakefile
138
166
  - lib/textract.rb
139
167
  - lib/textract/version.rb
168
+ - spec/fixtures/vcr_cassettes/bad_frisky.yml
169
+ - spec/fixtures/vcr_cassettes/cruz.yml
170
+ - spec/fixtures/vcr_cassettes/hamno.yml
171
+ - spec/fixtures/vcr_cassettes/imgs.yml
172
+ - spec/fixtures/vcr_cassettes/json.yml
173
+ - spec/fixtures/vcr_cassettes/og.yml
174
+ - spec/fixtures/vcr_cassettes/selector.yml
140
175
  - spec/lib/textract_spec.rb
176
+ - spec/spec_helper.rb
141
177
  - textract.gemspec
142
178
  homepage: ''
143
179
  licenses:
@@ -164,4 +200,12 @@ signing_key:
164
200
  specification_version: 4
165
201
  summary: Extracts article text from a URL
166
202
  test_files:
203
+ - spec/fixtures/vcr_cassettes/bad_frisky.yml
204
+ - spec/fixtures/vcr_cassettes/cruz.yml
205
+ - spec/fixtures/vcr_cassettes/hamno.yml
206
+ - spec/fixtures/vcr_cassettes/imgs.yml
207
+ - spec/fixtures/vcr_cassettes/json.yml
208
+ - spec/fixtures/vcr_cassettes/og.yml
209
+ - spec/fixtures/vcr_cassettes/selector.yml
167
210
  - spec/lib/textract_spec.rb
211
+ - spec/spec_helper.rb