RubyGems - textract - Versions diffs - 0.0.8 → 0.0.9 - Mend

textract 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/lib/textract/version.rb +1 -1
data/lib/textract.rb +11 -7
data/spec/fixtures/vcr_cassettes/bad_frisky.yml +1866 -0
data/spec/fixtures/vcr_cassettes/cruz.yml +642 -0
data/spec/fixtures/vcr_cassettes/hamno.yml +632 -0
data/spec/fixtures/vcr_cassettes/imgs.yml +753 -0
data/spec/fixtures/vcr_cassettes/json.yml +632 -0
data/spec/fixtures/vcr_cassettes/og.yml +622 -0
data/spec/fixtures/vcr_cassettes/selector.yml +684 -0
data/spec/lib/textract_spec.rb +45 -29
data/spec/spec_helper.rb +12 -0
data/textract.gemspec +2 -0
metadata +46 -2

data/spec/lib/textract_spec.rb CHANGED Viewed

@@ -1,43 +1,49 @@
-require_relative '../../lib/textract'
+require 'spec_helper'
+require 'textract'
-RSpec.configure do |c|
-  # filter_run is short-form alias for filter_run_including
-  c.filter_run :focus => true
-end
-describe Textract, :focus do
+describe Textract do
   it "initializes with the get_text method" do
-    url = "http://www.tedcruz.org/about/"
-    article = Textract.get_text(url)
-    expect(article).to be_a_kind_of Textract::Client
+    VCR.use_cassette("cruz") do
+      url = "http://www.tedcruz.org/about/"
+      article = Textract.get_text(url)
+      expect(article).to be_a_kind_of Textract::Client
+    end
   end
   it "returns article text based on article tag" do
-    url = "http://gawker.com/1694508525"
-    article = Textract.get_text(url)
-    expect(article.text.include?("Import")).to eq true
-    expect(article.md5).to eq "c11a810a3e73f24aac78fd3e39e69f87"
-    expect(article.author).to eq "Hamilton Nolan"
+    VCR.use_cassette("hamno") do
+      url = "http://gawker.com/1694508525"
+      article = Textract.get_text(url)
+      expect(article.text.include?("Import")).to eq true
+      expect(article.md5).to eq "c11a810a3e73f24aac78fd3e39e69f87"
+      expect(article.author).to eq "Hamilton Nolan"
+    end
   end
   it "also includes images" do
-    url = "http://gawker.com/1696731611"
-    img = "http://i.kinja-img.com/gawker-media/image/upload/s--fWYFlEv6--/c_fit,fl_progressive,q_80,w_636/l3sjlg0ariqomd4ubtl6.jpg"
-    article = Textract.get_text(url)
-    expect(article.text.include?(img)).to be true
+    VCR.use_cassette('imgs') do
+      url = "http://gawker.com/1696731611"
+      img = "http://i.kinja-img.com/gawker-media/image/upload/s--fWYFlEv6--/c_fit,fl_progressive,q_80,w_636/l3sjlg0ariqomd4ubtl6.jpg"
+      article = Textract.get_text(url)
+      expect(article.text.include?(img)).to be true
+    end
   end
   it "returns article text based on opengraph description" do
-    url = "http://www.tedcruz.org/record/our-standard-the-constitution/"
-    article = Textract.get_text(url)
-    expect(article.text.include?("Ted Cruz")).to eq true
+    VCR.use_cassette('og') do
+      url = "http://www.tedcruz.org/record/our-standard-the-constitution/"
+      article = Textract.get_text(url)
+      expect(article.text.include?("Ted Cruz")).to eq true
+    end
   end
   it "can find a twitter profile given a selector" do
-    url = "https://twitter.com/lifehacker"
-    article = Textract.get_text(url, 'p.ProfileHeaderCard-bio.u-dir')
-    expect(article.text.strip).to eq "Don't live to geek; geek to live."
-    expect(article.title).to eq "Lifehacker (@lifehacker) | Twitter"
+    VCR.use_cassette('selector') do
+      url = "https://twitter.com/lifehacker"
+      article = Textract.get_text(url, 'p.ProfileHeaderCard-bio.u-dir')
+      expect(article.text.strip).to eq "Don't live to geek; geek to live."
+      expect(article.title).to eq "Lifehacker (@lifehacker) | Twitter"
+    end
   end
   it "gets the page title from the title tag" do
@@ -51,9 +57,19 @@ describe Textract, :focus do
   end
   it "converts itself to json" do
-    url = "http://gawker.com/1694508525"
-    article = Textract.get_text(url)
-    expect(article.to_json).to be_a_kind_of String
+    VCR.use_cassette('json') do
+      url = "http://gawker.com/1694508525"
+      article = Textract.get_text(url)
+      expect(article.to_json).to be_a_kind_of String
+    end
+  end
+  it "handles problem urls" do
+    VCR.use_cassette('bad frisky') do
+      url = "http://www.thefrisky.com/2015-04-22/10-things-i-was-irrationally-jealous-of-in-high-school-and-admittedly-still-am/"
+      article = Textract.get_text(url)
+      expect(article.to_json).to be_a_kind_of String
+    end
   end
 end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,12 @@
+RSpec.configure do |c|
+  c.filter_run_including :focus => true
+  c.run_all_when_everything_filtered = true
+end
+require 'vcr'
+VCR.configure do |config|
+  config.cassette_library_dir = "spec/fixtures/vcr_cassettes"
+  config.hook_into :webmock
+  config.allow_http_connections_when_no_cassette = true
+end

data/textract.gemspec CHANGED Viewed

@@ -27,4 +27,6 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "rake", "~> 10.0"
   spec.add_development_dependency "rspec"
   spec.add_development_dependency "guard-rspec"
+  spec.add_development_dependency "vcr"
+  spec.add_development_dependency "webmock"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: textract
 version: !ruby/object:Gem::Version
-  version: 0.0.8
+  version: 0.0.9
 platform: ruby
 authors:
 - Adam Pash
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-04-10 00:00:00.000000000 Z
+date: 2015-04-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: opengraph_parser
@@ -122,6 +122,34 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: vcr
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: webmock
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description: Extracts article text from a URL
 email:
 - adam.pash@gmail.com
@@ -137,7 +165,15 @@ files:
 - Rakefile
 - lib/textract.rb
 - lib/textract/version.rb
+- spec/fixtures/vcr_cassettes/bad_frisky.yml
+- spec/fixtures/vcr_cassettes/cruz.yml
+- spec/fixtures/vcr_cassettes/hamno.yml
+- spec/fixtures/vcr_cassettes/imgs.yml
+- spec/fixtures/vcr_cassettes/json.yml
+- spec/fixtures/vcr_cassettes/og.yml
+- spec/fixtures/vcr_cassettes/selector.yml
 - spec/lib/textract_spec.rb
+- spec/spec_helper.rb
 - textract.gemspec
 homepage: ''
 licenses:
@@ -164,4 +200,12 @@ signing_key:
 specification_version: 4
 summary: Extracts article text from a URL
 test_files:
+- spec/fixtures/vcr_cassettes/bad_frisky.yml
+- spec/fixtures/vcr_cassettes/cruz.yml
+- spec/fixtures/vcr_cassettes/hamno.yml
+- spec/fixtures/vcr_cassettes/imgs.yml
+- spec/fixtures/vcr_cassettes/json.yml
+- spec/fixtures/vcr_cassettes/og.yml
+- spec/fixtures/vcr_cassettes/selector.yml
 - spec/lib/textract_spec.rb
+- spec/spec_helper.rb