textract 0.0.6.3 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2f02b4589b24d89ca5c034359a0c9aca74eaede6
4
- data.tar.gz: b25b6790a3db36784ac7cc6a9a15d51e42fb8ccd
3
+ metadata.gz: a0d8ffc9fc175116ed6a851cc12a992b4c2eb95b
4
+ data.tar.gz: d6093955a7cf136b0430246eebd62773f0c36d6f
5
5
  SHA512:
6
- metadata.gz: 9e7f3888e302abb385cf7d43b1e2e35650303dcb22c4c6294eb31d5af9e1aa3ad3b674e8d913b1ffd7f6cf89611729ed039132566c663abaa2589f20aa7ca953
7
- data.tar.gz: bebd451831bea49a01887ea79c03d0ec5058e5640429950dcd0adee35145ba5eb49cb9834cf290e745258230c8c7e0c683d35d85a43c226ba8bcba4e3bcc1e58
6
+ metadata.gz: 2ed2d11aa3988cbc81b31b7da07e9d74d09a7f1b057ca450d7fa78d1bd919a4f4dc83caf691a81e9bdb16fd58c848b723a1a27c77f0be3e503d8bce65bc8b5ad
7
+ data.tar.gz: 377327580500633d4f21ead9adfc610ce7fba65b752525ca5c6879e615e412732b228fd489789f5166f8c1971e5b8ec75207aa722fae96467af41becf67acfda
@@ -1,3 +1,3 @@
1
1
  module Textract
2
- VERSION = "0.0.6.3"
2
+ VERSION = "0.0.7"
3
3
  end
data/lib/textract.rb CHANGED
@@ -42,10 +42,10 @@ module Textract
42
42
  else
43
43
  article_el = doc
44
44
  end
45
- article = Readability::Document.new(article_el.to_s,
45
+ Readability::Document.new(article_el.to_s,
46
46
  tags: %w[div span p a img ul ol li blockquote table tr td h1 h2 h3 h4 h5 b em i strong],
47
47
  attributes: %w[src href],
48
- remove_empty_nodes: true,
48
+ remove_empty_nodes: false,
49
49
  )
50
50
  end
51
51
 
@@ -1,4 +1,10 @@
1
1
  require_relative '../../lib/textract'
2
+
3
+ RSpec.configure do |c|
4
+ # filter_run is short-form alias for filter_run_including
5
+ c.filter_run :focus => true
6
+ end
7
+
2
8
  describe Textract do
3
9
  it "initializes with the get_text method" do
4
10
  url = "http://www.tedcruz.org/about/"
@@ -14,6 +20,13 @@ describe Textract do
14
20
  expect(article.author).to eq "Hamilton Nolan"
15
21
  end
16
22
 
23
+ it "also includes images", :focus do
24
+ url = "http://gawker.com/1696731611"
25
+ img = "http://i.kinja-img.com/gawker-media/image/upload/s--fWYFlEv6--/c_fit,fl_progressive,q_80,w_636/l3sjlg0ariqomd4ubtl6.jpg"
26
+ article = Textract.get_text(url)
27
+ expect(article.text.include?(img)).to be true
28
+ end
29
+
17
30
  it "returns article text based on opengraph description" do
18
31
  url = "http://www.tedcruz.org/record/our-standard-the-constitution/"
19
32
  article = Textract.get_text(url)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6.3
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Pash