textract 0.0.6.3 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2f02b4589b24d89ca5c034359a0c9aca74eaede6
4
- data.tar.gz: b25b6790a3db36784ac7cc6a9a15d51e42fb8ccd
3
+ metadata.gz: a0d8ffc9fc175116ed6a851cc12a992b4c2eb95b
4
+ data.tar.gz: d6093955a7cf136b0430246eebd62773f0c36d6f
5
5
  SHA512:
6
- metadata.gz: 9e7f3888e302abb385cf7d43b1e2e35650303dcb22c4c6294eb31d5af9e1aa3ad3b674e8d913b1ffd7f6cf89611729ed039132566c663abaa2589f20aa7ca953
7
- data.tar.gz: bebd451831bea49a01887ea79c03d0ec5058e5640429950dcd0adee35145ba5eb49cb9834cf290e745258230c8c7e0c683d35d85a43c226ba8bcba4e3bcc1e58
6
+ metadata.gz: 2ed2d11aa3988cbc81b31b7da07e9d74d09a7f1b057ca450d7fa78d1bd919a4f4dc83caf691a81e9bdb16fd58c848b723a1a27c77f0be3e503d8bce65bc8b5ad
7
+ data.tar.gz: 377327580500633d4f21ead9adfc610ce7fba65b752525ca5c6879e615e412732b228fd489789f5166f8c1971e5b8ec75207aa722fae96467af41becf67acfda
@@ -1,3 +1,3 @@
1
1
  module Textract
2
- VERSION = "0.0.6.3"
2
+ VERSION = "0.0.7"
3
3
  end
data/lib/textract.rb CHANGED
@@ -42,10 +42,10 @@ module Textract
42
42
  else
43
43
  article_el = doc
44
44
  end
45
- article = Readability::Document.new(article_el.to_s,
45
+ Readability::Document.new(article_el.to_s,
46
46
  tags: %w[div span p a img ul ol li blockquote table tr td h1 h2 h3 h4 h5 b em i strong],
47
47
  attributes: %w[src href],
48
- remove_empty_nodes: true,
48
+ remove_empty_nodes: false,
49
49
  )
50
50
  end
51
51
 
@@ -1,4 +1,10 @@
1
1
  require_relative '../../lib/textract'
2
+
3
+ RSpec.configure do |c|
4
+ # filter_run is short-form alias for filter_run_including
5
+ c.filter_run :focus => true
6
+ end
7
+
2
8
  describe Textract do
3
9
  it "initializes with the get_text method" do
4
10
  url = "http://www.tedcruz.org/about/"
@@ -14,6 +20,13 @@ describe Textract do
14
20
  expect(article.author).to eq "Hamilton Nolan"
15
21
  end
16
22
 
23
+ it "also includes images", :focus do
24
+ url = "http://gawker.com/1696731611"
25
+ img = "http://i.kinja-img.com/gawker-media/image/upload/s--fWYFlEv6--/c_fit,fl_progressive,q_80,w_636/l3sjlg0ariqomd4ubtl6.jpg"
26
+ article = Textract.get_text(url)
27
+ expect(article.text.include?(img)).to be true
28
+ end
29
+
17
30
  it "returns article text based on opengraph description" do
18
31
  url = "http://www.tedcruz.org/record/our-standard-the-constitution/"
19
32
  article = Textract.get_text(url)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6.3
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Pash