textract 0.0.12 → 0.0.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/textract.rb +3 -0
- data/lib/textract/version.rb +1 -1
- data/spec/lib/textract_spec.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f140e3ce80676270090e5e7ecb1f23991972dd1f
|
4
|
+
data.tar.gz: 5869e5d9155f5078d2daf614bbe2a6c1b2c7d4ea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8ca2130f5ed77a70dae4862c60fc655da7ecac888cdff7ec9c4776efa49f7053452bb8871dfec1d3fe65dbc1744c0f70c334d6eaeb8343fd6da00ec093cbf380
|
7
|
+
data.tar.gz: 659c783534efc4f08fe882c0418d0b775e3e93465312e44af183518f4e9908ac6e989f8e05ae2346ac43d1ff377c9e647a95ed4bf312df0dfc0c731a4d20487c
|
data/lib/textract.rb
CHANGED
@@ -106,6 +106,9 @@ module Textract
|
|
106
106
|
@md5 = Textract.generate_hash @text
|
107
107
|
@author = @article.author || Textract.get_author(@html)
|
108
108
|
@title = @tags.title || Textract.get_page_title(@html)
|
109
|
+
if @url.match(/\/robots.txt$/) and @title = @text
|
110
|
+
@title = @url
|
111
|
+
end
|
109
112
|
end
|
110
113
|
|
111
114
|
def as_json
|
data/lib/textract/version.rb
CHANGED
data/spec/lib/textract_spec.rb
CHANGED
@@ -97,8 +97,8 @@ describe Textract do
|
|
97
97
|
VCR.use_cassette('robots') do
|
98
98
|
url = "http://www.buzzfeed.com/robots.txt"
|
99
99
|
text = Textract.get_text(url)
|
100
|
-
expect(text.to_json).to be_a_kind_of String
|
101
100
|
expect(text.url).to eq url
|
101
|
+
expect(text.title).to eq url
|
102
102
|
end
|
103
103
|
end
|
104
104
|
|