textract 0.0.15 → 0.0.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/textract.rb +7 -2
- data/lib/textract/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6df62961d3d6f6fb6fb88d189bc657111ea04f65
|
4
|
+
data.tar.gz: 400fe86f14b624c7d280719d66b705ac5cea6b41
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e3797cea30cc5ddc90e672c75b4c700a900c6bbe23bb916b504b2ea2e5ecaef1bc5619d90293c9574b85dea2fc8769d8585b13ae2615ac2bbb9806ffdfab13c5
|
7
|
+
data.tar.gz: b3aa7a586c9ab8a679f2d89e0fef6e9847612ab8c945420388e6dec8769ed0dd993158321cb470f9e99e6750f0bab17201795765da1d17c57e72fe5e9c1acdb3
|
data/lib/textract.rb
CHANGED
@@ -78,9 +78,14 @@ module Textract
|
|
78
78
|
twitter_meta.attribute('content').value unless twitter_meta.empty?
|
79
79
|
end
|
80
80
|
|
81
|
-
def self.build_site(html)
|
81
|
+
def self.build_site(url, html)
|
82
82
|
site_twitter = Nokogiri::HTML(html).search('meta[name="twitter:site"]')
|
83
83
|
site_name = Nokogiri::HTML(html).search('meta[property="og:site_name"]')
|
84
|
+
if site_name.empty?
|
85
|
+
site = url.match(/(http|ftp)s?:\/\/((\w+\.)?(\w+\.)(\w+))\//)
|
86
|
+
site = site[2] unless site[2].nil?
|
87
|
+
site = site.sub(/^www\./, '').capitalize!
|
88
|
+
end
|
84
89
|
{
|
85
90
|
name: site_name.empty? ? nil : site_name.attribute('content').value,
|
86
91
|
twitter: site_twitter.empty? ? nil : site_twitter.attribute('content').value,
|
@@ -131,7 +136,7 @@ module Textract
|
|
131
136
|
end
|
132
137
|
@md5 = Textract.generate_hash @text
|
133
138
|
@author = Textract.build_author @article, @html
|
134
|
-
@site = Textract.build_site @html
|
139
|
+
@site = Textract.build_site @url, @html
|
135
140
|
@title = @tags.title || Textract.get_page_title(@html)
|
136
141
|
if @url.match(/\/robots.txt$/) and @title = @text
|
137
142
|
@title = @url
|
data/lib/textract/version.rb
CHANGED