textract 0.0.15 → 0.0.16

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 67ed83a055c856909a5b9e3d5735c640c4310741
4
- data.tar.gz: 4bd80a7d4249935ccc66d79543195e90d6f5d14e
3
+ metadata.gz: 6df62961d3d6f6fb6fb88d189bc657111ea04f65
4
+ data.tar.gz: 400fe86f14b624c7d280719d66b705ac5cea6b41
5
5
  SHA512:
6
- metadata.gz: f57ad83703e136ebdf5d101026a1422b1f84d22d04bedce80db3efff0dc068d67c05d00810ad0b55a8e5dd0c1ce8c60e340771300ae7f2c8f50f7a7389eaaa07
7
- data.tar.gz: 9b9e1a55b948a838e19c11b878b114c18368eab053684552ad9027cef45fc91a67abccd89ed20d2ffd11244c87489004304e1236cb1cd1976afbde32ab63af7f
6
+ metadata.gz: e3797cea30cc5ddc90e672c75b4c700a900c6bbe23bb916b504b2ea2e5ecaef1bc5619d90293c9574b85dea2fc8769d8585b13ae2615ac2bbb9806ffdfab13c5
7
+ data.tar.gz: b3aa7a586c9ab8a679f2d89e0fef6e9847612ab8c945420388e6dec8769ed0dd993158321cb470f9e99e6750f0bab17201795765da1d17c57e72fe5e9c1acdb3
data/lib/textract.rb CHANGED
@@ -78,9 +78,14 @@ module Textract
78
78
  twitter_meta.attribute('content').value unless twitter_meta.empty?
79
79
  end
80
80
 
81
- def self.build_site(html)
81
+ def self.build_site(url, html)
82
82
  site_twitter = Nokogiri::HTML(html).search('meta[name="twitter:site"]')
83
83
  site_name = Nokogiri::HTML(html).search('meta[property="og:site_name"]')
84
+ if site_name.empty?
85
+ site = url.match(/(http|ftp)s?:\/\/((\w+\.)?(\w+\.)(\w+))\//)
86
+ site = site[2] unless site[2].nil?
87
+ site = site.sub(/^www\./, '').capitalize!
88
+ end
84
89
  {
85
90
  name: site_name.empty? ? nil : site_name.attribute('content').value,
86
91
  twitter: site_twitter.empty? ? nil : site_twitter.attribute('content').value,
@@ -131,7 +136,7 @@ module Textract
131
136
  end
132
137
  @md5 = Textract.generate_hash @text
133
138
  @author = Textract.build_author @article, @html
134
- @site = Textract.build_site @html
139
+ @site = Textract.build_site @url, @html
135
140
  @title = @tags.title || Textract.get_page_title(@html)
136
141
  if @url.match(/\/robots.txt$/) and @title = @text
137
142
  @title = @url
@@ -1,3 +1,3 @@
1
1
  module Textract
2
- VERSION = "0.0.15"
2
+ VERSION = "0.0.16"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.15
4
+ version: 0.0.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Pash