textract 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f140e3ce80676270090e5e7ecb1f23991972dd1f
4
- data.tar.gz: 5869e5d9155f5078d2daf614bbe2a6c1b2c7d4ea
3
+ metadata.gz: 0336cf0be51c8b9b98137b67d6d58cab27cbde14
4
+ data.tar.gz: 572f3955c091883512b94071264c89d3dbfba137
5
5
  SHA512:
6
- metadata.gz: 8ca2130f5ed77a70dae4862c60fc655da7ecac888cdff7ec9c4776efa49f7053452bb8871dfec1d3fe65dbc1744c0f70c334d6eaeb8343fd6da00ec093cbf380
7
- data.tar.gz: 659c783534efc4f08fe882c0418d0b775e3e93465312e44af183518f4e9908ac6e989f8e05ae2346ac43d1ff377c9e647a95ed4bf312df0dfc0c731a4d20487c
6
+ metadata.gz: bdd093f12bd58d70275e3cbbdb5fb61a6b7a336afcc7e92697d7ec2d2f3301b36ea9d5be222748b97f4251e4887f84110282737d09b1e8ec4b86ce23718879bd
7
+ data.tar.gz: 29729f554d34cca2a374c8ad5941d09a0f1ee62c9cfc25902af15cf17c1bec5c45e9e30edb088d6ef135ee33315a8c9e9073adc18acde2bf5aefcf1753d81d71
@@ -1,3 +1,3 @@
1
1
  module Textract
2
- VERSION = "0.0.13"
2
+ VERSION = "0.0.14"
3
3
  end
data/lib/textract.rb CHANGED
@@ -67,9 +67,26 @@ module Textract
67
67
 
68
68
  def self.get_author(html)
69
69
  name_meta = Nokogiri::HTML(html).search('meta[name="author"]')
70
+ if name_meta.empty?
71
+ name_meta = Nokogiri::HTML(html).search('meta[property="author"]')
72
+ end
70
73
  name_meta.attribute('content').value unless name_meta.empty?
71
74
  end
72
75
 
76
+ def self.get_twitter(html)
77
+ twitter_meta = Nokogiri::HTML(html).search('meta[name="twitter:creator"]')
78
+ twitter_meta.attribute('content').value unless twitter_meta.empty?
79
+ end
80
+
81
+ def self.build_author(article, html)
82
+ {
83
+ name: article.author || get_author(html),
84
+ twitter: get_twitter(html),
85
+ }
86
+ end
87
+
88
+ # def build_site
89
+
73
90
  def self.generate_hash(text)
74
91
  Digest::MD5.hexdigest text
75
92
  end
@@ -104,7 +121,7 @@ module Textract
104
121
  end
105
122
  end
106
123
  @md5 = Textract.generate_hash @text
107
- @author = @article.author || Textract.get_author(@html)
124
+ @author = Textract.build_author @article, @html
108
125
  @title = @tags.title || Textract.get_page_title(@html)
109
126
  if @url.match(/\/robots.txt$/) and @title = @text
110
127
  @title = @url