textract 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 746f24065d2e6b1062f2b4e8c0386957098119a1
4
- data.tar.gz: b14700f3c1ff7a1cdc5d20d34444cf096516c288
3
+ metadata.gz: 1329ef4f0beb75631d3a34160d9e5e525efa6792
4
+ data.tar.gz: 633208f798be1ed7dc2a3898ecfca96ae32c69db
5
5
  SHA512:
6
- metadata.gz: 92a531364daa3afffc42bad5de554e8c04334191d3584dceffad502273a124d4d447e1b3884a19d3143247386df06e7fde1143a6744f0573be838e52a681d13f
7
- data.tar.gz: 9df332865ada80fa1fae38562d8c7934916787cc0410cf80909ecdbea8fe8cf72620f397c07aa3b6d82ade4352bce9abc18ed9dd9f51eb53e2d7aacbb7da2bf1
6
+ metadata.gz: b34cb339fb187ce2247ec8f46a498d368f92b755f830730f645d5a29e77cfdf2901b6b94daeca3185390e35e55d418ae3a2f5d5092c46064d965d7c00fb0ce30
7
+ data.tar.gz: be8ab847fcdddd3fcb176113579395f922b07fd20ff4ed41fbd3dab655e6500caf7268c28d94479e13bc9edd67d25ee24541f309075fe015d20add28712a51cd
@@ -1,3 +1,3 @@
1
1
  module Textract
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/textract.rb CHANGED
@@ -58,12 +58,18 @@ module Textract
58
58
  name_meta.attribute('content').value unless name_meta.empty?
59
59
  end
60
60
 
61
+ def self.generate_hash(text)
62
+ # require 'pry'; binding.pry
63
+ Digest::MD5.hexdigest text
64
+ end
65
+
61
66
  class Client
62
67
  attr_reader :html
63
68
  attr_reader :url
64
69
  attr_reader :tags
65
70
  attr_reader :title
66
71
  attr_reader :text
72
+ attr_reader :md5
67
73
  attr_reader :author
68
74
 
69
75
  def initialize(url, selectors)
@@ -75,6 +81,7 @@ module Textract
75
81
 
76
82
  @article = Textract.get_text_from_description(@html, @tags.description, selectors)
77
83
  @text = ReverseMarkdown.convert @article.content, unknown_tags: :bypass
84
+ @md5 = Textract.generate_hash @text
78
85
  @author = @article.author || Textract.get_author(@html)
79
86
  @title = @tags.title || Textract.get_page_title(@html)
80
87
  end
@@ -10,6 +10,7 @@ describe Textract do
10
10
  url = "http://gawker.com/1694508525"
11
11
  article = Textract.get_text(url)
12
12
  expect(article.text[0..5]).to eq "Import"
13
+ expect(article.md5).to eq "ae57104339fbd6455a91f8ebdc94b90c"
13
14
  expect(article.author).to eq "Hamilton Nolan"
14
15
  end
15
16
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Pash