textract 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 052757f993bc3c87948e1d650ea837a48fef8c73
4
- data.tar.gz: a52c485fb6af0fcafcc1e9f743c617223e181fc9
3
+ metadata.gz: 6c0f4e826cede5d69f4c35d74129c33ffe6fd59a
4
+ data.tar.gz: 8884dac18c1ddf501ccabbfbde7d5f50c24e37d3
5
5
  SHA512:
6
- metadata.gz: 39c8b5162e22ca62fa182791324ae2e828712cde3d0e5a131212d4af105a40bbfe07ca6808b337900e4bc3bd492e1b210f8e0d0653035ab8f7c56d7b4e226172
7
- data.tar.gz: 74c00abd7a1d54dd0e8bf1153aceb9b98751b070c2ced92b8257d79a7836a292ee7ba646a984e1d3aaae0b35d20c874821d6fdaf37f33ac3c019e253f77c77de
6
+ metadata.gz: 43d1097a7252e581849883b43c4581757b029e9a8f2e8cca754c396a1ff519cd92e538d984fd163c7733b5a8fdeb1211465cc24a29bffd34918c06d9bce68ce7
7
+ data.tar.gz: 8658b5de346c5e47214bcc294627c9d9c2d0bf40446af26192836e8925429dd029c9a91cd75c54d2d3be62db32417d05a85a11c05b0786e80d8c868006b57bbb
data/lib/textract.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require "textract/version"
2
- require 'httparty'
2
+ require 'mechanize'
3
3
  require 'nokogiri'
4
4
  require 'opengraph_parser'
5
5
  require 'reverse_markdown'
@@ -21,6 +21,7 @@ module Textract
21
21
  if selectors.nil?
22
22
  article = doc.search('article')
23
23
  else
24
+ require 'pry'; binding.pry
24
25
  article = doc.search(selectors)
25
26
  end
26
27
  if article.count == 1
@@ -30,7 +31,6 @@ module Textract
30
31
  i = 1
31
32
  until els.count < 2
32
33
  search_text = description.split(" ")[0..i].join(" ")
33
- puts search_text
34
34
  els = doc.search "[text()*='#{search_text}']"
35
35
  i += 1
36
36
  end
@@ -66,7 +66,9 @@ module Textract
66
66
 
67
67
  def initialize(url, selectors)
68
68
  @url = url
69
- @html = HTTParty.get url
69
+ agent = Mechanize.new
70
+ agent.user_agent_alias = 'Mac Safari'
71
+ @html = agent.get(url).content
70
72
  @tags = Textract.get_og_tags(@html)
71
73
  if @tags.nil? or @tags.description.nil?
72
74
  # use readability method
@@ -1,3 +1,3 @@
1
1
  module Textract
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/textract.gemspec CHANGED
@@ -19,9 +19,9 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ["lib"]
20
20
 
21
21
  spec.add_dependency "opengraph_parser"
22
- spec.add_dependency "httparty"
23
22
  spec.add_dependency "reverse_markdown"
24
23
  spec.add_dependency "ruby-readability"
24
+ spec.add_dependency "mechanize"
25
25
 
26
26
  spec.add_development_dependency "bundler", "~> 1.7"
27
27
  spec.add_development_dependency "rake", "~> 10.0"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Adam Pash
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: httparty
28
+ name: reverse_markdown
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: reverse_markdown
42
+ name: ruby-readability
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -53,7 +53,7 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: ruby-readability
56
+ name: mechanize
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - ">="