scrapify 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -99,4 +99,10 @@ end
99
99
  get 'pizzas/:id' => pizza_api
100
100
  ```
101
101
 
102
- Jsonify scraps url and exposes index and show urls as JSON APIs
102
+ Jsonify scraps url and exposes index and show urls as JSON APIs
103
+
104
+ ## License
105
+
106
+ ScrApify is released under the MIT license:
107
+
108
+ http://www.opensource.org/licenses/MIT
data/Rakefile CHANGED
@@ -1,4 +1,4 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
2
2
  require 'rspec/core/rake_task'
3
3
 
4
4
  RSpec::Core::RakeTask.new('spec')
@@ -0,0 +1,9 @@
1
+ class HackerNews
2
+ include Scrapify::Base
3
+ html "http://news.ycombinator.com/news"
4
+
5
+ attribute :rank, css: "tr:nth-child(3n+1) td.title:nth-child(1)"
6
+ attribute :title, css: "tr:nth-child(3n+1) td.title:nth-child(3)"
7
+ attribute :url, xpath: "//tr[position() mod 3 = 1]/td[@class='title'][position()=2]//@href"
8
+ key :rank
9
+ end
@@ -4,8 +4,10 @@ require 'active_support/core_ext/hash/keys'
4
4
  require 'nokogiri'
5
5
  require 'uri'
6
6
  require 'net/http'
7
+ require "net/https"
8
+ require "uri"
7
9
  require 'scrapify/base'
8
10
  require 'scrapify/scraper'
9
11
  require 'scrapify/exceptions'
10
12
  require 'json'
11
- require 'jsonify'
13
+ require 'jsonify'
@@ -79,7 +79,18 @@ module Scrapify
79
79
  end
80
80
 
81
81
  def http_response
82
- @http_response ||= Net::HTTP.get_response URI(url)
82
+ @http_response ||= get_response(url, url =~ /^https/i)
83
+ end
84
+
85
+ def get_response(url, secure=false)
86
+ uri = URI.parse(url)
87
+ http = Net::HTTP.new(uri.host, uri.port)
88
+ if secure
89
+ http.use_ssl = true
90
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
91
+ end
92
+ request = Net::HTTP::Get.new(uri.request_uri)
93
+ response = http.request(request)
83
94
  end
84
95
 
85
96
  def http_header
@@ -109,7 +120,8 @@ module Scrapify
109
120
  self.new(attributes)
110
121
  end
111
122
 
112
- define_singleton_method :where do |conditions = {}|
123
+ define_singleton_method :where do |conditions|
124
+ conditions ||= {}
113
125
  raise Scrapify::AttributeDoesNotExist.new(conditions.keys - attribute_names) unless conditions.keys.all?{|key| attribute_names.include?(key) }
114
126
  indices = conditions.collect do |attribute, value|
115
127
  send("#{attribute}_values").each_with_index.find_all{|attr_val, index| attr_val == value}.collect(&:last)
@@ -1,3 +1,3 @@
1
1
  module Scrapify
2
- VERSION = "0.0.8"
2
+ VERSION = "0.0.9"
3
3
  end
metadata CHANGED
@@ -1,94 +1,115 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: scrapify
3
- version: !ruby/object:Gem::Version
4
- version: 0.0.8
5
- prerelease:
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 9
9
+ version: 0.0.9
6
10
  platform: ruby
7
- authors:
11
+ authors:
8
12
  - Sathish & Shakiel
9
13
  autorequire:
10
14
  bindir: bin
11
15
  cert_chain: []
12
- date: 2012-07-11 00:00:00.000000000Z
13
- dependencies:
14
- - !ruby/object:Gem::Dependency
16
+
17
+ date: 2012-10-02 00:00:00 +05:30
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
15
21
  name: rspec
16
- requirement: &70278707139780 !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
17
24
  none: false
18
- requirements:
19
- - - ! '>='
20
- - !ruby/object:Gem::Version
21
- version: '0'
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ version: "0"
22
31
  type: :development
23
- prerelease: false
24
- version_requirements: *70278707139780
25
- - !ruby/object:Gem::Dependency
32
+ version_requirements: *id001
33
+ - !ruby/object:Gem::Dependency
26
34
  name: mocha
27
- requirement: &70278707139300 !ruby/object:Gem::Requirement
35
+ prerelease: false
36
+ requirement: &id002 !ruby/object:Gem::Requirement
28
37
  none: false
29
- requirements:
30
- - - ! '>='
31
- - !ruby/object:Gem::Version
32
- version: '0'
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 0
43
+ version: "0"
33
44
  type: :development
34
- prerelease: false
35
- version_requirements: *70278707139300
36
- - !ruby/object:Gem::Dependency
45
+ version_requirements: *id002
46
+ - !ruby/object:Gem::Dependency
37
47
  name: fakeweb
38
- requirement: &70278707138800 !ruby/object:Gem::Requirement
48
+ prerelease: false
49
+ requirement: &id003 !ruby/object:Gem::Requirement
39
50
  none: false
40
- requirements:
41
- - - ! '>='
42
- - !ruby/object:Gem::Version
43
- version: '0'
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ segments:
55
+ - 0
56
+ version: "0"
44
57
  type: :development
45
- prerelease: false
46
- version_requirements: *70278707138800
47
- - !ruby/object:Gem::Dependency
58
+ version_requirements: *id003
59
+ - !ruby/object:Gem::Dependency
48
60
  name: nokogiri
49
- requirement: &70278707138260 !ruby/object:Gem::Requirement
61
+ prerelease: false
62
+ requirement: &id004 !ruby/object:Gem::Requirement
50
63
  none: false
51
- requirements:
52
- - - ! '>='
53
- - !ruby/object:Gem::Version
54
- version: '0'
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ segments:
68
+ - 0
69
+ version: "0"
55
70
  type: :runtime
56
- prerelease: false
57
- version_requirements: *70278707138260
58
- - !ruby/object:Gem::Dependency
71
+ version_requirements: *id004
72
+ - !ruby/object:Gem::Dependency
59
73
  name: activesupport
60
- requirement: &70278707137740 !ruby/object:Gem::Requirement
74
+ prerelease: false
75
+ requirement: &id005 !ruby/object:Gem::Requirement
61
76
  none: false
62
- requirements:
63
- - - ! '>='
64
- - !ruby/object:Gem::Version
65
- version: '0'
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ segments:
81
+ - 0
82
+ version: "0"
66
83
  type: :runtime
67
- prerelease: false
68
- version_requirements: *70278707137740
69
- - !ruby/object:Gem::Dependency
84
+ version_requirements: *id005
85
+ - !ruby/object:Gem::Dependency
70
86
  name: json
71
- requirement: &70278707137260 !ruby/object:Gem::Requirement
87
+ prerelease: false
88
+ requirement: &id006 !ruby/object:Gem::Requirement
72
89
  none: false
73
- requirements:
74
- - - ! '>='
75
- - !ruby/object:Gem::Version
76
- version: '0'
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ segments:
94
+ - 0
95
+ version: "0"
77
96
  type: :runtime
78
- prerelease: false
79
- version_requirements: *70278707137260
97
+ version_requirements: *id006
80
98
  description: ScrApify scraps static html sites to RESTlike APIs
81
- email:
99
+ email:
82
100
  - sathish316@gmail.com
83
101
  executables: []
102
+
84
103
  extensions: []
104
+
85
105
  extra_rdoc_files: []
86
- files:
106
+
107
+ files:
87
108
  - .gitignore
88
109
  - Gemfile
89
110
  - README.md
90
111
  - Rakefile
91
- - examples/pizza.rb
112
+ - examples/hacker_news.rb
92
113
  - lib/jsonify.rb
93
114
  - lib/scrapify.rb
94
115
  - lib/scrapify/base.rb
@@ -104,31 +125,39 @@ files:
104
125
  - spec/shared/scrapify.rb
105
126
  - spec/spec_helper.rb
106
127
  - spec/test_models.rb
128
+ has_rdoc: true
107
129
  homepage: http://www.github.com/sathish316/scrapify
108
130
  licenses: []
131
+
109
132
  post_install_message:
110
133
  rdoc_options: []
111
- require_paths:
134
+
135
+ require_paths:
112
136
  - lib
113
- required_ruby_version: !ruby/object:Gem::Requirement
137
+ required_ruby_version: !ruby/object:Gem::Requirement
114
138
  none: false
115
- requirements:
116
- - - ! '>='
117
- - !ruby/object:Gem::Version
118
- version: '0'
119
- required_rubygems_version: !ruby/object:Gem::Requirement
139
+ requirements:
140
+ - - ">="
141
+ - !ruby/object:Gem::Version
142
+ segments:
143
+ - 0
144
+ version: "0"
145
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
146
  none: false
121
- requirements:
122
- - - ! '>='
123
- - !ruby/object:Gem::Version
124
- version: '0'
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ segments:
151
+ - 0
152
+ version: "0"
125
153
  requirements: []
154
+
126
155
  rubyforge_project: scrapify
127
- rubygems_version: 1.8.10
156
+ rubygems_version: 1.3.7
128
157
  signing_key:
129
158
  specification_version: 3
130
159
  summary: ScrApify scraps static html sites to scraESTlike APIs
131
- test_files:
160
+ test_files:
132
161
  - spec/jsonify_spec.rb
133
162
  - spec/pizza.rb
134
163
  - spec/scraper_spec.rb
@@ -1,10 +0,0 @@
1
- class Pizza
2
- include Scrapify::Base
3
- html "http://www.dominos.co.in/menuDetails_ajx.php?catgId=1"
4
-
5
- attribute :name, css: ".menu_lft li a"
6
- attribute :image_url, xpath: "//li//input//@value"
7
- attribute :price, css: ".price", regex: /([\d\.]+)/
8
-
9
- key :name
10
- end