scrapify 0.0.8 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +7 -1
- data/Rakefile +1 -1
- data/examples/hacker_news.rb +9 -0
- data/lib/scrapify.rb +3 -1
- data/lib/scrapify/base.rb +14 -2
- data/lib/scrapify/version.rb +1 -1
- metadata +100 -71
- data/examples/pizza.rb +0 -10
data/README.md
CHANGED
@@ -99,4 +99,10 @@ end
|
|
99
99
|
get 'pizzas/:id' => pizza_api
|
100
100
|
```
|
101
101
|
|
102
|
-
Jsonify scraps url and exposes index and show urls as JSON APIs
|
102
|
+
Jsonify scraps url and exposes index and show urls as JSON APIs
|
103
|
+
|
104
|
+
## License
|
105
|
+
|
106
|
+
ScrApify is released under the MIT license:
|
107
|
+
|
108
|
+
http://www.opensource.org/licenses/MIT
|
data/Rakefile
CHANGED
@@ -0,0 +1,9 @@
|
|
1
|
+
class HackerNews
|
2
|
+
include Scrapify::Base
|
3
|
+
html "http://news.ycombinator.com/news"
|
4
|
+
|
5
|
+
attribute :rank, css: "tr:nth-child(3n+1) td.title:nth-child(1)"
|
6
|
+
attribute :title, css: "tr:nth-child(3n+1) td.title:nth-child(3)"
|
7
|
+
attribute :url, xpath: "//tr[position() mod 3 = 1]/td[@class='title'][position()=2]//@href"
|
8
|
+
key :rank
|
9
|
+
end
|
data/lib/scrapify.rb
CHANGED
@@ -4,8 +4,10 @@ require 'active_support/core_ext/hash/keys'
|
|
4
4
|
require 'nokogiri'
|
5
5
|
require 'uri'
|
6
6
|
require 'net/http'
|
7
|
+
require "net/https"
|
8
|
+
require "uri"
|
7
9
|
require 'scrapify/base'
|
8
10
|
require 'scrapify/scraper'
|
9
11
|
require 'scrapify/exceptions'
|
10
12
|
require 'json'
|
11
|
-
require 'jsonify'
|
13
|
+
require 'jsonify'
|
data/lib/scrapify/base.rb
CHANGED
@@ -79,7 +79,18 @@ module Scrapify
|
|
79
79
|
end
|
80
80
|
|
81
81
|
def http_response
|
82
|
-
@http_response ||=
|
82
|
+
@http_response ||= get_response(url, url =~ /^https/i)
|
83
|
+
end
|
84
|
+
|
85
|
+
def get_response(url, secure=false)
|
86
|
+
uri = URI.parse(url)
|
87
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
88
|
+
if secure
|
89
|
+
http.use_ssl = true
|
90
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
91
|
+
end
|
92
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
93
|
+
response = http.request(request)
|
83
94
|
end
|
84
95
|
|
85
96
|
def http_header
|
@@ -109,7 +120,8 @@ module Scrapify
|
|
109
120
|
self.new(attributes)
|
110
121
|
end
|
111
122
|
|
112
|
-
define_singleton_method :where do |conditions
|
123
|
+
define_singleton_method :where do |conditions|
|
124
|
+
conditions ||= {}
|
113
125
|
raise Scrapify::AttributeDoesNotExist.new(conditions.keys - attribute_names) unless conditions.keys.all?{|key| attribute_names.include?(key) }
|
114
126
|
indices = conditions.collect do |attribute, value|
|
115
127
|
send("#{attribute}_values").each_with_index.find_all{|attr_val, index| attr_val == value}.collect(&:last)
|
data/lib/scrapify/version.rb
CHANGED
metadata
CHANGED
@@ -1,94 +1,115 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapify
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 9
|
9
|
+
version: 0.0.9
|
6
10
|
platform: ruby
|
7
|
-
authors:
|
11
|
+
authors:
|
8
12
|
- Sathish & Shakiel
|
9
13
|
autorequire:
|
10
14
|
bindir: bin
|
11
15
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
16
|
+
|
17
|
+
date: 2012-10-02 00:00:00 +05:30
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
15
21
|
name: rspec
|
16
|
-
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
17
24
|
none: false
|
18
|
-
requirements:
|
19
|
-
- -
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
22
31
|
type: :development
|
23
|
-
|
24
|
-
|
25
|
-
- !ruby/object:Gem::Dependency
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
26
34
|
name: mocha
|
27
|
-
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
28
37
|
none: false
|
29
|
-
requirements:
|
30
|
-
- -
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
43
|
+
version: "0"
|
33
44
|
type: :development
|
34
|
-
|
35
|
-
|
36
|
-
- !ruby/object:Gem::Dependency
|
45
|
+
version_requirements: *id002
|
46
|
+
- !ruby/object:Gem::Dependency
|
37
47
|
name: fakeweb
|
38
|
-
|
48
|
+
prerelease: false
|
49
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
|
-
requirements:
|
41
|
-
- -
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
44
57
|
type: :development
|
45
|
-
|
46
|
-
|
47
|
-
- !ruby/object:Gem::Dependency
|
58
|
+
version_requirements: *id003
|
59
|
+
- !ruby/object:Gem::Dependency
|
48
60
|
name: nokogiri
|
49
|
-
|
61
|
+
prerelease: false
|
62
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
50
63
|
none: false
|
51
|
-
requirements:
|
52
|
-
- -
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
55
70
|
type: :runtime
|
56
|
-
|
57
|
-
|
58
|
-
- !ruby/object:Gem::Dependency
|
71
|
+
version_requirements: *id004
|
72
|
+
- !ruby/object:Gem::Dependency
|
59
73
|
name: activesupport
|
60
|
-
|
74
|
+
prerelease: false
|
75
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
61
76
|
none: false
|
62
|
-
requirements:
|
63
|
-
- -
|
64
|
-
- !ruby/object:Gem::Version
|
65
|
-
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
66
83
|
type: :runtime
|
67
|
-
|
68
|
-
|
69
|
-
- !ruby/object:Gem::Dependency
|
84
|
+
version_requirements: *id005
|
85
|
+
- !ruby/object:Gem::Dependency
|
70
86
|
name: json
|
71
|
-
|
87
|
+
prerelease: false
|
88
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
72
89
|
none: false
|
73
|
-
requirements:
|
74
|
-
- -
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
segments:
|
94
|
+
- 0
|
95
|
+
version: "0"
|
77
96
|
type: :runtime
|
78
|
-
|
79
|
-
version_requirements: *70278707137260
|
97
|
+
version_requirements: *id006
|
80
98
|
description: ScrApify scraps static html sites to RESTlike APIs
|
81
|
-
email:
|
99
|
+
email:
|
82
100
|
- sathish316@gmail.com
|
83
101
|
executables: []
|
102
|
+
|
84
103
|
extensions: []
|
104
|
+
|
85
105
|
extra_rdoc_files: []
|
86
|
-
|
106
|
+
|
107
|
+
files:
|
87
108
|
- .gitignore
|
88
109
|
- Gemfile
|
89
110
|
- README.md
|
90
111
|
- Rakefile
|
91
|
-
- examples/
|
112
|
+
- examples/hacker_news.rb
|
92
113
|
- lib/jsonify.rb
|
93
114
|
- lib/scrapify.rb
|
94
115
|
- lib/scrapify/base.rb
|
@@ -104,31 +125,39 @@ files:
|
|
104
125
|
- spec/shared/scrapify.rb
|
105
126
|
- spec/spec_helper.rb
|
106
127
|
- spec/test_models.rb
|
128
|
+
has_rdoc: true
|
107
129
|
homepage: http://www.github.com/sathish316/scrapify
|
108
130
|
licenses: []
|
131
|
+
|
109
132
|
post_install_message:
|
110
133
|
rdoc_options: []
|
111
|
-
|
134
|
+
|
135
|
+
require_paths:
|
112
136
|
- lib
|
113
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
137
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
138
|
none: false
|
115
|
-
requirements:
|
116
|
-
- -
|
117
|
-
- !ruby/object:Gem::Version
|
118
|
-
|
119
|
-
|
139
|
+
requirements:
|
140
|
+
- - ">="
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
segments:
|
143
|
+
- 0
|
144
|
+
version: "0"
|
145
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
146
|
none: false
|
121
|
-
requirements:
|
122
|
-
- -
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
|
147
|
+
requirements:
|
148
|
+
- - ">="
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
segments:
|
151
|
+
- 0
|
152
|
+
version: "0"
|
125
153
|
requirements: []
|
154
|
+
|
126
155
|
rubyforge_project: scrapify
|
127
|
-
rubygems_version: 1.
|
156
|
+
rubygems_version: 1.3.7
|
128
157
|
signing_key:
|
129
158
|
specification_version: 3
|
130
159
|
summary: ScrApify scraps static html sites to scraESTlike APIs
|
131
|
-
test_files:
|
160
|
+
test_files:
|
132
161
|
- spec/jsonify_spec.rb
|
133
162
|
- spec/pizza.rb
|
134
163
|
- spec/scraper_spec.rb
|
data/examples/pizza.rb
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
class Pizza
|
2
|
-
include Scrapify::Base
|
3
|
-
html "http://www.dominos.co.in/menuDetails_ajx.php?catgId=1"
|
4
|
-
|
5
|
-
attribute :name, css: ".menu_lft li a"
|
6
|
-
attribute :image_url, xpath: "//li//input//@value"
|
7
|
-
attribute :price, css: ".price", regex: /([\d\.]+)/
|
8
|
-
|
9
|
-
key :name
|
10
|
-
end
|