scrapify 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +7 -1
- data/Rakefile +1 -1
- data/examples/hacker_news.rb +9 -0
- data/lib/scrapify.rb +3 -1
- data/lib/scrapify/base.rb +14 -2
- data/lib/scrapify/version.rb +1 -1
- metadata +100 -71
- data/examples/pizza.rb +0 -10
data/README.md
CHANGED
@@ -99,4 +99,10 @@ end
|
|
99
99
|
get 'pizzas/:id' => pizza_api
|
100
100
|
```
|
101
101
|
|
102
|
-
Jsonify scraps url and exposes index and show urls as JSON APIs
|
102
|
+
Jsonify scraps url and exposes index and show urls as JSON APIs
|
103
|
+
|
104
|
+
## License
|
105
|
+
|
106
|
+
ScrApify is released under the MIT license:
|
107
|
+
|
108
|
+
http://www.opensource.org/licenses/MIT
|
data/Rakefile
CHANGED
@@ -0,0 +1,9 @@
|
|
1
|
+
class HackerNews
|
2
|
+
include Scrapify::Base
|
3
|
+
html "http://news.ycombinator.com/news"
|
4
|
+
|
5
|
+
attribute :rank, css: "tr:nth-child(3n+1) td.title:nth-child(1)"
|
6
|
+
attribute :title, css: "tr:nth-child(3n+1) td.title:nth-child(3)"
|
7
|
+
attribute :url, xpath: "//tr[position() mod 3 = 1]/td[@class='title'][position()=2]//@href"
|
8
|
+
key :rank
|
9
|
+
end
|
data/lib/scrapify.rb
CHANGED
@@ -4,8 +4,10 @@ require 'active_support/core_ext/hash/keys'
|
|
4
4
|
require 'nokogiri'
|
5
5
|
require 'uri'
|
6
6
|
require 'net/http'
|
7
|
+
require "net/https"
|
8
|
+
require "uri"
|
7
9
|
require 'scrapify/base'
|
8
10
|
require 'scrapify/scraper'
|
9
11
|
require 'scrapify/exceptions'
|
10
12
|
require 'json'
|
11
|
-
require 'jsonify'
|
13
|
+
require 'jsonify'
|
data/lib/scrapify/base.rb
CHANGED
@@ -79,7 +79,18 @@ module Scrapify
|
|
79
79
|
end
|
80
80
|
|
81
81
|
def http_response
|
82
|
-
@http_response ||=
|
82
|
+
@http_response ||= get_response(url, url =~ /^https/i)
|
83
|
+
end
|
84
|
+
|
85
|
+
def get_response(url, secure=false)
|
86
|
+
uri = URI.parse(url)
|
87
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
88
|
+
if secure
|
89
|
+
http.use_ssl = true
|
90
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
91
|
+
end
|
92
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
93
|
+
response = http.request(request)
|
83
94
|
end
|
84
95
|
|
85
96
|
def http_header
|
@@ -109,7 +120,8 @@ module Scrapify
|
|
109
120
|
self.new(attributes)
|
110
121
|
end
|
111
122
|
|
112
|
-
define_singleton_method :where do |conditions
|
123
|
+
define_singleton_method :where do |conditions|
|
124
|
+
conditions ||= {}
|
113
125
|
raise Scrapify::AttributeDoesNotExist.new(conditions.keys - attribute_names) unless conditions.keys.all?{|key| attribute_names.include?(key) }
|
114
126
|
indices = conditions.collect do |attribute, value|
|
115
127
|
send("#{attribute}_values").each_with_index.find_all{|attr_val, index| attr_val == value}.collect(&:last)
|
data/lib/scrapify/version.rb
CHANGED
metadata
CHANGED
@@ -1,94 +1,115 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapify
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 9
|
9
|
+
version: 0.0.9
|
6
10
|
platform: ruby
|
7
|
-
authors:
|
11
|
+
authors:
|
8
12
|
- Sathish & Shakiel
|
9
13
|
autorequire:
|
10
14
|
bindir: bin
|
11
15
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
16
|
+
|
17
|
+
date: 2012-10-02 00:00:00 +05:30
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
15
21
|
name: rspec
|
16
|
-
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
17
24
|
none: false
|
18
|
-
requirements:
|
19
|
-
- -
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
22
31
|
type: :development
|
23
|
-
|
24
|
-
|
25
|
-
- !ruby/object:Gem::Dependency
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
26
34
|
name: mocha
|
27
|
-
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
28
37
|
none: false
|
29
|
-
requirements:
|
30
|
-
- -
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
43
|
+
version: "0"
|
33
44
|
type: :development
|
34
|
-
|
35
|
-
|
36
|
-
- !ruby/object:Gem::Dependency
|
45
|
+
version_requirements: *id002
|
46
|
+
- !ruby/object:Gem::Dependency
|
37
47
|
name: fakeweb
|
38
|
-
|
48
|
+
prerelease: false
|
49
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
|
-
requirements:
|
41
|
-
- -
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
segments:
|
55
|
+
- 0
|
56
|
+
version: "0"
|
44
57
|
type: :development
|
45
|
-
|
46
|
-
|
47
|
-
- !ruby/object:Gem::Dependency
|
58
|
+
version_requirements: *id003
|
59
|
+
- !ruby/object:Gem::Dependency
|
48
60
|
name: nokogiri
|
49
|
-
|
61
|
+
prerelease: false
|
62
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
50
63
|
none: false
|
51
|
-
requirements:
|
52
|
-
- -
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
55
70
|
type: :runtime
|
56
|
-
|
57
|
-
|
58
|
-
- !ruby/object:Gem::Dependency
|
71
|
+
version_requirements: *id004
|
72
|
+
- !ruby/object:Gem::Dependency
|
59
73
|
name: activesupport
|
60
|
-
|
74
|
+
prerelease: false
|
75
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
61
76
|
none: false
|
62
|
-
requirements:
|
63
|
-
- -
|
64
|
-
- !ruby/object:Gem::Version
|
65
|
-
|
77
|
+
requirements:
|
78
|
+
- - ">="
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
segments:
|
81
|
+
- 0
|
82
|
+
version: "0"
|
66
83
|
type: :runtime
|
67
|
-
|
68
|
-
|
69
|
-
- !ruby/object:Gem::Dependency
|
84
|
+
version_requirements: *id005
|
85
|
+
- !ruby/object:Gem::Dependency
|
70
86
|
name: json
|
71
|
-
|
87
|
+
prerelease: false
|
88
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
72
89
|
none: false
|
73
|
-
requirements:
|
74
|
-
- -
|
75
|
-
- !ruby/object:Gem::Version
|
76
|
-
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
segments:
|
94
|
+
- 0
|
95
|
+
version: "0"
|
77
96
|
type: :runtime
|
78
|
-
|
79
|
-
version_requirements: *70278707137260
|
97
|
+
version_requirements: *id006
|
80
98
|
description: ScrApify scraps static html sites to RESTlike APIs
|
81
|
-
email:
|
99
|
+
email:
|
82
100
|
- sathish316@gmail.com
|
83
101
|
executables: []
|
102
|
+
|
84
103
|
extensions: []
|
104
|
+
|
85
105
|
extra_rdoc_files: []
|
86
|
-
|
106
|
+
|
107
|
+
files:
|
87
108
|
- .gitignore
|
88
109
|
- Gemfile
|
89
110
|
- README.md
|
90
111
|
- Rakefile
|
91
|
-
- examples/
|
112
|
+
- examples/hacker_news.rb
|
92
113
|
- lib/jsonify.rb
|
93
114
|
- lib/scrapify.rb
|
94
115
|
- lib/scrapify/base.rb
|
@@ -104,31 +125,39 @@ files:
|
|
104
125
|
- spec/shared/scrapify.rb
|
105
126
|
- spec/spec_helper.rb
|
106
127
|
- spec/test_models.rb
|
128
|
+
has_rdoc: true
|
107
129
|
homepage: http://www.github.com/sathish316/scrapify
|
108
130
|
licenses: []
|
131
|
+
|
109
132
|
post_install_message:
|
110
133
|
rdoc_options: []
|
111
|
-
|
134
|
+
|
135
|
+
require_paths:
|
112
136
|
- lib
|
113
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
137
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
138
|
none: false
|
115
|
-
requirements:
|
116
|
-
- -
|
117
|
-
- !ruby/object:Gem::Version
|
118
|
-
|
119
|
-
|
139
|
+
requirements:
|
140
|
+
- - ">="
|
141
|
+
- !ruby/object:Gem::Version
|
142
|
+
segments:
|
143
|
+
- 0
|
144
|
+
version: "0"
|
145
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
120
146
|
none: false
|
121
|
-
requirements:
|
122
|
-
- -
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
|
147
|
+
requirements:
|
148
|
+
- - ">="
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
segments:
|
151
|
+
- 0
|
152
|
+
version: "0"
|
125
153
|
requirements: []
|
154
|
+
|
126
155
|
rubyforge_project: scrapify
|
127
|
-
rubygems_version: 1.
|
156
|
+
rubygems_version: 1.3.7
|
128
157
|
signing_key:
|
129
158
|
specification_version: 3
|
130
159
|
summary: ScrApify scraps static html sites to scraESTlike APIs
|
131
|
-
test_files:
|
160
|
+
test_files:
|
132
161
|
- spec/jsonify_spec.rb
|
133
162
|
- spec/pizza.rb
|
134
163
|
- spec/scraper_spec.rb
|
data/examples/pizza.rb
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
class Pizza
|
2
|
-
include Scrapify::Base
|
3
|
-
html "http://www.dominos.co.in/menuDetails_ajx.php?catgId=1"
|
4
|
-
|
5
|
-
attribute :name, css: ".menu_lft li a"
|
6
|
-
attribute :image_url, xpath: "//li//input//@value"
|
7
|
-
attribute :price, css: ".price", regex: /([\d\.]+)/
|
8
|
-
|
9
|
-
key :name
|
10
|
-
end
|