recipe_crawler 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/recipe_crawler.rb +52 -2
- data/lib/recipe_crawler/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a08ccb046d6fcc71e07e3f232e0e62b3f7298224
|
4
|
+
data.tar.gz: 472292684fa8ed1f27df25dfeea003a92489b284
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c292442813ea2a0ae37400641b0bd9d98a9db25a4e22164c1f546e592c6946aecc3f3a3dcaf52236db36694ad36ecb4fb8b5da0c05e6b3945115ecb59902797
|
7
|
+
data.tar.gz: 19d6862ab5faaa1c5b7812ba864ca69529bf59951d33578ac1eb8c28d09628c4ee9f1b1027118ffc9e98173fb176bdab0b03accc43276dd5c4d6d5ad26ad94c6
|
data/README.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# RecipeCrawler
|
2
2
|
|
3
|
-
A web scrawler to get
|
3
|
+
A web scrawler to get recipe data just by its web url: **RecipeCrawler** support:
|
4
|
+
|
5
|
+
* [marmiton.org](http://www.marmiton.org/)
|
6
|
+
* [750g.com](http://www.750g.com)
|
7
|
+
* [cuisineaz.com](http://www.cuisineaz.com)
|
4
8
|
|
5
9
|
You'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/recipe_crawler`. To experiment with that code, run `bin/console` for an interactive prompt.
|
6
10
|
|
data/lib/recipe_crawler.rb
CHANGED
@@ -14,7 +14,8 @@ module RecipeCrawler
|
|
14
14
|
attr_reader :title, :preptime, :cooktime , :ingredients, :steps, :image
|
15
15
|
|
16
16
|
MARMITON_HOST = {desktop: 'http://www.marmiton.org/', mobile: 'http://m.marmiton.org/'}
|
17
|
-
G750_HOST = 'http://www.750g.com'
|
17
|
+
G750_HOST = {desktop: 'http://www.750g.com'}
|
18
|
+
CUISINEAZ_HOST = {desktop: 'http://www.cuisineaz.com/'}
|
18
19
|
|
19
20
|
|
20
21
|
|
@@ -24,8 +25,13 @@ module RecipeCrawler
|
|
24
25
|
def initialize url
|
25
26
|
if marmiton_host? url
|
26
27
|
fetch_from_marmiton url
|
28
|
+
|
27
29
|
elsif g750_host? url
|
28
30
|
fetch_from_g750 url
|
31
|
+
|
32
|
+
elsif cuisineaz_host? url
|
33
|
+
fetch_from_cuisineaz url
|
34
|
+
|
29
35
|
else
|
30
36
|
raise ArgumentError, "Instantiation cancelled (Host not supported)."
|
31
37
|
end
|
@@ -67,6 +73,7 @@ module RecipeCrawler
|
|
67
73
|
return text
|
68
74
|
end
|
69
75
|
|
76
|
+
|
70
77
|
# test if url is from a valid marmiton.org host
|
71
78
|
#
|
72
79
|
# @param url [String] representing an url
|
@@ -81,7 +88,16 @@ module RecipeCrawler
|
|
81
88
|
# @param url [String] representing an url
|
82
89
|
# @return [Boolean] as true if coresponding to a valid url
|
83
90
|
def g750_host? url
|
84
|
-
return url.include? G750_HOST
|
91
|
+
return url.include? G750_HOST[:desktop]
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
# test if url is from a valid cuisineaz.com host
|
96
|
+
#
|
97
|
+
# @param url [String] representing an url
|
98
|
+
# @return [Boolean] as true if coresponding to a valid url
|
99
|
+
def cuisineaz_host? url
|
100
|
+
return url.include? CUISINEAZ_HOST[:desktop]
|
85
101
|
end
|
86
102
|
|
87
103
|
|
@@ -154,6 +170,40 @@ module RecipeCrawler
|
|
154
170
|
raise ArgumentError, "Instantiation cancelled (ulr not from #{G750_HOST})."
|
155
171
|
end
|
156
172
|
end
|
173
|
+
|
174
|
+
|
175
|
+
# fill object properties from a 750g url
|
176
|
+
#
|
177
|
+
# @param url [String] representing an url
|
178
|
+
def fetch_from_cuisineaz url
|
179
|
+
if cuisineaz_host? url
|
180
|
+
page = Nokogiri::HTML(open(url).read)
|
181
|
+
@title = page.css('#ficheRecette h1.fs36').text
|
182
|
+
|
183
|
+
# get times
|
184
|
+
@preptime = page.css('#ctl00_ContentPlaceHolder_LblRecetteTempsPrepa').text.to_i
|
185
|
+
@cooktime = page.css('#ctl00_ContentPlaceHolder_LblRecetteTempsCuisson').text.to_i
|
186
|
+
|
187
|
+
|
188
|
+
@steps = []
|
189
|
+
page.css("#preparation span p.fs17").each { |step_node|
|
190
|
+
@steps << sanitize(step_node.text)
|
191
|
+
}
|
192
|
+
|
193
|
+
@ingredients = []
|
194
|
+
page.css("#ingredients ul li span").each { |ing_node|
|
195
|
+
@ingredients << sanitize(ing_node.text)
|
196
|
+
}
|
197
|
+
|
198
|
+
begin
|
199
|
+
@image = page.css('#shareimg').attr('src').to_s
|
200
|
+
rescue NoMethodError => e
|
201
|
+
end
|
202
|
+
|
203
|
+
else
|
204
|
+
raise ArgumentError, "Instantiation cancelled (ulr not from #{G750_HOST})."
|
205
|
+
end
|
206
|
+
end
|
157
207
|
|
158
208
|
end
|
159
209
|
|