recipe_crawler 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/recipe_crawler.rb +52 -2
- data/lib/recipe_crawler/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a08ccb046d6fcc71e07e3f232e0e62b3f7298224
|
4
|
+
data.tar.gz: 472292684fa8ed1f27df25dfeea003a92489b284
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c292442813ea2a0ae37400641b0bd9d98a9db25a4e22164c1f546e592c6946aecc3f3a3dcaf52236db36694ad36ecb4fb8b5da0c05e6b3945115ecb59902797
|
7
|
+
data.tar.gz: 19d6862ab5faaa1c5b7812ba864ca69529bf59951d33578ac1eb8c28d09628c4ee9f1b1027118ffc9e98173fb176bdab0b03accc43276dd5c4d6d5ad26ad94c6
|
data/README.md
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
# RecipeCrawler
|
2
2
|
|
3
|
-
A web scrawler to get
|
3
|
+
A web scrawler to get recipe data just by its web url: **RecipeCrawler** support:
|
4
|
+
|
5
|
+
* [marmiton.org](http://www.marmiton.org/)
|
6
|
+
* [750g.com](http://www.750g.com)
|
7
|
+
* [cuisineaz.com](http://www.cuisineaz.com)
|
4
8
|
|
5
9
|
You'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/recipe_crawler`. To experiment with that code, run `bin/console` for an interactive prompt.
|
6
10
|
|
data/lib/recipe_crawler.rb
CHANGED
@@ -14,7 +14,8 @@ module RecipeCrawler
|
|
14
14
|
attr_reader :title, :preptime, :cooktime , :ingredients, :steps, :image
|
15
15
|
|
16
16
|
MARMITON_HOST = {desktop: 'http://www.marmiton.org/', mobile: 'http://m.marmiton.org/'}
|
17
|
-
G750_HOST = 'http://www.750g.com'
|
17
|
+
G750_HOST = {desktop: 'http://www.750g.com'}
|
18
|
+
CUISINEAZ_HOST = {desktop: 'http://www.cuisineaz.com/'}
|
18
19
|
|
19
20
|
|
20
21
|
|
@@ -24,8 +25,13 @@ module RecipeCrawler
|
|
24
25
|
def initialize url
|
25
26
|
if marmiton_host? url
|
26
27
|
fetch_from_marmiton url
|
28
|
+
|
27
29
|
elsif g750_host? url
|
28
30
|
fetch_from_g750 url
|
31
|
+
|
32
|
+
elsif cuisineaz_host? url
|
33
|
+
fetch_from_cuisineaz url
|
34
|
+
|
29
35
|
else
|
30
36
|
raise ArgumentError, "Instantiation cancelled (Host not supported)."
|
31
37
|
end
|
@@ -67,6 +73,7 @@ module RecipeCrawler
|
|
67
73
|
return text
|
68
74
|
end
|
69
75
|
|
76
|
+
|
70
77
|
# test if url is from a valid marmiton.org host
|
71
78
|
#
|
72
79
|
# @param url [String] representing an url
|
@@ -81,7 +88,16 @@ module RecipeCrawler
|
|
81
88
|
# @param url [String] representing an url
|
82
89
|
# @return [Boolean] as true if coresponding to a valid url
|
83
90
|
def g750_host? url
|
84
|
-
return url.include? G750_HOST
|
91
|
+
return url.include? G750_HOST[:desktop]
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
# test if url is from a valid cuisineaz.com host
|
96
|
+
#
|
97
|
+
# @param url [String] representing an url
|
98
|
+
# @return [Boolean] as true if coresponding to a valid url
|
99
|
+
def cuisineaz_host? url
|
100
|
+
return url.include? CUISINEAZ_HOST[:desktop]
|
85
101
|
end
|
86
102
|
|
87
103
|
|
@@ -154,6 +170,40 @@ module RecipeCrawler
|
|
154
170
|
raise ArgumentError, "Instantiation cancelled (ulr not from #{G750_HOST})."
|
155
171
|
end
|
156
172
|
end
|
173
|
+
|
174
|
+
|
175
|
+
# fill object properties from a 750g url
|
176
|
+
#
|
177
|
+
# @param url [String] representing an url
|
178
|
+
def fetch_from_cuisineaz url
|
179
|
+
if cuisineaz_host? url
|
180
|
+
page = Nokogiri::HTML(open(url).read)
|
181
|
+
@title = page.css('#ficheRecette h1.fs36').text
|
182
|
+
|
183
|
+
# get times
|
184
|
+
@preptime = page.css('#ctl00_ContentPlaceHolder_LblRecetteTempsPrepa').text.to_i
|
185
|
+
@cooktime = page.css('#ctl00_ContentPlaceHolder_LblRecetteTempsCuisson').text.to_i
|
186
|
+
|
187
|
+
|
188
|
+
@steps = []
|
189
|
+
page.css("#preparation span p.fs17").each { |step_node|
|
190
|
+
@steps << sanitize(step_node.text)
|
191
|
+
}
|
192
|
+
|
193
|
+
@ingredients = []
|
194
|
+
page.css("#ingredients ul li span").each { |ing_node|
|
195
|
+
@ingredients << sanitize(ing_node.text)
|
196
|
+
}
|
197
|
+
|
198
|
+
begin
|
199
|
+
@image = page.css('#shareimg').attr('src').to_s
|
200
|
+
rescue NoMethodError => e
|
201
|
+
end
|
202
|
+
|
203
|
+
else
|
204
|
+
raise ArgumentError, "Instantiation cancelled (ulr not from #{G750_HOST})."
|
205
|
+
end
|
206
|
+
end
|
157
207
|
|
158
208
|
end
|
159
209
|
|