recipe_scraper 2.2.2 → 2.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +2 -0
- data/lib/recipe_scraper.rb +42 -58
- data/lib/recipe_scraper/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ff6587e185de963070fe2e3b9e8afdd909d4ee989d59535dcf7eb8d55da2e847
|
4
|
+
data.tar.gz: a3d694b2b551ac1521586bf60cd21109607a8a48d893a7dce518b1bd90f63659
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ddcdbf5ea74b7842f04e714fe1eb491a2e98bb9575111e0b06c7bad5eecca196c987a410079076f7348cd446f69108d50dec2367977b2974ff47def52612f18a
|
7
|
+
data.tar.gz: d4fc03e5d42d6dcbfbcf19162dad7d24030d31c53011ec94ddb56bfb338971dac63ce14b40d0757915d1ec4cb00f862f7faf26b7527456a58398b5c02e309d13
|
data/README.md
CHANGED
data/lib/recipe_scraper.rb
CHANGED
@@ -1,29 +1,22 @@
|
|
1
|
-
# encoding: UTF-8
|
2
1
|
require 'json'
|
3
2
|
require 'open-uri'
|
4
3
|
require 'nokogiri'
|
5
4
|
|
6
|
-
require
|
7
|
-
|
5
|
+
require 'recipe_scraper/version'
|
8
6
|
|
9
7
|
module RecipeScraper
|
10
|
-
|
11
8
|
# represent a recipe fetched from an Url
|
12
9
|
class Recipe
|
10
|
+
attr_reader :title, :preptime, :cooktime, :ingredients, :steps, :image
|
13
11
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
G750_HOST = {desktop: 'http://www.750g.com'}
|
18
|
-
CUISINEAZ_HOST = {desktop: 'http://www.cuisineaz.com/'}
|
19
|
-
|
20
|
-
|
12
|
+
MARMITON_HOST = { desktop: 'marmiton.org/', mobile: 'm.marmiton.org/' }.freeze
|
13
|
+
G750_HOST = { desktop: '750g.com' }.freeze
|
14
|
+
CUISINEAZ_HOST = { desktop: 'cuisineaz.com/' }.freeze
|
21
15
|
|
22
16
|
# Instanciate a Recipe object with data crawled from an url
|
23
17
|
#
|
24
18
|
# @param url [String] representing an url from Marmiton or 750g website
|
25
|
-
def initialize
|
26
|
-
|
19
|
+
def initialize(url)
|
27
20
|
if marmiton_host? url
|
28
21
|
fetch_from_marmiton url
|
29
22
|
|
@@ -34,16 +27,15 @@ module RecipeScraper
|
|
34
27
|
fetch_from_cuisineaz url
|
35
28
|
|
36
29
|
else
|
37
|
-
raise ArgumentError,
|
30
|
+
raise ArgumentError, 'Instantiation cancelled (Host not supported).'
|
38
31
|
end
|
39
32
|
end
|
40
33
|
|
41
|
-
|
42
34
|
# export object properties to hash
|
43
35
|
#
|
44
36
|
# @return [Hash] as object's properties
|
45
37
|
def to_hash
|
46
|
-
attrs =
|
38
|
+
attrs = {}
|
47
39
|
instance_variables.each do |var|
|
48
40
|
str = var.to_s.gsub /^@/, ''
|
49
41
|
attrs[str.to_sym] = instance_variable_get(var)
|
@@ -51,69 +43,61 @@ module RecipeScraper
|
|
51
43
|
attrs
|
52
44
|
end
|
53
45
|
|
54
|
-
|
55
46
|
# convert object properties to json
|
56
47
|
#
|
57
48
|
# @return [String] data formated in JSON
|
58
49
|
def to_json
|
59
|
-
|
50
|
+
to_hash.to_json
|
60
51
|
end
|
61
52
|
|
62
|
-
|
63
53
|
private
|
64
54
|
|
65
|
-
|
66
55
|
# remove `\r\n` & unwanted espaces
|
67
56
|
#
|
68
57
|
# @param text [String] a text to sanitize
|
69
58
|
# @return [String] as text corrected formated
|
70
|
-
def sanitize
|
71
|
-
[' ', '\r\n', "\r\n", "\n", "\r", "\t", / ^/, / $+/, /^ /, /^ /].each
|
72
|
-
text.gsub!(text_to_remove,'')
|
73
|
-
|
74
|
-
|
59
|
+
def sanitize(text)
|
60
|
+
[' ', '\r\n', "\r\n", "\n", "\r", "\t", / ^/, / $+/, /^ /, /^ /, /Etape [0-9]/, 'Icone horloge', 'Icone casserole '].each do |text_to_remove|
|
61
|
+
text.gsub!(text_to_remove, '')
|
62
|
+
end
|
63
|
+
text
|
75
64
|
end
|
76
65
|
|
77
|
-
|
78
66
|
# test if url is from a valid marmiton.org host
|
79
67
|
#
|
80
68
|
# @param url [String] representing an url
|
81
69
|
# @return [Boolean] as true if coresponding to a valid url
|
82
|
-
def marmiton_host?
|
83
|
-
|
70
|
+
def marmiton_host?(url)
|
71
|
+
url.include?(MARMITON_HOST[:desktop]) || url.include?(MARMITON_HOST[:mobile])
|
84
72
|
end
|
85
73
|
|
86
|
-
|
87
74
|
# test if url is from a valid 750g.com host
|
88
75
|
#
|
89
76
|
# @param url [String] representing an url
|
90
77
|
# @return [Boolean] as true if coresponding to a valid url
|
91
|
-
def g750_host?
|
92
|
-
|
78
|
+
def g750_host?(url)
|
79
|
+
url.include? G750_HOST[:desktop]
|
93
80
|
end
|
94
81
|
|
95
|
-
|
96
82
|
# test if url is from a valid cuisineaz.com host
|
97
83
|
#
|
98
84
|
# @param url [String] representing an url
|
99
85
|
# @return [Boolean] as true if coresponding to a valid url
|
100
|
-
def cuisineaz_host?
|
101
|
-
|
86
|
+
def cuisineaz_host?(url)
|
87
|
+
url.include? CUISINEAZ_HOST[:desktop]
|
102
88
|
end
|
103
89
|
|
104
|
-
|
105
90
|
# fill object properties from a Marmiton url
|
106
91
|
#
|
107
92
|
# @param url [String] representing an url
|
108
|
-
def fetch_from_marmiton
|
93
|
+
def fetch_from_marmiton(url)
|
109
94
|
if marmiton_host? url
|
110
95
|
|
111
96
|
url.gsub! MARMITON_HOST[:mobile], MARMITON_HOST[:desktop]
|
112
97
|
|
113
|
-
page =
|
98
|
+
page = Nokogiri::HTML(open(url).read)
|
114
99
|
@title = page.css('h1').text
|
115
100
|
|
116
|
-
|
117
101
|
# get times
|
118
102
|
@preptime = page.css('div.recipe-infos__timmings__preparation > span.recipe-infos__timmings__value').text.to_i
|
119
103
|
@cooktime = page.css('div.recipe-infos__timmings__cooking > span.recipe-infos__timmings__value').text.to_i
|
@@ -131,20 +115,23 @@ module RecipeScraper
|
|
131
115
|
end
|
132
116
|
|
133
117
|
# get image
|
134
|
-
@image =
|
118
|
+
@image = begin
|
119
|
+
page.css('#af-diapo-desktop-0_img').attr('src').to_s
|
120
|
+
rescue StandardError
|
121
|
+
NoMethodError
|
122
|
+
end
|
135
123
|
|
136
124
|
else
|
137
125
|
raise ArgumentError, "Instantiation cancelled (ulr not from #{MARMITON_HOST})."
|
138
126
|
end
|
139
127
|
end
|
140
128
|
|
141
|
-
|
142
129
|
# fill object properties from a 750g url
|
143
130
|
#
|
144
131
|
# @param url [String] representing an url
|
145
|
-
def fetch_from_g750
|
132
|
+
def fetch_from_g750(url)
|
146
133
|
if g750_host? url
|
147
|
-
page =
|
134
|
+
page = Nokogiri::HTML(open(url).read)
|
148
135
|
@title = page.css('h1.c-article__title').text
|
149
136
|
|
150
137
|
# get times
|
@@ -152,14 +139,14 @@ module RecipeScraper
|
|
152
139
|
@cooktime = sanitize(page.css('ul.c-recipe-summary > li.c-recipe-summary__rating[title="Temps de cuisson"]').text).to_i
|
153
140
|
|
154
141
|
@steps = []
|
155
|
-
css_step =
|
142
|
+
css_step = 'div[itemprop=recipeInstructions] p'
|
156
143
|
@steps = page.css(css_step).text.split /[( ),(<br>)]/
|
157
144
|
|
158
145
|
@ingredients = []
|
159
|
-
css_ingredient =
|
160
|
-
page.css(css_ingredient).each
|
146
|
+
css_ingredient = 'ul.c-recipe-ingredients__list li'
|
147
|
+
page.css(css_ingredient).each do |ing_node|
|
161
148
|
@ingredients << sanitize(ing_node.text)
|
162
|
-
|
149
|
+
end
|
163
150
|
|
164
151
|
# get image
|
165
152
|
css_image = 'div.swiper-wrapper img.photo'
|
@@ -173,31 +160,30 @@ module RecipeScraper
|
|
173
160
|
end
|
174
161
|
end
|
175
162
|
|
176
|
-
|
177
163
|
# fill object properties from a 750g url
|
178
164
|
#
|
179
165
|
# @param url [String] representing an url
|
180
|
-
def fetch_from_cuisineaz
|
166
|
+
def fetch_from_cuisineaz(url)
|
181
167
|
if cuisineaz_host? url
|
182
|
-
page =
|
168
|
+
page = Nokogiri::HTML(open(url).read)
|
183
169
|
@title = page.css('h1').text
|
184
170
|
|
185
171
|
# get times
|
186
|
-
@preptime = page.css('#
|
187
|
-
@cooktime = page.css('#
|
172
|
+
@preptime = page.css('#ContentPlaceHolder_LblRecetteTempsPrepa').text.to_i
|
173
|
+
@cooktime = page.css('#ContentPlaceHolder_LblRecetteTempsCuisson').text.to_i
|
188
174
|
|
189
175
|
@steps = []
|
190
|
-
page.css(
|
176
|
+
page.css('#preparation p').each do |step_node|
|
191
177
|
@steps << sanitize(step_node.text)
|
192
|
-
|
178
|
+
end
|
193
179
|
|
194
180
|
@ingredients = []
|
195
|
-
page.css(
|
181
|
+
page.css('section.recipe_ingredients li').each do |ing_node|
|
196
182
|
@ingredients << sanitize(ing_node.text)
|
197
|
-
|
183
|
+
end
|
198
184
|
|
199
185
|
begin
|
200
|
-
@image = page.css('#
|
186
|
+
@image = page.css('#ContentPlaceHolder_recipeImg').attr('data-src').to_s
|
201
187
|
rescue NoMethodError => e
|
202
188
|
end
|
203
189
|
|
@@ -205,7 +191,5 @@ module RecipeScraper
|
|
205
191
|
raise ArgumentError, "Instantiation cancelled (ulr not from #{G750_HOST})."
|
206
192
|
end
|
207
193
|
end
|
208
|
-
|
209
194
|
end
|
210
|
-
|
211
195
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: recipe_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- madeindjs
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-12-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -107,7 +107,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
107
|
version: '0'
|
108
108
|
requirements: []
|
109
109
|
rubyforge_project:
|
110
|
-
rubygems_version: 2.
|
110
|
+
rubygems_version: 2.7.7
|
111
111
|
signing_key:
|
112
112
|
specification_version: 4
|
113
113
|
summary: A web scrawler to get a Marmiton's, cuisineaz or 750g recipe
|