recipe_scraper 2.2.2 → 2.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +2 -0
- data/lib/recipe_scraper.rb +42 -58
- data/lib/recipe_scraper/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ff6587e185de963070fe2e3b9e8afdd909d4ee989d59535dcf7eb8d55da2e847
|
4
|
+
data.tar.gz: a3d694b2b551ac1521586bf60cd21109607a8a48d893a7dce518b1bd90f63659
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ddcdbf5ea74b7842f04e714fe1eb491a2e98bb9575111e0b06c7bad5eecca196c987a410079076f7348cd446f69108d50dec2367977b2974ff47def52612f18a
|
7
|
+
data.tar.gz: d4fc03e5d42d6dcbfbcf19162dad7d24030d31c53011ec94ddb56bfb338971dac63ce14b40d0757915d1ec4cb00f862f7faf26b7527456a58398b5c02e309d13
|
data/README.md
CHANGED
data/lib/recipe_scraper.rb
CHANGED
@@ -1,29 +1,22 @@
|
|
1
|
-
# encoding: UTF-8
|
2
1
|
require 'json'
|
3
2
|
require 'open-uri'
|
4
3
|
require 'nokogiri'
|
5
4
|
|
6
|
-
require
|
7
|
-
|
5
|
+
require 'recipe_scraper/version'
|
8
6
|
|
9
7
|
module RecipeScraper
|
10
|
-
|
11
8
|
# represent a recipe fetched from an Url
|
12
9
|
class Recipe
|
10
|
+
attr_reader :title, :preptime, :cooktime, :ingredients, :steps, :image
|
13
11
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
G750_HOST = {desktop: 'http://www.750g.com'}
|
18
|
-
CUISINEAZ_HOST = {desktop: 'http://www.cuisineaz.com/'}
|
19
|
-
|
20
|
-
|
12
|
+
MARMITON_HOST = { desktop: 'marmiton.org/', mobile: 'm.marmiton.org/' }.freeze
|
13
|
+
G750_HOST = { desktop: '750g.com' }.freeze
|
14
|
+
CUISINEAZ_HOST = { desktop: 'cuisineaz.com/' }.freeze
|
21
15
|
|
22
16
|
# Instanciate a Recipe object with data crawled from an url
|
23
17
|
#
|
24
18
|
# @param url [String] representing an url from Marmiton or 750g website
|
25
|
-
def initialize
|
26
|
-
|
19
|
+
def initialize(url)
|
27
20
|
if marmiton_host? url
|
28
21
|
fetch_from_marmiton url
|
29
22
|
|
@@ -34,16 +27,15 @@ module RecipeScraper
|
|
34
27
|
fetch_from_cuisineaz url
|
35
28
|
|
36
29
|
else
|
37
|
-
raise ArgumentError,
|
30
|
+
raise ArgumentError, 'Instantiation cancelled (Host not supported).'
|
38
31
|
end
|
39
32
|
end
|
40
33
|
|
41
|
-
|
42
34
|
# export object properties to hash
|
43
35
|
#
|
44
36
|
# @return [Hash] as object's properties
|
45
37
|
def to_hash
|
46
|
-
attrs =
|
38
|
+
attrs = {}
|
47
39
|
instance_variables.each do |var|
|
48
40
|
str = var.to_s.gsub /^@/, ''
|
49
41
|
attrs[str.to_sym] = instance_variable_get(var)
|
@@ -51,69 +43,61 @@ module RecipeScraper
|
|
51
43
|
attrs
|
52
44
|
end
|
53
45
|
|
54
|
-
|
55
46
|
# convert object properties to json
|
56
47
|
#
|
57
48
|
# @return [String] data formated in JSON
|
58
49
|
def to_json
|
59
|
-
|
50
|
+
to_hash.to_json
|
60
51
|
end
|
61
52
|
|
62
|
-
|
63
53
|
private
|
64
54
|
|
65
|
-
|
66
55
|
# remove `\r\n` & unwanted espaces
|
67
56
|
#
|
68
57
|
# @param text [String] a text to sanitize
|
69
58
|
# @return [String] as text corrected formated
|
70
|
-
def sanitize
|
71
|
-
[' ', '\r\n', "\r\n", "\n", "\r", "\t", / ^/, / $+/, /^ /, /^ /].each
|
72
|
-
text.gsub!(text_to_remove,'')
|
73
|
-
|
74
|
-
|
59
|
+
def sanitize(text)
|
60
|
+
[' ', '\r\n', "\r\n", "\n", "\r", "\t", / ^/, / $+/, /^ /, /^ /, /Etape [0-9]/, 'Icone horloge', 'Icone casserole '].each do |text_to_remove|
|
61
|
+
text.gsub!(text_to_remove, '')
|
62
|
+
end
|
63
|
+
text
|
75
64
|
end
|
76
65
|
|
77
|
-
|
78
66
|
# test if url is from a valid marmiton.org host
|
79
67
|
#
|
80
68
|
# @param url [String] representing an url
|
81
69
|
# @return [Boolean] as true if coresponding to a valid url
|
82
|
-
def marmiton_host?
|
83
|
-
|
70
|
+
def marmiton_host?(url)
|
71
|
+
url.include?(MARMITON_HOST[:desktop]) || url.include?(MARMITON_HOST[:mobile])
|
84
72
|
end
|
85
73
|
|
86
|
-
|
87
74
|
# test if url is from a valid 750g.com host
|
88
75
|
#
|
89
76
|
# @param url [String] representing an url
|
90
77
|
# @return [Boolean] as true if coresponding to a valid url
|
91
|
-
def g750_host?
|
92
|
-
|
78
|
+
def g750_host?(url)
|
79
|
+
url.include? G750_HOST[:desktop]
|
93
80
|
end
|
94
81
|
|
95
|
-
|
96
82
|
# test if url is from a valid cuisineaz.com host
|
97
83
|
#
|
98
84
|
# @param url [String] representing an url
|
99
85
|
# @return [Boolean] as true if coresponding to a valid url
|
100
|
-
def cuisineaz_host?
|
101
|
-
|
86
|
+
def cuisineaz_host?(url)
|
87
|
+
url.include? CUISINEAZ_HOST[:desktop]
|
102
88
|
end
|
103
89
|
|
104
|
-
|
105
90
|
# fill object properties from a Marmiton url
|
106
91
|
#
|
107
92
|
# @param url [String] representing an url
|
108
|
-
def fetch_from_marmiton
|
93
|
+
def fetch_from_marmiton(url)
|
109
94
|
if marmiton_host? url
|
110
95
|
|
111
96
|
url.gsub! MARMITON_HOST[:mobile], MARMITON_HOST[:desktop]
|
112
97
|
|
113
|
-
page =
|
98
|
+
page = Nokogiri::HTML(open(url).read)
|
114
99
|
@title = page.css('h1').text
|
115
100
|
|
116
|
-
|
117
101
|
# get times
|
118
102
|
@preptime = page.css('div.recipe-infos__timmings__preparation > span.recipe-infos__timmings__value').text.to_i
|
119
103
|
@cooktime = page.css('div.recipe-infos__timmings__cooking > span.recipe-infos__timmings__value').text.to_i
|
@@ -131,20 +115,23 @@ module RecipeScraper
|
|
131
115
|
end
|
132
116
|
|
133
117
|
# get image
|
134
|
-
@image =
|
118
|
+
@image = begin
|
119
|
+
page.css('#af-diapo-desktop-0_img').attr('src').to_s
|
120
|
+
rescue StandardError
|
121
|
+
NoMethodError
|
122
|
+
end
|
135
123
|
|
136
124
|
else
|
137
125
|
raise ArgumentError, "Instantiation cancelled (ulr not from #{MARMITON_HOST})."
|
138
126
|
end
|
139
127
|
end
|
140
128
|
|
141
|
-
|
142
129
|
# fill object properties from a 750g url
|
143
130
|
#
|
144
131
|
# @param url [String] representing an url
|
145
|
-
def fetch_from_g750
|
132
|
+
def fetch_from_g750(url)
|
146
133
|
if g750_host? url
|
147
|
-
page =
|
134
|
+
page = Nokogiri::HTML(open(url).read)
|
148
135
|
@title = page.css('h1.c-article__title').text
|
149
136
|
|
150
137
|
# get times
|
@@ -152,14 +139,14 @@ module RecipeScraper
|
|
152
139
|
@cooktime = sanitize(page.css('ul.c-recipe-summary > li.c-recipe-summary__rating[title="Temps de cuisson"]').text).to_i
|
153
140
|
|
154
141
|
@steps = []
|
155
|
-
css_step =
|
142
|
+
css_step = 'div[itemprop=recipeInstructions] p'
|
156
143
|
@steps = page.css(css_step).text.split /[( ),(<br>)]/
|
157
144
|
|
158
145
|
@ingredients = []
|
159
|
-
css_ingredient =
|
160
|
-
page.css(css_ingredient).each
|
146
|
+
css_ingredient = 'ul.c-recipe-ingredients__list li'
|
147
|
+
page.css(css_ingredient).each do |ing_node|
|
161
148
|
@ingredients << sanitize(ing_node.text)
|
162
|
-
|
149
|
+
end
|
163
150
|
|
164
151
|
# get image
|
165
152
|
css_image = 'div.swiper-wrapper img.photo'
|
@@ -173,31 +160,30 @@ module RecipeScraper
|
|
173
160
|
end
|
174
161
|
end
|
175
162
|
|
176
|
-
|
177
163
|
# fill object properties from a 750g url
|
178
164
|
#
|
179
165
|
# @param url [String] representing an url
|
180
|
-
def fetch_from_cuisineaz
|
166
|
+
def fetch_from_cuisineaz(url)
|
181
167
|
if cuisineaz_host? url
|
182
|
-
page =
|
168
|
+
page = Nokogiri::HTML(open(url).read)
|
183
169
|
@title = page.css('h1').text
|
184
170
|
|
185
171
|
# get times
|
186
|
-
@preptime = page.css('#
|
187
|
-
@cooktime = page.css('#
|
172
|
+
@preptime = page.css('#ContentPlaceHolder_LblRecetteTempsPrepa').text.to_i
|
173
|
+
@cooktime = page.css('#ContentPlaceHolder_LblRecetteTempsCuisson').text.to_i
|
188
174
|
|
189
175
|
@steps = []
|
190
|
-
page.css(
|
176
|
+
page.css('#preparation p').each do |step_node|
|
191
177
|
@steps << sanitize(step_node.text)
|
192
|
-
|
178
|
+
end
|
193
179
|
|
194
180
|
@ingredients = []
|
195
|
-
page.css(
|
181
|
+
page.css('section.recipe_ingredients li').each do |ing_node|
|
196
182
|
@ingredients << sanitize(ing_node.text)
|
197
|
-
|
183
|
+
end
|
198
184
|
|
199
185
|
begin
|
200
|
-
@image = page.css('#
|
186
|
+
@image = page.css('#ContentPlaceHolder_recipeImg').attr('data-src').to_s
|
201
187
|
rescue NoMethodError => e
|
202
188
|
end
|
203
189
|
|
@@ -205,7 +191,5 @@ module RecipeScraper
|
|
205
191
|
raise ArgumentError, "Instantiation cancelled (ulr not from #{G750_HOST})."
|
206
192
|
end
|
207
193
|
end
|
208
|
-
|
209
194
|
end
|
210
|
-
|
211
195
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: recipe_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- madeindjs
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-12-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -107,7 +107,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
107
107
|
version: '0'
|
108
108
|
requirements: []
|
109
109
|
rubyforge_project:
|
110
|
-
rubygems_version: 2.
|
110
|
+
rubygems_version: 2.7.7
|
111
111
|
signing_key:
|
112
112
|
specification_version: 4
|
113
113
|
summary: A web scrawler to get a Marmiton's, cuisineaz or 750g recipe
|