recipe_scraper 2.2.2 → 2.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: e025c453bfd951c2b67abaf3d27a269af4d6dd00
4
- data.tar.gz: f32ea1856cdc9caea8caed07153d76450dc94ca3
2
+ SHA256:
3
+ metadata.gz: ff6587e185de963070fe2e3b9e8afdd909d4ee989d59535dcf7eb8d55da2e847
4
+ data.tar.gz: a3d694b2b551ac1521586bf60cd21109607a8a48d893a7dce518b1bd90f63659
5
5
  SHA512:
6
- metadata.gz: 6d2ab974e447358d9f2f90476caea4f3cee4236c17a5dceef9e3a470ad89653c121297b725fe646f71a7dc3bbc54750a36fe90dd076952d52b4fbf704158aff4
7
- data.tar.gz: 2aaff08dbb6c1a794d4e0d9e9ff009832802dca967d104f1b786d42e5ed90e0f6840ecf5d7071e743d8434b14989a10b7340d0069fc8923bb18e50926e250280
6
+ metadata.gz: ddcdbf5ea74b7842f04e714fe1eb491a2e98bb9575111e0b06c7bad5eecca196c987a410079076f7348cd446f69108d50dec2367977b2974ff47def52612f18a
7
+ data.tar.gz: d4fc03e5d42d6dcbfbcf19162dad7d24030d31c53011ec94ddb56bfb338971dac63ce14b40d0757915d1ec4cb00f862f7faf26b7527456a58398b5c02e309d13
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # RecipeScraper
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/recipe_scraper.svg)](https://rubygems.org/gems/recipe_scraper)
4
+
3
5
  A web scrawler to get recipe data just by its web url: **RecipeScraper** support:
4
6
 
5
7
  * [marmiton.org](http://www.marmiton.org/)
@@ -1,29 +1,22 @@
1
- # encoding: UTF-8
2
1
  require 'json'
3
2
  require 'open-uri'
4
3
  require 'nokogiri'
5
4
 
6
- require "recipe_scraper/version"
7
-
5
+ require 'recipe_scraper/version'
8
6
 
9
7
  module RecipeScraper
10
-
11
8
  # represent a recipe fetched from an Url
12
9
  class Recipe
10
+ attr_reader :title, :preptime, :cooktime, :ingredients, :steps, :image
13
11
 
14
- attr_reader :title, :preptime, :cooktime , :ingredients, :steps, :image
15
-
16
- MARMITON_HOST = {desktop: 'http://www.marmiton.org/', mobile: 'http://m.marmiton.org/'}
17
- G750_HOST = {desktop: 'http://www.750g.com'}
18
- CUISINEAZ_HOST = {desktop: 'http://www.cuisineaz.com/'}
19
-
20
-
12
+ MARMITON_HOST = { desktop: 'marmiton.org/', mobile: 'm.marmiton.org/' }.freeze
13
+ G750_HOST = { desktop: '750g.com' }.freeze
14
+ CUISINEAZ_HOST = { desktop: 'cuisineaz.com/' }.freeze
21
15
 
22
16
  # Instanciate a Recipe object with data crawled from an url
23
17
  #
24
18
  # @param url [String] representing an url from Marmiton or 750g website
25
- def initialize url
26
-
19
+ def initialize(url)
27
20
  if marmiton_host? url
28
21
  fetch_from_marmiton url
29
22
 
@@ -34,16 +27,15 @@ module RecipeScraper
34
27
  fetch_from_cuisineaz url
35
28
 
36
29
  else
37
- raise ArgumentError, "Instantiation cancelled (Host not supported)."
30
+ raise ArgumentError, 'Instantiation cancelled (Host not supported).'
38
31
  end
39
32
  end
40
33
 
41
-
42
34
  # export object properties to hash
43
35
  #
44
36
  # @return [Hash] as object's properties
45
37
  def to_hash
46
- attrs = Hash.new
38
+ attrs = {}
47
39
  instance_variables.each do |var|
48
40
  str = var.to_s.gsub /^@/, ''
49
41
  attrs[str.to_sym] = instance_variable_get(var)
@@ -51,69 +43,61 @@ module RecipeScraper
51
43
  attrs
52
44
  end
53
45
 
54
-
55
46
  # convert object properties to json
56
47
  #
57
48
  # @return [String] data formated in JSON
58
49
  def to_json
59
- return self.to_hash.to_json
50
+ to_hash.to_json
60
51
  end
61
52
 
62
-
63
53
  private
64
54
 
65
-
66
55
  # remove `\r\n` & unwanted espaces
67
56
  #
68
57
  # @param text [String] a text to sanitize
69
58
  # @return [String] as text corrected formated
70
- def sanitize text
71
- [' ', '\r\n', "\r\n", "\n", "\r", "\t", / ^/, / $+/, /^  /, /^ /].each { |text_to_remove|
72
- text.gsub!(text_to_remove,'')
73
- }
74
- return text
59
+ def sanitize(text)
60
+ [' ', '\r\n', "\r\n", "\n", "\r", "\t", / ^/, / $+/, /^  /, /^ /, /Etape [0-9]/, 'Icone horloge', 'Icone casserole '].each do |text_to_remove|
61
+ text.gsub!(text_to_remove, '')
62
+ end
63
+ text
75
64
  end
76
65
 
77
-
78
66
  # test if url is from a valid marmiton.org host
79
67
  #
80
68
  # @param url [String] representing an url
81
69
  # @return [Boolean] as true if coresponding to a valid url
82
- def marmiton_host? url
83
- return url.include?(MARMITON_HOST[:desktop]) || url.include?(MARMITON_HOST[:mobile])
70
+ def marmiton_host?(url)
71
+ url.include?(MARMITON_HOST[:desktop]) || url.include?(MARMITON_HOST[:mobile])
84
72
  end
85
73
 
86
-
87
74
  # test if url is from a valid 750g.com host
88
75
  #
89
76
  # @param url [String] representing an url
90
77
  # @return [Boolean] as true if coresponding to a valid url
91
- def g750_host? url
92
- return url.include? G750_HOST[:desktop]
78
+ def g750_host?(url)
79
+ url.include? G750_HOST[:desktop]
93
80
  end
94
81
 
95
-
96
82
  # test if url is from a valid cuisineaz.com host
97
83
  #
98
84
  # @param url [String] representing an url
99
85
  # @return [Boolean] as true if coresponding to a valid url
100
- def cuisineaz_host? url
101
- return url.include? CUISINEAZ_HOST[:desktop]
86
+ def cuisineaz_host?(url)
87
+ url.include? CUISINEAZ_HOST[:desktop]
102
88
  end
103
89
 
104
-
105
90
  # fill object properties from a Marmiton url
106
91
  #
107
92
  # @param url [String] representing an url
108
- def fetch_from_marmiton url
93
+ def fetch_from_marmiton(url)
109
94
  if marmiton_host? url
110
95
 
111
96
  url.gsub! MARMITON_HOST[:mobile], MARMITON_HOST[:desktop]
112
97
 
113
- page = Nokogiri::HTML(open(url).read)
98
+ page = Nokogiri::HTML(open(url).read)
114
99
  @title = page.css('h1').text
115
100
 
116
-
117
101
  # get times
118
102
  @preptime = page.css('div.recipe-infos__timmings__preparation > span.recipe-infos__timmings__value').text.to_i
119
103
  @cooktime = page.css('div.recipe-infos__timmings__cooking > span.recipe-infos__timmings__value').text.to_i
@@ -131,20 +115,23 @@ module RecipeScraper
131
115
  end
132
116
 
133
117
  # get image
134
- @image = page.css('#af-diapo-desktop-0_img').attr('src').to_s rescue NoMethodError
118
+ @image = begin
119
+ page.css('#af-diapo-desktop-0_img').attr('src').to_s
120
+ rescue StandardError
121
+ NoMethodError
122
+ end
135
123
 
136
124
  else
137
125
  raise ArgumentError, "Instantiation cancelled (ulr not from #{MARMITON_HOST})."
138
126
  end
139
127
  end
140
128
 
141
-
142
129
  # fill object properties from a 750g url
143
130
  #
144
131
  # @param url [String] representing an url
145
- def fetch_from_g750 url
132
+ def fetch_from_g750(url)
146
133
  if g750_host? url
147
- page = Nokogiri::HTML(open(url).read)
134
+ page = Nokogiri::HTML(open(url).read)
148
135
  @title = page.css('h1.c-article__title').text
149
136
 
150
137
  # get times
@@ -152,14 +139,14 @@ module RecipeScraper
152
139
  @cooktime = sanitize(page.css('ul.c-recipe-summary > li.c-recipe-summary__rating[title="Temps de cuisson"]').text).to_i
153
140
 
154
141
  @steps = []
155
- css_step = "div[itemprop=recipeInstructions] p"
142
+ css_step = 'div[itemprop=recipeInstructions] p'
156
143
  @steps = page.css(css_step).text.split /[( ),(<br>)]/
157
144
 
158
145
  @ingredients = []
159
- css_ingredient = "div.c-recipe-ingredients ul.c-recipe-ingredients__list li.ingredient"
160
- page.css(css_ingredient).each { |ing_node|
146
+ css_ingredient = 'ul.c-recipe-ingredients__list li'
147
+ page.css(css_ingredient).each do |ing_node|
161
148
  @ingredients << sanitize(ing_node.text)
162
- }
149
+ end
163
150
 
164
151
  # get image
165
152
  css_image = 'div.swiper-wrapper img.photo'
@@ -173,31 +160,30 @@ module RecipeScraper
173
160
  end
174
161
  end
175
162
 
176
-
177
163
  # fill object properties from a 750g url
178
164
  #
179
165
  # @param url [String] representing an url
180
- def fetch_from_cuisineaz url
166
+ def fetch_from_cuisineaz(url)
181
167
  if cuisineaz_host? url
182
- page = Nokogiri::HTML(open(url).read)
168
+ page = Nokogiri::HTML(open(url).read)
183
169
  @title = page.css('h1').text
184
170
 
185
171
  # get times
186
- @preptime = page.css('#ctl00_ContentPlaceHolder_LblRecetteTempsPrepa').text.to_i
187
- @cooktime = page.css('#ctl00_ContentPlaceHolder_LblRecetteTempsCuisson').text.to_i
172
+ @preptime = page.css('#ContentPlaceHolder_LblRecetteTempsPrepa').text.to_i
173
+ @cooktime = page.css('#ContentPlaceHolder_LblRecetteTempsCuisson').text.to_i
188
174
 
189
175
  @steps = []
190
- page.css("#preparation p").each { |step_node|
176
+ page.css('#preparation p').each do |step_node|
191
177
  @steps << sanitize(step_node.text)
192
- }
178
+ end
193
179
 
194
180
  @ingredients = []
195
- page.css("section.recipe_ingredients li").each { |ing_node|
181
+ page.css('section.recipe_ingredients li').each do |ing_node|
196
182
  @ingredients << sanitize(ing_node.text)
197
- }
183
+ end
198
184
 
199
185
  begin
200
- @image = page.css('#ctl00_ContentPlaceHolder_recipeImgLarge').attr('src').to_s
186
+ @image = page.css('#ContentPlaceHolder_recipeImg').attr('data-src').to_s
201
187
  rescue NoMethodError => e
202
188
  end
203
189
 
@@ -205,7 +191,5 @@ module RecipeScraper
205
191
  raise ArgumentError, "Instantiation cancelled (ulr not from #{G750_HOST})."
206
192
  end
207
193
  end
208
-
209
194
  end
210
-
211
195
  end
@@ -1,3 +1,3 @@
1
1
  module RecipeScraper
2
- VERSION = "2.2.2"
2
+ VERSION = '2.2.3'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: recipe_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.2
4
+ version: 2.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - madeindjs
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-07-09 00:00:00.000000000 Z
11
+ date: 2018-12-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -107,7 +107,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
107
107
  version: '0'
108
108
  requirements: []
109
109
  rubyforge_project:
110
- rubygems_version: 2.6.11
110
+ rubygems_version: 2.7.7
111
111
  signing_key:
112
112
  specification_version: 4
113
113
  summary: A web scrawler to get a Marmiton's, cuisineaz or 750g recipe