hangry 0.0.10 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/hangry/canonical_url_parser.rb +6 -1
- data/lib/hangry/parser_class_selecter.rb +5 -1
- data/lib/hangry/parsers/non_standard/eating_well_parser.rb +46 -0
- data/lib/hangry/recipe_parser.rb +10 -0
- data/lib/hangry/version.rb +1 -1
- data/spec/fixtures/eatingwell.com.html +1743 -0
- data/spec/real_examples/eating_well_spec.rb +67 -0
- metadata +19 -14
@@ -15,7 +15,12 @@ module Hangry
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def canonical_domain
|
18
|
-
|
18
|
+
return nil unless canonical_url
|
19
|
+
full_domain = URI.parse(canonical_url).host
|
20
|
+
return nil unless full_domain
|
21
|
+
base_domain_fragments = full_domain.split('.')[-2..-1]
|
22
|
+
return nil unless base_domain_fragments
|
23
|
+
base_domain_fragments.join('.')
|
19
24
|
end
|
20
25
|
|
21
26
|
def canonical_url
|
@@ -4,6 +4,7 @@ require 'hangry/hrecipe_parser'
|
|
4
4
|
require 'hangry/schema_org_recipe_parser'
|
5
5
|
require 'hangry/data_vocabulary_recipe_parser'
|
6
6
|
require 'hangry/parsers/non_standard/all_recipes_parser'
|
7
|
+
require 'hangry/parsers/non_standard/eating_well_parser'
|
7
8
|
|
8
9
|
module Hangry
|
9
10
|
class ParserClassSelecter
|
@@ -13,7 +14,10 @@ module Hangry
|
|
13
14
|
|
14
15
|
def parser_class
|
15
16
|
# Prefer the more specific parsers
|
16
|
-
parser_classes = [
|
17
|
+
parser_classes = [
|
18
|
+
Parsers::NonStandard::AllRecipesParser,
|
19
|
+
Parsers::NonStandard::EatingWellParser
|
20
|
+
]
|
17
21
|
parser_classes += [SchemaOrgRecipeParser, HRecipeParser, DataVocabularyRecipeParser]
|
18
22
|
parser_classes << DefaultRecipeParser
|
19
23
|
parser_classes.detect { |p| p.can_parse?(@html) }
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Hangry
|
2
|
+
module Parsers
|
3
|
+
module NonStandard
|
4
|
+
class EatingWellParser < SchemaOrgRecipeParser
|
5
|
+
|
6
|
+
def self.root_selector
|
7
|
+
'[itemtype="http://schema.org/recipe"]'
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.can_parse?(html)
|
11
|
+
canonical_url_matches_domain?(html, 'eatingwell.com')
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse_instructions
|
15
|
+
content = nodes_with_itemprop(:recipeinstructions).map(&:content).join("\n")
|
16
|
+
clean_string content, preserve_newlines: true
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_nutrition
|
20
|
+
recipe.nutrition.tap do |nutrition|
|
21
|
+
nutrition[:calories] = nutrition_property_value(:calories)
|
22
|
+
nutrition[:cholesterol] = nutrition_property_value(:cholesterolcontent)
|
23
|
+
nutrition[:fiber] = nutrition_property_value(:fibercontent)
|
24
|
+
nutrition[:protein] = nutrition_property_value(:proteincontent)
|
25
|
+
nutrition[:saturated_fat] = nutrition_property_value(:saturatedfatcontent)
|
26
|
+
nutrition[:sodium] = nutrition_property_value(:sodiumcontent)
|
27
|
+
nutrition[:sugar] = nutrition_property_value(:sugarcontent)
|
28
|
+
nutrition[:total_carbohydrates] = nutrition_property_value(:carbohydratecontent)
|
29
|
+
nutrition[:total_fat] = nutrition_property_value(:fatcontent)
|
30
|
+
nutrition[:trans_fat] = nutrition_property_value(:transfatcontent)
|
31
|
+
nutrition[:unsaturated_fat] = nutrition_property_value(:unsaturatedfatcontent)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_yield
|
36
|
+
clean_string(
|
37
|
+
value(node_with_itemprop(:recipeyield).content) ||
|
38
|
+
NullObject.new
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
data/lib/hangry/recipe_parser.rb
CHANGED
@@ -18,6 +18,9 @@ module Hangry
|
|
18
18
|
RECIPE_ATTRIBUTES.each do |attribute|
|
19
19
|
attr_value = value(send("parse_#{attribute}"))
|
20
20
|
recipe.public_send("#{attribute}=", attr_value)
|
21
|
+
next unless recipe.public_send(attribute).present?
|
22
|
+
|
23
|
+
send("clean_#{attribute}", recipe) if respond_to? "clean_#{attribute}"
|
21
24
|
end
|
22
25
|
recipe
|
23
26
|
end
|
@@ -30,6 +33,13 @@ module Hangry
|
|
30
33
|
CanonicalUrlParser.new(html).canonical_domain == domain
|
31
34
|
end
|
32
35
|
|
36
|
+
def clean_nutrition(recipe)
|
37
|
+
recipe.nutrition.each do |key, value|
|
38
|
+
next unless value
|
39
|
+
recipe.nutrition[key] = clean_string value
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
33
43
|
private
|
34
44
|
|
35
45
|
class NullObject
|
data/lib/hangry/version.rb
CHANGED