hangry 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/hangry/canonical_url_parser.rb +6 -1
- data/lib/hangry/parser_class_selecter.rb +5 -1
- data/lib/hangry/parsers/non_standard/eating_well_parser.rb +46 -0
- data/lib/hangry/recipe_parser.rb +10 -0
- data/lib/hangry/version.rb +1 -1
- data/spec/fixtures/eatingwell.com.html +1743 -0
- data/spec/real_examples/eating_well_spec.rb +67 -0
- metadata +19 -14
@@ -15,7 +15,12 @@ module Hangry
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def canonical_domain
|
18
|
-
|
18
|
+
return nil unless canonical_url
|
19
|
+
full_domain = URI.parse(canonical_url).host
|
20
|
+
return nil unless full_domain
|
21
|
+
base_domain_fragments = full_domain.split('.')[-2..-1]
|
22
|
+
return nil unless base_domain_fragments
|
23
|
+
base_domain_fragments.join('.')
|
19
24
|
end
|
20
25
|
|
21
26
|
def canonical_url
|
@@ -4,6 +4,7 @@ require 'hangry/hrecipe_parser'
|
|
4
4
|
require 'hangry/schema_org_recipe_parser'
|
5
5
|
require 'hangry/data_vocabulary_recipe_parser'
|
6
6
|
require 'hangry/parsers/non_standard/all_recipes_parser'
|
7
|
+
require 'hangry/parsers/non_standard/eating_well_parser'
|
7
8
|
|
8
9
|
module Hangry
|
9
10
|
class ParserClassSelecter
|
@@ -13,7 +14,10 @@ module Hangry
|
|
13
14
|
|
14
15
|
def parser_class
|
15
16
|
# Prefer the more specific parsers
|
16
|
-
parser_classes = [
|
17
|
+
parser_classes = [
|
18
|
+
Parsers::NonStandard::AllRecipesParser,
|
19
|
+
Parsers::NonStandard::EatingWellParser
|
20
|
+
]
|
17
21
|
parser_classes += [SchemaOrgRecipeParser, HRecipeParser, DataVocabularyRecipeParser]
|
18
22
|
parser_classes << DefaultRecipeParser
|
19
23
|
parser_classes.detect { |p| p.can_parse?(@html) }
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Hangry
|
2
|
+
module Parsers
|
3
|
+
module NonStandard
|
4
|
+
class EatingWellParser < SchemaOrgRecipeParser
|
5
|
+
|
6
|
+
def self.root_selector
|
7
|
+
'[itemtype="http://schema.org/recipe"]'
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.can_parse?(html)
|
11
|
+
canonical_url_matches_domain?(html, 'eatingwell.com')
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse_instructions
|
15
|
+
content = nodes_with_itemprop(:recipeinstructions).map(&:content).join("\n")
|
16
|
+
clean_string content, preserve_newlines: true
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_nutrition
|
20
|
+
recipe.nutrition.tap do |nutrition|
|
21
|
+
nutrition[:calories] = nutrition_property_value(:calories)
|
22
|
+
nutrition[:cholesterol] = nutrition_property_value(:cholesterolcontent)
|
23
|
+
nutrition[:fiber] = nutrition_property_value(:fibercontent)
|
24
|
+
nutrition[:protein] = nutrition_property_value(:proteincontent)
|
25
|
+
nutrition[:saturated_fat] = nutrition_property_value(:saturatedfatcontent)
|
26
|
+
nutrition[:sodium] = nutrition_property_value(:sodiumcontent)
|
27
|
+
nutrition[:sugar] = nutrition_property_value(:sugarcontent)
|
28
|
+
nutrition[:total_carbohydrates] = nutrition_property_value(:carbohydratecontent)
|
29
|
+
nutrition[:total_fat] = nutrition_property_value(:fatcontent)
|
30
|
+
nutrition[:trans_fat] = nutrition_property_value(:transfatcontent)
|
31
|
+
nutrition[:unsaturated_fat] = nutrition_property_value(:unsaturatedfatcontent)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_yield
|
36
|
+
clean_string(
|
37
|
+
value(node_with_itemprop(:recipeyield).content) ||
|
38
|
+
NullObject.new
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
data/lib/hangry/recipe_parser.rb
CHANGED
@@ -18,6 +18,9 @@ module Hangry
|
|
18
18
|
RECIPE_ATTRIBUTES.each do |attribute|
|
19
19
|
attr_value = value(send("parse_#{attribute}"))
|
20
20
|
recipe.public_send("#{attribute}=", attr_value)
|
21
|
+
next unless recipe.public_send(attribute).present?
|
22
|
+
|
23
|
+
send("clean_#{attribute}", recipe) if respond_to? "clean_#{attribute}"
|
21
24
|
end
|
22
25
|
recipe
|
23
26
|
end
|
@@ -30,6 +33,13 @@ module Hangry
|
|
30
33
|
CanonicalUrlParser.new(html).canonical_domain == domain
|
31
34
|
end
|
32
35
|
|
36
|
+
def clean_nutrition(recipe)
|
37
|
+
recipe.nutrition.each do |key, value|
|
38
|
+
next unless value
|
39
|
+
recipe.nutrition[key] = clean_string value
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
33
43
|
private
|
34
44
|
|
35
45
|
class NullObject
|
data/lib/hangry/version.rb
CHANGED