hangry 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +6 -0
- data/README.md +14 -4
- data/Rakefile +6 -0
- data/hangry.gemspec +1 -0
- data/lib/hangry/data_vocabulary_recipe_parser.rb +38 -0
- data/lib/hangry/hrecipe_parser.rb +21 -5
- data/lib/hangry/recipe_parser.rb +12 -1
- data/lib/hangry/schema_org_recipe_parser.rb +31 -3
- data/lib/hangry/version.rb +1 -1
- data/lib/hangry.rb +17 -1
- data/spec/fixtures/bigoven.html +1566 -0
- data/spec/fixtures/epicurious.html +1779 -3433
- data/spec/fixtures/myrecipes.com.html +1104 -0
- data/spec/real_examples/all_recipes_spec.rb +15 -0
- data/spec/real_examples/big_oven_spec.rb +45 -0
- data/spec/real_examples/epicurious_spec.rb +1 -1
- data/spec/real_examples/myrecipes_com_spec.rb +43 -0
- metadata +33 -12
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -4,10 +4,11 @@ Parses microformatted recipe HTML into a plain-old-ruby Recipe object.
|
|
4
4
|
|
5
5
|
Currently supported microformats:
|
6
6
|
- http://schema.org/Recipe
|
7
|
-
|
8
|
-
Microformats to support in future versions:
|
7
|
+
- http://data-vocabulary.org/Recipe
|
9
8
|
- http://microformats.org/wiki/hrecipe
|
10
9
|
|
10
|
+
## Build Status [![Build Status](https://travis-ci.org/iancanderson/hangry.png?branch=master)](http://travis-ci.org/iancanderson/hangry)
|
11
|
+
|
11
12
|
## Installation
|
12
13
|
|
13
14
|
Add this line to your application's Gemfile:
|
@@ -29,9 +30,18 @@ require 'open-uri'
|
|
29
30
|
recipe_url = "http://www.foodnetwork.com/recipes/rachael-ray/spinach-and-mushroom-stuffed-chicken-breasts-recipe/index.html"
|
30
31
|
recipe_html_string = open(recipe_url).read
|
31
32
|
|
33
|
+
require 'hangry'
|
32
34
|
recipe = Hangry.parse(recipe_html_string)
|
33
|
-
recipe.
|
34
|
-
recipe.
|
35
|
+
recipe.author # "Rachel Ray"
|
36
|
+
recipe.cook_time # 20
|
37
|
+
recipe.description # nil
|
38
|
+
recipe.ingredients # ["4 boneless, skinless chicken breasts, 6 ounces", "Large plastic food storage bags or waxed paper", "1 package, 10 ounces, frozen chopped spinach", "2 tablespoons butter", "12 small mushroom caps, crimini or button", "2 cloves garlic, cracked", "1 small shallot, quartered", "Salt and freshly ground black pepper", "1 cup part skim ricotta cheese", "1/2 cup grated Parmigiano or Romano, a couple of handfuls", "1/2 teaspoon fresh grated or ground nutmeg", "Toothpicks", "2 tablespoons extra-virgin olive oil", "2 tablespoons butter", "2 tablespoons flour", "1/2 cup white wine", "1 cup chicken broth"]
|
39
|
+
recipe.instructions # "Place breasts in the center of a plastic food storage..."
|
40
|
+
recipe.name # "Spinach and Mushroom Stuffed Chicken Breasts"
|
41
|
+
recipe.prep_time # 15
|
42
|
+
recipe.published_date # #<Date: 2013-02-06 >
|
43
|
+
recipe.total_time # 35
|
44
|
+
recipe.yield # "4 servings"
|
35
45
|
# etc..
|
36
46
|
```
|
37
47
|
|
data/Rakefile
CHANGED
data/hangry.gemspec
CHANGED
@@ -11,6 +11,7 @@ Gem::Specification.new do |gem|
|
|
11
11
|
gem.files = `git ls-files`.split($\)
|
12
12
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
13
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.add_development_dependency('rake')
|
14
15
|
gem.add_development_dependency('rspec')
|
15
16
|
gem.add_development_dependency('pry')
|
16
17
|
gem.add_dependency('activesupport', '~> 3.0')
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Hangry
|
2
|
+
class DataVocabularyRecipeParser < SchemaOrgRecipeParser
|
3
|
+
|
4
|
+
def self.root_selector
|
5
|
+
'[itemtype="http://data-vocabulary.org/Recipe"]'
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.nutrition_selector
|
9
|
+
'[itemtype="http://data-vocabulary.org/NutritionInformation"]'
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.ingredient_itemprop
|
13
|
+
:ingredient
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def parse_description
|
19
|
+
clean_string node_with_itemprop(:summary).content
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse_instructions
|
23
|
+
clean_string node_with_itemprop(:instructions).content
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_published_date
|
27
|
+
content = node_with_itemprop(:published).content
|
28
|
+
content.blank? ? nil : Date.parse(content)
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_yield
|
32
|
+
clean_string node_with_itemprop(:yield).content
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
@@ -1,8 +1,12 @@
|
|
1
1
|
module Hangry
|
2
2
|
class HRecipeParser < RecipeParser
|
3
3
|
|
4
|
+
def self.nutrition_selector
|
5
|
+
'.nutrition'
|
6
|
+
end
|
7
|
+
|
4
8
|
def self.root_selector
|
5
|
-
'.hrecipe'
|
9
|
+
'.hrecipe, .hRecipe'
|
6
10
|
end
|
7
11
|
|
8
12
|
private
|
@@ -20,7 +24,7 @@ module Hangry
|
|
20
24
|
end
|
21
25
|
|
22
26
|
def parse_cook_time
|
23
|
-
|
27
|
+
parse_duration node_with_class(:cookTime).css('.value-title').first['title']
|
24
28
|
end
|
25
29
|
|
26
30
|
def parse_description
|
@@ -28,7 +32,14 @@ module Hangry
|
|
28
32
|
end
|
29
33
|
|
30
34
|
def parse_ingredients
|
31
|
-
nodes_with_class(:ingredient).map
|
35
|
+
nodes_with_class(:ingredient).map { |ingredient_node|
|
36
|
+
# Instead of calling content on the node itself,
|
37
|
+
# join together the content of the nodes' children.
|
38
|
+
# This is to support BigOven's janky usage of spans with margin-lefts...
|
39
|
+
ingredient_node.children.map { |c| c.content }.join(' ')
|
40
|
+
}.map { |ingredient|
|
41
|
+
clean_string ingredient
|
42
|
+
}
|
32
43
|
end
|
33
44
|
|
34
45
|
def parse_instructions
|
@@ -39,16 +50,21 @@ module Hangry
|
|
39
50
|
clean_string node_with_class(:fn).content
|
40
51
|
end
|
41
52
|
|
42
|
-
def
|
53
|
+
def parse_nutrition
|
43
54
|
#TODO
|
44
55
|
end
|
45
56
|
|
57
|
+
def parse_prep_time
|
58
|
+
parse_duration node_with_class(:prepTime).css('.value-title').first['title']
|
59
|
+
end
|
60
|
+
|
46
61
|
def parse_published_date
|
47
62
|
#TODO
|
48
63
|
end
|
49
64
|
|
50
65
|
def parse_total_time
|
51
|
-
|
66
|
+
node = value(node_with_class(:duration)) || value(node_with_class(:totalTime))
|
67
|
+
parse_duration node.css('.value-title').first['title']
|
52
68
|
end
|
53
69
|
|
54
70
|
def parse_yield
|
data/lib/hangry/recipe_parser.rb
CHANGED
@@ -1,13 +1,15 @@
|
|
1
1
|
module Hangry
|
2
2
|
class RecipeParser
|
3
3
|
attr_reader :recipe_html
|
4
|
-
attr_accessor :recipe_ast, :recipe
|
4
|
+
attr_accessor :nutrition_ast, :recipe_ast, :recipe
|
5
5
|
|
6
6
|
def initialize(recipe_html)
|
7
7
|
@recipe_html = recipe_html
|
8
8
|
@recipe = Recipe.new
|
9
|
+
initialize_nutrition
|
9
10
|
doc = Nokogiri::HTML(recipe_html)
|
10
11
|
self.recipe_ast = doc.css(self.class.root_selector).first
|
12
|
+
self.nutrition_ast = recipe_ast && recipe_ast.css(self.class.nutrition_selector)
|
11
13
|
end
|
12
14
|
|
13
15
|
def parse
|
@@ -49,9 +51,18 @@ module Hangry
|
|
49
51
|
string.strip.gsub(/\s+/, ' ')
|
50
52
|
end
|
51
53
|
|
54
|
+
def initialize_nutrition
|
55
|
+
recipe.nutrition = {}
|
56
|
+
NUTRITION_ATTRIBUTES.each do |attribute|
|
57
|
+
recipe.nutrition[attribute] = nil
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
52
61
|
def parse_duration(iso8601_string)
|
53
62
|
duration = ISO8601::Duration.new(iso8601_string)
|
54
63
|
duration.hours.to_i * 60 + duration.minutes.to_i
|
64
|
+
rescue ISO8601::Errors::UnknownPattern
|
65
|
+
nil
|
55
66
|
end
|
56
67
|
|
57
68
|
end
|
@@ -5,6 +5,14 @@ module Hangry
|
|
5
5
|
'[itemtype="http://schema.org/Recipe"]'
|
6
6
|
end
|
7
7
|
|
8
|
+
def self.nutrition_selector
|
9
|
+
'[itemtype="http://schema.org/NutritionInformation"]'
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.ingredient_itemprop
|
13
|
+
:ingredients
|
14
|
+
end
|
15
|
+
|
8
16
|
private
|
9
17
|
|
10
18
|
def node_with_itemprop(itemprop)
|
@@ -13,6 +21,13 @@ module Hangry
|
|
13
21
|
def nodes_with_itemprop(itemprop)
|
14
22
|
recipe_ast ? recipe_ast.css("[itemprop = \"#{itemprop}\"]") : NullObject.new
|
15
23
|
end
|
24
|
+
def nutrition_node_with_itemprop(itemprop)
|
25
|
+
return NullObject.new unless nutrition_ast
|
26
|
+
nutrition_ast.css("[itemprop = \"#{itemprop}\"]").first || NullObject.new
|
27
|
+
end
|
28
|
+
def nutrition_property_value(itemprop)
|
29
|
+
value(nutrition_node_with_itemprop(itemprop).content)
|
30
|
+
end
|
16
31
|
def parse_author
|
17
32
|
author_node = node_with_itemprop(:author)
|
18
33
|
author = if author_node['itemtype'] == "http://schema.org/Person"
|
@@ -29,7 +44,7 @@ module Hangry
|
|
29
44
|
clean_string node_with_itemprop(:description).content
|
30
45
|
end
|
31
46
|
def parse_ingredients
|
32
|
-
nodes_with_itemprop(
|
47
|
+
nodes_with_itemprop(self.class.ingredient_itemprop).map(&:content).map do |ingredient|
|
33
48
|
# remove newlines and excess whitespace from ingredients
|
34
49
|
clean_string ingredient
|
35
50
|
end
|
@@ -40,6 +55,21 @@ module Hangry
|
|
40
55
|
def parse_name
|
41
56
|
clean_string node_with_itemprop(:name).content
|
42
57
|
end
|
58
|
+
def parse_nutrition
|
59
|
+
recipe.nutrition.tap do |nutrition|
|
60
|
+
nutrition[:calories] = nutrition_property_value(:calories)
|
61
|
+
nutrition[:cholesterol] = nutrition_property_value(:cholesterolContent)
|
62
|
+
nutrition[:fiber] = nutrition_property_value(:fiberContent)
|
63
|
+
nutrition[:protein] = nutrition_property_value(:proteinContent)
|
64
|
+
nutrition[:saturated_fat] = nutrition_property_value(:saturatedFatContent)
|
65
|
+
nutrition[:sodium] = nutrition_property_value(:sodiumContent)
|
66
|
+
nutrition[:sugar] = nutrition_property_value(:sugarContent)
|
67
|
+
nutrition[:total_carbohydrates] = nutrition_property_value(:carbohydrateContent)
|
68
|
+
nutrition[:total_fat] = nutrition_property_value(:fatContent)
|
69
|
+
nutrition[:trans_fat] = nutrition_property_value(:transFatContent)
|
70
|
+
nutrition[:unsaturated_fat] = nutrition_property_value(:unsaturatedFatContent)
|
71
|
+
end
|
72
|
+
end
|
43
73
|
def parse_prep_time
|
44
74
|
parse_time(:prepTime)
|
45
75
|
end
|
@@ -55,8 +85,6 @@ module Hangry
|
|
55
85
|
node['datetime'] # allrecipes.com
|
56
86
|
end
|
57
87
|
parse_duration(iso8601_string)
|
58
|
-
rescue ISO8601::Errors::UnknownPattern
|
59
|
-
nil
|
60
88
|
end
|
61
89
|
def parse_total_time
|
62
90
|
parse_time(:totalTime)
|
data/lib/hangry/version.rb
CHANGED
data/lib/hangry.rb
CHANGED
@@ -2,6 +2,7 @@ require "hangry/version"
|
|
2
2
|
require 'hangry/recipe_parser'
|
3
3
|
require 'hangry/hrecipe_parser'
|
4
4
|
require 'hangry/schema_org_recipe_parser'
|
5
|
+
require 'hangry/data_vocabulary_recipe_parser'
|
5
6
|
require 'active_support/core_ext/object/blank'
|
6
7
|
require 'date'
|
7
8
|
require 'iso8601'
|
@@ -15,16 +16,31 @@ module Hangry
|
|
15
16
|
:ingredients,
|
16
17
|
:instructions,
|
17
18
|
:name,
|
19
|
+
:nutrition,
|
18
20
|
:prep_time,
|
19
21
|
:published_date,
|
20
22
|
:total_time,
|
21
23
|
:yield
|
22
24
|
]
|
23
25
|
|
26
|
+
NUTRITION_ATTRIBUTES = [
|
27
|
+
:calories,
|
28
|
+
:cholesterol,
|
29
|
+
:fiber,
|
30
|
+
:protein,
|
31
|
+
:saturated_fat,
|
32
|
+
:sodium,
|
33
|
+
:sugar,
|
34
|
+
:total_carbohydrates,
|
35
|
+
:total_fat,
|
36
|
+
:trans_fat,
|
37
|
+
:unsaturated_fat
|
38
|
+
]
|
39
|
+
|
24
40
|
Recipe = Struct.new(*RECIPE_ATTRIBUTES)
|
25
41
|
|
26
42
|
def self.parse(html)
|
27
|
-
parser_classes = [SchemaOrgRecipeParser, HRecipeParser]
|
43
|
+
parser_classes = [SchemaOrgRecipeParser, HRecipeParser, DataVocabularyRecipeParser]
|
28
44
|
parsers = parser_classes.map { |klass| klass.new(html) }
|
29
45
|
parser = parsers.detect { |p| p.can_parse? }
|
30
46
|
|