hangry 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/.travis.yml +1 -1
- data/README.md +3 -3
- data/lib/hangry/canonical_url_parser.rb +31 -0
- data/lib/hangry/default_recipe_parser.rb +14 -0
- data/lib/hangry/parser_class_selecter.rb +23 -0
- data/lib/hangry/parsers/non_standard/all_recipes_parser.rb +18 -0
- data/lib/hangry/recipe_parser.rb +15 -5
- data/lib/hangry/version.rb +1 -1
- data/lib/hangry.rb +4 -9
- data/spec/real_examples/all_recipes_spec.rb +18 -2
- data/spec/real_examples/big_oven_spec.rb +1 -0
- data/spec/real_examples/epicurious_spec.rb +1 -0
- data/spec/real_examples/food_network_spec.rb +1 -0
- data/spec/real_examples/myrecipes_com_spec.rb +1 -0
- metadata +18 -14
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -7,7 +7,9 @@ Currently supported microformats:
|
|
7
7
|
- http://data-vocabulary.org/Recipe
|
8
8
|
- http://microformats.org/wiki/hrecipe
|
9
9
|
|
10
|
-
##
|
10
|
+
## Pieces of Flair
|
11
|
+
- [![Build Status](https://travis-ci.org/iancanderson/hangry.png?branch=master)](http://travis-ci.org/iancanderson/hangry)
|
12
|
+
- [![Code Climate](https://codeclimate.com/github/iancanderson/hangry.png)](https://codeclimate.com/github/iancanderson/hangry)
|
11
13
|
|
12
14
|
## Installation
|
13
15
|
|
@@ -45,8 +47,6 @@ recipe.yield # "4 servings"
|
|
45
47
|
# etc..
|
46
48
|
```
|
47
49
|
|
48
|
-
Here's a live demo: http://hangry.herokuapp.com/
|
49
|
-
|
50
50
|
## Contributing
|
51
51
|
|
52
52
|
1. Fork it
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Hangry
|
2
|
+
class CanonicalUrlParser
|
3
|
+
|
4
|
+
attr_accessor :nokogiri_doc
|
5
|
+
|
6
|
+
def initialize(html_or_nokogiri_doc)
|
7
|
+
self.nokogiri_doc = case html_or_nokogiri_doc
|
8
|
+
when String
|
9
|
+
Nokogiri::HTML(html_or_nokogiri_doc)
|
10
|
+
when Nokogiri::HTML::Document
|
11
|
+
html_or_nokogiri_doc
|
12
|
+
else
|
13
|
+
raise ArgumentError
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def canonical_domain
|
18
|
+
canonical_url ? URI.parse(canonical_url).host : nil
|
19
|
+
end
|
20
|
+
|
21
|
+
def canonical_url
|
22
|
+
node = nokogiri_doc.css('link[rel="canonical"]').first
|
23
|
+
return node['href'] if node
|
24
|
+
|
25
|
+
# Fall back to open graph URL (see food network example)
|
26
|
+
node = nokogiri_doc.css('meta[property="og:url"]').first
|
27
|
+
node ? node['content'] : nil
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'hangry/recipe_parser'
|
2
|
+
require 'hangry/default_recipe_parser'
|
3
|
+
require 'hangry/hrecipe_parser'
|
4
|
+
require 'hangry/schema_org_recipe_parser'
|
5
|
+
require 'hangry/data_vocabulary_recipe_parser'
|
6
|
+
require 'hangry/parsers/non_standard/all_recipes_parser'
|
7
|
+
|
8
|
+
module Hangry
|
9
|
+
class ParserClassSelecter
|
10
|
+
def initialize(html)
|
11
|
+
@html = html
|
12
|
+
end
|
13
|
+
|
14
|
+
def parser_class
|
15
|
+
# Prefer the more specific parsers
|
16
|
+
parser_classes = [Parsers::NonStandard::AllRecipesParser]
|
17
|
+
parser_classes += [SchemaOrgRecipeParser, HRecipeParser, DataVocabularyRecipeParser]
|
18
|
+
parser_classes << DefaultRecipeParser
|
19
|
+
parser_classes.detect { |p| p.can_parse?(@html) }
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Hangry
|
2
|
+
module Parsers
|
3
|
+
module NonStandard
|
4
|
+
class AllRecipesParser < SchemaOrgRecipeParser
|
5
|
+
|
6
|
+
def self.can_parse?(html)
|
7
|
+
canonical_url_matches_domain?(html, 'allrecipes.com')
|
8
|
+
end
|
9
|
+
|
10
|
+
def parse_instructions
|
11
|
+
content = recipe_ast.css('.directions ol').first.content
|
12
|
+
clean_string content, preserve_newlines: true
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/hangry/recipe_parser.rb
CHANGED
@@ -1,14 +1,16 @@
|
|
1
|
+
require 'hangry/canonical_url_parser'
|
2
|
+
|
1
3
|
module Hangry
|
2
4
|
class RecipeParser
|
3
5
|
attr_reader :recipe_html
|
4
|
-
attr_accessor :nutrition_ast, :recipe_ast, :recipe
|
6
|
+
attr_accessor :nokogiri_doc, :nutrition_ast, :recipe_ast, :recipe
|
5
7
|
|
6
8
|
def initialize(recipe_html)
|
7
9
|
@recipe_html = recipe_html
|
8
10
|
@recipe = Recipe.new
|
9
11
|
initialize_nutrition
|
10
|
-
|
11
|
-
self.recipe_ast =
|
12
|
+
self.nokogiri_doc = Nokogiri::HTML(recipe_html)
|
13
|
+
self.recipe_ast = nokogiri_doc.css(self.class.root_selector).first
|
12
14
|
self.nutrition_ast = recipe_ast && recipe_ast.css(self.class.nutrition_selector)
|
13
15
|
end
|
14
16
|
|
@@ -20,8 +22,12 @@ module Hangry
|
|
20
22
|
recipe
|
21
23
|
end
|
22
24
|
|
23
|
-
def can_parse?
|
24
|
-
recipe_ast
|
25
|
+
def self.can_parse?(html)
|
26
|
+
new(html).recipe_ast
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.canonical_url_matches_domain?(html, domain)
|
30
|
+
CanonicalUrlParser.new(html).canonical_domain == domain
|
25
31
|
end
|
26
32
|
|
27
33
|
private
|
@@ -67,6 +73,10 @@ module Hangry
|
|
67
73
|
end
|
68
74
|
end
|
69
75
|
|
76
|
+
def parse_canonical_url
|
77
|
+
CanonicalUrlParser.new(nokogiri_doc).canonical_url
|
78
|
+
end
|
79
|
+
|
70
80
|
def parse_duration(iso8601_string)
|
71
81
|
duration = ISO8601::Duration.new(iso8601_string)
|
72
82
|
duration.hours.to_i * 60 + duration.minutes.to_i
|
data/lib/hangry/version.rb
CHANGED
data/lib/hangry.rb
CHANGED
@@ -1,8 +1,5 @@
|
|
1
1
|
require "hangry/version"
|
2
|
-
require 'hangry/
|
3
|
-
require 'hangry/hrecipe_parser'
|
4
|
-
require 'hangry/schema_org_recipe_parser'
|
5
|
-
require 'hangry/data_vocabulary_recipe_parser'
|
2
|
+
require 'hangry/parser_class_selecter'
|
6
3
|
require 'active_support/core_ext/object/blank'
|
7
4
|
require 'date'
|
8
5
|
require 'iso8601'
|
@@ -11,6 +8,7 @@ require "nokogiri"
|
|
11
8
|
module Hangry
|
12
9
|
RECIPE_ATTRIBUTES = [
|
13
10
|
:author,
|
11
|
+
:canonical_url,
|
14
12
|
:cook_time,
|
15
13
|
:description,
|
16
14
|
:ingredients,
|
@@ -40,11 +38,8 @@ module Hangry
|
|
40
38
|
Recipe = Struct.new(*RECIPE_ATTRIBUTES)
|
41
39
|
|
42
40
|
def self.parse(html)
|
43
|
-
|
44
|
-
|
45
|
-
parser = parsers.detect { |p| p.can_parse? }
|
46
|
-
|
47
|
-
parser ? parser.parse : Recipe.new
|
41
|
+
parser_class = ParserClassSelecter.new(html).parser_class
|
42
|
+
parser_class.new(html).parse
|
48
43
|
end
|
49
44
|
|
50
45
|
end
|
@@ -3,9 +3,15 @@ require 'hangry'
|
|
3
3
|
describe Hangry do
|
4
4
|
|
5
5
|
context "allrecipes.com recipe" do
|
6
|
-
|
6
|
+
let(:html) { File.read("spec/fixtures/allrecipes.html") }
|
7
|
+
subject { Hangry.parse(html) }
|
8
|
+
|
9
|
+
it "should use the correct parser" do
|
10
|
+
Hangry::ParserClassSelecter.new(html).parser_class.should == Hangry::Parsers::NonStandard::AllRecipesParser
|
11
|
+
end
|
7
12
|
|
8
13
|
its(:author) { should == "United Soybean Board" }
|
14
|
+
its(:canonical_url) { should == "http://allrecipes.com/recipe/roasted-vegetable-and-couscous-salad/" }
|
9
15
|
its(:cook_time) { should == 15 }
|
10
16
|
its(:description) { should == "\"This better-for-you main-dish salad is quick, colorful and full of satisfying texture. To explore a variety of grains, substitute 3 cups cooked regular couscous, brown rice or quinoa.\"" }
|
11
17
|
its(:ingredients) {
|
@@ -36,7 +42,17 @@ describe Hangry do
|
|
36
42
|
unsaturated_fat: nil
|
37
43
|
}
|
38
44
|
end
|
39
|
-
its(:instructions) {
|
45
|
+
its(:instructions) {
|
46
|
+
instructions = <<-EOS
|
47
|
+
Preheat oven to 425 degrees F.
|
48
|
+
Toss broccoli, peppers and onions with 2 tablespoons soybean oil and 1/4 teaspoon each salt and pepper. Place on foil-lined baking sheet.
|
49
|
+
Bake for 15 minutes until vegetables are tender and lightly browned.
|
50
|
+
Meanwhile, cook couscous according to package directions.
|
51
|
+
Place cooked couscous and roasted vegetables in large bowl. Pour vinegar and remaining soybean oil over salad and sprinkle with remaining salt and pepper; toss lightly until combined.
|
52
|
+
You may substitute 3 cups cooked regular couscous, brown rice or quinoa.
|
53
|
+
EOS
|
54
|
+
should == instructions.strip
|
55
|
+
}
|
40
56
|
its(:prep_time) { should == 15 }
|
41
57
|
its(:published_date) { should == nil }
|
42
58
|
its(:total_time) { should == 30 }
|
@@ -10,6 +10,7 @@ describe Hangry do
|
|
10
10
|
# BigOven puts the author element outside of the hRecipe element...
|
11
11
|
should == nil
|
12
12
|
end
|
13
|
+
its(:canonical_url) { should == "http://www.bigoven.com/recipe/178920/steves-fish-tacos" }
|
13
14
|
its(:cook_time) { should == nil }
|
14
15
|
its(:description) { should == "I had never tried fish tacos until my son, fresh out of boot camp, asked me to make them. I found a basic recipe, then adapted it from there, and now it's one of my favorite things to eat!" }
|
15
16
|
its(:ingredients) {
|
@@ -6,6 +6,7 @@ describe Hangry do
|
|
6
6
|
subject { Hangry.parse(File.read("spec/fixtures/epicurious.html")) }
|
7
7
|
|
8
8
|
its(:author) { should == "by Janet Taylor McCracken" }
|
9
|
+
its(:canonical_url) { should == "http://www.epicurious.com/articlesguides/bestof/toprecipes/bestburgerrecipes/recipes/food/views/Grilled-Turkey-Burgers-with-Cheddar-and-Smoky-Aioli-354289" }
|
9
10
|
its(:cook_time) { should == nil }
|
10
11
|
its(:description) { should == "A simple Moroccan-spiced aioli is mixed in with the ground turkey to keep the burgers moist and give them tons of flavor. Smoked paprika is available in the spice aisle of most supermarkets." }
|
11
12
|
its(:prep_time) { should == 40 }
|
@@ -5,6 +5,7 @@ describe Hangry do
|
|
5
5
|
context "food network recipe" do
|
6
6
|
subject { Hangry.parse(File.read("spec/fixtures/food_network_schema_org.html")) }
|
7
7
|
|
8
|
+
its(:canonical_url) { should == 'http://www.foodnetwork.com/recipes/rachael-ray/spinach-and-mushroom-stuffed-chicken-breasts-recipe/index.html' }
|
8
9
|
its(:cook_time) { should == 20 }
|
9
10
|
its(:description) { should == nil }
|
10
11
|
its(:ingredients) do
|
@@ -7,6 +7,7 @@ describe Hangry do
|
|
7
7
|
subject { Hangry.parse(File.read("spec/fixtures/myrecipes.com.html")) }
|
8
8
|
|
9
9
|
its(:author) { should == "Southern Living" }
|
10
|
+
its(:canonical_url) { should == 'http://www.myrecipes.com/recipe/best-carrot-cake-10000000257583/' }
|
10
11
|
its(:cook_time) { should == nil }
|
11
12
|
its(:description) { should == "" } # not valid HTML to have a p inside of an h2...
|
12
13
|
its(:ingredients) {
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hangry
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-25 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement: &
|
16
|
+
requirement: &70314683800080 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70314683800080
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &70314683799600 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '0'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70314683799600
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: pry
|
38
|
-
requirement: &
|
38
|
+
requirement: &70314683799140 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ! '>='
|
@@ -43,10 +43,10 @@ dependencies:
|
|
43
43
|
version: '0'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70314683799140
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: activesupport
|
49
|
-
requirement: &
|
49
|
+
requirement: &70314683798640 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
@@ -54,10 +54,10 @@ dependencies:
|
|
54
54
|
version: '3.0'
|
55
55
|
type: :runtime
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70314683798640
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: iso8601
|
60
|
-
requirement: &
|
60
|
+
requirement: &70314683798120 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,10 +65,10 @@ dependencies:
|
|
65
65
|
version: 0.4.0
|
66
66
|
type: :runtime
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70314683798120
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: nokogiri
|
71
|
-
requirement: &
|
71
|
+
requirement: &70314683797640 !ruby/object:Gem::Requirement
|
72
72
|
none: false
|
73
73
|
requirements:
|
74
74
|
- - ~>
|
@@ -76,7 +76,7 @@ dependencies:
|
|
76
76
|
version: '1.5'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
|
-
version_requirements: *
|
79
|
+
version_requirements: *70314683797640
|
80
80
|
description: A recipe microformat parser.
|
81
81
|
email:
|
82
82
|
- ian@iancanderson.com
|
@@ -92,8 +92,12 @@ files:
|
|
92
92
|
- Rakefile
|
93
93
|
- hangry.gemspec
|
94
94
|
- lib/hangry.rb
|
95
|
+
- lib/hangry/canonical_url_parser.rb
|
95
96
|
- lib/hangry/data_vocabulary_recipe_parser.rb
|
97
|
+
- lib/hangry/default_recipe_parser.rb
|
96
98
|
- lib/hangry/hrecipe_parser.rb
|
99
|
+
- lib/hangry/parser_class_selecter.rb
|
100
|
+
- lib/hangry/parsers/non_standard/all_recipes_parser.rb
|
97
101
|
- lib/hangry/recipe_parser.rb
|
98
102
|
- lib/hangry/schema_org_recipe_parser.rb
|
99
103
|
- lib/hangry/version.rb
|