hangry 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -35,6 +35,8 @@ recipe.yield # "4 servings"
35
35
  # etc..
36
36
  ```
37
37
 
38
+ Here's a live demo: http://hangry.herokuapp.com/
39
+
38
40
  ## Contributing
39
41
 
40
42
  1. Fork it
@@ -0,0 +1,61 @@
1
+ module Hangry
2
+ class HRecipeParser < RecipeParser
3
+
4
+ def self.root_selector
5
+ '.hrecipe'
6
+ end
7
+
8
+ private
9
+
10
+ def node_with_class(klass)
11
+ nodes_with_class(klass).first || NullObject.new
12
+ end
13
+
14
+ def nodes_with_class(klass)
15
+ recipe_ast.css(".#{klass}")
16
+ end
17
+
18
+ def parse_author
19
+ clean_string node_with_class(:author).content
20
+ end
21
+
22
+ def parse_cook_time
23
+ #TODO
24
+ end
25
+
26
+ def parse_description
27
+ clean_string node_with_class(:summary).content
28
+ end
29
+
30
+ def parse_ingredients
31
+ nodes_with_class(:ingredient).map(&:content).map { |ingredient| clean_string ingredient }
32
+ end
33
+
34
+ def parse_instructions
35
+ clean_string node_with_class(:instructions).content
36
+ end
37
+
38
+ def parse_name
39
+ clean_string node_with_class(:fn).content
40
+ end
41
+
42
+ def parse_prep_time
43
+ #TODO
44
+ end
45
+
46
+ def parse_published_date
47
+ #TODO
48
+ end
49
+
50
+ def parse_total_time
51
+ parse_duration node_with_class(:duration).css('.value-title').first['title']
52
+ end
53
+
54
+ def parse_yield
55
+ clean_string node_with_class(:yield).content
56
+ end
57
+
58
+ end
59
+
60
+ end
61
+
@@ -0,0 +1,61 @@
1
+ module Hangry
2
+ class RecipeParser
3
+ attr_reader :recipe_html
4
+ attr_accessor :recipe_ast, :recipe
5
+
6
+ def initialize(recipe_html)
7
+ @recipe_html = recipe_html
8
+ @recipe = Recipe.new
9
+ doc = Nokogiri::HTML(recipe_html)
10
+ self.recipe_ast = doc.css(self.class.root_selector).first
11
+ end
12
+
13
+ def parse
14
+ RECIPE_ATTRIBUTES.each do |attribute|
15
+ attr_value = value(send("parse_#{attribute}"))
16
+ recipe.public_send("#{attribute}=", attr_value)
17
+ end
18
+ recipe
19
+ end
20
+
21
+ def can_parse?
22
+ recipe_ast
23
+ end
24
+
25
+ private
26
+
27
+ class NullObject
28
+ def method_missing(*args, &block)
29
+ self
30
+ end
31
+ def blank?; true; end
32
+ def present?; false; end
33
+ def to_a; []; end
34
+ def to_ary; []; end
35
+ def to_s; ""; end
36
+ def to_str; ""; end
37
+ def to_f; 0.0; end
38
+ def to_i; 0; end
39
+ end
40
+
41
+ def value(object)
42
+ case object
43
+ when NullObject then nil
44
+ else object
45
+ end
46
+ end
47
+
48
+ def clean_string(string)
49
+ string.strip.gsub(/\s+/, ' ')
50
+ end
51
+
52
+ def parse_duration(iso8601_string)
53
+ duration = ISO8601::Duration.new(iso8601_string)
54
+ duration.hours.to_i * 60 + duration.minutes.to_i
55
+ end
56
+
57
+ end
58
+
59
+ end
60
+
61
+
@@ -0,0 +1,70 @@
1
+ module Hangry
2
+ class SchemaOrgRecipeParser < RecipeParser
3
+
4
+ def self.root_selector
5
+ '[itemtype="http://schema.org/Recipe"]'
6
+ end
7
+
8
+ private
9
+
10
+ def node_with_itemprop(itemprop)
11
+ nodes_with_itemprop(itemprop).first || NullObject.new
12
+ end
13
+ def nodes_with_itemprop(itemprop)
14
+ recipe_ast ? recipe_ast.css("[itemprop = \"#{itemprop}\"]") : NullObject.new
15
+ end
16
+ def parse_author
17
+ author_node = node_with_itemprop(:author)
18
+ author = if author_node['itemtype'] == "http://schema.org/Person"
19
+ author_node.css('[itemprop = "name"]').first['content']
20
+ else
21
+ author_node.content
22
+ end
23
+ clean_string author
24
+ end
25
+ def parse_cook_time
26
+ parse_time(:cookTime)
27
+ end
28
+ def parse_description
29
+ clean_string node_with_itemprop(:description).content
30
+ end
31
+ def parse_ingredients
32
+ nodes_with_itemprop(:ingredients).map(&:content).map do |ingredient|
33
+ # remove newlines and excess whitespace from ingredients
34
+ clean_string ingredient
35
+ end
36
+ end
37
+ def parse_instructions
38
+ clean_string node_with_itemprop(:recipeInstructions).content
39
+ end
40
+ def parse_name
41
+ clean_string node_with_itemprop(:name).content
42
+ end
43
+ def parse_prep_time
44
+ parse_time(:prepTime)
45
+ end
46
+ def parse_published_date
47
+ content = node_with_itemprop(:datePublished)['content']
48
+ content.blank? ? nil : Date.parse(content)
49
+ end
50
+ def parse_time(type)
51
+ node = node_with_itemprop(type)
52
+ iso8601_string = if node['content'].present?
53
+ node['content'] # foodnetwork.com
54
+ else
55
+ node['datetime'] # allrecipes.com
56
+ end
57
+ parse_duration(iso8601_string)
58
+ rescue ISO8601::Errors::UnknownPattern
59
+ nil
60
+ end
61
+ def parse_total_time
62
+ parse_time(:totalTime)
63
+ end
64
+ def parse_yield
65
+ clean_string node_with_itemprop(:recipeYield).content
66
+ end
67
+
68
+ end
69
+
70
+ end
@@ -1,3 +1,3 @@
1
1
  module Hangry
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/hangry.rb CHANGED
@@ -1,14 +1,13 @@
1
1
  require "hangry/version"
2
+ require 'hangry/recipe_parser'
3
+ require 'hangry/hrecipe_parser'
4
+ require 'hangry/schema_org_recipe_parser'
2
5
  require 'active_support/core_ext/object/blank'
3
6
  require 'date'
4
7
  require 'iso8601'
5
8
  require "nokogiri"
6
9
 
7
10
  module Hangry
8
- def self.parse(html)
9
- parse_schema_org_recipe(html)
10
- end
11
-
12
11
  RECIPE_ATTRIBUTES = [
13
12
  :author,
14
13
  :cook_time,
@@ -24,110 +23,13 @@ module Hangry
24
23
 
25
24
  Recipe = Struct.new(*RECIPE_ATTRIBUTES)
26
25
 
27
- def self.parse_schema_org_recipe(html)
28
- SchemaOrgRecipeParser.new(html).parse
29
- end
30
-
31
- class SchemaOrgRecipeParser
32
- attr_reader :recipe_html
33
- attr_accessor :recipe_ast, :recipe
34
-
35
- def initialize(recipe_html)
36
- @recipe_html = recipe_html
37
- @recipe = Recipe.new
38
- doc = Nokogiri::HTML(recipe_html)
39
- self.recipe_ast = doc.css('[itemtype="http://schema.org/Recipe"]').first
40
- end
41
-
42
- def parse
43
- RECIPE_ATTRIBUTES.each do |attribute|
44
- attr_value = value(send("parse_#{attribute}"))
45
- recipe.public_send("#{attribute}=", attr_value)
46
- end
47
- recipe
48
- end
49
-
50
- private
51
-
52
- class NullObject
53
- def method_missing(*args, &block)
54
- self
55
- end
56
- def blank?; true; end
57
- def present?; false; end
58
- def to_a; []; end
59
- def to_ary; []; end
60
- def to_s; ""; end
61
- def to_str; ""; end
62
- def to_f; 0.0; end
63
- def to_i; 0; end
64
- end
65
-
66
- def value(object)
67
- case object
68
- when NullObject then nil
69
- else object
70
- end
71
- end
72
-
73
- def node_with_itemprop(itemprop)
74
- nodes_with_itemprop(itemprop).first || NullObject.new
75
- end
76
- def nodes_with_itemprop(itemprop)
77
- recipe_ast ? recipe_ast.css("[itemprop = \"#{itemprop}\"]") : NullObject.new
78
- end
79
- def parse_author
80
- author_node = node_with_itemprop(:author)
81
- if author_node['itemtype'] == "http://schema.org/Person"
82
- author_node.css('[itemprop = "name"]').first['content']
83
- else
84
- author_node.content
85
- end
86
- end
87
- def parse_cook_time
88
- parse_time(:cookTime)
89
- end
90
- def parse_description
91
- node_with_itemprop(:description).content
92
- end
93
- def parse_ingredients
94
- nodes_with_itemprop(:ingredients).map(&:content).map do |ingredient|
95
- # remove newlines and excess whitespace from ingredients
96
- ingredient.strip.gsub(/\s+/, ' ')
97
- end
98
- end
99
- def parse_instructions
100
- node_with_itemprop(:recipeInstructions).content.strip
101
- end
102
- def parse_name
103
- node_with_itemprop(:name).content
104
- end
105
- def parse_prep_time
106
- parse_time(:prepTime)
107
- end
108
- def parse_published_date
109
- content = node_with_itemprop(:datePublished)['content']
110
- content.blank? ? nil : Date.parse(content)
111
- end
112
- def parse_time(type)
113
- node = node_with_itemprop(type)
114
- iso8601_string = if node['content'].present?
115
- node['content'] # foodnetwork.com
116
- else
117
- node['datetime'] # allrecipes.com
118
- end
119
- duration = ISO8601::Duration.new(iso8601_string)
120
- duration.hours.to_i * 60 + duration.minutes.to_i
121
- rescue ISO8601::Errors::UnknownPattern
122
- nil
123
- end
124
- def parse_total_time
125
- parse_time(:totalTime)
126
- end
127
- def parse_yield
128
- node_with_itemprop(:recipeYield).content
129
- end
26
+ def self.parse(html)
27
+ parser_classes = [SchemaOrgRecipeParser, HRecipeParser]
28
+ parsers = parser_classes.map { |klass| klass.new(html) }
29
+ parser = parsers.detect { |p| p.can_parse? }
130
30
 
31
+ parser ? parser.parse : Recipe.new
131
32
  end
33
+
132
34
  end
133
35