hangry 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -35,6 +35,8 @@ recipe.yield # "4 servings"
35
35
  # etc..
36
36
  ```
37
37
 
38
+ Here's a live demo: http://hangry.herokuapp.com/
39
+
38
40
  ## Contributing
39
41
 
40
42
  1. Fork it
@@ -0,0 +1,61 @@
1
+ module Hangry
2
+ class HRecipeParser < RecipeParser
3
+
4
+ def self.root_selector
5
+ '.hrecipe'
6
+ end
7
+
8
+ private
9
+
10
+ def node_with_class(klass)
11
+ nodes_with_class(klass).first || NullObject.new
12
+ end
13
+
14
+ def nodes_with_class(klass)
15
+ recipe_ast.css(".#{klass}")
16
+ end
17
+
18
+ def parse_author
19
+ clean_string node_with_class(:author).content
20
+ end
21
+
22
+ def parse_cook_time
23
+ #TODO
24
+ end
25
+
26
+ def parse_description
27
+ clean_string node_with_class(:summary).content
28
+ end
29
+
30
+ def parse_ingredients
31
+ nodes_with_class(:ingredient).map(&:content).map { |ingredient| clean_string ingredient }
32
+ end
33
+
34
+ def parse_instructions
35
+ clean_string node_with_class(:instructions).content
36
+ end
37
+
38
+ def parse_name
39
+ clean_string node_with_class(:fn).content
40
+ end
41
+
42
+ def parse_prep_time
43
+ #TODO
44
+ end
45
+
46
+ def parse_published_date
47
+ #TODO
48
+ end
49
+
50
+ def parse_total_time
51
+ parse_duration node_with_class(:duration).css('.value-title').first['title']
52
+ end
53
+
54
+ def parse_yield
55
+ clean_string node_with_class(:yield).content
56
+ end
57
+
58
+ end
59
+
60
+ end
61
+
@@ -0,0 +1,61 @@
1
+ module Hangry
2
+ class RecipeParser
3
+ attr_reader :recipe_html
4
+ attr_accessor :recipe_ast, :recipe
5
+
6
+ def initialize(recipe_html)
7
+ @recipe_html = recipe_html
8
+ @recipe = Recipe.new
9
+ doc = Nokogiri::HTML(recipe_html)
10
+ self.recipe_ast = doc.css(self.class.root_selector).first
11
+ end
12
+
13
+ def parse
14
+ RECIPE_ATTRIBUTES.each do |attribute|
15
+ attr_value = value(send("parse_#{attribute}"))
16
+ recipe.public_send("#{attribute}=", attr_value)
17
+ end
18
+ recipe
19
+ end
20
+
21
+ def can_parse?
22
+ recipe_ast
23
+ end
24
+
25
+ private
26
+
27
+ class NullObject
28
+ def method_missing(*args, &block)
29
+ self
30
+ end
31
+ def blank?; true; end
32
+ def present?; false; end
33
+ def to_a; []; end
34
+ def to_ary; []; end
35
+ def to_s; ""; end
36
+ def to_str; ""; end
37
+ def to_f; 0.0; end
38
+ def to_i; 0; end
39
+ end
40
+
41
+ def value(object)
42
+ case object
43
+ when NullObject then nil
44
+ else object
45
+ end
46
+ end
47
+
48
+ def clean_string(string)
49
+ string.strip.gsub(/\s+/, ' ')
50
+ end
51
+
52
+ def parse_duration(iso8601_string)
53
+ duration = ISO8601::Duration.new(iso8601_string)
54
+ duration.hours.to_i * 60 + duration.minutes.to_i
55
+ end
56
+
57
+ end
58
+
59
+ end
60
+
61
+
@@ -0,0 +1,70 @@
1
+ module Hangry
2
+ class SchemaOrgRecipeParser < RecipeParser
3
+
4
+ def self.root_selector
5
+ '[itemtype="http://schema.org/Recipe"]'
6
+ end
7
+
8
+ private
9
+
10
+ def node_with_itemprop(itemprop)
11
+ nodes_with_itemprop(itemprop).first || NullObject.new
12
+ end
13
+ def nodes_with_itemprop(itemprop)
14
+ recipe_ast ? recipe_ast.css("[itemprop = \"#{itemprop}\"]") : NullObject.new
15
+ end
16
+ def parse_author
17
+ author_node = node_with_itemprop(:author)
18
+ author = if author_node['itemtype'] == "http://schema.org/Person"
19
+ author_node.css('[itemprop = "name"]').first['content']
20
+ else
21
+ author_node.content
22
+ end
23
+ clean_string author
24
+ end
25
+ def parse_cook_time
26
+ parse_time(:cookTime)
27
+ end
28
+ def parse_description
29
+ clean_string node_with_itemprop(:description).content
30
+ end
31
+ def parse_ingredients
32
+ nodes_with_itemprop(:ingredients).map(&:content).map do |ingredient|
33
+ # remove newlines and excess whitespace from ingredients
34
+ clean_string ingredient
35
+ end
36
+ end
37
+ def parse_instructions
38
+ clean_string node_with_itemprop(:recipeInstructions).content
39
+ end
40
+ def parse_name
41
+ clean_string node_with_itemprop(:name).content
42
+ end
43
+ def parse_prep_time
44
+ parse_time(:prepTime)
45
+ end
46
+ def parse_published_date
47
+ content = node_with_itemprop(:datePublished)['content']
48
+ content.blank? ? nil : Date.parse(content)
49
+ end
50
+ def parse_time(type)
51
+ node = node_with_itemprop(type)
52
+ iso8601_string = if node['content'].present?
53
+ node['content'] # foodnetwork.com
54
+ else
55
+ node['datetime'] # allrecipes.com
56
+ end
57
+ parse_duration(iso8601_string)
58
+ rescue ISO8601::Errors::UnknownPattern
59
+ nil
60
+ end
61
+ def parse_total_time
62
+ parse_time(:totalTime)
63
+ end
64
+ def parse_yield
65
+ clean_string node_with_itemprop(:recipeYield).content
66
+ end
67
+
68
+ end
69
+
70
+ end
@@ -1,3 +1,3 @@
1
1
  module Hangry
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/hangry.rb CHANGED
@@ -1,14 +1,13 @@
1
1
  require "hangry/version"
2
+ require 'hangry/recipe_parser'
3
+ require 'hangry/hrecipe_parser'
4
+ require 'hangry/schema_org_recipe_parser'
2
5
  require 'active_support/core_ext/object/blank'
3
6
  require 'date'
4
7
  require 'iso8601'
5
8
  require "nokogiri"
6
9
 
7
10
  module Hangry
8
- def self.parse(html)
9
- parse_schema_org_recipe(html)
10
- end
11
-
12
11
  RECIPE_ATTRIBUTES = [
13
12
  :author,
14
13
  :cook_time,
@@ -24,110 +23,13 @@ module Hangry
24
23
 
25
24
  Recipe = Struct.new(*RECIPE_ATTRIBUTES)
26
25
 
27
- def self.parse_schema_org_recipe(html)
28
- SchemaOrgRecipeParser.new(html).parse
29
- end
30
-
31
- class SchemaOrgRecipeParser
32
- attr_reader :recipe_html
33
- attr_accessor :recipe_ast, :recipe
34
-
35
- def initialize(recipe_html)
36
- @recipe_html = recipe_html
37
- @recipe = Recipe.new
38
- doc = Nokogiri::HTML(recipe_html)
39
- self.recipe_ast = doc.css('[itemtype="http://schema.org/Recipe"]').first
40
- end
41
-
42
- def parse
43
- RECIPE_ATTRIBUTES.each do |attribute|
44
- attr_value = value(send("parse_#{attribute}"))
45
- recipe.public_send("#{attribute}=", attr_value)
46
- end
47
- recipe
48
- end
49
-
50
- private
51
-
52
- class NullObject
53
- def method_missing(*args, &block)
54
- self
55
- end
56
- def blank?; true; end
57
- def present?; false; end
58
- def to_a; []; end
59
- def to_ary; []; end
60
- def to_s; ""; end
61
- def to_str; ""; end
62
- def to_f; 0.0; end
63
- def to_i; 0; end
64
- end
65
-
66
- def value(object)
67
- case object
68
- when NullObject then nil
69
- else object
70
- end
71
- end
72
-
73
- def node_with_itemprop(itemprop)
74
- nodes_with_itemprop(itemprop).first || NullObject.new
75
- end
76
- def nodes_with_itemprop(itemprop)
77
- recipe_ast ? recipe_ast.css("[itemprop = \"#{itemprop}\"]") : NullObject.new
78
- end
79
- def parse_author
80
- author_node = node_with_itemprop(:author)
81
- if author_node['itemtype'] == "http://schema.org/Person"
82
- author_node.css('[itemprop = "name"]').first['content']
83
- else
84
- author_node.content
85
- end
86
- end
87
- def parse_cook_time
88
- parse_time(:cookTime)
89
- end
90
- def parse_description
91
- node_with_itemprop(:description).content
92
- end
93
- def parse_ingredients
94
- nodes_with_itemprop(:ingredients).map(&:content).map do |ingredient|
95
- # remove newlines and excess whitespace from ingredients
96
- ingredient.strip.gsub(/\s+/, ' ')
97
- end
98
- end
99
- def parse_instructions
100
- node_with_itemprop(:recipeInstructions).content.strip
101
- end
102
- def parse_name
103
- node_with_itemprop(:name).content
104
- end
105
- def parse_prep_time
106
- parse_time(:prepTime)
107
- end
108
- def parse_published_date
109
- content = node_with_itemprop(:datePublished)['content']
110
- content.blank? ? nil : Date.parse(content)
111
- end
112
- def parse_time(type)
113
- node = node_with_itemprop(type)
114
- iso8601_string = if node['content'].present?
115
- node['content'] # foodnetwork.com
116
- else
117
- node['datetime'] # allrecipes.com
118
- end
119
- duration = ISO8601::Duration.new(iso8601_string)
120
- duration.hours.to_i * 60 + duration.minutes.to_i
121
- rescue ISO8601::Errors::UnknownPattern
122
- nil
123
- end
124
- def parse_total_time
125
- parse_time(:totalTime)
126
- end
127
- def parse_yield
128
- node_with_itemprop(:recipeYield).content
129
- end
26
+ def self.parse(html)
27
+ parser_classes = [SchemaOrgRecipeParser, HRecipeParser]
28
+ parsers = parser_classes.map { |klass| klass.new(html) }
29
+ parser = parsers.detect { |p| p.can_parse? }
130
30
 
31
+ parser ? parser.parse : Recipe.new
131
32
  end
33
+
132
34
  end
133
35