tychus 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bb7646d41e612da74f91b9fd838925f632fe1621
4
+ data.tar.gz: 45b6939c694f5a9221bc3be47accc4cd966eae0f
5
+ SHA512:
6
+ metadata.gz: 68ed88a988f294e3880b9ed7ad4ae9813fe89da7eac26979ef3a7ffb0209345a89d98fd011d17e513ec445a8529b631e4f25f50828be5c7b96664c8f45228f64
7
+ data.tar.gz: 7860d9a43959274dd1aadf56a1b67335a3900f3c4d74a872c49e8063a84df2704d889064447e0c090d6b32611bf9b0c35ec219677b826c22c2f1273ee7d91a4c
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --require spec_helper
3
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tychus.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Wayne Yang
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,49 @@
1
+ # Tychus
2
+
3
+ Recipe parser supporting microformats for:
4
+
5
+ * [Schema.org/Recipe](https://support.google.com/webmasters/answer/173379?hl=en)
6
+
7
+ Compatible with:
8
+
9
+ * [Allrecipes](http://allrecipes.com)
10
+ * [Food Network](http://www.foodnetwork.com)
11
+ * [Kraft Recipes](http://www.kraftrecipes.com)
12
+
13
+ ## Installation
14
+
15
+ Add this line to your application's Gemfile:
16
+
17
+ gem 'tychus'
18
+
19
+ And then execute:
20
+
21
+ $ bundle
22
+
23
+ Or install it yourself as:
24
+
25
+ $ gem install tychus
26
+
27
+ ## Usage
28
+
29
+ ```
30
+ 'require tychus'
31
+ recipe = Tychus.parse('http://allrecipes.com/Recipe/Chicken-Pot-Pie-IX/Detail.aspx?soid=recs_recipe_2')
32
+
33
+ recipe.name
34
+ => "Chicken Pot Pie IX"
35
+ recipe.author
36
+ => "Robbie Rice"
37
+ recipe.description
38
+ => "\"A delicious chicken pie made from scratch with carrots, peas and celery.\""
39
+ recipe.ingredients
40
+ => ["1 pound skinless, boneless chicken breast halves - cubed", "1 cup sliced carrots", "1 cup frozen green peas", "1/2 cup sliced celery", "1/3 cup butter", "1/3 cup chopped onion", "1/3 cup all-purpose flour", "1/2 teaspoon salt", "1/4 teaspoon black pepper", "1/4 teaspoon celery seed", "1 3/4 cups chicken broth", "2/3 cup milk", "2 (9 inch) unbaked pie crusts"]
41
+ ```
42
+
43
+ ## Contributing
44
+
45
+ 1. Fork it ( https://github.com/[my-github-username]/tychus/fork )
46
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
47
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
48
+ 4. Push to the branch (`git push origin my-new-feature`)
49
+ 5. Create a new Pull Request
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,12 @@
1
+ require "tychus/version"
2
+ require "tychus/parsers"
3
+ require "tychus/parser_selector"
4
+ require "tychus/uri_resolver"
5
+
6
+ module Tychus
7
+ def self.parse(uri)
8
+ host = URIResolver.new(uri).resolve_uri
9
+ parser = ParserSelector.resolve_parser(host)
10
+ parser.new(uri).parse
11
+ end
12
+ end
@@ -0,0 +1,17 @@
1
+ module Tychus
2
+ class ParserSelector
3
+ PARSERS = [
4
+ Tychus::Parsers::AllrecipesParser,
5
+ Tychus::Parsers::FoodNetworkParser,
6
+ Tychus::Parsers::KraftRecipesParser
7
+ ]
8
+
9
+ def self.resolve_parser(host)
10
+ PARSERS.detect do |parser|
11
+ host =~ %r[#{parser.uri_host}]
12
+ end
13
+ end
14
+
15
+ end
16
+ end
17
+
@@ -0,0 +1,17 @@
1
+ require_relative 'parsers/base'
2
+ require_relative 'parsers/schema_org_parser'
3
+ require_relative 'parsers/allrecipes_parser'
4
+ require_relative 'parsers/food_network_parser'
5
+ require_relative 'parsers/kraft_recipes_parser'
6
+
7
+ module Tychus
8
+ module Parsers
9
+
10
+ Recipe = Struct.new(*Base.recipe_attributes) do
11
+ alias_method :yield, :recipe_yield
12
+ alias_method :instructions, :recipe_instructions
13
+ end
14
+
15
+ end
16
+ end
17
+
@@ -0,0 +1,18 @@
1
+ module Tychus
2
+ module Parsers
3
+
4
+ # Allrecipes uses schema.org's recipe microformat
5
+ class AllrecipesParser < SchemaOrgParser
6
+ def self.uri_host
7
+ "allrecipes.com"
8
+ end
9
+
10
+ def clean_instructions(instructions)
11
+ #reject last "Kitchen Friendly View" element
12
+ instructions[0..-2]
13
+ end
14
+ end
15
+
16
+ end
17
+ end
18
+
@@ -0,0 +1,157 @@
1
+ require 'active_support/core_ext/object/blank.rb'
2
+
3
+ module Tychus
4
+ module Parsers
5
+
6
+ class Base
7
+ attr_reader :uri, :doc, :recipe_doc, :recipe
8
+
9
+ def self.recipe_attributes
10
+ # TODO: clear up these attributes. Are they used? Real example to
11
+ # verify?
12
+ # recipeType
13
+ # photo
14
+ # published
15
+ # summary
16
+ # review - see schema.org/Review
17
+ %i[
18
+ name
19
+ author
20
+ description
21
+ prep_time
22
+ cook_time
23
+ total_time
24
+ recipe_yield
25
+ ingredients
26
+ recipe_instructions
27
+ image
28
+ ]
29
+ end
30
+
31
+ def initialize(uri)
32
+ @uri = uri
33
+ @recipe = Recipe.new
34
+ @doc = Nokogiri::HTML(open(uri))
35
+ @recipe_doc = @doc.css(self.class.root_doc)
36
+ end
37
+
38
+ def parse
39
+ recipe_attributes.each do |attr|
40
+ property_value = __send__("parse_#{attr}")
41
+ recipe.__send__("#{attr}=", Value(property_value))
42
+ end
43
+ recipe
44
+ end
45
+
46
+ def parse_author
47
+ # is it always first?
48
+ itemprop_node_for(:author).content
49
+ end
50
+
51
+ def parse_description
52
+ # is it always first?
53
+ itemprop_node_for(:description).content
54
+ end
55
+
56
+ def parse_recipe_instructions
57
+ # strip empty strings, drop trailing whitespace, clean carriage returns (\r\n)
58
+ #
59
+ # Allrecipes: <li><span>lorem ipsum</span></li>
60
+ # FoodNetwork: <p>lorem ipsum</p>
61
+ # reject headers such as "Directions" and divs such as .categories for Foodnetwork recipes
62
+ reject_regex = /^(h.|div)$/
63
+
64
+ clean_instructions(itemprop_node_for(:recipeInstructions)
65
+ .element_children
66
+ .reject { |node| node.name =~ reject_regex }
67
+ .map do |node|
68
+ node.content
69
+ .squeeze(" ")
70
+ .rstrip
71
+ .split("\r\n\s\r\n\s")
72
+ end.flatten.reject(&:blank?))
73
+ end
74
+
75
+ def parse_name
76
+ # is it always first?
77
+ itemprop_node_for(:name).content
78
+ end
79
+
80
+ def parse_cook_time
81
+ # is it always first?
82
+ # leverage iso8601
83
+ parse_duration(itemprop_node_for(:cookTime))
84
+ end
85
+
86
+ def parse_image
87
+ # is it always first?
88
+ itemprop_node_for(:image).attr('src')
89
+ end
90
+
91
+ def parse_ingredients
92
+ # NOT FIRST
93
+ recipe_doc
94
+ .css('[itemprop="ingredients"]')
95
+ .map do |ingredient_node|
96
+ ingredient_node
97
+ .element_children
98
+ .map(&:content)
99
+ .join(" ")
100
+ end.reject(&:blank?)
101
+ end
102
+
103
+ def parse_prep_time
104
+ # is it always first?
105
+ # leverage iso8601
106
+ parse_duration(itemprop_node_for(:prepTime))
107
+ end
108
+
109
+ def parse_duration(node)
110
+ # Allrecipes - 'time' element
111
+ # Foodnetwork - 'meta' element (std according to
112
+ # Schema.org/Recipe)
113
+ case node.name
114
+ when "meta", "span"
115
+ node.attr('content')
116
+ when "time"
117
+ node.attr('datetime')
118
+ else
119
+ NullObject.new
120
+ end
121
+ end
122
+
123
+ def parse_recipe_yield
124
+ # is it always first?
125
+ itemprop_node_for(:recipeYield).content
126
+ end
127
+
128
+ def parse_total_time
129
+ # is it always first?
130
+ # leverage iso8601
131
+ parse_duration(itemprop_node_for(:totalTime))
132
+ end
133
+
134
+ def recipe_attributes
135
+ self.class.recipe_attributes
136
+ end
137
+
138
+ def clean_instructions(obj)
139
+ obj
140
+ end
141
+
142
+ def Value(obj)
143
+ case obj
144
+ when NullObject then nil
145
+ else obj
146
+ end
147
+ end
148
+ end
149
+
150
+ class NullObject
151
+ def method_missing(*args, &block)
152
+ self
153
+ end
154
+ end
155
+
156
+ end
157
+ end
@@ -0,0 +1,38 @@
1
+ module Tychus
2
+ module Parsers
3
+ class FoodNetworkParser < SchemaOrgParser
4
+ def self.uri_host
5
+ "foodnetwork.com"
6
+ end
7
+
8
+ def parse_author
9
+ # in the case of an author advertising her TV show
10
+ itemprop_node_for(:author)
11
+ .css('span')
12
+ .first
13
+ .content
14
+ end
15
+
16
+ def parse_description
17
+ # Foodnetwork does not use the description in its recipe body
18
+ # resort to opengraph to pull out description in head
19
+ # TODO: pull this func out for an opengraph parser?
20
+ @doc.css('meta[property="og:description"]').first.attr('content')
21
+ end
22
+
23
+ def parse_ingredients
24
+ # NOT FIRST
25
+ recipe_doc
26
+ .css('[itemprop="ingredients"]')
27
+ .map { |node| node.content.lstrip.squeeze(" ").chomp }
28
+ end
29
+
30
+ def clean_instructions(obj)
31
+ #TODO: what is best pattern to share this behavior?
32
+ obj
33
+ end
34
+
35
+ end
36
+ end
37
+ end
38
+
@@ -0,0 +1,55 @@
1
+ module Tychus
2
+ module Parsers
3
+
4
+ class KraftRecipesParser < SchemaOrgParser
5
+
6
+ def self.uri_host
7
+ "kraftrecipes.com"
8
+ end
9
+
10
+ def parse_description
11
+ # description can be found in .recipeDesc or meta tag in header
12
+ # TODO: pull out meta tag parsing into own methods/class?
13
+ doc.css('meta[name="description"]').first.attr('content')
14
+ end
15
+
16
+ def parse_name
17
+ # "\r\n\tSweet BBQ Chicken Kabobs\r\n\t"
18
+ result = super
19
+ result.gsub(/(\r|\n|\t)/,'')
20
+ end
21
+
22
+ def parse_recipe_instructions
23
+ itemprop_node_for(:recipeInstructions)
24
+ .element_children
25
+ .map do|x|
26
+ x.content
27
+ .squeeze(" ")
28
+ .rstrip
29
+ .split("\r\n\t")
30
+ .map{|x|x.gsub(/\t/,'')}
31
+ end.flatten.reject(&:blank?)
32
+ end
33
+
34
+ def parse_ingredients
35
+ # NOT FIRST
36
+ # "1 lb.\r\n\t\t\t\t\t\t\t\t boneless skinless chicken breasts, cut into 1-1/2-inch pieces", "2 cups\r\n\t\t\t\t\t\t\t\t fresh pineapple chunks (1-1/2 inch)", "1 \r\n\t\t\t\t\t\t\t\t each red and green pepper, cut into 1-1/2-inch pieces", "1/2 cup\r\n\t\t\t\t\t\t\t\t KRAFT Original Barbecue Sauce", "3 Tbsp.\r\n\t\t\t\t\t\t\t\t frozen orange juice concentrate, thawed"
37
+ recipe_doc
38
+ .css('[itemprop="ingredients"]')
39
+ .map do|ingredient_node|
40
+ ingredient_node
41
+ .element_children
42
+ .map do |node| node.content
43
+ .lstrip
44
+ .rstrip
45
+ .squeeze(" ")
46
+ .gsub(/(\r|\n|\t)/,'')
47
+ end.join(" ")
48
+ end.reject(&:blank?)
49
+ end
50
+
51
+ end
52
+
53
+ end
54
+ end
55
+
@@ -0,0 +1,41 @@
1
+ module Tychus
2
+ module Parsers
3
+
4
+ class SchemaOrgParser < Base
5
+
6
+ def initialize(uri)
7
+ super
8
+ strip_review_microformat
9
+ strip_video_object_microformat
10
+ end
11
+
12
+ def strip_review_microformat
13
+ recipe_doc.css(self.class.review_doc).remove
14
+ end
15
+
16
+ def strip_video_object_microformat
17
+ recipe_doc.css(self.class.video_object_doc).remove
18
+ end
19
+
20
+
21
+ def self.root_doc
22
+ '[itemtype="http://schema.org/Recipe"]'
23
+ end
24
+
25
+ def itemprop_node_for(property)
26
+ recipe_doc.css("[itemprop='#{property}']").first || NullObject.new
27
+ end
28
+
29
+ def self.review_doc
30
+ '[itemtype="http://schema.org/Review"]'
31
+ end
32
+
33
+ def self.video_object_doc
34
+ '[itemtype="http://www.schema.org/VideoObject"]'
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+ end
41
+