tychus 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/lib/tychus/meta_parser.rb +55 -0
  3. data/lib/tychus/parser_selector.rb +4 -3
  4. data/lib/tychus/parsers/campbells_kitchen_parser.rb +10 -0
  5. data/lib/tychus/parsers/open_graph_protocol_parser.rb +35 -0
  6. data/lib/tychus/parsers/schema_org_parser.rb +19 -19
  7. data/lib/tychus/parsers.rb +2 -0
  8. data/lib/tychus/uri_resolver.rb +3 -3
  9. data/lib/tychus/version.rb +1 -1
  10. data/lib/tychus.rb +3 -2
  11. data/spec/fixtures/cassettes/allrecipes_1.yml +14403 -0
  12. data/spec/fixtures/cassettes/campbells_kitchen_1.yml +3364 -0
  13. data/spec/fixtures/cassettes/food_network_single_ingredients_group_1.yml +4884 -0
  14. data/spec/fixtures/cassettes/kraft_recipes_1.yml +3638 -0
  15. data/spec/fixtures/cassettes/meta_parser_og_protocol_uri.yml +6733 -0
  16. data/spec/fixtures/cassettes/meta_parser_schema_org_microformat_uri.yml +9605 -0
  17. data/spec/meta_parser.rb +36 -0
  18. data/spec/parsers/allrecipes_parser_spec.rb +7 -6
  19. data/spec/parsers/campbells_kitchen_parser_spec.rb +71 -0
  20. data/spec/parsers/food_network_parser_spec.rb +7 -4
  21. data/spec/parsers/kraft_recipes_parser_spec.rb +6 -2
  22. data/spec/parsers/schema_org_parser_spec.rb +6 -2
  23. data/spec/spec_helper.rb +1 -1
  24. data/spec/uri_resolver_spec.rb +48 -10
  25. metadata +20 -11
  26. data/spec/fixtures/allrecipes.html +0 -3003
  27. data/spec/fixtures/campbellskitchen.html +0 -2190
  28. data/spec/fixtures/food_network_double_ingredients_group.html +0 -3725
  29. data/spec/fixtures/food_network_single_ingredients_group.html +0 -4930
  30. data/spec/fixtures/kraftrecipes.html +0 -2722
@@ -0,0 +1,36 @@
1
+ describe Tychus::MetaParser do
2
+ subject do
3
+ VCR.use_cassette("meta_parser_#{uri}") do
4
+ Tychus::MetaParser.new(__send__(uri)).parse
5
+ end
6
+ end
7
+
8
+ shared_examples_for "a_parsable_uri" do
9
+ its(:uri_object) { is_expected.to be_present }
10
+ its(:uri) { is_expected.to be_present }
11
+ its(:host) { is_expected.to be_present }
12
+ end
13
+
14
+ context "when uri uses fb:og protocol" do
15
+ let(:uri) { "og_protocol_uri" }
16
+ let(:og_protocol_uri) { "http://www.campbellskitchen.com/recipes/squash-casserole-24122" }
17
+
18
+ it_behaves_like "a_parsable_uri"
19
+
20
+ its(:schema_org_microformat?) { is_expected.to be_falsy }
21
+ its(:open_graph_protocol) { is_expected.to be_present }
22
+ its(:open_graph_protocol?) { is_expected.to be_truthy }
23
+ end
24
+
25
+ context "when uri uses schema.org microformat" do
26
+ let(:uri) { "schema_org_microformat_uri" }
27
+ let(:schema_org_microformat_uri) { "http://allrecipes.com/Recipe/Chicken-Pot-Pie-IX/Detail.aspx?soid=recs_recipe_2" }
28
+
29
+ it_behaves_like "a_parsable_uri"
30
+
31
+ its(:schema_org_microformat?) { is_expected.to be_truthy }
32
+ its(:open_graph_protocol) { is_expected.to be_nil }
33
+ its(:open_graph_protocol?) { is_expected.to be_falsy }
34
+ end
35
+ end
36
+
@@ -1,8 +1,10 @@
1
- describe Tychus::Parsers::AllrecipesParser do
2
- subject { Tychus.parse(allrecipes_uri) }
3
- # 7/26/14 source:
4
- # http://allrecipes.com/Recipe/Chicken-Pot-Pie-IX/Detail.aspx?soid=recs_recipe_2
5
- let(:allrecipes_uri) { File.expand_path("../../fixtures/allrecipes.html", __FILE__) }
1
+ describe Tychus::Parsers::AllrecipesParser, :vcr do
2
+ subject do
3
+ VCR.use_cassette("allrecipes_1") do
4
+ Tychus.parse(allrecipes_uri)
5
+ end
6
+ end
7
+ let(:allrecipes_uri) { 'http://allrecipes.com/Recipe/Chicken-Pot-Pie-IX/Detail.aspx' }
6
8
  let(:ingredients) {
7
9
  [
8
10
  "1 pound skinless, boneless chicken breast halves - cubed",
@@ -41,4 +43,3 @@ describe Tychus::Parsers::AllrecipesParser do
41
43
  its(:image) { is_expected.to eq("http://images.media-allrecipes.com/userphotos/250x250/00/14/23/142350.jpg") }
42
44
  its(:description) { is_expected.to eq("\"A delicious chicken pie made from scratch with carrots, peas and celery.\"") }
43
45
  end
44
-
@@ -0,0 +1,71 @@
1
+ describe Tychus::Parsers::CampbellsKitchenParser do
2
+ before { pending "Does NOT use schema.org microformat" }
3
+ subject do
4
+ VCR.use_cassette("campbells_kitchen_1") do
5
+ Tychus::Parsers::CampbellsKitchenParser.new(campbells_kitchen_uri)
6
+ end
7
+ end
8
+
9
+ let(:campbells_kitchen_uri) { "http://www.campbellskitchen.com/recipes/squash-casserole-24122" }
10
+ let(:ingredients) {
11
+ [
12
+ "3 cups Pepperidge Farm® Cornbread Stuffing",
13
+ "1/4 cup butter, melted (1/2 stick)",
14
+ "1 can (10 3/4 ounces) Campbell's® Condensed Cream of Chicken Soup (Regular or 98% Fat Free)",
15
+ "1/2 cup sour cream",
16
+ "2 small yellow squash, shredded (about 2 cups)",
17
+ "2 small zucchini, shredded (about 2 cups)",
18
+ "1 small carrot, shredded (about 1/3 cup)",
19
+ "1/2 cup shredded Cheddar cheese (about 2 ounces)"
20
+ ]
21
+ }
22
+
23
+ let(:instructions) {
24
+ [
25
+ "Stir the stuffing and butter in a large bowl. Reserve 1/2 cup of the stuffing mixture and spoon the remaining stuffing mixture into a 2-quart shallow baking dish.",
26
+ "Stir the soup, sour cream, yellow squash, zucchini, carrot and cheese in a medium bowl. Spread the mixture over the stuffing mixture and sprinkle with the reserved stuffing mixture.",
27
+ "Bake at 350°F. for 40 minutes or until the mixture is hot and bubbling."
28
+ ]
29
+ }
30
+
31
+ it "parses the name of the recipe" do
32
+ expect(subject.parse_name).to eq("Squash Casserole")
33
+ end
34
+
35
+ it "parses the author" do
36
+ expect(subject.parse_author).to eq(nil)
37
+ end
38
+
39
+ it "parses the prep time" do
40
+ expect(subject.parse_prep_time).to eq("PT15M")
41
+ end
42
+
43
+ it "parses the cook time" do
44
+ expect(subject.parse_cook_time).to eq("PT40M")
45
+ end
46
+
47
+ it "parses the total time" do
48
+ expect(subject.parse_total_time).to eq("PT55M")
49
+ end
50
+
51
+ it "parses the yield" do
52
+ expect(subject.parse_recipe_yield).to eq("8")
53
+ end
54
+
55
+ it "parses the image" do
56
+ expect(subject.parse_image).to eq("http://www.campbellskitchen.com/recipeimages/squash-casserole-large-24122.jpg")
57
+ end
58
+
59
+ it "parses the ingredients" do
60
+ expect(subject.parse_ingredients).to eq(ingredients)
61
+ end
62
+
63
+ it "parses the instructions" do
64
+ expect(subject.parse_recipe_instructions).to eq(instructions)
65
+ end
66
+
67
+ it "parses the description" do
68
+ expect(subject.parse_description).to eq("This creamy, crowd-pleasing side dish features summer squash, carrots, stuffing mix and cheese baked in a creamy sauce. Crispy on top and creamy in the center, it's a winner on any menu.")
69
+ end
70
+ end
71
+
@@ -1,14 +1,17 @@
1
1
  describe Tychus::Parsers::FoodNetworkParser do
2
- subject { Tychus::Parsers::FoodNetworkParser.new(food_network_uri) }
3
-
4
2
  context "When the page has a single ingredients group" do
5
3
  # NOTE: this specific uri has an author who's hidden in an anchor
6
4
  # tag that also references her TV show and episode the recipe
7
5
  # appeared. #parse_author may be complex for this edge case
8
6
  # NOTE: author is formatted using schema.org/Person
9
7
 
10
- # 7/26/14 source: http://www.foodnetwork.com/recipes/ina-garten/grilled-panzanella-recipe.html?ic1=obinsite^
11
- let(:food_network_uri) { File.expand_path("../../fixtures/food_network_single_ingredients_group.html", __FILE__) }
8
+ subject do
9
+ VCR.use_cassette("food_network_single_ingredients_group_1") do
10
+ Tychus::Parsers::FoodNetworkParser.new(food_network_uri)
11
+ end
12
+ end
13
+
14
+ let(:food_network_uri) { "http://www.foodnetwork.com/recipes/ina-garten/grilled-panzanella-recipe.html?ic1=obinsite" }
12
15
  let(:ingredients) {
13
16
  [
14
17
  "Good olive oil",
@@ -3,9 +3,13 @@ describe Tychus::Parsers::KraftRecipesParser do
3
3
  Tychus::Parsers::NullObject
4
4
  end
5
5
 
6
- subject { Tychus::Parsers::KraftRecipesParser.new(kraft_recipes_uri) }
6
+ subject do
7
+ VCR.use_cassette("kraft_recipes_1") do
8
+ Tychus::Parsers::KraftRecipesParser.new(kraft_recipes_uri)
9
+ end
10
+ end
7
11
 
8
- let(:kraft_recipes_uri) { File.expand_path("../../fixtures/kraftrecipes.html", __FILE__) }
12
+ let(:kraft_recipes_uri) { "http://www.kraftrecipes.com/recipes/sweet-bbq-chicken-kabobs-92092.aspx" }
9
13
 
10
14
  let(:ingredients) {
11
15
  [
@@ -1,7 +1,11 @@
1
1
  describe Tychus::Parsers::SchemaOrgParser do
2
2
  context "on creation" do
3
- let(:allrecipes_uri) { File.expand_path("../../fixtures/allrecipes.html", __FILE__) }
4
- let(:parser) { Tychus::Parsers::AllrecipesParser.new(allrecipes_uri) }
3
+ let(:allrecipes_uri) { 'http://allrecipes.com/Recipe/Chicken-Pot-Pie-IX/Detail.aspx' }
4
+ let(:parser) do
5
+ VCR.use_cassette("allrecipes_1") do
6
+ Tychus::Parsers::AllrecipesParser.new(allrecipes_uri)
7
+ end
8
+ end
5
9
 
6
10
  it "strips the Review microformat from node to prevent name collisions with item properties of different microformats" do
7
11
  expect(parser.recipe_doc.css(parser.review_doc)).to be_empty
data/spec/spec_helper.rb CHANGED
@@ -5,7 +5,7 @@ require 'vcr'
5
5
  require 'pry'
6
6
 
7
7
  VCR.configure do |c|
8
- c.cassette_library_dir = 'fixtures/cassettes'
8
+ c.cassette_library_dir = 'spec/fixtures/cassettes'
9
9
  c.hook_into :webmock
10
10
  end
11
11
 
@@ -1,25 +1,63 @@
1
- # at this point intime, allrecipes/campbellskitchen/kraftrecipes
2
- # fixtures all contain both schema.org/recipe canonical uri elements as
1
+ # at this point intime, allrecipes/campbellskitchen/kraftrecipes all contain both schema.org/recipe canonical uri elements as
3
2
  # well as open graph canonical uri elements
4
3
  #
5
4
  describe Tychus::URIResolver do
5
+
6
6
  context "when schema.org and opengraph canonical uri properties exist" do
7
- let(:allrecipes_uri) { File.expand_path("../fixtures/allrecipes.html", __FILE__) }
8
- let(:campbells_kitchen_uri) { File.expand_path("../fixtures/campbellskitchen.html", __FILE__) }
9
- let(:kraft_recipes_uri) { File.expand_path("../fixtures/kraftrecipes.html", __FILE__) }
7
+ let(:allrecipes_uri) { 'http://allrecipes.com/Recipe/Chicken-Pot-Pie-IX/Detail.aspx' }
8
+ let(:kraft_recipes_uri) { "http://www.kraftrecipes.com/recipes/sweet-bbq-chicken-kabobs-92092.aspx" }
10
9
  let(:examples) { [
11
- [allrecipes_uri, 'allrecipes.com'],
12
- [campbells_kitchen_uri, 'campbellskitchen.com'],
13
- [kraft_recipes_uri, 'kraftrecipes.com']
10
+ [allrecipes_uri, 'allrecipes.com', 'allrecipes_1'],
11
+ [kraft_recipes_uri, 'kraftrecipes.com', 'kraft_recipes_1']
14
12
  ]}
15
13
 
16
- it "able to retrieve it" do
14
+ it "retrieves the host" do
17
15
  examples.each do |html|
18
16
  page = html[0]
19
17
  host = Regexp.new(html[1])
20
- expect(Tychus::URIResolver.new(page).resolve_uri).to match_regex(host)
18
+
19
+ VCR.use_cassette(html[2]) do
20
+ expect(Tychus::URIResolver.new(page).resolve_uri.host).to match_regex(host)
21
+ end
22
+
21
23
  end
22
24
  end
25
+
23
26
  end
27
+
28
+ context "when only opengraph canonical uri properties exist" do
29
+ let(:campbells_kitchen_uri) { "http://www.campbellskitchen.com/recipes/squash-casserole-24122" }
30
+ let(:examples) {
31
+ [
32
+ [campbells_kitchen_uri, 'campbellskitchen.com', 'campbells_kitchen_1']
33
+ ]
34
+ }
35
+
36
+ it "is missing a schema org microformat" do
37
+ examples.each do |html|
38
+ page = html[0]
39
+ cassette = html[2]
40
+
41
+ VCR.use_cassette(cassette) do
42
+ node = Nokogiri::HTML(open(page))
43
+ expect(node.css('[itemprop="http://schema.org/Recipe"]')).to be_empty
44
+ end
45
+ end
46
+ end
47
+
48
+ it "retrieves the host" do
49
+ examples.each do |html|
50
+ page = html[0]
51
+ host = Regexp.new(html[1])
52
+ cassette = html[2]
53
+
54
+ VCR.use_cassette(cassette) do
55
+ expect(Tychus::URIResolver.new(page).resolve_uri.host).to match_regex(host)
56
+ end
57
+ end
58
+
59
+ end
60
+ end
61
+
24
62
  end
25
63
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tychus
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wayne Yang
@@ -164,24 +164,30 @@ files:
164
164
  - README.md
165
165
  - Rakefile
166
166
  - lib/tychus.rb
167
+ - lib/tychus/meta_parser.rb
167
168
  - lib/tychus/parser_selector.rb
168
169
  - lib/tychus/parsers.rb
169
170
  - lib/tychus/parsers/allrecipes_parser.rb
170
171
  - lib/tychus/parsers/base.rb
172
+ - lib/tychus/parsers/campbells_kitchen_parser.rb
171
173
  - lib/tychus/parsers/food_network_parser.rb
172
174
  - lib/tychus/parsers/kraft_recipes_parser.rb
175
+ - lib/tychus/parsers/open_graph_protocol_parser.rb
173
176
  - lib/tychus/parsers/schema_org_parser.rb
174
177
  - lib/tychus/uri_resolver.rb
175
178
  - lib/tychus/version.rb
176
179
  - questions.md
177
- - spec/fixtures/allrecipes.html
178
- - spec/fixtures/campbellskitchen.html
179
- - spec/fixtures/food_network_double_ingredients_group.html
180
- - spec/fixtures/food_network_single_ingredients_group.html
181
- - spec/fixtures/kraftrecipes.html
180
+ - spec/fixtures/cassettes/allrecipes_1.yml
181
+ - spec/fixtures/cassettes/campbells_kitchen_1.yml
182
+ - spec/fixtures/cassettes/food_network_single_ingredients_group_1.yml
183
+ - spec/fixtures/cassettes/kraft_recipes_1.yml
184
+ - spec/fixtures/cassettes/meta_parser_og_protocol_uri.yml
185
+ - spec/fixtures/cassettes/meta_parser_schema_org_microformat_uri.yml
186
+ - spec/meta_parser.rb
182
187
  - spec/parser_selector.rb
183
188
  - spec/parsers/allrecipes_parser_spec.rb
184
189
  - spec/parsers/base_spec.rb
190
+ - spec/parsers/campbells_kitchen_parser_spec.rb
185
191
  - spec/parsers/food_network_parser_spec.rb
186
192
  - spec/parsers/kraft_recipes_parser_spec.rb
187
193
  - spec/parsers/schema_org_parser_spec.rb
@@ -214,14 +220,17 @@ signing_key:
214
220
  specification_version: 4
215
221
  summary: Web recipe parser
216
222
  test_files:
217
- - spec/fixtures/allrecipes.html
218
- - spec/fixtures/campbellskitchen.html
219
- - spec/fixtures/food_network_double_ingredients_group.html
220
- - spec/fixtures/food_network_single_ingredients_group.html
221
- - spec/fixtures/kraftrecipes.html
223
+ - spec/fixtures/cassettes/allrecipes_1.yml
224
+ - spec/fixtures/cassettes/campbells_kitchen_1.yml
225
+ - spec/fixtures/cassettes/food_network_single_ingredients_group_1.yml
226
+ - spec/fixtures/cassettes/kraft_recipes_1.yml
227
+ - spec/fixtures/cassettes/meta_parser_og_protocol_uri.yml
228
+ - spec/fixtures/cassettes/meta_parser_schema_org_microformat_uri.yml
229
+ - spec/meta_parser.rb
222
230
  - spec/parser_selector.rb
223
231
  - spec/parsers/allrecipes_parser_spec.rb
224
232
  - spec/parsers/base_spec.rb
233
+ - spec/parsers/campbells_kitchen_parser_spec.rb
225
234
  - spec/parsers/food_network_parser_spec.rb
226
235
  - spec/parsers/kraft_recipes_parser_spec.rb
227
236
  - spec/parsers/schema_org_parser_spec.rb