tychus 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tychus/meta_parser.rb +55 -0
- data/lib/tychus/parser_selector.rb +4 -3
- data/lib/tychus/parsers/campbells_kitchen_parser.rb +10 -0
- data/lib/tychus/parsers/open_graph_protocol_parser.rb +35 -0
- data/lib/tychus/parsers/schema_org_parser.rb +19 -19
- data/lib/tychus/parsers.rb +2 -0
- data/lib/tychus/uri_resolver.rb +3 -3
- data/lib/tychus/version.rb +1 -1
- data/lib/tychus.rb +3 -2
- data/spec/fixtures/cassettes/allrecipes_1.yml +14403 -0
- data/spec/fixtures/cassettes/campbells_kitchen_1.yml +3364 -0
- data/spec/fixtures/cassettes/food_network_single_ingredients_group_1.yml +4884 -0
- data/spec/fixtures/cassettes/kraft_recipes_1.yml +3638 -0
- data/spec/fixtures/cassettes/meta_parser_og_protocol_uri.yml +6733 -0
- data/spec/fixtures/cassettes/meta_parser_schema_org_microformat_uri.yml +9605 -0
- data/spec/meta_parser.rb +36 -0
- data/spec/parsers/allrecipes_parser_spec.rb +7 -6
- data/spec/parsers/campbells_kitchen_parser_spec.rb +71 -0
- data/spec/parsers/food_network_parser_spec.rb +7 -4
- data/spec/parsers/kraft_recipes_parser_spec.rb +6 -2
- data/spec/parsers/schema_org_parser_spec.rb +6 -2
- data/spec/spec_helper.rb +1 -1
- data/spec/uri_resolver_spec.rb +48 -10
- metadata +20 -11
- data/spec/fixtures/allrecipes.html +0 -3003
- data/spec/fixtures/campbellskitchen.html +0 -2190
- data/spec/fixtures/food_network_double_ingredients_group.html +0 -3725
- data/spec/fixtures/food_network_single_ingredients_group.html +0 -4930
- data/spec/fixtures/kraftrecipes.html +0 -2722
data/spec/meta_parser.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
describe Tychus::MetaParser do
|
2
|
+
subject do
|
3
|
+
VCR.use_cassette("meta_parser_#{uri}") do
|
4
|
+
Tychus::MetaParser.new(__send__(uri)).parse
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
shared_examples_for "a_parsable_uri" do
|
9
|
+
its(:uri_object) { is_expected.to be_present }
|
10
|
+
its(:uri) { is_expected.to be_present }
|
11
|
+
its(:host) { is_expected.to be_present }
|
12
|
+
end
|
13
|
+
|
14
|
+
context "when uri uses fb:og protocol" do
|
15
|
+
let(:uri) { "og_protocol_uri" }
|
16
|
+
let(:og_protocol_uri) { "http://www.campbellskitchen.com/recipes/squash-casserole-24122" }
|
17
|
+
|
18
|
+
it_behaves_like "a_parsable_uri"
|
19
|
+
|
20
|
+
its(:schema_org_microformat?) { is_expected.to be_falsy }
|
21
|
+
its(:open_graph_protocol) { is_expected.to be_present }
|
22
|
+
its(:open_graph_protocol?) { is_expected.to be_truthy }
|
23
|
+
end
|
24
|
+
|
25
|
+
context "when uri uses schema.org microformat" do
|
26
|
+
let(:uri) { "schema_org_microformat_uri" }
|
27
|
+
let(:schema_org_microformat_uri) { "http://allrecipes.com/Recipe/Chicken-Pot-Pie-IX/Detail.aspx?soid=recs_recipe_2" }
|
28
|
+
|
29
|
+
it_behaves_like "a_parsable_uri"
|
30
|
+
|
31
|
+
its(:schema_org_microformat?) { is_expected.to be_truthy }
|
32
|
+
its(:open_graph_protocol) { is_expected.to be_nil }
|
33
|
+
its(:open_graph_protocol?) { is_expected.to be_falsy }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
@@ -1,8 +1,10 @@
|
|
1
|
-
describe Tychus::Parsers::AllrecipesParser do
|
2
|
-
subject
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
describe Tychus::Parsers::AllrecipesParser, :vcr do
|
2
|
+
subject do
|
3
|
+
VCR.use_cassette("allrecipes_1") do
|
4
|
+
Tychus.parse(allrecipes_uri)
|
5
|
+
end
|
6
|
+
end
|
7
|
+
let(:allrecipes_uri) { 'http://allrecipes.com/Recipe/Chicken-Pot-Pie-IX/Detail.aspx' }
|
6
8
|
let(:ingredients) {
|
7
9
|
[
|
8
10
|
"1 pound skinless, boneless chicken breast halves - cubed",
|
@@ -41,4 +43,3 @@ describe Tychus::Parsers::AllrecipesParser do
|
|
41
43
|
its(:image) { is_expected.to eq("http://images.media-allrecipes.com/userphotos/250x250/00/14/23/142350.jpg") }
|
42
44
|
its(:description) { is_expected.to eq("\"A delicious chicken pie made from scratch with carrots, peas and celery.\"") }
|
43
45
|
end
|
44
|
-
|
@@ -0,0 +1,71 @@
|
|
1
|
+
describe Tychus::Parsers::CampbellsKitchenParser do
|
2
|
+
before { pending "Does NOT use schema.org microformat" }
|
3
|
+
subject do
|
4
|
+
VCR.use_cassette("campbells_kitchen_1") do
|
5
|
+
Tychus::Parsers::CampbellsKitchenParser.new(campbells_kitchen_uri)
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
let(:campbells_kitchen_uri) { "http://www.campbellskitchen.com/recipes/squash-casserole-24122" }
|
10
|
+
let(:ingredients) {
|
11
|
+
[
|
12
|
+
"3 cups Pepperidge Farm® Cornbread Stuffing",
|
13
|
+
"1/4 cup butter, melted (1/2 stick)",
|
14
|
+
"1 can (10 3/4 ounces) Campbell's® Condensed Cream of Chicken Soup (Regular or 98% Fat Free)",
|
15
|
+
"1/2 cup sour cream",
|
16
|
+
"2 small yellow squash, shredded (about 2 cups)",
|
17
|
+
"2 small zucchini, shredded (about 2 cups)",
|
18
|
+
"1 small carrot, shredded (about 1/3 cup)",
|
19
|
+
"1/2 cup shredded Cheddar cheese (about 2 ounces)"
|
20
|
+
]
|
21
|
+
}
|
22
|
+
|
23
|
+
let(:instructions) {
|
24
|
+
[
|
25
|
+
"Stir the stuffing and butter in a large bowl. Reserve 1/2 cup of the stuffing mixture and spoon the remaining stuffing mixture into a 2-quart shallow baking dish.",
|
26
|
+
"Stir the soup, sour cream, yellow squash, zucchini, carrot and cheese in a medium bowl. Spread the mixture over the stuffing mixture and sprinkle with the reserved stuffing mixture.",
|
27
|
+
"Bake at 350°F. for 40 minutes or until the mixture is hot and bubbling."
|
28
|
+
]
|
29
|
+
}
|
30
|
+
|
31
|
+
it "parses the name of the recipe" do
|
32
|
+
expect(subject.parse_name).to eq("Squash Casserole")
|
33
|
+
end
|
34
|
+
|
35
|
+
it "parses the author" do
|
36
|
+
expect(subject.parse_author).to eq(nil)
|
37
|
+
end
|
38
|
+
|
39
|
+
it "parses the prep time" do
|
40
|
+
expect(subject.parse_prep_time).to eq("PT15M")
|
41
|
+
end
|
42
|
+
|
43
|
+
it "parses the cook time" do
|
44
|
+
expect(subject.parse_cook_time).to eq("PT40M")
|
45
|
+
end
|
46
|
+
|
47
|
+
it "parses the total time" do
|
48
|
+
expect(subject.parse_total_time).to eq("PT55M")
|
49
|
+
end
|
50
|
+
|
51
|
+
it "parses the yield" do
|
52
|
+
expect(subject.parse_recipe_yield).to eq("8")
|
53
|
+
end
|
54
|
+
|
55
|
+
it "parses the image" do
|
56
|
+
expect(subject.parse_image).to eq("http://www.campbellskitchen.com/recipeimages/squash-casserole-large-24122.jpg")
|
57
|
+
end
|
58
|
+
|
59
|
+
it "parses the ingredients" do
|
60
|
+
expect(subject.parse_ingredients).to eq(ingredients)
|
61
|
+
end
|
62
|
+
|
63
|
+
it "parses the instructions" do
|
64
|
+
expect(subject.parse_recipe_instructions).to eq(instructions)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "parses the description" do
|
68
|
+
expect(subject.parse_description).to eq("This creamy, crowd-pleasing side dish features summer squash, carrots, stuffing mix and cheese baked in a creamy sauce. Crispy on top and creamy in the center, it's a winner on any menu.")
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
@@ -1,14 +1,17 @@
|
|
1
1
|
describe Tychus::Parsers::FoodNetworkParser do
|
2
|
-
subject { Tychus::Parsers::FoodNetworkParser.new(food_network_uri) }
|
3
|
-
|
4
2
|
context "When the page has a single ingredients group" do
|
5
3
|
# NOTE: this specific uri has an author who's hidden in an anchor
|
6
4
|
# tag that also references her TV show and episode the recipe
|
7
5
|
# appeared. #parse_author may be complex for this edge case
|
8
6
|
# NOTE: author is formatted using schema.org/Person
|
9
7
|
|
10
|
-
|
11
|
-
|
8
|
+
subject do
|
9
|
+
VCR.use_cassette("food_network_single_ingredients_group_1") do
|
10
|
+
Tychus::Parsers::FoodNetworkParser.new(food_network_uri)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
let(:food_network_uri) { "http://www.foodnetwork.com/recipes/ina-garten/grilled-panzanella-recipe.html?ic1=obinsite" }
|
12
15
|
let(:ingredients) {
|
13
16
|
[
|
14
17
|
"Good olive oil",
|
@@ -3,9 +3,13 @@ describe Tychus::Parsers::KraftRecipesParser do
|
|
3
3
|
Tychus::Parsers::NullObject
|
4
4
|
end
|
5
5
|
|
6
|
-
subject
|
6
|
+
subject do
|
7
|
+
VCR.use_cassette("kraft_recipes_1") do
|
8
|
+
Tychus::Parsers::KraftRecipesParser.new(kraft_recipes_uri)
|
9
|
+
end
|
10
|
+
end
|
7
11
|
|
8
|
-
let(:kraft_recipes_uri) {
|
12
|
+
let(:kraft_recipes_uri) { "http://www.kraftrecipes.com/recipes/sweet-bbq-chicken-kabobs-92092.aspx" }
|
9
13
|
|
10
14
|
let(:ingredients) {
|
11
15
|
[
|
@@ -1,7 +1,11 @@
|
|
1
1
|
describe Tychus::Parsers::SchemaOrgParser do
|
2
2
|
context "on creation" do
|
3
|
-
let(:allrecipes_uri) {
|
4
|
-
let(:parser)
|
3
|
+
let(:allrecipes_uri) { 'http://allrecipes.com/Recipe/Chicken-Pot-Pie-IX/Detail.aspx' }
|
4
|
+
let(:parser) do
|
5
|
+
VCR.use_cassette("allrecipes_1") do
|
6
|
+
Tychus::Parsers::AllrecipesParser.new(allrecipes_uri)
|
7
|
+
end
|
8
|
+
end
|
5
9
|
|
6
10
|
it "strips the Review microformat from node to prevent name collisions with item properties of different microformats" do
|
7
11
|
expect(parser.recipe_doc.css(parser.review_doc)).to be_empty
|
data/spec/spec_helper.rb
CHANGED
data/spec/uri_resolver_spec.rb
CHANGED
@@ -1,25 +1,63 @@
|
|
1
|
-
# at this point intime, allrecipes/campbellskitchen/kraftrecipes
|
2
|
-
# fixtures all contain both schema.org/recipe canonical uri elements as
|
1
|
+
# at this point intime, allrecipes/campbellskitchen/kraftrecipes all contain both schema.org/recipe canonical uri elements as
|
3
2
|
# well as open graph canonical uri elements
|
4
3
|
#
|
5
4
|
describe Tychus::URIResolver do
|
5
|
+
|
6
6
|
context "when schema.org and opengraph canonical uri properties exist" do
|
7
|
-
let(:allrecipes_uri) {
|
8
|
-
let(:
|
9
|
-
let(:kraft_recipes_uri) { File.expand_path("../fixtures/kraftrecipes.html", __FILE__) }
|
7
|
+
let(:allrecipes_uri) { 'http://allrecipes.com/Recipe/Chicken-Pot-Pie-IX/Detail.aspx' }
|
8
|
+
let(:kraft_recipes_uri) { "http://www.kraftrecipes.com/recipes/sweet-bbq-chicken-kabobs-92092.aspx" }
|
10
9
|
let(:examples) { [
|
11
|
-
[allrecipes_uri, 'allrecipes.com'],
|
12
|
-
[
|
13
|
-
[kraft_recipes_uri, 'kraftrecipes.com']
|
10
|
+
[allrecipes_uri, 'allrecipes.com', 'allrecipes_1'],
|
11
|
+
[kraft_recipes_uri, 'kraftrecipes.com', 'kraft_recipes_1']
|
14
12
|
]}
|
15
13
|
|
16
|
-
it "
|
14
|
+
it "retrieves the host" do
|
17
15
|
examples.each do |html|
|
18
16
|
page = html[0]
|
19
17
|
host = Regexp.new(html[1])
|
20
|
-
|
18
|
+
|
19
|
+
VCR.use_cassette(html[2]) do
|
20
|
+
expect(Tychus::URIResolver.new(page).resolve_uri.host).to match_regex(host)
|
21
|
+
end
|
22
|
+
|
21
23
|
end
|
22
24
|
end
|
25
|
+
|
23
26
|
end
|
27
|
+
|
28
|
+
context "when only opengraph canonical uri properties exist" do
|
29
|
+
let(:campbells_kitchen_uri) { "http://www.campbellskitchen.com/recipes/squash-casserole-24122" }
|
30
|
+
let(:examples) {
|
31
|
+
[
|
32
|
+
[campbells_kitchen_uri, 'campbellskitchen.com', 'campbells_kitchen_1']
|
33
|
+
]
|
34
|
+
}
|
35
|
+
|
36
|
+
it "is missing a schema org microformat" do
|
37
|
+
examples.each do |html|
|
38
|
+
page = html[0]
|
39
|
+
cassette = html[2]
|
40
|
+
|
41
|
+
VCR.use_cassette(cassette) do
|
42
|
+
node = Nokogiri::HTML(open(page))
|
43
|
+
expect(node.css('[itemprop="http://schema.org/Recipe"]')).to be_empty
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
it "retrieves the host" do
|
49
|
+
examples.each do |html|
|
50
|
+
page = html[0]
|
51
|
+
host = Regexp.new(html[1])
|
52
|
+
cassette = html[2]
|
53
|
+
|
54
|
+
VCR.use_cassette(cassette) do
|
55
|
+
expect(Tychus::URIResolver.new(page).resolve_uri.host).to match_regex(host)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
24
62
|
end
|
25
63
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tychus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wayne Yang
|
@@ -164,24 +164,30 @@ files:
|
|
164
164
|
- README.md
|
165
165
|
- Rakefile
|
166
166
|
- lib/tychus.rb
|
167
|
+
- lib/tychus/meta_parser.rb
|
167
168
|
- lib/tychus/parser_selector.rb
|
168
169
|
- lib/tychus/parsers.rb
|
169
170
|
- lib/tychus/parsers/allrecipes_parser.rb
|
170
171
|
- lib/tychus/parsers/base.rb
|
172
|
+
- lib/tychus/parsers/campbells_kitchen_parser.rb
|
171
173
|
- lib/tychus/parsers/food_network_parser.rb
|
172
174
|
- lib/tychus/parsers/kraft_recipes_parser.rb
|
175
|
+
- lib/tychus/parsers/open_graph_protocol_parser.rb
|
173
176
|
- lib/tychus/parsers/schema_org_parser.rb
|
174
177
|
- lib/tychus/uri_resolver.rb
|
175
178
|
- lib/tychus/version.rb
|
176
179
|
- questions.md
|
177
|
-
- spec/fixtures/
|
178
|
-
- spec/fixtures/
|
179
|
-
- spec/fixtures/
|
180
|
-
- spec/fixtures/
|
181
|
-
- spec/fixtures/
|
180
|
+
- spec/fixtures/cassettes/allrecipes_1.yml
|
181
|
+
- spec/fixtures/cassettes/campbells_kitchen_1.yml
|
182
|
+
- spec/fixtures/cassettes/food_network_single_ingredients_group_1.yml
|
183
|
+
- spec/fixtures/cassettes/kraft_recipes_1.yml
|
184
|
+
- spec/fixtures/cassettes/meta_parser_og_protocol_uri.yml
|
185
|
+
- spec/fixtures/cassettes/meta_parser_schema_org_microformat_uri.yml
|
186
|
+
- spec/meta_parser.rb
|
182
187
|
- spec/parser_selector.rb
|
183
188
|
- spec/parsers/allrecipes_parser_spec.rb
|
184
189
|
- spec/parsers/base_spec.rb
|
190
|
+
- spec/parsers/campbells_kitchen_parser_spec.rb
|
185
191
|
- spec/parsers/food_network_parser_spec.rb
|
186
192
|
- spec/parsers/kraft_recipes_parser_spec.rb
|
187
193
|
- spec/parsers/schema_org_parser_spec.rb
|
@@ -214,14 +220,17 @@ signing_key:
|
|
214
220
|
specification_version: 4
|
215
221
|
summary: Web recipe parser
|
216
222
|
test_files:
|
217
|
-
- spec/fixtures/
|
218
|
-
- spec/fixtures/
|
219
|
-
- spec/fixtures/
|
220
|
-
- spec/fixtures/
|
221
|
-
- spec/fixtures/
|
223
|
+
- spec/fixtures/cassettes/allrecipes_1.yml
|
224
|
+
- spec/fixtures/cassettes/campbells_kitchen_1.yml
|
225
|
+
- spec/fixtures/cassettes/food_network_single_ingredients_group_1.yml
|
226
|
+
- spec/fixtures/cassettes/kraft_recipes_1.yml
|
227
|
+
- spec/fixtures/cassettes/meta_parser_og_protocol_uri.yml
|
228
|
+
- spec/fixtures/cassettes/meta_parser_schema_org_microformat_uri.yml
|
229
|
+
- spec/meta_parser.rb
|
222
230
|
- spec/parser_selector.rb
|
223
231
|
- spec/parsers/allrecipes_parser_spec.rb
|
224
232
|
- spec/parsers/base_spec.rb
|
233
|
+
- spec/parsers/campbells_kitchen_parser_spec.rb
|
225
234
|
- spec/parsers/food_network_parser_spec.rb
|
226
235
|
- spec/parsers/kraft_recipes_parser_spec.rb
|
227
236
|
- spec/parsers/schema_org_parser_spec.rb
|