diffbot 0.1.15 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/diffbot.gemspec +1 -1
- data/lib/diffbot/frontpage.rb +7 -3
- data/test/fixtures/frontpage.json +1 -0
- data/test/frontpage_test.rb +23 -0
- metadata +3 -2
- data/lib/diffbot/frontpage/dml_parser.rb +0 -92
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 62bd8b70e92af249cc9d7eed3fdd2273346b8957
|
|
4
|
+
data.tar.gz: 01b3e6ceae1c9c19910fa4509d14a193e5c68d29
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6466e8f042508d444b32822e446a084264ec55964d0368ebd442a623fdd4052e6f436fa817e4e57fd33e3ec27403bcf19a0b7c5742284431c7fb0a3a893f0c15
|
|
7
|
+
data.tar.gz: 1d6f64a55b526964130f0ea9e81a30a993f551504d3c56d03bed6f78f077302e8906393ce7de130b7abf224318547fd43445dfb0e23c184a8b63670209e14529
|
data/Gemfile.lock
CHANGED
data/diffbot.gemspec
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
s.name = "diffbot"
|
|
3
|
-
s.version = "0.1.
|
|
3
|
+
s.version = "0.1.16"
|
|
4
4
|
s.description = "Diffbot provides a concise API for analyzing and extracting semantic information from web pages using Diffbot (http://www.diffbot.com)."
|
|
5
5
|
s.summary = "Ruby interface to the Diffbot API "
|
|
6
6
|
s.authors = ["Nicolas Sanguinetti", "Roman Greshny"]
|
data/lib/diffbot/frontpage.rb
CHANGED
|
@@ -23,7 +23,7 @@ module Diffbot
|
|
|
23
23
|
# frontpage = Diffbot::Frontpage.fetch(url, api_token)
|
|
24
24
|
#
|
|
25
25
|
# Returns a Diffbot::Frontpage.
|
|
26
|
-
def self.fetch(url, token=Diffbot.token, parser=
|
|
26
|
+
def self.fetch(url, token=Diffbot.token, parser=Yajl::Parser.method(:parse))
|
|
27
27
|
request = Diffbot::Request.new(token)
|
|
28
28
|
response = request.perform(:get, endpoint) do |req|
|
|
29
29
|
req[:query][:url] = url
|
|
@@ -39,6 +39,12 @@ module Diffbot
|
|
|
39
39
|
"http://www.diffbot.com/api/frontpage"
|
|
40
40
|
end
|
|
41
41
|
|
|
42
|
+
property :type
|
|
43
|
+
|
|
44
|
+
property :url
|
|
45
|
+
|
|
46
|
+
property :sections
|
|
47
|
+
|
|
42
48
|
# Public: The title of the page.
|
|
43
49
|
property :title
|
|
44
50
|
|
|
@@ -57,5 +63,3 @@ module Diffbot
|
|
|
57
63
|
coerce_property :items, collection: Item
|
|
58
64
|
end
|
|
59
65
|
end
|
|
60
|
-
|
|
61
|
-
require "diffbot/frontpage/dml_parser"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"icon":"http://s.m.huffpost.com/assets/favicon-4e39cb9f22f072465b467122478c9cb0db0ba45ba7f9ae415ac7f55194515fc6.ico","type":"frontpage","title":"Breaking News and Opinion on The Huffington Post","url":"http://www.huffingtonpost.com/","sections":[{"type":"nav","items":[{"title":"Advertise","url":"http://advertising.aol.com/properties"},{"title":"Careers","url":"https://careers.smartrecruiters.com/Aol/huffington-post/"},{"title":"User Agreement","url":"http://www.huffingtonpost.com/terms.html"},{"title":"Privacy","url":"http://privacy.aol.com"},{"title":"Comment Policy","url":"http://www.huffingtonpost.com/comment/policy/"},{"title":"About Us","url":"http://www.huffingtonpost.com/p/huffington-post.html"},{"title":"About Our Ads","url":"http://adinfo.aol.com/about-our-ads/"},{"title":"Contact Us","url":"http://www.huffingtonpost.com/contact/"},{"title":"Archive","url":"http://www.huffingtonpost.com/archive/"}]},{"type":"profile","items":[{"title":"9.05 M","url":"http://facebook.com/HuffingtonPost"},{"title":"9.15 M","url":"https://twitter.com/HuffingtonPost"},{"title":"470 K","url":"http://pinterest.com/huffingtonpost"},{"title":"Podcast","url":"https://itunes.apple.com/us/artist/the-huffington-post/id908797543?mt=2"}]},{"type":"location","items":[{"title":"\u2018Jeopardy!\u2019 Contestant Gets Away With Flipping Off America","url":"http://www.huffingtonpost.com/entry/jeopardy-contestant-gets-away-with-flipping-off-america_us_58ada4ede4b03d80af7101f4?a8vewl1bj2kpzj8aor&ncid=inblnkushpmg00000009"},{"title":"Jamie Foxx Target Of Racial Slur While Out Dining","url":"http://www.huffingtonpost.com/entry/jamie-foxx-racist-slur-croatia_us_58ad936ae4b04a0b274e6570?76j63hroe191qncdi&ncid=inblnkushpmg00000009"},{"title":"Horror Film About Racism Earns Coveted 100 Percent On Rotten Tomatoes","url":"http://www.huffingtonpost.com/entry/jordan-peele-get-out-rotten-tomatoes_us_58ae08dbe4b01406012f7905?37mtxl0nvaafxhia4i&ncid=inblnkushpmg00000009"},{"title":"The List Of Cities That Want The 2024 Olympics Is Down To Two","url":"http://www.huffingtonpost.com/entry/2024-olympics-two-cities_us_58adee80e4b05ca474a03bc0?qlv04nvoq5j5v4e7b9&ncid=inblnkushpmg00000009"},{"title":"An Up-Close Look At A Delaware-Sized Iceberg About To Break From Antarctica","url":"http://www.huffingtonpost.com/entry/larsen-c-video-iceberg-antartica_us_58ae03f4e4b057efdce8b3de?kbj2lib9c2384cxr&ncid=inblnkushpmg00000009"},{"title":"Andy Warhol\u2019s Death Might Not Have Been As \u2018Routine\u2019 As First Thought","url":"http://www.huffingtonpost.com/entry/andy-warhol-death_us_58adbb38e4b04a0b274eb96d?h6xp2k1zrcw61or§ion=us_arts&ncid=inblnkushpmg00000009"},{"title":"Met Director Fears Elimination Of NEA Marks \u2018New Assault\u2019 On Art","url":"http://www.huffingtonpost.com/entry/met-director-fears-elimination-of-nea-marks-new-assault-on-art_us_58ab6476e4b0f077b3ed1677?qbyf4qnc11akwvzpvi?clear&ncid=inblnkushpmg00000009"},{"title":"This Is What The Oscars Looked Like 30 Years Ago","url":"http://www.huffingtonpost.com/entry/the-oscars-1987_us_58ab3ad1e4b07028b702d678?ncid=inblnkushpmg00000009"},{"title":"Baseball Is Making One Of Its Biggest Rule Changes Ever","url":"http://www.huffingtonpost.com/entry/baseball-intentional-walk-change_us_58ad128ee4b04a0b274e172e?ncid=inblnkushpmg00000009"},{"title":"Accused Drunk Driver Does Cartwheels During Sobriety Test","url":"http://www.huffingtonpost.com/entry/cartwheels-sobriety-test_us_58ae0bc1e4b05ca474a06dc0?ropw4s4i&ncid=inblnkushpmg00000009"},{"title":"It\u2019s Getting Better, But Let\u2019s Not Forget Hollywood Still Has A Diversity Issue","url":"http://www.huffingtonpost.com/entry/2017-hollywood-diversity-report_us_58ac6c0ce4b06b61e61e2db5?ncid=inblnkushpmg00000009"},{"title":"Viral Photos Show Black Women Simply Unbothered By Police Intimidation","url":"http://www.huffingtonpost.com/entry/photos-shows-black-women-simply-unbothered-by-police-intimidation_us_58adbfe4e4b03d80af713f1d?ncid=inblnkushpmg00000009"},{"title":"Muslim Activists\u2019 Fundraiser For Vandalized Jewish Cemetery Soars Past Initial Goal","url":"http://www.huffingtonpost.com/entry/muslim-activists-fundraiser-for-desecrated-jewish-cemetery-soars-past-initial-goal_us_58adbb6ce4b04a0b274eb9c9?y29uugggmf1xajor&ncid=inblnkushpmg00000009"},{"title":"This Feminist Film Got Snubbed By The Oscars, But You Should Watch It Anyway","url":"http://www.huffingtonpost.com/entry/this-feminist-film-got-snubbed-by-the-oscars-you-should-watch-it-anyway_us_58ab6b2ce4b0f077b3ed2022?md72s5iwqy9pgy14i§ion=us_arts&ncid=inblnkushpmg00000009"},{"title":"An Open Letter To Attorney General Jeff Sessions, From A Fellow Alabamian","url":"http://www.huffingtonpost.com/entry/open-letter-to-jeff-sessions_us_58ae1920e4b01406012f93e6?ncid=inblnkushpmg00000009"},{"title":"Liberals Are Not \u2018Helping\u2019 Trump, And Supporting Him Isn\u2019t Like Being Gay","url":"http://www.huffingtonpost.com/entry/no-liberals-are-not-helping-trump-and-supporting_us_58acc68be4b0d818c4f0a340?ncid=inblnkushpmg00000009"},{"title":"Michigan Teen Believes Shirt Was Left On Her Car Windshield As A Trap","url":"http://www.huffingtonpost.com/entry/michigan-teen-shirt-windshiel_us_58adb942e4b0d0a6ef46d75a?aprwfuw6w4dvx6r&ncid=inblnkushpmg00000009"},{"title":"With Growth Of The Gig Economy, States Rethink How Workers Get Benefits","url":"http://www.huffingtonpost.com/entry/with-growth-of-the-gig-economy-states-rethink-how_us_58ada76be4b0d818c4f0a422?ncid=inblnkushpmg00000009"},{"title":"Whatever You Do, Don\u2019t Ask Anna Kendrick To Be A Bridesmaid","url":"http://www.huffingtonpost.com/entry/anna-kendrick-will-not-be-a-bridesmaid_us_58ade350e4b03d80af71bd99?76fl4eci333plow29&ncid=inblnkushpmg00000009"},{"title":"Having A Cat Does Not Cause Mental Illness, New Study Suggests","url":"http://www.huffingtonpost.com/entry/cats-mental-illness-toxoplasma_us_58adc06ae4b03d80af714072?ncid=inblnkushpmg00000009"},{"title":"James Corden Delights In Tearing Lifetime\u2019s Britney Biopic To Shreds","url":"http://www.huffingtonpost.com/entry/james-corden-delights-in-tearing-lifetimes-britney-spears-biopic-to-shreds_us_58add5c2e4b04a0b274f195b?v6j2nf2cmrpjsjor&ncid=inblnkushpmg00000009"},{"title":"Mariah Carey Gives Epic Interview About That NYE Debacle","url":"http://www.huffingtonpost.com/entry/mariah-carey-rolling-stone-interview_us_58ade9cbe4b01406012f3e29?54mzaw852t77xecdi&ncid=inblnkushpmg00000009"},{"title":"Chris Brown\u2019s Ex Claims He Threatened To Kill Her","url":"http://www.huffingtonpost.com/entry/chris-browns-ex-claims-he-threatened-to-kill-her_us_58adbafde4b0d0a6ef46d9ef?fc93pjd7om0qbk2o6r&ncid=inblnkushpmg00000009"},{"title":"Oh Hell Yeah, Louis C.K. Is Putting Out A Netflix Special In April","url":"http://www.huffingtonpost.com/entry/louis-ck-netflix_us_58ade81ce4b05ca474a02dad?wxvaundnwj89mb1emi&ncid=inblnkushpmg00000009"},{"title":"The Most Politically Influential Film Of The Oscars Will End Up Being \u2018Zootopia\u2019","url":"http://www.huffingtonpost.com/entry/zootopia-oscars_us_58ac6503e4b02a1e7dac1255?5vrkutst926hl4n29&ncid=inblnkushpmg00000009"},{"title":"Pay Young Black Folks To Do The Work They Want To Do","url":"http://www.huffingtonpost.com/entry/pay-young-black-folks-do-work_us_58adad31e4b04a0b274e97c1?mepo0ba7qjtyrmgqfr&ncid=inblnkushpmg00000009"},{"title":"7 Standout Black Figures In Sports And Entertainment Who Empower Us","url":"http://www.huffingtonpost.com/entry/7-black-figures-incredible-advancements-sports-and-entertainment_us_58ab44e9e4b0a855d1d8b845?ncid=inblnkushpmg00000009"},{"title":"Seth Meyers Roasts Trump In Recut Press Conference","url":"http://www.huffingtonpost.com/entry/seth-meyers-roasts-donald-trump-in-recut-press-conference_us_58adb78de4b04a0b274eb3a3?jpnhottiy3qnf80k9&ncid=inblnkushpmg00000009"},{"title":"Musician Accused Of Getting Onstage Enema During Cancer Benefit","url":"http://www.huffingtonpost.com/entry/michael-clemmons-onstage-enema_us_58adc353e4b03d80af7147a2?ncid=inblnkushpmg00000009"},{"title":"\u2018Walking Dead\u2019 T-Shirt Pulled For Being \u2018Racist\u2019 And \u2018Offensive\u2019","url":"http://www.huffingtonpost.com/entry/walking-dead-t-shirt-pulled-for-being-racist-and-offensive_us_58ad8e31e4b03d80af70dcfd?5zkiq509qwnbwewmi&ncid=inblnkushpmg00000009"},{"title":"Dakota Access Protesters Arrested As Deadline Passes To Depart Camp","url":"http://www.huffingtonpost.com/entry/time-runs-out-at-main-dakota-access-pipeline-protest-camp_us_58acc6a9e4b04a0b274df548?hn4p7o9bk4so979zfr&ncid=inblnkushpmg00000009"},{"title":"Trump Administration Rescinds Protections For Transgender Students","url":"http://www.huffingtonpost.com/entry/donald-trump-transgender_us_58ac4fe8e4b0a855d1d9d278?ncid=inblnkushpmg00000009"},{"title":"Scientists Discover \u2018Treasure Trove\u2019 Of Earth-Size Planets Orbiting Nearby Dwarf Star","url":"http://www.huffingtonpost.com/entry/earth-size-planets-discovery_us_58ad949fe4b03d80af70e309?ncid=inblnkushpmg00000009"},{"title":"GOP Congressman: Getting Rich Will Solve That Whole Environment Thing","url":"http://www.huffingtonpost.com/entry/dave-brat-town-hall_us_58adaf35e4b03d80af7118ea?v46ckay79wznf80k9&ncid=inblnkushpmg00000009"},{"title":"One Republican Would Subpoena Trump\u2019s Taxes If Russia Probe Goes There","url":"http://www.huffingtonpost.com/entry/susan-collins-republican-subpoena-trump-taxes-michael-flynn-probe_us_58addfb1e4b04a0b274f3d67?hajs0juj0g22o6r&ncid=inblnkushpmg00000009"},{"title":"Pennsylvania Governor: Dems Can Combat Trump By Getting Things Done","url":"http://www.huffingtonpost.com/entry/tom-wolf-democrats-donald-trump_us_58ae05c2e4b057efdce8b65e?h2jct8jsxxbt9&ncid=inblnkushpmg00000009"},{"title":"Facebook Donates Some $100,000 To CPAC, Reminding Users Again That It\u2019s Not Liberal","url":"http://www.huffingtonpost.com/entry/facebook-sponsor-cpac-2017_us_58adca13e4b0d0a6ef4705d9?eaqtic4u34v8zd7vi&ncid=inblnkushpmg00000009"},{"title":"Former Aides Explain How They Shielded Trump From Twitter Destruction","url":"http://www.huffingtonpost.com/entry/donald-trump-twitter_us_58ade36be4b03d80af71bdef?96lkhzc1h6fbyy14i&ncid=inblnkushpmg00000009"},{"title":"Check Out Mitch McConnell\u2019s Face As An Angry Voter Lets Him Have It","url":"http://www.huffingtonpost.com/entry/mitch-mcconnell-town-hall-obamacare_us_58acf3c9e4b0d0a6ef463755?47ny901zcspa7zaor&ncid=inblnkushpmg00000009"},{"title":"This GOP Congresswoman Sure Got An Easy First Question At Her Tele-Town Hall","url":"http://www.huffingtonpost.com/entry/barbara-comstock-townhall-question_us_58adec9ae4b057efdce88931?ncid=inblnkushpmg00000009"},{"title":"Trump Promised To Build Infrastructure But Delays Bay Area Railway Project","url":"http://www.huffingtonpost.com/entry/trump-infrastructure-delays-caltrain_us_58adc320e4b03d80af714733?ncid=engmodushpmg00000004&ncid=inblnkushpmg00000009"},{"title":"White House Chief Digital Officer Ousted After Failing FBI Background Check","url":"http://www.politico.com/blogs/donald-trump-administration/2017/02/gerrit-lansing-trump-digital-chief-ousted-235268"},{"title":"Joe Scarborough Tells Stephen Colbert Why His Show Won\u2019t Book Kellyanne Conway","url":"http://www.huffingtonpost.com/entry/joe-scarborough-wont-book-kellyanne-conway_us_58ad3af7e4b03d80af7097b5?ncid=inblnkushpmg00000009"},{"title":"\u2018Have You No Ethics?\u2019: Anne Frank Center Chief Slams Trump Surrogate","url":"http://www.huffingtonpost.com/entry/anne-frank-center-cnn-kayleigh-mcenany_us_58ad2004e4b04a0b274e181a?ncid=inblnkushpmg00000009"},{"title":"Keith Ellison Supporters Warn Of Fallout If He Loses DNC Chair Race","url":"http://www.huffingtonpost.com/entry/keith-ellison-dnc_us_58acc9c5e4b03d80af7075b3?c2him2tqgwqht1emi&ncid=inblnkushpmg00000009"},{"title":"Anti-Defamation League\u2019s New York Headquarters Receives Bomb Threat","url":"http://www.huffingtonpost.com/entry/anti-defamation-league-new-york-bomb-threat_us_58adc712e4b0d0a6ef46f848?ncid=inblnkushpmg00000009"},{"title":"You No Longer Need A License To Carry A Concealed Weapon In New Hampshire","url":"http://www.huffingtonpost.com/entry/new-hampshire-concealed-carry_us_58add620e4b03d80af7193e3?ncid=inblnkushpmg00000009"},{"title":"Miller Admits Trump\u2019s New Travel Ban Will Look A Lot Like The Old One","url":"http://www.huffingtonpost.com/entry/stephen-miller-new-immigration-ban_us_58ad281ee4b0d0a6ef463e24?ncid=inblnkushpmg00000009&ncid=inblnkushpmg00000009"},{"title":"The Jewish Community Center Bomb Threats Affect You, Too","url":"http://www.huffingtonpost.com/entry/jewish-community-center-bomb-threats_us_58ad9b29e4b04a0b274e7221?s30uxtjz0c72mlsor&ncid=inblnkushpmg00000009"},{"title":"GOP Millennial Task Force Chair Ignores Her Own Advice","url":"http://www.huffingtonpost.com/entry/elise-stefanik-millennial-outreach_us_58adb8dce4b0d0a6ef46d6c6?q19lhvm3911gpynwmi&ncid=inblnkushpmg00000009"},{"title":"Leader Of Azerbaijan Goes Full \u2018House Of Cards,\u2019 Appoints Wife As VP","url":"http://www.huffingtonpost.com/entry/azerbaijan-house-of-cards_us_58adaf2ce4b0d0a6ef46bdfe?ncid=inblnkushpmg00000009"},{"title":"North Korea Calls For Immediate Release Of Airport Assassination Suspects","url":"http://www.huffingtonpost.com/entry/north-korea-airport-assassination-suspects_us_58ad56cae4b0d0a6ef464ad2?ncid=inblnkushpmg00000009"},{"title":"Nursing Home Worker Accused Of Giving Lap Dance To 100-Year-Old Man","url":"http://www.huffingtonpost.com/entry/lap-dance-100-year-old-man_us_58adf84be4b05ca474a04977?v25r8uxr&ncid=inblnkushpmg00000009"},{"title":"Ivanka Trump And Daughter Pay The Supreme Court A Visit","url":"http://www.huffingtonpost.com/entry/ivanka-trump-supreme-court_us_58adb382e4b0d0a6ef46cc27?ncid=inblnkushpmg00000009"},{"title":"Howard Dean Endorses Pete Buttigieg For DNC Chairman","url":"http://www.huffingtonpost.com/entry/howard-dean-endorses-pete-buttigieg-for-dnc-chairman_us_58ad833fe4b03d80af70d8ce?0griqrzal8no4dkj4i&ncid=inblnkushpmg00000009"},{"title":"U.K. Oil Boss Torches Trump\u2019s Clean Energy Plan","url":"http://www.huffingtonpost.co.uk/entry/sinead-lynch-royal-dutch-shell-uk-donald-trump_uk_58ad971fe4b03d80af70e59f?ncid=inblnkushpmg00000009"}],"primary":true},{"type":"nav","items":[{"title":"POLITICS","url":"http://www.huffingtonpost.com/section/politics"},{"title":"ENTERTAINMENT","url":"http://www.huffingtonpost.com/dept/entertainment"},{"title":"IMPACT","url":"http://www.huffingtonpost.com/dept/impact"},{"title":"VIDEO","url":"http://www.huffingtonpost.com/section/video"}]}]}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
require 'uri'
|
|
3
|
+
|
|
4
|
+
describe Diffbot::Frontpage do
|
|
5
|
+
describe '.fetch' do
|
|
6
|
+
before do
|
|
7
|
+
path = URI.parse(Diffbot::Frontpage.endpoint).path
|
|
8
|
+
|
|
9
|
+
body = File.read('test/fixtures/frontpage.json')
|
|
10
|
+
|
|
11
|
+
Excon.stub(method: :get, path: path) do |params|
|
|
12
|
+
{ body: body, status: 200 }
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
it 'returns a new Frontpage' do
|
|
17
|
+
frontpage = Diffbot::Frontpage.fetch('http://www.huffingtonpost.com/')
|
|
18
|
+
|
|
19
|
+
frontpage.title.must_match(/Breaking News/)
|
|
20
|
+
frontpage.type.must_equal('frontpage')
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: diffbot
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.16
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Nicolas Sanguinetti
|
|
@@ -130,14 +130,15 @@ files:
|
|
|
130
130
|
- lib/diffbot/article.rb
|
|
131
131
|
- lib/diffbot/coercible_hash.rb
|
|
132
132
|
- lib/diffbot/frontpage.rb
|
|
133
|
-
- lib/diffbot/frontpage/dml_parser.rb
|
|
134
133
|
- lib/diffbot/item.rb
|
|
135
134
|
- lib/diffbot/items/product.rb
|
|
136
135
|
- lib/diffbot/product.rb
|
|
137
136
|
- lib/diffbot/request.rb
|
|
138
137
|
- test/article_test.rb
|
|
139
138
|
- test/coercible_hash_test.rb
|
|
139
|
+
- test/fixtures/frontpage.json
|
|
140
140
|
- test/fixtures/product.json
|
|
141
|
+
- test/frontpage_test.rb
|
|
141
142
|
- test/product_test.rb
|
|
142
143
|
- test/request_test.rb
|
|
143
144
|
- test/test_helper.rb
|
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
# Parser that takes the XML generated from Diffbot's Frontpage API call and
|
|
2
|
-
# returns a hash suitable for Diffbot::Frontpage.
|
|
3
|
-
class Diffbot::Frontpage::DmlParser
|
|
4
|
-
# Take the string of DML and convert it into a nice little hash we can pass to
|
|
5
|
-
# Diffbot::Frontpage.
|
|
6
|
-
#
|
|
7
|
-
# dml - A string of DML.
|
|
8
|
-
#
|
|
9
|
-
# Returns a Hash.
|
|
10
|
-
def self.parse(dml)
|
|
11
|
-
node = Nokogiri(dml).root
|
|
12
|
-
parser = new(node)
|
|
13
|
-
parser.parse
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
# Initialize the parser with a DML node.
|
|
17
|
-
#
|
|
18
|
-
# dml - The root XML::Element
|
|
19
|
-
def initialize(node)
|
|
20
|
-
@dml = node
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# The root element of the DML document.
|
|
24
|
-
attr_reader :dml
|
|
25
|
-
|
|
26
|
-
# Parses the Diffbot Markup Language and generates a Hash that we can pass to
|
|
27
|
-
# Frontpage.new.
|
|
28
|
-
#
|
|
29
|
-
# Returns a Hash.
|
|
30
|
-
def parse
|
|
31
|
-
attrs = {}
|
|
32
|
-
|
|
33
|
-
info = dml % "info"
|
|
34
|
-
attrs["title"] = extract_text(info, "title")
|
|
35
|
-
attrs["icon"] = extract_text(info, "icon")
|
|
36
|
-
attrs["sourceType"] = extract_text(info, "sourceType")
|
|
37
|
-
attrs["sourceURL"] = extract_text(info, "sourceURL")
|
|
38
|
-
|
|
39
|
-
items = dml / "item"
|
|
40
|
-
attrs["items"] = items.map do |item|
|
|
41
|
-
ItemParser.new(item).parse
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
attrs
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
def extract_text(node, selector)
|
|
48
|
-
tag = node % selector
|
|
49
|
-
tag && tag.text
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
# Parser that takes the XML from a particular item from the XML returned from
|
|
53
|
-
# the frontpage API.
|
|
54
|
-
class ItemParser
|
|
55
|
-
def self.parse(node)
|
|
56
|
-
node = Nokogiri(node)
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# The root element of each item.
|
|
60
|
-
attr_reader :item
|
|
61
|
-
|
|
62
|
-
# Initialize the parser with an Item node.
|
|
63
|
-
#
|
|
64
|
-
# item_node - The root node of the item.
|
|
65
|
-
def initialize(item_node)
|
|
66
|
-
@item = item_node
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
# Parses the item's DML and generates a Hash that we can add to the DML
|
|
70
|
-
# parser's parser's "items" key together with the other items.
|
|
71
|
-
#
|
|
72
|
-
# Returns a Hash.
|
|
73
|
-
def parse
|
|
74
|
-
attrs = {}
|
|
75
|
-
|
|
76
|
-
%w(title link pubDate description textSummary).each do |attr|
|
|
77
|
-
node = item % attr
|
|
78
|
-
attrs[attr] = node && node.text
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
%w(type img id xroot cluster).each do |attr|
|
|
82
|
-
attrs[attr] = item[attr]
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
attrs["stats"] = %w(fresh sp sr).each_with_object({}) do |attr, hash|
|
|
86
|
-
hash[attr] = item[attr].to_f
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
attrs
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
end
|