diffbot 0.1.15 → 0.1.16
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/diffbot.gemspec +1 -1
- data/lib/diffbot/frontpage.rb +7 -3
- data/test/fixtures/frontpage.json +1 -0
- data/test/frontpage_test.rb +23 -0
- metadata +3 -2
- data/lib/diffbot/frontpage/dml_parser.rb +0 -92
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62bd8b70e92af249cc9d7eed3fdd2273346b8957
|
4
|
+
data.tar.gz: 01b3e6ceae1c9c19910fa4509d14a193e5c68d29
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6466e8f042508d444b32822e446a084264ec55964d0368ebd442a623fdd4052e6f436fa817e4e57fd33e3ec27403bcf19a0b7c5742284431c7fb0a3a893f0c15
|
7
|
+
data.tar.gz: 1d6f64a55b526964130f0ea9e81a30a993f551504d3c56d03bed6f78f077302e8906393ce7de130b7abf224318547fd43445dfb0e23c184a8b63670209e14529
|
data/Gemfile.lock
CHANGED
data/diffbot.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "diffbot"
|
3
|
-
s.version = "0.1.
|
3
|
+
s.version = "0.1.16"
|
4
4
|
s.description = "Diffbot provides a concise API for analyzing and extracting semantic information from web pages using Diffbot (http://www.diffbot.com)."
|
5
5
|
s.summary = "Ruby interface to the Diffbot API "
|
6
6
|
s.authors = ["Nicolas Sanguinetti", "Roman Greshny"]
|
data/lib/diffbot/frontpage.rb
CHANGED
@@ -23,7 +23,7 @@ module Diffbot
|
|
23
23
|
# frontpage = Diffbot::Frontpage.fetch(url, api_token)
|
24
24
|
#
|
25
25
|
# Returns a Diffbot::Frontpage.
|
26
|
-
def self.fetch(url, token=Diffbot.token, parser=
|
26
|
+
def self.fetch(url, token=Diffbot.token, parser=Yajl::Parser.method(:parse))
|
27
27
|
request = Diffbot::Request.new(token)
|
28
28
|
response = request.perform(:get, endpoint) do |req|
|
29
29
|
req[:query][:url] = url
|
@@ -39,6 +39,12 @@ module Diffbot
|
|
39
39
|
"http://www.diffbot.com/api/frontpage"
|
40
40
|
end
|
41
41
|
|
42
|
+
property :type
|
43
|
+
|
44
|
+
property :url
|
45
|
+
|
46
|
+
property :sections
|
47
|
+
|
42
48
|
# Public: The title of the page.
|
43
49
|
property :title
|
44
50
|
|
@@ -57,5 +63,3 @@ module Diffbot
|
|
57
63
|
coerce_property :items, collection: Item
|
58
64
|
end
|
59
65
|
end
|
60
|
-
|
61
|
-
require "diffbot/frontpage/dml_parser"
|
@@ -0,0 +1 @@
|
|
1
|
+
{"icon":"http://s.m.huffpost.com/assets/favicon-4e39cb9f22f072465b467122478c9cb0db0ba45ba7f9ae415ac7f55194515fc6.ico","type":"frontpage","title":"Breaking News and Opinion on The Huffington Post","url":"http://www.huffingtonpost.com/","sections":[{"type":"nav","items":[{"title":"Advertise","url":"http://advertising.aol.com/properties"},{"title":"Careers","url":"https://careers.smartrecruiters.com/Aol/huffington-post/"},{"title":"User Agreement","url":"http://www.huffingtonpost.com/terms.html"},{"title":"Privacy","url":"http://privacy.aol.com"},{"title":"Comment Policy","url":"http://www.huffingtonpost.com/comment/policy/"},{"title":"About Us","url":"http://www.huffingtonpost.com/p/huffington-post.html"},{"title":"About Our Ads","url":"http://adinfo.aol.com/about-our-ads/"},{"title":"Contact Us","url":"http://www.huffingtonpost.com/contact/"},{"title":"Archive","url":"http://www.huffingtonpost.com/archive/"}]},{"type":"profile","items":[{"title":"9.05 M","url":"http://facebook.com/HuffingtonPost"},{"title":"9.15 M","url":"https://twitter.com/HuffingtonPost"},{"title":"470 K","url":"http://pinterest.com/huffingtonpost"},{"title":"Podcast","url":"https://itunes.apple.com/us/artist/the-huffington-post/id908797543?mt=2"}]},{"type":"location","items":[{"title":"\u2018Jeopardy!\u2019 Contestant Gets Away With Flipping Off America","url":"http://www.huffingtonpost.com/entry/jeopardy-contestant-gets-away-with-flipping-off-america_us_58ada4ede4b03d80af7101f4?a8vewl1bj2kpzj8aor&ncid=inblnkushpmg00000009"},{"title":"Jamie Foxx Target Of Racial Slur While Out Dining","url":"http://www.huffingtonpost.com/entry/jamie-foxx-racist-slur-croatia_us_58ad936ae4b04a0b274e6570?76j63hroe191qncdi&ncid=inblnkushpmg00000009"},{"title":"Horror Film About Racism Earns Coveted 100 Percent On Rotten Tomatoes","url":"http://www.huffingtonpost.com/entry/jordan-peele-get-out-rotten-tomatoes_us_58ae08dbe4b01406012f7905?37mtxl0nvaafxhia4i&ncid=inblnkushpmg00000009"},{"title":"The List Of Cities That Want The 2024 Olympics Is Down To Two","url":"http://www.huffingtonpost.com/entry/2024-olympics-two-cities_us_58adee80e4b05ca474a03bc0?qlv04nvoq5j5v4e7b9&ncid=inblnkushpmg00000009"},{"title":"An Up-Close Look At A Delaware-Sized Iceberg About To Break From Antarctica","url":"http://www.huffingtonpost.com/entry/larsen-c-video-iceberg-antartica_us_58ae03f4e4b057efdce8b3de?kbj2lib9c2384cxr&ncid=inblnkushpmg00000009"},{"title":"Andy Warhol\u2019s Death Might Not Have Been As \u2018Routine\u2019 As First Thought","url":"http://www.huffingtonpost.com/entry/andy-warhol-death_us_58adbb38e4b04a0b274eb96d?h6xp2k1zrcw61or§ion=us_arts&ncid=inblnkushpmg00000009"},{"title":"Met Director Fears Elimination Of NEA Marks \u2018New Assault\u2019 On Art","url":"http://www.huffingtonpost.com/entry/met-director-fears-elimination-of-nea-marks-new-assault-on-art_us_58ab6476e4b0f077b3ed1677?qbyf4qnc11akwvzpvi?clear&ncid=inblnkushpmg00000009"},{"title":"This Is What The Oscars Looked Like 30 Years Ago","url":"http://www.huffingtonpost.com/entry/the-oscars-1987_us_58ab3ad1e4b07028b702d678?ncid=inblnkushpmg00000009"},{"title":"Baseball Is Making One Of Its Biggest Rule Changes Ever","url":"http://www.huffingtonpost.com/entry/baseball-intentional-walk-change_us_58ad128ee4b04a0b274e172e?ncid=inblnkushpmg00000009"},{"title":"Accused Drunk Driver Does Cartwheels During Sobriety Test","url":"http://www.huffingtonpost.com/entry/cartwheels-sobriety-test_us_58ae0bc1e4b05ca474a06dc0?ropw4s4i&ncid=inblnkushpmg00000009"},{"title":"It\u2019s Getting Better, But Let\u2019s Not Forget Hollywood Still Has A Diversity Issue","url":"http://www.huffingtonpost.com/entry/2017-hollywood-diversity-report_us_58ac6c0ce4b06b61e61e2db5?ncid=inblnkushpmg00000009"},{"title":"Viral Photos Show Black Women Simply Unbothered By Police Intimidation","url":"http://www.huffingtonpost.com/entry/photos-shows-black-women-simply-unbothered-by-police-intimidation_us_58adbfe4e4b03d80af713f1d?ncid=inblnkushpmg00000009"},{"title":"Muslim Activists\u2019 Fundraiser For Vandalized Jewish Cemetery Soars Past Initial Goal","url":"http://www.huffingtonpost.com/entry/muslim-activists-fundraiser-for-desecrated-jewish-cemetery-soars-past-initial-goal_us_58adbb6ce4b04a0b274eb9c9?y29uugggmf1xajor&ncid=inblnkushpmg00000009"},{"title":"This Feminist Film Got Snubbed By The Oscars, But You Should Watch It Anyway","url":"http://www.huffingtonpost.com/entry/this-feminist-film-got-snubbed-by-the-oscars-you-should-watch-it-anyway_us_58ab6b2ce4b0f077b3ed2022?md72s5iwqy9pgy14i§ion=us_arts&ncid=inblnkushpmg00000009"},{"title":"An Open Letter To Attorney General Jeff Sessions, From A Fellow Alabamian","url":"http://www.huffingtonpost.com/entry/open-letter-to-jeff-sessions_us_58ae1920e4b01406012f93e6?ncid=inblnkushpmg00000009"},{"title":"Liberals Are Not \u2018Helping\u2019 Trump, And Supporting Him Isn\u2019t Like Being Gay","url":"http://www.huffingtonpost.com/entry/no-liberals-are-not-helping-trump-and-supporting_us_58acc68be4b0d818c4f0a340?ncid=inblnkushpmg00000009"},{"title":"Michigan Teen Believes Shirt Was Left On Her Car Windshield As A Trap","url":"http://www.huffingtonpost.com/entry/michigan-teen-shirt-windshiel_us_58adb942e4b0d0a6ef46d75a?aprwfuw6w4dvx6r&ncid=inblnkushpmg00000009"},{"title":"With Growth Of The Gig Economy, States Rethink How Workers Get Benefits","url":"http://www.huffingtonpost.com/entry/with-growth-of-the-gig-economy-states-rethink-how_us_58ada76be4b0d818c4f0a422?ncid=inblnkushpmg00000009"},{"title":"Whatever You Do, Don\u2019t Ask Anna Kendrick To Be A Bridesmaid","url":"http://www.huffingtonpost.com/entry/anna-kendrick-will-not-be-a-bridesmaid_us_58ade350e4b03d80af71bd99?76fl4eci333plow29&ncid=inblnkushpmg00000009"},{"title":"Having A Cat Does Not Cause Mental Illness, New Study Suggests","url":"http://www.huffingtonpost.com/entry/cats-mental-illness-toxoplasma_us_58adc06ae4b03d80af714072?ncid=inblnkushpmg00000009"},{"title":"James Corden Delights In Tearing Lifetime\u2019s Britney Biopic To Shreds","url":"http://www.huffingtonpost.com/entry/james-corden-delights-in-tearing-lifetimes-britney-spears-biopic-to-shreds_us_58add5c2e4b04a0b274f195b?v6j2nf2cmrpjsjor&ncid=inblnkushpmg00000009"},{"title":"Mariah Carey Gives Epic Interview About That NYE Debacle","url":"http://www.huffingtonpost.com/entry/mariah-carey-rolling-stone-interview_us_58ade9cbe4b01406012f3e29?54mzaw852t77xecdi&ncid=inblnkushpmg00000009"},{"title":"Chris Brown\u2019s Ex Claims He Threatened To Kill Her","url":"http://www.huffingtonpost.com/entry/chris-browns-ex-claims-he-threatened-to-kill-her_us_58adbafde4b0d0a6ef46d9ef?fc93pjd7om0qbk2o6r&ncid=inblnkushpmg00000009"},{"title":"Oh Hell Yeah, Louis C.K. Is Putting Out A Netflix Special In April","url":"http://www.huffingtonpost.com/entry/louis-ck-netflix_us_58ade81ce4b05ca474a02dad?wxvaundnwj89mb1emi&ncid=inblnkushpmg00000009"},{"title":"The Most Politically Influential Film Of The Oscars Will End Up Being \u2018Zootopia\u2019","url":"http://www.huffingtonpost.com/entry/zootopia-oscars_us_58ac6503e4b02a1e7dac1255?5vrkutst926hl4n29&ncid=inblnkushpmg00000009"},{"title":"Pay Young Black Folks To Do The Work They Want To Do","url":"http://www.huffingtonpost.com/entry/pay-young-black-folks-do-work_us_58adad31e4b04a0b274e97c1?mepo0ba7qjtyrmgqfr&ncid=inblnkushpmg00000009"},{"title":"7 Standout Black Figures In Sports And Entertainment Who Empower Us","url":"http://www.huffingtonpost.com/entry/7-black-figures-incredible-advancements-sports-and-entertainment_us_58ab44e9e4b0a855d1d8b845?ncid=inblnkushpmg00000009"},{"title":"Seth Meyers Roasts Trump In Recut Press Conference","url":"http://www.huffingtonpost.com/entry/seth-meyers-roasts-donald-trump-in-recut-press-conference_us_58adb78de4b04a0b274eb3a3?jpnhottiy3qnf80k9&ncid=inblnkushpmg00000009"},{"title":"Musician Accused Of Getting Onstage Enema During Cancer Benefit","url":"http://www.huffingtonpost.com/entry/michael-clemmons-onstage-enema_us_58adc353e4b03d80af7147a2?ncid=inblnkushpmg00000009"},{"title":"\u2018Walking Dead\u2019 T-Shirt Pulled For Being \u2018Racist\u2019 And \u2018Offensive\u2019","url":"http://www.huffingtonpost.com/entry/walking-dead-t-shirt-pulled-for-being-racist-and-offensive_us_58ad8e31e4b03d80af70dcfd?5zkiq509qwnbwewmi&ncid=inblnkushpmg00000009"},{"title":"Dakota Access Protesters Arrested As Deadline Passes To Depart Camp","url":"http://www.huffingtonpost.com/entry/time-runs-out-at-main-dakota-access-pipeline-protest-camp_us_58acc6a9e4b04a0b274df548?hn4p7o9bk4so979zfr&ncid=inblnkushpmg00000009"},{"title":"Trump Administration Rescinds Protections For Transgender Students","url":"http://www.huffingtonpost.com/entry/donald-trump-transgender_us_58ac4fe8e4b0a855d1d9d278?ncid=inblnkushpmg00000009"},{"title":"Scientists Discover \u2018Treasure Trove\u2019 Of Earth-Size Planets Orbiting Nearby Dwarf Star","url":"http://www.huffingtonpost.com/entry/earth-size-planets-discovery_us_58ad949fe4b03d80af70e309?ncid=inblnkushpmg00000009"},{"title":"GOP Congressman: Getting Rich Will Solve That Whole Environment Thing","url":"http://www.huffingtonpost.com/entry/dave-brat-town-hall_us_58adaf35e4b03d80af7118ea?v46ckay79wznf80k9&ncid=inblnkushpmg00000009"},{"title":"One Republican Would Subpoena Trump\u2019s Taxes If Russia Probe Goes There","url":"http://www.huffingtonpost.com/entry/susan-collins-republican-subpoena-trump-taxes-michael-flynn-probe_us_58addfb1e4b04a0b274f3d67?hajs0juj0g22o6r&ncid=inblnkushpmg00000009"},{"title":"Pennsylvania Governor: Dems Can Combat Trump By Getting Things Done","url":"http://www.huffingtonpost.com/entry/tom-wolf-democrats-donald-trump_us_58ae05c2e4b057efdce8b65e?h2jct8jsxxbt9&ncid=inblnkushpmg00000009"},{"title":"Facebook Donates Some $100,000 To CPAC, Reminding Users Again That It\u2019s Not Liberal","url":"http://www.huffingtonpost.com/entry/facebook-sponsor-cpac-2017_us_58adca13e4b0d0a6ef4705d9?eaqtic4u34v8zd7vi&ncid=inblnkushpmg00000009"},{"title":"Former Aides Explain How They Shielded Trump From Twitter Destruction","url":"http://www.huffingtonpost.com/entry/donald-trump-twitter_us_58ade36be4b03d80af71bdef?96lkhzc1h6fbyy14i&ncid=inblnkushpmg00000009"},{"title":"Check Out Mitch McConnell\u2019s Face As An Angry Voter Lets Him Have It","url":"http://www.huffingtonpost.com/entry/mitch-mcconnell-town-hall-obamacare_us_58acf3c9e4b0d0a6ef463755?47ny901zcspa7zaor&ncid=inblnkushpmg00000009"},{"title":"This GOP Congresswoman Sure Got An Easy First Question At Her Tele-Town Hall","url":"http://www.huffingtonpost.com/entry/barbara-comstock-townhall-question_us_58adec9ae4b057efdce88931?ncid=inblnkushpmg00000009"},{"title":"Trump Promised To Build Infrastructure But Delays Bay Area Railway Project","url":"http://www.huffingtonpost.com/entry/trump-infrastructure-delays-caltrain_us_58adc320e4b03d80af714733?ncid=engmodushpmg00000004&ncid=inblnkushpmg00000009"},{"title":"White House Chief Digital Officer Ousted After Failing FBI Background Check","url":"http://www.politico.com/blogs/donald-trump-administration/2017/02/gerrit-lansing-trump-digital-chief-ousted-235268"},{"title":"Joe Scarborough Tells Stephen Colbert Why His Show Won\u2019t Book Kellyanne Conway","url":"http://www.huffingtonpost.com/entry/joe-scarborough-wont-book-kellyanne-conway_us_58ad3af7e4b03d80af7097b5?ncid=inblnkushpmg00000009"},{"title":"\u2018Have You No Ethics?\u2019: Anne Frank Center Chief Slams Trump Surrogate","url":"http://www.huffingtonpost.com/entry/anne-frank-center-cnn-kayleigh-mcenany_us_58ad2004e4b04a0b274e181a?ncid=inblnkushpmg00000009"},{"title":"Keith Ellison Supporters Warn Of Fallout If He Loses DNC Chair Race","url":"http://www.huffingtonpost.com/entry/keith-ellison-dnc_us_58acc9c5e4b03d80af7075b3?c2him2tqgwqht1emi&ncid=inblnkushpmg00000009"},{"title":"Anti-Defamation League\u2019s New York Headquarters Receives Bomb Threat","url":"http://www.huffingtonpost.com/entry/anti-defamation-league-new-york-bomb-threat_us_58adc712e4b0d0a6ef46f848?ncid=inblnkushpmg00000009"},{"title":"You No Longer Need A License To Carry A Concealed Weapon In New Hampshire","url":"http://www.huffingtonpost.com/entry/new-hampshire-concealed-carry_us_58add620e4b03d80af7193e3?ncid=inblnkushpmg00000009"},{"title":"Miller Admits Trump\u2019s New Travel Ban Will Look A Lot Like The Old One","url":"http://www.huffingtonpost.com/entry/stephen-miller-new-immigration-ban_us_58ad281ee4b0d0a6ef463e24?ncid=inblnkushpmg00000009&ncid=inblnkushpmg00000009"},{"title":"The Jewish Community Center Bomb Threats Affect You, Too","url":"http://www.huffingtonpost.com/entry/jewish-community-center-bomb-threats_us_58ad9b29e4b04a0b274e7221?s30uxtjz0c72mlsor&ncid=inblnkushpmg00000009"},{"title":"GOP Millennial Task Force Chair Ignores Her Own Advice","url":"http://www.huffingtonpost.com/entry/elise-stefanik-millennial-outreach_us_58adb8dce4b0d0a6ef46d6c6?q19lhvm3911gpynwmi&ncid=inblnkushpmg00000009"},{"title":"Leader Of Azerbaijan Goes Full \u2018House Of Cards,\u2019 Appoints Wife As VP","url":"http://www.huffingtonpost.com/entry/azerbaijan-house-of-cards_us_58adaf2ce4b0d0a6ef46bdfe?ncid=inblnkushpmg00000009"},{"title":"North Korea Calls For Immediate Release Of Airport Assassination Suspects","url":"http://www.huffingtonpost.com/entry/north-korea-airport-assassination-suspects_us_58ad56cae4b0d0a6ef464ad2?ncid=inblnkushpmg00000009"},{"title":"Nursing Home Worker Accused Of Giving Lap Dance To 100-Year-Old Man","url":"http://www.huffingtonpost.com/entry/lap-dance-100-year-old-man_us_58adf84be4b05ca474a04977?v25r8uxr&ncid=inblnkushpmg00000009"},{"title":"Ivanka Trump And Daughter Pay The Supreme Court A Visit","url":"http://www.huffingtonpost.com/entry/ivanka-trump-supreme-court_us_58adb382e4b0d0a6ef46cc27?ncid=inblnkushpmg00000009"},{"title":"Howard Dean Endorses Pete Buttigieg For DNC Chairman","url":"http://www.huffingtonpost.com/entry/howard-dean-endorses-pete-buttigieg-for-dnc-chairman_us_58ad833fe4b03d80af70d8ce?0griqrzal8no4dkj4i&ncid=inblnkushpmg00000009"},{"title":"U.K. Oil Boss Torches Trump\u2019s Clean Energy Plan","url":"http://www.huffingtonpost.co.uk/entry/sinead-lynch-royal-dutch-shell-uk-donald-trump_uk_58ad971fe4b03d80af70e59f?ncid=inblnkushpmg00000009"}],"primary":true},{"type":"nav","items":[{"title":"POLITICS","url":"http://www.huffingtonpost.com/section/politics"},{"title":"ENTERTAINMENT","url":"http://www.huffingtonpost.com/dept/entertainment"},{"title":"IMPACT","url":"http://www.huffingtonpost.com/dept/impact"},{"title":"VIDEO","url":"http://www.huffingtonpost.com/section/video"}]}]}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
describe Diffbot::Frontpage do
|
5
|
+
describe '.fetch' do
|
6
|
+
before do
|
7
|
+
path = URI.parse(Diffbot::Frontpage.endpoint).path
|
8
|
+
|
9
|
+
body = File.read('test/fixtures/frontpage.json')
|
10
|
+
|
11
|
+
Excon.stub(method: :get, path: path) do |params|
|
12
|
+
{ body: body, status: 200 }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'returns a new Frontpage' do
|
17
|
+
frontpage = Diffbot::Frontpage.fetch('http://www.huffingtonpost.com/')
|
18
|
+
|
19
|
+
frontpage.title.must_match(/Breaking News/)
|
20
|
+
frontpage.type.must_equal('frontpage')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: diffbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nicolas Sanguinetti
|
@@ -130,14 +130,15 @@ files:
|
|
130
130
|
- lib/diffbot/article.rb
|
131
131
|
- lib/diffbot/coercible_hash.rb
|
132
132
|
- lib/diffbot/frontpage.rb
|
133
|
-
- lib/diffbot/frontpage/dml_parser.rb
|
134
133
|
- lib/diffbot/item.rb
|
135
134
|
- lib/diffbot/items/product.rb
|
136
135
|
- lib/diffbot/product.rb
|
137
136
|
- lib/diffbot/request.rb
|
138
137
|
- test/article_test.rb
|
139
138
|
- test/coercible_hash_test.rb
|
139
|
+
- test/fixtures/frontpage.json
|
140
140
|
- test/fixtures/product.json
|
141
|
+
- test/frontpage_test.rb
|
141
142
|
- test/product_test.rb
|
142
143
|
- test/request_test.rb
|
143
144
|
- test/test_helper.rb
|
@@ -1,92 +0,0 @@
|
|
1
|
-
# Parser that takes the XML generated from Diffbot's Frontpage API call and
|
2
|
-
# returns a hash suitable for Diffbot::Frontpage.
|
3
|
-
class Diffbot::Frontpage::DmlParser
|
4
|
-
# Take the string of DML and convert it into a nice little hash we can pass to
|
5
|
-
# Diffbot::Frontpage.
|
6
|
-
#
|
7
|
-
# dml - A string of DML.
|
8
|
-
#
|
9
|
-
# Returns a Hash.
|
10
|
-
def self.parse(dml)
|
11
|
-
node = Nokogiri(dml).root
|
12
|
-
parser = new(node)
|
13
|
-
parser.parse
|
14
|
-
end
|
15
|
-
|
16
|
-
# Initialize the parser with a DML node.
|
17
|
-
#
|
18
|
-
# dml - The root XML::Element
|
19
|
-
def initialize(node)
|
20
|
-
@dml = node
|
21
|
-
end
|
22
|
-
|
23
|
-
# The root element of the DML document.
|
24
|
-
attr_reader :dml
|
25
|
-
|
26
|
-
# Parses the Diffbot Markup Language and generates a Hash that we can pass to
|
27
|
-
# Frontpage.new.
|
28
|
-
#
|
29
|
-
# Returns a Hash.
|
30
|
-
def parse
|
31
|
-
attrs = {}
|
32
|
-
|
33
|
-
info = dml % "info"
|
34
|
-
attrs["title"] = extract_text(info, "title")
|
35
|
-
attrs["icon"] = extract_text(info, "icon")
|
36
|
-
attrs["sourceType"] = extract_text(info, "sourceType")
|
37
|
-
attrs["sourceURL"] = extract_text(info, "sourceURL")
|
38
|
-
|
39
|
-
items = dml / "item"
|
40
|
-
attrs["items"] = items.map do |item|
|
41
|
-
ItemParser.new(item).parse
|
42
|
-
end
|
43
|
-
|
44
|
-
attrs
|
45
|
-
end
|
46
|
-
|
47
|
-
def extract_text(node, selector)
|
48
|
-
tag = node % selector
|
49
|
-
tag && tag.text
|
50
|
-
end
|
51
|
-
|
52
|
-
# Parser that takes the XML from a particular item from the XML returned from
|
53
|
-
# the frontpage API.
|
54
|
-
class ItemParser
|
55
|
-
def self.parse(node)
|
56
|
-
node = Nokogiri(node)
|
57
|
-
end
|
58
|
-
|
59
|
-
# The root element of each item.
|
60
|
-
attr_reader :item
|
61
|
-
|
62
|
-
# Initialize the parser with an Item node.
|
63
|
-
#
|
64
|
-
# item_node - The root node of the item.
|
65
|
-
def initialize(item_node)
|
66
|
-
@item = item_node
|
67
|
-
end
|
68
|
-
|
69
|
-
# Parses the item's DML and generates a Hash that we can add to the DML
|
70
|
-
# parser's parser's "items" key together with the other items.
|
71
|
-
#
|
72
|
-
# Returns a Hash.
|
73
|
-
def parse
|
74
|
-
attrs = {}
|
75
|
-
|
76
|
-
%w(title link pubDate description textSummary).each do |attr|
|
77
|
-
node = item % attr
|
78
|
-
attrs[attr] = node && node.text
|
79
|
-
end
|
80
|
-
|
81
|
-
%w(type img id xroot cluster).each do |attr|
|
82
|
-
attrs[attr] = item[attr]
|
83
|
-
end
|
84
|
-
|
85
|
-
attrs["stats"] = %w(fresh sp sr).each_with_object({}) do |attr, hash|
|
86
|
-
hash[attr] = item[attr].to_f
|
87
|
-
end
|
88
|
-
|
89
|
-
attrs
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|