harrisj-nytimes-articles 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +4 -0
- data/lib/nytimes_articles.rb +8 -0
- data/lib/nytimes_articles/article.rb +397 -0
- data/lib/nytimes_articles/base.rb +110 -0
- data/lib/nytimes_articles/exceptions.rb +24 -0
- data/lib/nytimes_articles/facet.rb +70 -0
- data/lib/nytimes_articles/result_set.rb +40 -0
- data/test/nytimes/articles/test_article.rb +527 -0
- data/test/nytimes/articles/test_base.rb +92 -0
- data/test/nytimes/articles/test_facet.rb +46 -0
- data/test/nytimes/articles/test_result_set.rb +62 -0
- data/test/test_helper.rb +31 -0
- metadata +78 -0
data/VERSION.yml
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
# should I be setting this?
|
2
|
+
$KCODE = 'UTF8'
|
3
|
+
|
4
|
+
require File.join(File.dirname(__FILE__), 'nytimes_articles', 'exceptions')
|
5
|
+
require File.join(File.dirname(__FILE__), 'nytimes_articles', 'base')
|
6
|
+
require File.join(File.dirname(__FILE__), 'nytimes_articles', 'facet')
|
7
|
+
require File.join(File.dirname(__FILE__), 'nytimes_articles', 'article')
|
8
|
+
require File.join(File.dirname(__FILE__), 'nytimes_articles', 'result_set')
|
@@ -0,0 +1,397 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
module Nytimes
|
4
|
+
module Articles
|
5
|
+
class Article < Base
|
6
|
+
RAW_FIELDS = %w(url)
|
7
|
+
TEXT_FIELDS = %w(abstract author body byline lead_paragraph nytd_lead_paragraph nytd_title title)
|
8
|
+
NUMERIC_FIELDS = %w(word_count)
|
9
|
+
BOOLEAN_FIELDS = %w(fee small_image)
|
10
|
+
IMAGE_FIELDS = %w(small_image small_image_url small_image_height small_image_width)
|
11
|
+
MULTIMEDIA_FIELDS = %w(multimedia related_multimedia)
|
12
|
+
|
13
|
+
ALL_FIELDS = TEXT_FIELDS + RAW_FIELDS + NUMERIC_FIELDS + BOOLEAN_FIELDS + IMAGE_FIELDS + MULTIMEDIA_FIELDS + Facet::ALL_FACETS
|
14
|
+
|
15
|
+
attr_reader *ALL_FIELDS
|
16
|
+
|
17
|
+
# Scalar facets
|
18
|
+
attr_reader :page, :column, :pub_month, :pub_year, :pub_day, :day_of_week, :desk, :date, :section_page, :source
|
19
|
+
|
20
|
+
# Facets that return multiple values
|
21
|
+
attr_reader :classifiers, :descriptions, :geo, :material_types, :organizations, :persons, :nytd_bylines, :nytd_descriptions, :nytd_geo, :nytd_organizations, :nytd_persons, :nytd_sections, :nytd_works_mentioned, :works_mentioned
|
22
|
+
alias :people :persons
|
23
|
+
alias :nytd_people :nytd_persons
|
24
|
+
|
25
|
+
##
|
26
|
+
# Create a new Article from hash arguments. You really don't need to call this as Article instances are automatically returned from the API
|
27
|
+
def initialize(params={})
|
28
|
+
params.each_pair do |k,v|
|
29
|
+
instance_variable_set("@#{k}", v)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Is this article available for a fee?
|
35
|
+
alias :fee? :fee
|
36
|
+
|
37
|
+
##
|
38
|
+
# Is this article available for free?
|
39
|
+
def free?
|
40
|
+
not(fee?)
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# Creates a new Article from the a hash returned from the API. This is called on search results. You have no reason to call it.
|
45
|
+
def self.init_from_api(params)
|
46
|
+
article = Article.new(
|
47
|
+
:abstract => text_field(params['abstract']),
|
48
|
+
:author => text_field(params['author']),
|
49
|
+
:body => text_field(params['body']),
|
50
|
+
:byline => text_field(params['byline']),
|
51
|
+
:fee => params['fee'] || false,
|
52
|
+
:lead_paragraph => text_field(params['lead_paragraph']),
|
53
|
+
:nytd_title => text_field(params['nytd_title']),
|
54
|
+
:nytd_lead_paragraph => text_field(params['nytd_lead_paragraph']),
|
55
|
+
:related_multimedia => nil, # FIXME
|
56
|
+
:image => nil, # FIXME
|
57
|
+
:title => text_field(params['title']),
|
58
|
+
:url => params['url'],
|
59
|
+
:word_count => integer_field(params['word_count']),
|
60
|
+
|
61
|
+
# FACETS THAT RETURN SCALARS
|
62
|
+
:page => integer_field(params[Facet::PAGE]),
|
63
|
+
:column => text_field(params[Facet::COLUMN]),
|
64
|
+
:pub_month => integer_field(params[Facet::PUB_MONTH]),
|
65
|
+
:pub_year => integer_field(params[Facet::PUB_YEAR]),
|
66
|
+
:pub_day => integer_field(params[Facet::PUB_DAY]),
|
67
|
+
:day_of_week => params[Facet::DAY_OF_WEEK],
|
68
|
+
:desk => text_field(params[Facet::DESK]),
|
69
|
+
:date => date_field(params[Facet::DATE]),
|
70
|
+
:section_page => params[Facet::SECTION_PAGE],
|
71
|
+
:source => text_field(params[Facet::SOURCE]),
|
72
|
+
|
73
|
+
# FIXME! MORE FACET PARAMS
|
74
|
+
# FACETS THAT RETURN ARRAYS
|
75
|
+
:classifiers => facet_params(params, Facet::CLASSIFIERS),
|
76
|
+
:descriptions => facet_params(params, Facet::DESCRIPTION),
|
77
|
+
:geo => facet_params(params, Facet::GEO),
|
78
|
+
:material_types => facet_params(params, Facet::MATERIAL_TYPE),
|
79
|
+
:organizations => facet_params(params, Facet::ORGANIZATION),
|
80
|
+
:persons => facet_params(params, Facet::PERSON),
|
81
|
+
:nytd_bylines => facet_params(params, Facet::NYTD_BYLINE),
|
82
|
+
:nytd_descriptions => facet_params(params, Facet::NYTD_DESCRIPTION),
|
83
|
+
:nytd_geo => facet_params(params, Facet::NYTD_GEO),
|
84
|
+
:nytd_organizations => facet_params(params, Facet::NYTD_ORGANIZATION),
|
85
|
+
:nytd_persons => facet_params(params, Facet::NYTD_PERSON),
|
86
|
+
:nytd_sections => facet_params(params, Facet::NYTD_SECTION),
|
87
|
+
:nytd_works_mentioned => facet_params(params, Facet::NYTD_WORKS_MENTIONED),
|
88
|
+
:works_mentioned => facet_params(params, Facet::WORKS_MENTIONED)
|
89
|
+
)
|
90
|
+
|
91
|
+
article
|
92
|
+
end
|
93
|
+
|
94
|
+
##
|
95
|
+
# Executes a search against the Article Search API and returns a ResultSet of 10 articles. At its simplest form, can be invoked
|
96
|
+
# with just a string like so
|
97
|
+
#
|
98
|
+
# Article.search 'dog food'
|
99
|
+
#
|
100
|
+
# which will do a text search against several text fields in the article and return the most basic fields for each
|
101
|
+
# article, but it takes a large number of potential parameters. All of these fields and then some can be returned as display fields
|
102
|
+
# in the articles retrieved from search (see the <tt>:fields</tt> argument below)
|
103
|
+
#
|
104
|
+
# == TEXT FIELDS
|
105
|
+
#
|
106
|
+
# If passed a string as the first argument, the text will be used to search against the title, byline and body fields of articles. This text takes
|
107
|
+
# the following boolean syntax:
|
108
|
+
# * <tt>dog food</tt> - similar to doing a boolean =AND search on both terms
|
109
|
+
# * <tt>"ice cream"</tt> - matches the words as a phrase in the text
|
110
|
+
# * <tt>ice -cream</tt> - to search text that doesn't contain a term, prefix with the minus sign.
|
111
|
+
#
|
112
|
+
# Should you wish to target text against specific text fields associated with the article, the following named parameters are supported:
|
113
|
+
# * <tt>:abstract</tt> - A summary of the article, written by Times indexers
|
114
|
+
# * <tt>:body</tt> - A portion of the beginning of the article. Note: Only a portion of the article body is included in responses. But when you search against the body field, you search the full text of the article.
|
115
|
+
# * <tt>:byline</tt> - The article byline, including the author's name
|
116
|
+
# * <tt>:lead_paragraph</tt> - The first paragraph of the article (as it appeared in the printed newspaper)
|
117
|
+
# * <tt>:nytd_byline</tt> - The article byline, formatted for NYTimes.com
|
118
|
+
# * <tt>:nytd_lead_paragraph</tt> - The first paragraph of the article (as it appears on NYTimes.com)
|
119
|
+
# * <tt>:nytd_title</tt> - The article title on NYTimes.com (this field may or may not match the title field; headlines may be shortened and edited for the Web)
|
120
|
+
# * <tt>:text</tt> - The text field consists of title + byline + body (combined in an OR search) and is the default field for keyword searches.
|
121
|
+
# * <tt>:title</tt> - The article title (headline); corresponds to the headline that appeared in the printed newspaper
|
122
|
+
# * <tt>:url</tt> - The URL of the article on NYTimes.com
|
123
|
+
#
|
124
|
+
# == FACET SEARCHING
|
125
|
+
#
|
126
|
+
# Beyond query searches, the NY Times API also allows you to search against controlled vocabulary metadata associated with the article. This is powerful, if you want precise matching against specific
|
127
|
+
# people, places, etc (eg, "I want stories about Ford the former president, not Ford the automative company"). The following Facet constants are supported.
|
128
|
+
#
|
129
|
+
# * <tt>Facet::CLASSIFIERS</tt> - Taxonomic classifiers that reflect Times content categories, such as _Top/News/Sports_
|
130
|
+
# * <tt>Facet::COLUMN</tt> - A Times column title (if applicable), such as _Weddings_ or _Ideas & Trends_
|
131
|
+
# * <tt>Facet::DATE</tt> - The publication date in YYYYMMDD format
|
132
|
+
# * <tt>Facet::DAY_OF_WEEK</tt> - The day of the week (e.g., Monday, Tuesday) the article was published (compare <tt>PUB_DAY</tt>, which is the numeric date rather than the day of the week)
|
133
|
+
# * <tt>Facet::DESCRIPTION</tt> - Descriptive subject terms assigned by Times indexers (must be in UPPERCASE)
|
134
|
+
# * <tt>Facet::DESK</tt> - The Times desk that produced the story (e.g., _Business/Financial Desk_)
|
135
|
+
# * <tt>Facet::GEO</tt> - Standardized names of geographic locations, assigned by Times indexers (must be in UPPERCASE)
|
136
|
+
# * <tt>Facet::MATERIAL_TYPE</tt> - The general article type, such as Biography, Editorial or Review
|
137
|
+
# * <tt>Facet::ORGANIZATION</tt> - Standardized names of people, assigned by Times indexers (must be UPPERCASE)
|
138
|
+
# * <tt>Facet::PAGE</tt> - The page the article appeared on (in the printed paper)
|
139
|
+
# * <tt>Facet::PERSON</tt> - Standardized names of people, assigned by Times indexers. When used in a request, values must be UPPERCASE.
|
140
|
+
# * <tt>Facet::PUB_DAY</tt> - The day (DD) segment of date, separated for use as facets
|
141
|
+
# * <tt>Facet::PUB_MONTH</tt> - The month (MM) segment of date, separated for use as facets
|
142
|
+
# * <tt>Facet::PUB_YEAR</tt> - The year (YYYY) segment of date, separated for use as facets
|
143
|
+
# * <tt>Facet::SECTION_PAGE</tt> - The full page number of the printed article (e.g., _D00002_)
|
144
|
+
# * <tt>Facet::SOURCE</tt> - The originating body (e.g., _AP_, _Dow Jones_, _The New York Times_)
|
145
|
+
# * <tt>Facet::WORKS_MENTIONED</tt> - Literary works mentioned in the article
|
146
|
+
# * <tt>Facet::NYTD_BYLINE</tt> - The article byline, formatted for NYTimes.com
|
147
|
+
# * <tt>Facet::NYTD_DESCRIPTION</tt> - Descriptive subject terms, assigned for use on NYTimes.com (to get standardized terms, use the TimesTags API). When used in a request, values must be Mixed Case
|
148
|
+
# * <tt>Facet::NYTD_GEO</tt> - Standardized names of geographic locations, assigned for use on NYTimes.com (to get standardized terms, use the TimesTags API). When used in a request, values must be Mixed Case
|
149
|
+
# * <tt>Facet::NYTD_ORGANIZATION</tt> - Standardized names of organizations, assigned for use on NYTimes.com (to get standardized terms, use the TimesTags API). When used in a request, values must be Mixed Case
|
150
|
+
# * <tt>Facet::NYTD_PERSON</tt> - Standardized names of people, assigned for use on NYTimes.com (to get standardized terms, use the TimesTags API). When used in a request, values must be Mixed Case.
|
151
|
+
# * <tt>Facet::NYTD_SECTION</tt> - The section the article appears in (on NYTimes.com)
|
152
|
+
# * <tt>Facet::NYTD_WORKS_MENTIONED</tt> - Literary works mentioned (titles formatted for use on NYTimes.com)
|
153
|
+
#
|
154
|
+
# The following two search fields are used for facet searching:
|
155
|
+
# * <tt>:search_facets</tt> - takes a single value or array of facets to search. Facets can either be specified as array pairs (like <tt>[Facet::GEOGRAPHIC, 'CALIFORNIA']</tt>) or facets returned from a previous search can be passed directly. A single string can be passed as well if you have hand-crafted string.
|
156
|
+
# * <tt>:exclude_facets</tt> - similar to <tt>:search_facets</tt> but is used to specify a list of facets to exclude.
|
157
|
+
#
|
158
|
+
# == OTHER SEARCH FIELDS
|
159
|
+
# * <tt>:fee</tt> - to be implemented
|
160
|
+
# * <tt>:begin_date</tt>, <tt>:end_date</tt> - the parameters are used to specify a start and end date for search results. BOTH of these must be provided or the API will return an error. Accepts either a Time/Date argument or a string of the format YYYYMMDD. For convenience the following alternative methods are provided
|
161
|
+
# * <tt>:before</tt> - an alternative to :end_date. Automatically adds a :before_date of sometime in 1980 if no :since argument is also provided; to be implemented
|
162
|
+
# * <tt>:since</tt> - An alternative to :begin_date. Automatically adds an :end_date of Time.now if no :before argument is provided; to be implemented.
|
163
|
+
# * <tt>:has_thumbnail</tt> - to be implemented
|
164
|
+
# * <tt>:has_multimedia</tt> - to be implemented
|
165
|
+
#
|
166
|
+
# == FACET SUMMARIES
|
167
|
+
#
|
168
|
+
# The <tt>:facets</tt> argument can be used to specify up to 5 facet fields to be returned alongside the search that provide overall counts
|
169
|
+
# of how much each facet term appears in the search results. FIXME provide list of available facets as well as description of :nytd parameter.
|
170
|
+
#
|
171
|
+
# == ARTICLE FIELDS
|
172
|
+
#
|
173
|
+
# The <tt>:fields</tt> parameter is used to indicate what fields are returned with each article from the search results. If not specified, only
|
174
|
+
# the following fields are returned for each article: body, byline, date, title, and url. To return specific fields, any of the search fields
|
175
|
+
# from above can be explicitly specified in a comma-delimited list, as well as the additional display-only (not searchable) fields below (these
|
176
|
+
# are strings or symbols):
|
177
|
+
#
|
178
|
+
# * <tt>:all</tt> - return all fields for the article
|
179
|
+
# * <tt>:none</tt> - display only the facet breakdown and no article results
|
180
|
+
# * <tt>:multimedia</tt> - return any related multimedia links for the article
|
181
|
+
# * <tt>:thumbnail</tt> - return information for a related thumbnail image (if the article has one)
|
182
|
+
# * <tt>:word_count</tt> - the word_count of the article.
|
183
|
+
def self.search(query, params={})
|
184
|
+
params = params.dup
|
185
|
+
|
186
|
+
case query
|
187
|
+
when String
|
188
|
+
params[:query] = query
|
189
|
+
when Hash
|
190
|
+
params.merge! query
|
191
|
+
end
|
192
|
+
|
193
|
+
api_params = {}
|
194
|
+
|
195
|
+
add_query_params(api_params, params)
|
196
|
+
add_search_facets_param(api_params, params)
|
197
|
+
add_boolean_params(api_params, params)
|
198
|
+
add_fields_param(api_params, params)
|
199
|
+
add_facets_param(api_params, params)
|
200
|
+
add_rank_params(api_params, params)
|
201
|
+
add_date_params(api_params, params)
|
202
|
+
add_offset_params(api_params, params)
|
203
|
+
|
204
|
+
reply = invoke(api_params)
|
205
|
+
parse_reply(reply)
|
206
|
+
end
|
207
|
+
|
208
|
+
private
|
209
|
+
def self.date_argument(field_name, arg)
|
210
|
+
return arg if arg.is_a? String
|
211
|
+
return arg.strftime("%Y%m%d") if arg.respond_to? :strftime
|
212
|
+
raise ArgumentError, "Only a string or Date/Time object is allowed as a parameter to the #{field_name} input"
|
213
|
+
end
|
214
|
+
|
215
|
+
def self.facet_params(params, facet_name)
|
216
|
+
return nil if params[facet_name].nil?
|
217
|
+
|
218
|
+
params[facet_name].map {|f| Facet.new(facet_name, f, nil) }
|
219
|
+
end
|
220
|
+
|
221
|
+
def self.text_argument(field, argument)
|
222
|
+
arg = argument.dup
|
223
|
+
subquery = []
|
224
|
+
while term = arg.slice!(%r{("[^"]+")|\S+})
|
225
|
+
if term =~ /^\-/
|
226
|
+
subquery << "-#{field}:#{term[1..term.length]}"
|
227
|
+
else
|
228
|
+
subquery << "#{field}:#{term}"
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
subquery.join(' ')
|
233
|
+
end
|
234
|
+
|
235
|
+
|
236
|
+
def self.parse_reply(reply)
|
237
|
+
ResultSet.init_from_api(reply)
|
238
|
+
end
|
239
|
+
|
240
|
+
def self.add_facets_param(out_params, in_params)
|
241
|
+
if in_params[:facets]
|
242
|
+
out_params['facets'] = in_params[:facets].to_a.join(',')
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def self.add_fields_param(out_params, in_params)
|
247
|
+
case in_params[:fields]
|
248
|
+
when nil
|
249
|
+
# do nothing
|
250
|
+
when :all
|
251
|
+
out_params['fields'] = ALL_FIELDS.join(',')
|
252
|
+
when String, Symbol
|
253
|
+
out_params['fields'] = in_params[:fields].to_s
|
254
|
+
when Array
|
255
|
+
out_params['fields'] = in_params[:fields].map {|f| f.to_s}.join(',')
|
256
|
+
else
|
257
|
+
raise ArgumentError, "Fields must either be :all, a single field name, or an array of field names (either strings or symbols)"
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
def self.add_query_params(out_params, in_params)
|
262
|
+
query = []
|
263
|
+
|
264
|
+
query << in_params[:query]
|
265
|
+
|
266
|
+
# Also add other text params to the query
|
267
|
+
TEXT_FIELDS.each do |tf|
|
268
|
+
if in_params[tf.to_sym]
|
269
|
+
query << text_argument(tf, in_params[tf.to_sym])
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
out_params['query'] = query.compact.join(' ')
|
274
|
+
out_params['query'] = nil if out_params['query'].empty?
|
275
|
+
end
|
276
|
+
|
277
|
+
def self.facet_argument(name, value, exclude = false)
|
278
|
+
unless value.is_a? Array
|
279
|
+
value = [value]
|
280
|
+
end
|
281
|
+
|
282
|
+
"#{'-' if exclude}#{name}:[#{value.join(',')}]"
|
283
|
+
end
|
284
|
+
|
285
|
+
def self.parse_facet_params(facets, exclude = false)
|
286
|
+
search_facets = []
|
287
|
+
|
288
|
+
case facets
|
289
|
+
when nil
|
290
|
+
# do nothing
|
291
|
+
when String
|
292
|
+
search_facets = [facets]
|
293
|
+
when Facet
|
294
|
+
search_facets = [facet_argument(facets.facet_type, facets.term, exclude)]
|
295
|
+
when Array
|
296
|
+
unless facets.all? {|f| f.is_a? Facet }
|
297
|
+
raise ArgumentError, "Only Facet instances can be passed in as an array; use Hash for Facet::Name => values input"
|
298
|
+
end
|
299
|
+
|
300
|
+
facet_hash = {}
|
301
|
+
facets.each do |f|
|
302
|
+
unless facet_hash[f.facet_type]
|
303
|
+
facet_hash[f.facet_type] = []
|
304
|
+
end
|
305
|
+
|
306
|
+
facet_hash[f.facet_type] << f.term
|
307
|
+
end
|
308
|
+
|
309
|
+
facet_hash.each_pair do |k,v|
|
310
|
+
search_facets << facet_argument(k, v, exclude)
|
311
|
+
end
|
312
|
+
when Hash
|
313
|
+
facets.each_pair do |k,v|
|
314
|
+
search_facets << facet_argument(k, v, exclude)
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
search_facets
|
319
|
+
end
|
320
|
+
|
321
|
+
def self.add_search_facets_param(out_params, in_params)
|
322
|
+
query = out_params['query']
|
323
|
+
|
324
|
+
search_facets = parse_facet_params(in_params[:search_facets])
|
325
|
+
exclude_facets = parse_facet_params(in_params[:exclude_facets], true)
|
326
|
+
|
327
|
+
unless search_facets.empty? && exclude_facets.empty?
|
328
|
+
out_params['query'] = ([query] + search_facets + exclude_facets).compact.join(' ')
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
def self.add_boolean_params(out_params, in_params)
|
333
|
+
bool_params = []
|
334
|
+
query = out_params['query']
|
335
|
+
|
336
|
+
unless in_params[:fee].nil?
|
337
|
+
bool_params << "#{'-' unless in_params[:fee]}fee:Y"
|
338
|
+
end
|
339
|
+
|
340
|
+
unless in_params[:has_multimedia].nil?
|
341
|
+
bool_params << "#{'-' unless in_params[:has_multimedia]}related_multimedia:Y"
|
342
|
+
end
|
343
|
+
|
344
|
+
unless in_params[:has_thumbnail].nil?
|
345
|
+
bool_params << "#{'-' unless in_params[:has_thumbnail]}small_image:Y"
|
346
|
+
end
|
347
|
+
|
348
|
+
unless bool_params.empty?
|
349
|
+
out_params['query'] = ([query] + bool_params).compact.join(' ')
|
350
|
+
end
|
351
|
+
end
|
352
|
+
|
353
|
+
def self.add_rank_params(out_params, in_params)
|
354
|
+
if in_params[:rank]
|
355
|
+
unless [:newest, :oldest, :closest].include?(in_params[:rank])
|
356
|
+
raise ArgumentError, "Rank should only be :newest | :oldest | :closest"
|
357
|
+
end
|
358
|
+
|
359
|
+
out_params['rank'] = in_params[:rank].to_s
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
def self.add_date_params(out_params, in_params)
|
364
|
+
if in_params[:begin_date]
|
365
|
+
out_params['begin_date'] = date_argument(:begin_date, in_params[:begin_date])
|
366
|
+
end
|
367
|
+
|
368
|
+
if in_params[:end_date]
|
369
|
+
out_params['end_date'] = date_argument(:end_date, in_params[:end_date])
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
def self.add_offset_params(out_params, in_params)
|
374
|
+
if in_params[:page]
|
375
|
+
unless in_params[:page].is_a? Integer
|
376
|
+
raise ArgumentError, "Page must be an integer"
|
377
|
+
end
|
378
|
+
|
379
|
+
unless in_params[:page] >= 1
|
380
|
+
raise ArgumentError, "Page must count up from 1"
|
381
|
+
end
|
382
|
+
|
383
|
+
# Page counts from 1, offset counts from 0
|
384
|
+
out_params['offset'] = in_params[:page] - 1
|
385
|
+
end
|
386
|
+
|
387
|
+
if in_params[:offset]
|
388
|
+
unless in_params[:offset].is_a? Integer
|
389
|
+
raise ArgumentError, "Offset must be an integer"
|
390
|
+
end
|
391
|
+
|
392
|
+
out_params['offset'] = in_params[:offset]
|
393
|
+
end
|
394
|
+
end
|
395
|
+
end
|
396
|
+
end
|
397
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'json'
|
3
|
+
require 'htmlentities'
|
4
|
+
|
5
|
+
module Nytimes
|
6
|
+
module Articles
|
7
|
+
class Base
|
8
|
+
API_SERVER = 'api.nytimes.com'
|
9
|
+
API_VERSION = 'v1'
|
10
|
+
API_NAME = 'article'
|
11
|
+
API_BASE = "/svc/search/#{API_VERSION}/#{API_NAME}"
|
12
|
+
|
13
|
+
@@api_key = nil
|
14
|
+
@@copyright = nil
|
15
|
+
@@debug = false
|
16
|
+
|
17
|
+
##
|
18
|
+
# The copyright footer to be placed at the bottom of any data from the New York Times. Note this is only set after an API call.
|
19
|
+
def self.copyright
|
20
|
+
@@copyright
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# Set the API key used for operations. This needs to be called before any requests against the API. To obtain an API key, go to http://developer.nytimes.com/
|
25
|
+
def self.api_key=(key)
|
26
|
+
@@api_key = key
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.debug=(flag)
|
30
|
+
@@debug = flag
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Returns the current value of the API Key
|
35
|
+
def self.api_key
|
36
|
+
@@api_key
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# Builds a request URI to call the API server
|
41
|
+
def self.build_request_url(params)
|
42
|
+
URI::HTTP.build :host => API_SERVER,
|
43
|
+
:path => API_BASE,
|
44
|
+
:query => params.map {|k,v| "#{URI.escape(k)}=#{URI.escape(v)}"}.join('&')
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.text_field(value)
|
48
|
+
return nil if value.nil?
|
49
|
+
coder = HTMLEntities.new
|
50
|
+
coder.decode(value)
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.integer_field(value)
|
54
|
+
return nil if value.nil?
|
55
|
+
value.to_i
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.date_field(value)
|
59
|
+
return nil unless value =~ /^\d{8}$/
|
60
|
+
Date.strptime(value, "%Y%m%d")
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.invoke(params={})
|
64
|
+
begin
|
65
|
+
if @@api_key.nil?
|
66
|
+
raise AuthenticationError, "You must initialize the API key before you run any API queries"
|
67
|
+
end
|
68
|
+
|
69
|
+
full_params = params.merge 'api-key' => @@api_key
|
70
|
+
uri = build_request_url(full_params)
|
71
|
+
|
72
|
+
puts "REQUEST: #{uri}" if @@debug
|
73
|
+
|
74
|
+
reply = uri.read
|
75
|
+
parsed_reply = JSON.parse reply
|
76
|
+
|
77
|
+
if parsed_reply.nil?
|
78
|
+
raise BadResponseError, "Empty reply returned from API"
|
79
|
+
end
|
80
|
+
|
81
|
+
#case parsed_reply['status']
|
82
|
+
# FIXME
|
83
|
+
#end
|
84
|
+
|
85
|
+
@@copyright = parsed_reply['copyright']
|
86
|
+
|
87
|
+
parsed_reply
|
88
|
+
rescue OpenURI::HTTPError => e
|
89
|
+
# FIXME: Return message from body?
|
90
|
+
case e.message
|
91
|
+
when /^400/
|
92
|
+
raise BadRequestError
|
93
|
+
when /^403/
|
94
|
+
raise AuthenticationError
|
95
|
+
when /^404/
|
96
|
+
return nil
|
97
|
+
when /^500/
|
98
|
+
raise ServerError
|
99
|
+
else
|
100
|
+
raise ConnectionError
|
101
|
+
end
|
102
|
+
|
103
|
+
raise "Error connecting to URL #{uri} #{e}"
|
104
|
+
rescue JSON::ParserError => e
|
105
|
+
raise BadResponseError, "Invalid JSON returned from API:\n#{reply}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|