nytimes-articles 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/HISTORY +9 -0
- data/LICENSE +24 -0
- data/README +32 -0
- data/Rakefile +54 -0
- data/VERSION.yml +4 -0
- data/features/nytimes_articles.feature +9 -0
- data/features/steps/nytimes_articles_steps.rb +0 -0
- data/features/support/env.rb +13 -0
- data/lib/nytimes_articles.rb +6 -0
- data/lib/nytimes_articles/article.rb +466 -0
- data/lib/nytimes_articles/base.rb +124 -0
- data/lib/nytimes_articles/exceptions.rb +38 -0
- data/lib/nytimes_articles/facet.rb +128 -0
- data/lib/nytimes_articles/facet_hash.rb +26 -0
- data/lib/nytimes_articles/query.rb +28 -0
- data/lib/nytimes_articles/result_set.rb +66 -0
- data/lib/nytimes_articles/thumbnail.rb +30 -0
- data/nytimes-articles.gemspec +73 -0
- data/script/console +10 -0
- data/test/nytimes/articles/test_article.rb +584 -0
- data/test/nytimes/articles/test_base.rb +120 -0
- data/test/nytimes/articles/test_facet.rb +109 -0
- data/test/nytimes/articles/test_query.rb +89 -0
- data/test/nytimes/articles/test_result_set.rb +62 -0
- data/test/nytimes/articles/test_thumbnail.rb +47 -0
- data/test/test_helper.rb +31 -0
- metadata +98 -0
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'json'
|
3
|
+
require 'htmlentities'
|
4
|
+
|
5
|
+
module Nytimes
|
6
|
+
module Articles
|
7
|
+
class Base
|
8
|
+
API_SERVER = 'api.nytimes.com'
|
9
|
+
API_VERSION = 'v1'
|
10
|
+
API_NAME = 'article'
|
11
|
+
API_BASE = "/svc/search/#{API_VERSION}/#{API_NAME}"
|
12
|
+
|
13
|
+
@@api_key = nil
|
14
|
+
@@debug = false
|
15
|
+
@@decode_html_entities = true
|
16
|
+
|
17
|
+
##
|
18
|
+
# Set the API key used for operations. This needs to be called before any requests against the API. To obtain an API key, go to http://developer.nytimes.com/
|
19
|
+
def self.api_key=(key)
|
20
|
+
@@api_key = key
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.debug=(flag)
|
24
|
+
@@debug = flag
|
25
|
+
end
|
26
|
+
|
27
|
+
##
|
28
|
+
# Set whether or not to decode HTML entities when returning text fields.
|
29
|
+
def self.decode_html_entities=(flag)
|
30
|
+
@@decode_html_entities = flag
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# Returns the current value of the API Key
|
35
|
+
def self.api_key
|
36
|
+
@@api_key
|
37
|
+
end
|
38
|
+
|
39
|
+
##
|
40
|
+
# Builds a request URI to call the API server
|
41
|
+
def self.build_request_url(params)
|
42
|
+
URI::HTTP.build :host => API_SERVER,
|
43
|
+
:path => API_BASE,
|
44
|
+
:query => params.map {|k,v| "#{URI.escape(k)}=#{URI.escape(v.to_s)}"}.join('&')
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.text_field(value)
|
48
|
+
return nil if value.nil?
|
49
|
+
@@decode_html_entities ? HTMLEntities.new.decode(value) : value
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.integer_field(value)
|
53
|
+
return nil if value.nil?
|
54
|
+
value.to_i
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.date_field(value)
|
58
|
+
return nil unless value =~ /^\d{8}$/
|
59
|
+
Date.strptime(value, "%Y%m%d")
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.boolean_field(value)
|
63
|
+
case value
|
64
|
+
when nil
|
65
|
+
false
|
66
|
+
when TrueClass
|
67
|
+
true
|
68
|
+
when FalseClass
|
69
|
+
false
|
70
|
+
when 'Y'
|
71
|
+
true
|
72
|
+
when 'N'
|
73
|
+
false
|
74
|
+
else
|
75
|
+
false
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.invoke(params={})
|
80
|
+
begin
|
81
|
+
if @@api_key.nil?
|
82
|
+
raise AuthenticationError, "You must initialize the API key before you run any API queries"
|
83
|
+
end
|
84
|
+
|
85
|
+
full_params = params.merge 'api-key' => @@api_key
|
86
|
+
uri = build_request_url(full_params)
|
87
|
+
|
88
|
+
puts "REQUEST: #{uri}" if @@debug
|
89
|
+
|
90
|
+
reply = uri.read
|
91
|
+
parsed_reply = JSON.parse reply
|
92
|
+
|
93
|
+
if parsed_reply.nil?
|
94
|
+
raise BadResponseError, "Empty reply returned from API"
|
95
|
+
end
|
96
|
+
|
97
|
+
#case parsed_reply['status']
|
98
|
+
# FIXME
|
99
|
+
#end
|
100
|
+
|
101
|
+
parsed_reply
|
102
|
+
rescue OpenURI::HTTPError => e
|
103
|
+
# FIXME: Return message from body?
|
104
|
+
case e.message
|
105
|
+
when /^400/
|
106
|
+
raise BadRequestError
|
107
|
+
when /^403/
|
108
|
+
raise AuthenticationError
|
109
|
+
when /^404/
|
110
|
+
return nil
|
111
|
+
when /^500/
|
112
|
+
raise ServerError
|
113
|
+
else
|
114
|
+
raise ConnectionError
|
115
|
+
end
|
116
|
+
|
117
|
+
raise "Error connecting to URL #{uri} #{e}"
|
118
|
+
rescue JSON::ParserError => e
|
119
|
+
raise BadResponseError, "Invalid JSON returned from API:\n#{reply}"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Nytimes
|
2
|
+
module Articles
|
3
|
+
##
|
4
|
+
# The generic Error class from which all other Errors are derived.
|
5
|
+
class Error < ::RuntimeError
|
6
|
+
end
|
7
|
+
|
8
|
+
##
|
9
|
+
# This error is thrown if there are problems authenticating your API key.
|
10
|
+
class AuthenticationError < Error
|
11
|
+
end
|
12
|
+
|
13
|
+
##
|
14
|
+
# This error is thrown if the request was not parsable by the API server.
|
15
|
+
class BadRequestError < Error
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# This error is thrown if the response from the API server is not parsable.
|
20
|
+
class BadResponseError < Error
|
21
|
+
end
|
22
|
+
|
23
|
+
##
|
24
|
+
# This error is thrown if there is an error connecting to the API server.
|
25
|
+
class ServerError < Error
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# This error is thrown if there is a timeout connecting to the server (to be implemented).
|
30
|
+
class TimeoutError < Error
|
31
|
+
end
|
32
|
+
|
33
|
+
##
|
34
|
+
# This error is thrown for general connection errors to the API server.
|
35
|
+
class ConnectionError < Error
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
module Nytimes
|
2
|
+
module Articles
|
3
|
+
|
4
|
+
##
|
5
|
+
# This class represents a Facet used in the ArticleSearch API. Facets can be used to both search for matching articles (see Article#search) and
|
6
|
+
# are also returned as article and search metadata. Facets are made up of 3 parts:
|
7
|
+
# * <tt>facet_type</tt> - a string; see Article#search for a list of facet types
|
8
|
+
# * <tt>term</tt> - a string as well
|
9
|
+
# * <tt>count</tt> - Facets returned as search metadata (via the <tt>:facets</tt> parameter to Article#search) also include a non-nil count of matching articles for that facet
|
10
|
+
class Facet
|
11
|
+
##
|
12
|
+
# The term for the facet
|
13
|
+
attr_reader :term
|
14
|
+
|
15
|
+
##
|
16
|
+
# The number of times this facet has appeared in the search results (note: this only applies for facets returned in the facets header on an Article#search)
|
17
|
+
attr_reader :count
|
18
|
+
|
19
|
+
##
|
20
|
+
# The facet type
|
21
|
+
attr_reader :facet_type
|
22
|
+
|
23
|
+
# Facet name constants
|
24
|
+
CLASSIFIERS = 'classifiers_facet'
|
25
|
+
COLUMN = 'column_facet'
|
26
|
+
DATE = 'date'
|
27
|
+
DAY_OF_WEEK = 'day_of_week_facet'
|
28
|
+
DESCRIPTION = 'des_facet'
|
29
|
+
DESK = 'desk_facet'
|
30
|
+
GEO = 'geo_facet'
|
31
|
+
MATERIAL_TYPE = 'material_type_facet'
|
32
|
+
ORGANIZATION = 'org_facet'
|
33
|
+
PAGE = 'page_facet'
|
34
|
+
PERSON = 'per_facet'
|
35
|
+
PUB_DAY = 'publication_day'
|
36
|
+
PUB_MONTH = 'publication_month'
|
37
|
+
PUB_YEAR = 'publication_year'
|
38
|
+
SECTION_PAGE = 'section_page_facet'
|
39
|
+
SOURCE = 'source_facet'
|
40
|
+
WORKS_MENTIONED = 'works_mentioned_facet'
|
41
|
+
|
42
|
+
# Facets of content formatted for nytimes.com
|
43
|
+
NYTD_BYLINE = 'nytd_byline'
|
44
|
+
NYTD_DESCRIPTION = 'nytd_des_facet'
|
45
|
+
NYTD_GEO = 'nytd_geo_facet'
|
46
|
+
NYTD_ORGANIZATION = 'nytd_org_facet'
|
47
|
+
NYTD_PERSON = 'nytd_per_facet'
|
48
|
+
NYTD_SECTION = 'nytd_section_facet'
|
49
|
+
NYTD_WORKS_MENTIONED = 'nytd_works_mentioned_facet'
|
50
|
+
|
51
|
+
# The default 5 facets to return
|
52
|
+
DEFAULT_RETURN_FACETS = [DESCRIPTION, GEO, ORGANIZATION, PERSON, DESK]
|
53
|
+
|
54
|
+
ALL_FACETS = [CLASSIFIERS, COLUMN, DATE, DAY_OF_WEEK, DESCRIPTION, DESK, GEO, MATERIAL_TYPE, ORGANIZATION, PAGE, PERSON, PUB_DAY,
|
55
|
+
PUB_MONTH, PUB_YEAR, SECTION_PAGE, SOURCE, WORKS_MENTIONED, NYTD_BYLINE, NYTD_DESCRIPTION, NYTD_GEO,
|
56
|
+
NYTD_ORGANIZATION, NYTD_PERSON, NYTD_SECTION, NYTD_WORKS_MENTIONED]
|
57
|
+
|
58
|
+
##
|
59
|
+
# Initializes the facet. There is seldom a reason for you to call this.
|
60
|
+
def initialize(facet_type, term, count)
|
61
|
+
@facet_type = facet_type
|
62
|
+
@term = term
|
63
|
+
@count = count
|
64
|
+
end
|
65
|
+
|
66
|
+
##
|
67
|
+
# Takes a symbol name and subs it to a string constant
|
68
|
+
def self.symbol_name(facet)
|
69
|
+
case facet
|
70
|
+
when String
|
71
|
+
return facet
|
72
|
+
when Facet
|
73
|
+
return facet.facet_type
|
74
|
+
when Symbol
|
75
|
+
# fall through
|
76
|
+
else
|
77
|
+
raise ArgumentError, "Unsupported type to Facet#symbol_to_api_name"
|
78
|
+
end
|
79
|
+
|
80
|
+
case facet
|
81
|
+
when :geography
|
82
|
+
GEO
|
83
|
+
when :org, :orgs
|
84
|
+
ORGANIZATION
|
85
|
+
when :people
|
86
|
+
PERSON
|
87
|
+
when :nytd_geography
|
88
|
+
NYTD_GEO
|
89
|
+
when :nytd_org, :nytd_orgs
|
90
|
+
NYTD_ORGANIZATION
|
91
|
+
when :nytd_people
|
92
|
+
NYTD_PERSON
|
93
|
+
else
|
94
|
+
name = facet.to_s.upcase
|
95
|
+
|
96
|
+
if const_defined?(name)
|
97
|
+
const_get(name)
|
98
|
+
elsif name =~ /S$/ && const_defined?(name.gsub(/S$/, ''))
|
99
|
+
const_get(name.gsub(/S$/, ''))
|
100
|
+
else
|
101
|
+
raise ArgumentError, "Unable to find a matching facet key for symbol :#{facet}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
##
|
107
|
+
# Initializes a selection of Facet objects returned from the API. Used for marshaling Facets in articles and metadata from search results
|
108
|
+
# (Note: some facets are returned as scalar values)
|
109
|
+
def self.init_from_api(api_hash)
|
110
|
+
return nil if api_hash.nil?
|
111
|
+
|
112
|
+
unless api_hash.is_a? Hash
|
113
|
+
raise ArgumentError, "expecting a Hash only"
|
114
|
+
else
|
115
|
+
return nil if api_hash.empty?
|
116
|
+
end
|
117
|
+
|
118
|
+
out = {}
|
119
|
+
|
120
|
+
api_hash.each_pair do |k,v|
|
121
|
+
out[k] = v.map {|f| Facet.new(k, f['term'], f['count'])}
|
122
|
+
end
|
123
|
+
|
124
|
+
out
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Nytimes
|
2
|
+
module Articles
|
3
|
+
class FacetHash
|
4
|
+
def initialize(hash)
|
5
|
+
@facets = hash
|
6
|
+
end
|
7
|
+
|
8
|
+
def [](key)
|
9
|
+
case key
|
10
|
+
when Symbol
|
11
|
+
key = Facet.symbol_name(key)
|
12
|
+
when String
|
13
|
+
# do nothing
|
14
|
+
else
|
15
|
+
raise ArgumentError, "Argument to facets hash must be a symbol or string name"
|
16
|
+
end
|
17
|
+
|
18
|
+
@facets[key]
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.init_from_api(hash)
|
22
|
+
new(hash)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'digest'
|
2
|
+
|
3
|
+
module Nytimes
|
4
|
+
module Articles
|
5
|
+
##
|
6
|
+
# The Query class represents a single query to the Article Search API. Supports
|
7
|
+
# all of the named parameters to Article.search as accessor methods.
|
8
|
+
#
|
9
|
+
class Query
|
10
|
+
FIELDS = [:only_facets, :except_facets, :begin_date, :end_date, :since,
|
11
|
+
:before, :fee, :has_thumbnail, :facets, :fields, :query, :offset] + Article::TEXT_FIELDS.map{|f| f.to_sym}
|
12
|
+
FIELDS.each {|f| attr_accessor f}
|
13
|
+
|
14
|
+
# Produce a hash which uniquely identifies this query
|
15
|
+
def hash
|
16
|
+
strs = FIELDS.collect {|f| "#{f}:#{send(f).inspect}"}
|
17
|
+
Digest::SHA256.hexdigest(strs.join(' '))
|
18
|
+
end
|
19
|
+
|
20
|
+
# Perform this query. Returns result of Article.search
|
21
|
+
def perform
|
22
|
+
params = {}
|
23
|
+
FIELDS.each {|f| params[f] = send(f) unless send(f).nil?}
|
24
|
+
Article.search(params)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'forwardable'
|
3
|
+
|
4
|
+
module Nytimes
|
5
|
+
module Articles
|
6
|
+
##
|
7
|
+
# The ResultSet is returned by Article#search and contains an array of up to 10 results out of the total matches. For your convenience, this
|
8
|
+
# object provides a selection of array methods on the underlying collection of articles.
|
9
|
+
class ResultSet < Base
|
10
|
+
extend Forwardable
|
11
|
+
|
12
|
+
##
|
13
|
+
# The offset of the result_set. Note that this is essentially the ordinal position of the batch among all results. First 10 results are offset
|
14
|
+
# 0, the next 10 are offset 1, etc.
|
15
|
+
attr_reader :offset
|
16
|
+
|
17
|
+
##
|
18
|
+
# The total results that matched the query.
|
19
|
+
attr_reader :total_results
|
20
|
+
|
21
|
+
##
|
22
|
+
# The results array of articles returned. Note that if you call Articles#find with :fields => :none, this will return nil even if
|
23
|
+
# there are matching results.
|
24
|
+
attr_reader :results
|
25
|
+
|
26
|
+
##
|
27
|
+
# If you have specified a list of <tt>:facets</tt> for Article#search, they will be returned in a hash keyed by the facet name here.
|
28
|
+
attr_reader :facets
|
29
|
+
|
30
|
+
BATCH_SIZE = 10
|
31
|
+
|
32
|
+
def_delegators :@results, :&, :*, :+, :-, :[], :at, :collect, :compact, :each, :each_index, :empty?, :fetch, :first, :include?, :index, :last, :length, :map, :nitems, :reject, :reverse, :reverse_each, :rindex, :select, :size, :slice
|
33
|
+
|
34
|
+
def initialize(params)
|
35
|
+
@offset = params[:offset]
|
36
|
+
@total_results = params[:total_results]
|
37
|
+
@results = params[:results]
|
38
|
+
@facets = FacetHash.init_from_api(params[:facets])
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
# For your convenience, the page_number method is an alternate version of #offset that counts up from 1.
|
43
|
+
def page_number
|
44
|
+
return 0 if @total_results == 0
|
45
|
+
@offset + 1
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Calculates the total number of pages in the results based on the standard batch size and total results.
|
50
|
+
def total_pages
|
51
|
+
return 0 if @total_results == 0
|
52
|
+
(@total_results.to_f / BATCH_SIZE).ceil
|
53
|
+
end
|
54
|
+
|
55
|
+
##
|
56
|
+
# Used to initialize a new result_set from Article#search.
|
57
|
+
def self.init_from_api(api_hash)
|
58
|
+
self.new(:offset => integer_field(api_hash['offset']),
|
59
|
+
:total_results => integer_field(api_hash['total']),
|
60
|
+
:results => api_hash['results'].map {|r| Article.init_from_api(r)},
|
61
|
+
:facets => Facet.init_from_api(api_hash['facets'])
|
62
|
+
)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Nytimes
|
2
|
+
module Articles
|
3
|
+
##
|
4
|
+
# If requested in <tt>:fields</tt> for an article search, some articles are returned with a matching thumbnail image. The several thumbnail
|
5
|
+
# fields are collected together into a single Thumbnail instance for your convenience.
|
6
|
+
class Thumbnail
|
7
|
+
attr_reader :url, :width, :height
|
8
|
+
|
9
|
+
def initialize(url, width, height)
|
10
|
+
@url = url
|
11
|
+
@width = width
|
12
|
+
@height = height
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.init_from_api(api_hash)
|
16
|
+
return nil unless !api_hash.nil? && api_hash['small_image_url']
|
17
|
+
|
18
|
+
unless api_hash['small_image_width'].nil?
|
19
|
+
width = api_hash['small_image_width'].to_i
|
20
|
+
end
|
21
|
+
|
22
|
+
unless api_hash['small_image_height'].nil?
|
23
|
+
height = api_hash['small_image_height'].to_i
|
24
|
+
end
|
25
|
+
|
26
|
+
new(api_hash['small_image_url'], width, height)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|