nytimes-articles 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,124 @@
1
+ require 'open-uri'
2
+ require 'json'
3
+ require 'htmlentities'
4
+
5
+ module Nytimes
6
+ module Articles
7
+ class Base
8
+ API_SERVER = 'api.nytimes.com'
9
+ API_VERSION = 'v1'
10
+ API_NAME = 'article'
11
+ API_BASE = "/svc/search/#{API_VERSION}/#{API_NAME}"
12
+
13
+ @@api_key = nil
14
+ @@debug = false
15
+ @@decode_html_entities = true
16
+
17
+ ##
18
+ # Set the API key used for operations. This needs to be called before any requests against the API. To obtain an API key, go to http://developer.nytimes.com/
19
+ def self.api_key=(key)
20
+ @@api_key = key
21
+ end
22
+
23
+ def self.debug=(flag)
24
+ @@debug = flag
25
+ end
26
+
27
+ ##
28
+ # Set whether or not to decode HTML entities when returning text fields.
29
+ def self.decode_html_entities=(flag)
30
+ @@decode_html_entities = flag
31
+ end
32
+
33
+ ##
34
+ # Returns the current value of the API Key
35
+ def self.api_key
36
+ @@api_key
37
+ end
38
+
39
+ ##
40
+ # Builds a request URI to call the API server
41
+ def self.build_request_url(params)
42
+ URI::HTTP.build :host => API_SERVER,
43
+ :path => API_BASE,
44
+ :query => params.map {|k,v| "#{URI.escape(k)}=#{URI.escape(v.to_s)}"}.join('&')
45
+ end
46
+
47
+ def self.text_field(value)
48
+ return nil if value.nil?
49
+ @@decode_html_entities ? HTMLEntities.new.decode(value) : value
50
+ end
51
+
52
+ def self.integer_field(value)
53
+ return nil if value.nil?
54
+ value.to_i
55
+ end
56
+
57
+ def self.date_field(value)
58
+ return nil unless value =~ /^\d{8}$/
59
+ Date.strptime(value, "%Y%m%d")
60
+ end
61
+
62
+ def self.boolean_field(value)
63
+ case value
64
+ when nil
65
+ false
66
+ when TrueClass
67
+ true
68
+ when FalseClass
69
+ false
70
+ when 'Y'
71
+ true
72
+ when 'N'
73
+ false
74
+ else
75
+ false
76
+ end
77
+ end
78
+
79
+ def self.invoke(params={})
80
+ begin
81
+ if @@api_key.nil?
82
+ raise AuthenticationError, "You must initialize the API key before you run any API queries"
83
+ end
84
+
85
+ full_params = params.merge 'api-key' => @@api_key
86
+ uri = build_request_url(full_params)
87
+
88
+ puts "REQUEST: #{uri}" if @@debug
89
+
90
+ reply = uri.read
91
+ parsed_reply = JSON.parse reply
92
+
93
+ if parsed_reply.nil?
94
+ raise BadResponseError, "Empty reply returned from API"
95
+ end
96
+
97
+ #case parsed_reply['status']
98
+ # FIXME
99
+ #end
100
+
101
+ parsed_reply
102
+ rescue OpenURI::HTTPError => e
103
+ # FIXME: Return message from body?
104
+ case e.message
105
+ when /^400/
106
+ raise BadRequestError
107
+ when /^403/
108
+ raise AuthenticationError
109
+ when /^404/
110
+ return nil
111
+ when /^500/
112
+ raise ServerError
113
+ else
114
+ raise ConnectionError
115
+ end
116
+
117
+ raise "Error connecting to URL #{uri} #{e}"
118
+ rescue JSON::ParserError => e
119
+ raise BadResponseError, "Invalid JSON returned from API:\n#{reply}"
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,38 @@
1
+ module Nytimes
2
+ module Articles
3
+ ##
4
+ # The generic Error class from which all other Errors are derived.
5
+ class Error < ::RuntimeError
6
+ end
7
+
8
+ ##
9
+ # This error is thrown if there are problems authenticating your API key.
10
+ class AuthenticationError < Error
11
+ end
12
+
13
+ ##
14
+ # This error is thrown if the request was not parsable by the API server.
15
+ class BadRequestError < Error
16
+ end
17
+
18
+ ##
19
+ # This error is thrown if the response from the API server is not parsable.
20
+ class BadResponseError < Error
21
+ end
22
+
23
+ ##
24
+ # This error is thrown if there is an error connecting to the API server.
25
+ class ServerError < Error
26
+ end
27
+
28
+ ##
29
+ # This error is thrown if there is a timeout connecting to the server (to be implemented).
30
+ class TimeoutError < Error
31
+ end
32
+
33
+ ##
34
+ # This error is thrown for general connection errors to the API server.
35
+ class ConnectionError < Error
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,128 @@
1
+ module Nytimes
2
+ module Articles
3
+
4
+ ##
5
+ # This class represents a Facet used in the ArticleSearch API. Facets can be used to both search for matching articles (see Article#search) and
6
+ # are also returned as article and search metadata. Facets are made up of 3 parts:
7
+ # * <tt>facet_type</tt> - a string; see Article#search for a list of facet types
8
+ # * <tt>term</tt> - a string as well
9
+ # * <tt>count</tt> - Facets returned as search metadata (via the <tt>:facets</tt> parameter to Article#search) also include a non-nil count of matching articles for that facet
10
+ class Facet
11
+ ##
12
+ # The term for the facet
13
+ attr_reader :term
14
+
15
+ ##
16
+ # The number of times this facet has appeared in the search results (note: this only applies for facets returned in the facets header on an Article#search)
17
+ attr_reader :count
18
+
19
+ ##
20
+ # The facet type
21
+ attr_reader :facet_type
22
+
23
+ # Facet name constants
24
+ CLASSIFIERS = 'classifiers_facet'
25
+ COLUMN = 'column_facet'
26
+ DATE = 'date'
27
+ DAY_OF_WEEK = 'day_of_week_facet'
28
+ DESCRIPTION = 'des_facet'
29
+ DESK = 'desk_facet'
30
+ GEO = 'geo_facet'
31
+ MATERIAL_TYPE = 'material_type_facet'
32
+ ORGANIZATION = 'org_facet'
33
+ PAGE = 'page_facet'
34
+ PERSON = 'per_facet'
35
+ PUB_DAY = 'publication_day'
36
+ PUB_MONTH = 'publication_month'
37
+ PUB_YEAR = 'publication_year'
38
+ SECTION_PAGE = 'section_page_facet'
39
+ SOURCE = 'source_facet'
40
+ WORKS_MENTIONED = 'works_mentioned_facet'
41
+
42
+ # Facets of content formatted for nytimes.com
43
+ NYTD_BYLINE = 'nytd_byline'
44
+ NYTD_DESCRIPTION = 'nytd_des_facet'
45
+ NYTD_GEO = 'nytd_geo_facet'
46
+ NYTD_ORGANIZATION = 'nytd_org_facet'
47
+ NYTD_PERSON = 'nytd_per_facet'
48
+ NYTD_SECTION = 'nytd_section_facet'
49
+ NYTD_WORKS_MENTIONED = 'nytd_works_mentioned_facet'
50
+
51
+ # The default 5 facets to return
52
+ DEFAULT_RETURN_FACETS = [DESCRIPTION, GEO, ORGANIZATION, PERSON, DESK]
53
+
54
+ ALL_FACETS = [CLASSIFIERS, COLUMN, DATE, DAY_OF_WEEK, DESCRIPTION, DESK, GEO, MATERIAL_TYPE, ORGANIZATION, PAGE, PERSON, PUB_DAY,
55
+ PUB_MONTH, PUB_YEAR, SECTION_PAGE, SOURCE, WORKS_MENTIONED, NYTD_BYLINE, NYTD_DESCRIPTION, NYTD_GEO,
56
+ NYTD_ORGANIZATION, NYTD_PERSON, NYTD_SECTION, NYTD_WORKS_MENTIONED]
57
+
58
+ ##
59
+ # Initializes the facet. There is seldom a reason for you to call this.
60
+ def initialize(facet_type, term, count)
61
+ @facet_type = facet_type
62
+ @term = term
63
+ @count = count
64
+ end
65
+
66
+ ##
67
+ # Takes a symbol name and subs it to a string constant
68
+ def self.symbol_name(facet)
69
+ case facet
70
+ when String
71
+ return facet
72
+ when Facet
73
+ return facet.facet_type
74
+ when Symbol
75
+ # fall through
76
+ else
77
+ raise ArgumentError, "Unsupported type to Facet#symbol_to_api_name"
78
+ end
79
+
80
+ case facet
81
+ when :geography
82
+ GEO
83
+ when :org, :orgs
84
+ ORGANIZATION
85
+ when :people
86
+ PERSON
87
+ when :nytd_geography
88
+ NYTD_GEO
89
+ when :nytd_org, :nytd_orgs
90
+ NYTD_ORGANIZATION
91
+ when :nytd_people
92
+ NYTD_PERSON
93
+ else
94
+ name = facet.to_s.upcase
95
+
96
+ if const_defined?(name)
97
+ const_get(name)
98
+ elsif name =~ /S$/ && const_defined?(name.gsub(/S$/, ''))
99
+ const_get(name.gsub(/S$/, ''))
100
+ else
101
+ raise ArgumentError, "Unable to find a matching facet key for symbol :#{facet}"
102
+ end
103
+ end
104
+ end
105
+
106
+ ##
107
+ # Initializes a selection of Facet objects returned from the API. Used for marshaling Facets in articles and metadata from search results
108
+ # (Note: some facets are returned as scalar values)
109
+ def self.init_from_api(api_hash)
110
+ return nil if api_hash.nil?
111
+
112
+ unless api_hash.is_a? Hash
113
+ raise ArgumentError, "expecting a Hash only"
114
+ else
115
+ return nil if api_hash.empty?
116
+ end
117
+
118
+ out = {}
119
+
120
+ api_hash.each_pair do |k,v|
121
+ out[k] = v.map {|f| Facet.new(k, f['term'], f['count'])}
122
+ end
123
+
124
+ out
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,26 @@
1
+ module Nytimes
2
+ module Articles
3
+ class FacetHash
4
+ def initialize(hash)
5
+ @facets = hash
6
+ end
7
+
8
+ def [](key)
9
+ case key
10
+ when Symbol
11
+ key = Facet.symbol_name(key)
12
+ when String
13
+ # do nothing
14
+ else
15
+ raise ArgumentError, "Argument to facets hash must be a symbol or string name"
16
+ end
17
+
18
+ @facets[key]
19
+ end
20
+
21
+ def self.init_from_api(hash)
22
+ new(hash)
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,28 @@
1
+ require 'digest'
2
+
3
+ module Nytimes
4
+ module Articles
5
+ ##
6
+ # The Query class represents a single query to the Article Search API. Supports
7
+ # all of the named parameters to Article.search as accessor methods.
8
+ #
9
+ class Query
10
+ FIELDS = [:only_facets, :except_facets, :begin_date, :end_date, :since,
11
+ :before, :fee, :has_thumbnail, :facets, :fields, :query, :offset] + Article::TEXT_FIELDS.map{|f| f.to_sym}
12
+ FIELDS.each {|f| attr_accessor f}
13
+
14
+ # Produce a hash which uniquely identifies this query
15
+ def hash
16
+ strs = FIELDS.collect {|f| "#{f}:#{send(f).inspect}"}
17
+ Digest::SHA256.hexdigest(strs.join(' '))
18
+ end
19
+
20
+ # Perform this query. Returns result of Article.search
21
+ def perform
22
+ params = {}
23
+ FIELDS.each {|f| params[f] = send(f) unless send(f).nil?}
24
+ Article.search(params)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,66 @@
1
+ require 'rubygems'
2
+ require 'forwardable'
3
+
4
+ module Nytimes
5
+ module Articles
6
+ ##
7
+ # The ResultSet is returned by Article#search and contains an array of up to 10 results out of the total matches. For your convenience, this
8
+ # object provides a selection of array methods on the underlying collection of articles.
9
+ class ResultSet < Base
10
+ extend Forwardable
11
+
12
+ ##
13
+ # The offset of the result_set. Note that this is essentially the ordinal position of the batch among all results. First 10 results are offset
14
+ # 0, the next 10 are offset 1, etc.
15
+ attr_reader :offset
16
+
17
+ ##
18
+ # The total results that matched the query.
19
+ attr_reader :total_results
20
+
21
+ ##
22
+ # The results array of articles returned. Note that if you call Articles#find with :fields => :none, this will return nil even if
23
+ # there are matching results.
24
+ attr_reader :results
25
+
26
+ ##
27
+ # If you have specified a list of <tt>:facets</tt> for Article#search, they will be returned in a hash keyed by the facet name here.
28
+ attr_reader :facets
29
+
30
+ BATCH_SIZE = 10
31
+
32
+ def_delegators :@results, :&, :*, :+, :-, :[], :at, :collect, :compact, :each, :each_index, :empty?, :fetch, :first, :include?, :index, :last, :length, :map, :nitems, :reject, :reverse, :reverse_each, :rindex, :select, :size, :slice
33
+
34
+ def initialize(params)
35
+ @offset = params[:offset]
36
+ @total_results = params[:total_results]
37
+ @results = params[:results]
38
+ @facets = FacetHash.init_from_api(params[:facets])
39
+ end
40
+
41
+ ##
42
+ # For your convenience, the page_number method is an alternate version of #offset that counts up from 1.
43
+ def page_number
44
+ return 0 if @total_results == 0
45
+ @offset + 1
46
+ end
47
+
48
+ ##
49
+ # Calculates the total number of pages in the results based on the standard batch size and total results.
50
+ def total_pages
51
+ return 0 if @total_results == 0
52
+ (@total_results.to_f / BATCH_SIZE).ceil
53
+ end
54
+
55
+ ##
56
+ # Used to initialize a new result_set from Article#search.
57
+ def self.init_from_api(api_hash)
58
+ self.new(:offset => integer_field(api_hash['offset']),
59
+ :total_results => integer_field(api_hash['total']),
60
+ :results => api_hash['results'].map {|r| Article.init_from_api(r)},
61
+ :facets => Facet.init_from_api(api_hash['facets'])
62
+ )
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,30 @@
1
+ module Nytimes
2
+ module Articles
3
+ ##
4
+ # If requested in <tt>:fields</tt> for an article search, some articles are returned with a matching thumbnail image. The several thumbnail
5
+ # fields are collected together into a single Thumbnail instance for your convenience.
6
+ class Thumbnail
7
+ attr_reader :url, :width, :height
8
+
9
+ def initialize(url, width, height)
10
+ @url = url
11
+ @width = width
12
+ @height = height
13
+ end
14
+
15
+ def self.init_from_api(api_hash)
16
+ return nil unless !api_hash.nil? && api_hash['small_image_url']
17
+
18
+ unless api_hash['small_image_width'].nil?
19
+ width = api_hash['small_image_width'].to_i
20
+ end
21
+
22
+ unless api_hash['small_image_height'].nil?
23
+ height = api_hash['small_image_height'].to_i
24
+ end
25
+
26
+ new(api_hash['small_image_url'], width, height)
27
+ end
28
+ end
29
+ end
30
+ end