nytimes-articles 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/HISTORY +9 -0
- data/LICENSE +24 -0
- data/README +32 -0
- data/Rakefile +54 -0
- data/VERSION.yml +4 -0
- data/features/nytimes_articles.feature +9 -0
- data/features/steps/nytimes_articles_steps.rb +0 -0
- data/features/support/env.rb +13 -0
- data/lib/nytimes_articles.rb +6 -0
- data/lib/nytimes_articles/article.rb +466 -0
- data/lib/nytimes_articles/base.rb +124 -0
- data/lib/nytimes_articles/exceptions.rb +38 -0
- data/lib/nytimes_articles/facet.rb +128 -0
- data/lib/nytimes_articles/facet_hash.rb +26 -0
- data/lib/nytimes_articles/query.rb +28 -0
- data/lib/nytimes_articles/result_set.rb +66 -0
- data/lib/nytimes_articles/thumbnail.rb +30 -0
- data/nytimes-articles.gemspec +73 -0
- data/script/console +10 -0
- data/test/nytimes/articles/test_article.rb +584 -0
- data/test/nytimes/articles/test_base.rb +120 -0
- data/test/nytimes/articles/test_facet.rb +109 -0
- data/test/nytimes/articles/test_query.rb +89 -0
- data/test/nytimes/articles/test_result_set.rb +62 -0
- data/test/nytimes/articles/test_thumbnail.rb +47 -0
- data/test/test_helper.rb +31 -0
- metadata +98 -0
data/.gitignore
ADDED
data/HISTORY
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
Copyright (c) 2008 Jacob Harris
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
22
|
+
THIS LICENSE ALSO APPLIES ONLY TO THE RUBY GEM FOR ACCESSING THE NYTIMES
|
23
|
+
ARTICLE SEARCH API AND DOES NOT SUPPLEMENT OR ABROGATE ANY TERMS OF USE
|
24
|
+
OF THE API AT THE NEW YORK TIMES.
|
data/README
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
= nytimes-articles
|
2
|
+
|
3
|
+
A simple GEM for interacting with the New York Times' Article Search API (http://developer.nytimes.com/docs/article_search_api)
|
4
|
+
|
5
|
+
== CREDITS
|
6
|
+
|
7
|
+
* Jacob Harris (http://open.blogs.nytimes.com/)
|
8
|
+
* Taylor Barstow (http://www.nytexplorer.com/)
|
9
|
+
|
10
|
+
== USAGE
|
11
|
+
|
12
|
+
require 'rubygems'
|
13
|
+
require 'nytimes-articles'
|
14
|
+
|
15
|
+
include Nytimes::Articles
|
16
|
+
Base.api_key = 'YOUR API KEY'
|
17
|
+
Article.search 'ice cream'
|
18
|
+
Article.search :title => '"ice cream"', :since => 3.weeks.ago, :fields => :basic
|
19
|
+
Article.search :author => 'Sewell Chan', :facets => [:geo, :person]
|
20
|
+
|
21
|
+
See the RDOC for Article#search for better instructions on usage.
|
22
|
+
|
23
|
+
== TODO
|
24
|
+
|
25
|
+
The following functionality is still to be implemented:
|
26
|
+
|
27
|
+
* Parsing multimedia and related_multimedia fields
|
28
|
+
* Coercion of some facet results into more suitable Ruby types (mostly Dates / Integer fields)
|
29
|
+
* Next / previous result set pagination (with memoization?)
|
30
|
+
|
31
|
+
== COPYRIGHT
|
32
|
+
Copyright (c) 2008 Jacob Harris. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |s|
|
6
|
+
s.name = "nytimes-articles"
|
7
|
+
s.summary = %Q{A gem for accessing the NYTimes Article Search API}
|
8
|
+
s.email = "jharris@nytimes.com"
|
9
|
+
s.homepage = "http://github.com/harrisj/nytimes-articles"
|
10
|
+
s.description = "A gem for accessing the New York Times Article Search API"
|
11
|
+
s.authors = ["Jacob Harris"]
|
12
|
+
s.requirements << 'Unicode'
|
13
|
+
s.requirements << 'The htmlentities gem'
|
14
|
+
s.add_dependency('htmlentities')
|
15
|
+
end
|
16
|
+
rescue LoadError
|
17
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
18
|
+
end
|
19
|
+
|
20
|
+
require 'rake/rdoctask'
|
21
|
+
Rake::RDocTask.new do |rdoc|
|
22
|
+
rdoc.rdoc_dir = 'rdoc'
|
23
|
+
rdoc.title = 'nytimes-articles'
|
24
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
25
|
+
rdoc.rdoc_files.include('README*')
|
26
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
27
|
+
end
|
28
|
+
|
29
|
+
require 'rake/testtask'
|
30
|
+
Rake::TestTask.new(:test) do |t|
|
31
|
+
t.libs << 'lib' << 'test'
|
32
|
+
t.pattern = 'test/**/test_*.rb'
|
33
|
+
t.verbose = false
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
require 'rcov/rcovtask'
|
38
|
+
Rcov::RcovTask.new do |t|
|
39
|
+
t.libs << 'test'
|
40
|
+
t.test_files = FileList['test/**/test_*.rb']
|
41
|
+
t.verbose = true
|
42
|
+
end
|
43
|
+
rescue LoadError
|
44
|
+
puts "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
45
|
+
end
|
46
|
+
|
47
|
+
begin
|
48
|
+
require 'cucumber/rake/task'
|
49
|
+
Cucumber::Rake::Task.new(:features)
|
50
|
+
rescue LoadError
|
51
|
+
puts "Cucumber is not available. In order to run features, you must: sudo gem install cucumber"
|
52
|
+
end
|
53
|
+
|
54
|
+
task :default => :test
|
data/VERSION.yml
ADDED
File without changes
|
@@ -0,0 +1,466 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
module Nytimes
|
4
|
+
module Articles
|
5
|
+
##
|
6
|
+
# The Article class represents a single article returned from the New York Times Article Search API. Note that an article can have many attributes
|
7
|
+
# but these are not necessarily populated unless you explicitly request them in the reply from the server via the <tt>:fields</tt> parameter to
|
8
|
+
# search (or use <tt>:fields => :all</tt>).
|
9
|
+
class Article < Base
|
10
|
+
RAW_FIELDS = %w(url)
|
11
|
+
TEXT_FIELDS = %w(abstract author body byline lead_paragraph nytd_lead_paragraph nytd_title title)
|
12
|
+
NUMERIC_FIELDS = %w(word_count)
|
13
|
+
BOOLEAN_FIELDS = %w(fee small_image)
|
14
|
+
IMAGE_FIELDS = %w(small_image small_image_url small_image_height small_image_width)
|
15
|
+
MULTIMEDIA_FIELDS = %w(multimedia related_multimedia)
|
16
|
+
|
17
|
+
ALL_FIELDS = TEXT_FIELDS + RAW_FIELDS + NUMERIC_FIELDS + BOOLEAN_FIELDS + MULTIMEDIA_FIELDS + Facet::ALL_FACETS + IMAGE_FIELDS
|
18
|
+
|
19
|
+
EARLIEST_BEGIN_DATE = '19810101'
|
20
|
+
|
21
|
+
attr_reader *ALL_FIELDS
|
22
|
+
|
23
|
+
# special additional objects
|
24
|
+
attr_reader :thumbnail
|
25
|
+
|
26
|
+
# Scalar facets
|
27
|
+
attr_reader :page, :column, :pub_month, :pub_year, :pub_day, :day_of_week, :desk, :date, :section_page, :source
|
28
|
+
|
29
|
+
# Facets that return multiple values
|
30
|
+
attr_reader :classifiers, :descriptions, :geo, :material_types, :organizations, :persons, :nytd_bylines, :nytd_descriptions, :nytd_geo, :nytd_organizations, :nytd_persons, :nytd_sections, :nytd_works_mentioned, :works_mentioned
|
31
|
+
alias :people :persons
|
32
|
+
alias :nytd_people :nytd_persons
|
33
|
+
alias :places :geo
|
34
|
+
alias :nytd_places :nytd_geo
|
35
|
+
alias :subjects :descriptions
|
36
|
+
alias :nytd_subjects :nytd_descriptions
|
37
|
+
|
38
|
+
##
|
39
|
+
# Create a new Article from hash arguments. You really don't need to call this as Article instances are automatically returned from the API
|
40
|
+
def initialize(params={})
|
41
|
+
params.each_pair do |k,v|
|
42
|
+
instance_variable_set("@#{k}", v)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
##
|
47
|
+
# Is this article available for a fee?
|
48
|
+
alias :fee? :fee
|
49
|
+
|
50
|
+
##
|
51
|
+
# Is this article available for free?
|
52
|
+
def free?
|
53
|
+
not(fee?)
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Creates a new Article from the a hash returned from the API. This is called on search results. You have no reason to call it.
|
58
|
+
def self.init_from_api(params)
|
59
|
+
article = Article.new(
|
60
|
+
:abstract => text_field(params['abstract']),
|
61
|
+
:author => text_field(params['author']),
|
62
|
+
:body => text_field(params['body']),
|
63
|
+
:byline => text_field(params['byline']),
|
64
|
+
:fee => boolean_field(params['fee']),
|
65
|
+
:lead_paragraph => text_field(params['lead_paragraph']),
|
66
|
+
:nytd_title => text_field(params['nytd_title']),
|
67
|
+
:nytd_lead_paragraph => text_field(params['nytd_lead_paragraph']),
|
68
|
+
:related_multimedia => nil, # FIXME
|
69
|
+
:thumbnail => Thumbnail.init_from_api(params),
|
70
|
+
:title => text_field(params['title']),
|
71
|
+
:url => params['url'],
|
72
|
+
:word_count => integer_field(params['word_count']),
|
73
|
+
|
74
|
+
# FACETS THAT RETURN SCALARS
|
75
|
+
:page => integer_field(params[Facet::PAGE]),
|
76
|
+
:column => text_field(params[Facet::COLUMN]),
|
77
|
+
:pub_month => integer_field(params[Facet::PUB_MONTH]),
|
78
|
+
:pub_year => integer_field(params[Facet::PUB_YEAR]),
|
79
|
+
:pub_day => integer_field(params[Facet::PUB_DAY]),
|
80
|
+
:day_of_week => params[Facet::DAY_OF_WEEK],
|
81
|
+
:desk => text_field(params[Facet::DESK]),
|
82
|
+
:date => date_field(params[Facet::DATE]),
|
83
|
+
:section_page => params[Facet::SECTION_PAGE],
|
84
|
+
:source => text_field(params[Facet::SOURCE]),
|
85
|
+
|
86
|
+
# FIXME! MORE FACET PARAMS
|
87
|
+
# FACETS THAT RETURN ARRAYS
|
88
|
+
:classifiers => facet_params(params, Facet::CLASSIFIERS),
|
89
|
+
:descriptions => facet_params(params, Facet::DESCRIPTION),
|
90
|
+
:geo => facet_params(params, Facet::GEO),
|
91
|
+
:material_types => facet_params(params, Facet::MATERIAL_TYPE),
|
92
|
+
:organizations => facet_params(params, Facet::ORGANIZATION),
|
93
|
+
:persons => facet_params(params, Facet::PERSON),
|
94
|
+
:nytd_bylines => facet_params(params, Facet::NYTD_BYLINE),
|
95
|
+
:nytd_descriptions => facet_params(params, Facet::NYTD_DESCRIPTION),
|
96
|
+
:nytd_geo => facet_params(params, Facet::NYTD_GEO),
|
97
|
+
:nytd_organizations => facet_params(params, Facet::NYTD_ORGANIZATION),
|
98
|
+
:nytd_persons => facet_params(params, Facet::NYTD_PERSON),
|
99
|
+
:nytd_sections => facet_params(params, Facet::NYTD_SECTION),
|
100
|
+
:nytd_works_mentioned => facet_params(params, Facet::NYTD_WORKS_MENTIONED),
|
101
|
+
:works_mentioned => facet_params(params, Facet::WORKS_MENTIONED)
|
102
|
+
)
|
103
|
+
|
104
|
+
article
|
105
|
+
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Executes a search against the Article Search API and returns a ResultSet of 10 articles. At its simplest form, can be invoked
|
109
|
+
# with just a string like so
|
110
|
+
#
|
111
|
+
# Article.search 'dog food'
|
112
|
+
#
|
113
|
+
# which will do a text search against several text fields in the article and return the most basic fields for each
|
114
|
+
# article, but it takes a large number of potential parameters. All of these fields and then some can be returned as display fields
|
115
|
+
# in the articles retrieved from search (see the <tt>:fields</tt> argument below)
|
116
|
+
#
|
117
|
+
# == TEXT FIELDS
|
118
|
+
#
|
119
|
+
# If passed a string as the first argument, the text will be used to search against the title, byline and body fields of articles. This text takes
|
120
|
+
# the following boolean syntax:
|
121
|
+
# * <tt>dog food</tt> - similar to doing a boolean =AND search on both terms
|
122
|
+
# * <tt>"ice cream"</tt> - matches the words as a phrase in the text
|
123
|
+
# * <tt>ice -cream</tt> - to search text that doesn't contain a term, prefix with the minus sign.
|
124
|
+
#
|
125
|
+
# Should you wish to target text against specific text fields associated with the article, the following named parameters are supported:
|
126
|
+
# * <tt>:abstract</tt> - A summary of the article, written by Times indexers
|
127
|
+
# * <tt>:body</tt> - A portion of the beginning of the article. Note: Only a portion of the article body is included in responses. But when you search against the body field, you search the full text of the article.
|
128
|
+
# * <tt>:byline</tt> - The article byline, including the author's name
|
129
|
+
# * <tt>:lead_paragraph</tt> - The first paragraph of the article (as it appeared in the printed newspaper)
|
130
|
+
# * <tt>:nytd_byline</tt> - The article byline, formatted for NYTimes.com
|
131
|
+
# * <tt>:nytd_lead_paragraph</tt> - The first paragraph of the article (as it appears on NYTimes.com)
|
132
|
+
# * <tt>:nytd_title</tt> - The article title on NYTimes.com (this field may or may not match the title field; headlines may be shortened and edited for the Web)
|
133
|
+
# * <tt>:text</tt> - The text field consists of title + byline + body (combined in an OR search) and is the default field for keyword searches.
|
134
|
+
# * <tt>:title</tt> - The article title (headline); corresponds to the headline that appeared in the printed newspaper
|
135
|
+
# * <tt>:url</tt> - The URL of the article on NYTimes.com
|
136
|
+
#
|
137
|
+
# == FACET SEARCHING
|
138
|
+
#
|
139
|
+
# Beyond query searches, the NY Times API also allows you to search against controlled vocabulary metadata associated with the article. This is powerful, if you want precise matching against specific
|
140
|
+
# people, places, etc (eg, "I want stories about Ford the former president, not Ford the automative company"). The following Facet constants are supported.
|
141
|
+
#
|
142
|
+
# * <tt>Facet::CLASSIFIERS</tt> - Taxonomic classifiers that reflect Times content categories, such as _Top/News/Sports_
|
143
|
+
# * <tt>Facet::COLUMN</tt> - A Times column title (if applicable), such as _Weddings_ or _Ideas & Trends_
|
144
|
+
# * <tt>Facet::DATE</tt> - The publication date in YYYYMMDD format
|
145
|
+
# * <tt>Facet::DAY_OF_WEEK</tt> - The day of the week (e.g., Monday, Tuesday) the article was published (compare <tt>PUB_DAY</tt>, which is the numeric date rather than the day of the week)
|
146
|
+
# * <tt>Facet::DESCRIPTION</tt> - Descriptive subject terms assigned by Times indexers (must be in UPPERCASE)
|
147
|
+
# * <tt>Facet::DESK</tt> - The Times desk that produced the story (e.g., _Business/Financial Desk_)
|
148
|
+
# * <tt>Facet::GEO</tt> - Standardized names of geographic locations, assigned by Times indexers (must be in UPPERCASE)
|
149
|
+
# * <tt>Facet::MATERIAL_TYPE</tt> - The general article type, such as Biography, Editorial or Review
|
150
|
+
# * <tt>Facet::ORGANIZATION</tt> - Standardized names of people, assigned by Times indexers (must be UPPERCASE)
|
151
|
+
# * <tt>Facet::PAGE</tt> - The page the article appeared on (in the printed paper)
|
152
|
+
# * <tt>Facet::PERSON</tt> - Standardized names of people, assigned by Times indexers. When used in a request, values must be UPPERCASE.
|
153
|
+
# * <tt>Facet::PUB_DAY</tt> - The day (DD) segment of date, separated for use as facets
|
154
|
+
# * <tt>Facet::PUB_MONTH</tt> - The month (MM) segment of date, separated for use as facets
|
155
|
+
# * <tt>Facet::PUB_YEAR</tt> - The year (YYYY) segment of date, separated for use as facets
|
156
|
+
# * <tt>Facet::SECTION_PAGE</tt> - The full page number of the printed article (e.g., _D00002_)
|
157
|
+
# * <tt>Facet::SOURCE</tt> - The originating body (e.g., _AP_, _Dow Jones_, _The New York Times_)
|
158
|
+
# * <tt>Facet::WORKS_MENTIONED</tt> - Literary works mentioned in the article
|
159
|
+
# * <tt>Facet::NYTD_BYLINE</tt> - The article byline, formatted for NYTimes.com
|
160
|
+
# * <tt>Facet::NYTD_DESCRIPTION</tt> - Descriptive subject terms, assigned for use on NYTimes.com (to get standardized terms, use the TimesTags API). When used in a request, values must be Mixed Case
|
161
|
+
# * <tt>Facet::NYTD_GEO</tt> - Standardized names of geographic locations, assigned for use on NYTimes.com (to get standardized terms, use the TimesTags API). When used in a request, values must be Mixed Case
|
162
|
+
# * <tt>Facet::NYTD_ORGANIZATION</tt> - Standardized names of organizations, assigned for use on NYTimes.com (to get standardized terms, use the TimesTags API). When used in a request, values must be Mixed Case
|
163
|
+
# * <tt>Facet::NYTD_PERSON</tt> - Standardized names of people, assigned for use on NYTimes.com (to get standardized terms, use the TimesTags API). When used in a request, values must be Mixed Case.
|
164
|
+
# * <tt>Facet::NYTD_SECTION</tt> - The section the article appears in (on NYTimes.com)
|
165
|
+
# * <tt>Facet::NYTD_WORKS_MENTIONED</tt> - Literary works mentioned (titles formatted for use on NYTimes.com)
|
166
|
+
#
|
167
|
+
# Note that for your convenience you can also search with symbol versions of the constants (<tt>:geo => ['MANHATTAN']</tt>). Even pluralization is supported. To get the string API version of the facet use Facet#symbol_name
|
168
|
+
#
|
169
|
+
# The following two search fields are used for facet searching:
|
170
|
+
# * <tt>:only_facets</tt> - takes a single value or array of facets to search. Facets can either be specified as array pairs (like <tt>[Facet::GEOGRAPHIC, 'CALIFORNIA']</tt>) or facets returned from a previous search can be passed directly. A single string can be passed as well if you have hand-crafted string.
|
171
|
+
# * <tt>:except_facets</tt> - similar to <tt>:only_facets</tt> but is used to specify a list of facets to exclude.
|
172
|
+
#
|
173
|
+
# == TIME SEARCHES
|
174
|
+
# * <tt>:begin_date</tt>, <tt>:end_date</tt> - the parameters are used to specify a start and end date for search results. BOTH of these must be provided or the API will return an error. Accepts either a Time/Date argument or a string of the format YYYYMMDD. For convenience the following alternative methods are provided
|
175
|
+
# * <tt>:before</tt> - an alternative to :end_date. Automatically adds a :before_date of sometime in 1980 if no :since argument is also provided.
|
176
|
+
# * <tt>:since</tt> - An alternative to :begin_date. Automatically adds an :end_date of Time.now if no :before argument is provided.
|
177
|
+
#
|
178
|
+
# == OTHER SEARCH FIELDS
|
179
|
+
# * <tt>:fee</tt> - if set to true, only returns articles that must be purchased. If false, returns only free articles. If not specified, returns all articles
|
180
|
+
# * <tt>:has_thumbnail</tt> - returns only articles that have thumbnail images associated. Note that to see the thumbnails, you must specify either <tt>:thumbnail</tt> or <tt>:all</tt> in the <tt>:fields</tt> argument).
|
181
|
+
# * <tt>:has_multimedia</tt> - to be implemented
|
182
|
+
#
|
183
|
+
# == FACET SUMMARIES
|
184
|
+
#
|
185
|
+
# The <tt>:facets</tt> argument can be used to specify up to 5 facet fields to be returned alongside the search that provide overall counts
|
186
|
+
# of how much each facet term appears in the search results. FIXME provide list of available facets as well as description of :nytd parameter.
|
187
|
+
#
|
188
|
+
# == ARTICLE FIELDS
|
189
|
+
#
|
190
|
+
# The <tt>:fields</tt> parameter is used to indicate what fields are returned with each article from the search results. If not specified, all
|
191
|
+
# fields are returned. To return specific fields, any of the search fields
|
192
|
+
# from above can be explicitly specified in a comma-delimited list, as well as the additional display-only (not searchable) fields below (these
|
193
|
+
# are strings or symbols):
|
194
|
+
#
|
195
|
+
# * <tt>:all</tt> - return all fields for the article
|
196
|
+
# * <tt>:none</tt> - display only the facet breakdown and no article results
|
197
|
+
# * <tt>:basic</tt> - return only the body, byline, date, title, and url
|
198
|
+
# * <tt>:multimedia</tt> - return any related multimedia links for the article
|
199
|
+
# * <tt>:thumbnail</tt> - return information for a related thumbnail image (if the article has one)
|
200
|
+
# * <tt>:word_count</tt> - the word_count of the article.
|
201
|
+
def self.search(query, params={})
|
202
|
+
params = params.dup
|
203
|
+
|
204
|
+
case query
|
205
|
+
when String
|
206
|
+
params[:query] = query
|
207
|
+
when Hash
|
208
|
+
params.merge! query
|
209
|
+
end
|
210
|
+
|
211
|
+
api_params = {}
|
212
|
+
|
213
|
+
add_query_params(api_params, params)
|
214
|
+
add_facet_conditions_params(api_params, params)
|
215
|
+
add_boolean_params(api_params, params)
|
216
|
+
add_facets_param(api_params, params)
|
217
|
+
add_fields_param(api_params, params)
|
218
|
+
add_rank_params(api_params, params)
|
219
|
+
add_date_params(api_params, params)
|
220
|
+
add_offset_params(api_params, params)
|
221
|
+
|
222
|
+
reply = invoke(api_params)
|
223
|
+
parse_reply(reply)
|
224
|
+
end
|
225
|
+
|
226
|
+
private
|
227
|
+
def self.date_argument(field_name, arg)
|
228
|
+
return arg if arg.is_a? String
|
229
|
+
return arg.strftime("%Y%m%d") if arg.respond_to? :strftime
|
230
|
+
raise ArgumentError, "Only a string or Date/Time object is allowed as a parameter to the #{field_name} input"
|
231
|
+
end
|
232
|
+
|
233
|
+
def self.facet_params(params, facet_name)
|
234
|
+
#return nil if params[facet_name].nil?
|
235
|
+
params[facet_name] # .map {|f| Facet.new(facet_name, f, nil) }
|
236
|
+
end
|
237
|
+
|
238
|
+
def self.text_argument(field, argument)
|
239
|
+
arg = argument.dup
|
240
|
+
subquery = []
|
241
|
+
while term = arg.slice!(%r{("[^"]+")|\S+})
|
242
|
+
if term =~ /^\-/
|
243
|
+
subquery << "-#{field}:#{term[1..term.length]}"
|
244
|
+
else
|
245
|
+
subquery << "#{field}:#{term}"
|
246
|
+
end
|
247
|
+
end
|
248
|
+
|
249
|
+
subquery.join(' ')
|
250
|
+
end
|
251
|
+
|
252
|
+
|
253
|
+
def self.parse_reply(reply)
|
254
|
+
ResultSet.init_from_api(reply)
|
255
|
+
end
|
256
|
+
|
257
|
+
def self.add_facets_param(out_params, in_params)
|
258
|
+
if in_params[:facets]
|
259
|
+
unless in_params[:facets].is_a? Array
|
260
|
+
facet_array = [in_params[:facets]]
|
261
|
+
else
|
262
|
+
facet_array = in_params[:facets]
|
263
|
+
end
|
264
|
+
|
265
|
+
out_params['facets'] = facet_array.map {|f| Facet.symbol_name(f)}.join(',')
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
def self.field_param(name)
|
270
|
+
case name.to_s
|
271
|
+
when 'thumbnail'
|
272
|
+
IMAGE_FIELDS.join(',')
|
273
|
+
else
|
274
|
+
name.to_s
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
def self.add_fields_param(out_params, in_params)
|
279
|
+
case in_params[:fields]
|
280
|
+
when nil, :all
|
281
|
+
out_params['fields'] = ALL_FIELDS.join(',')
|
282
|
+
when :basic
|
283
|
+
# do nothing, the API will return basic URLs
|
284
|
+
when :none
|
285
|
+
out_params['fields'] = ' '
|
286
|
+
unless out_params['facets']
|
287
|
+
out_params['facets'] = Facet::DEFAULT_RETURN_FACETS.join(',')
|
288
|
+
end
|
289
|
+
when String, Symbol
|
290
|
+
out_params['fields'] = field_param(in_params[:fields])
|
291
|
+
when Array
|
292
|
+
out_params['fields'] = in_params[:fields].map {|f| field_param(f)}.join(',')
|
293
|
+
else
|
294
|
+
raise ArgumentError, "Fields must either be :all, a single field name, or an array of field names (either strings or symbols)"
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
def self.add_query_params(out_params, in_params)
|
299
|
+
query = []
|
300
|
+
|
301
|
+
query << in_params[:query]
|
302
|
+
|
303
|
+
# Also add other text params to the query
|
304
|
+
TEXT_FIELDS.each do |tf|
|
305
|
+
if in_params[tf.to_sym]
|
306
|
+
query << text_argument(tf, in_params[tf.to_sym])
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
out_params['query'] = query.compact.join(' ')
|
311
|
+
out_params['query'] = nil if out_params['query'].empty?
|
312
|
+
end
|
313
|
+
|
314
|
+
def self.facet_argument(name, value, exclude = false)
|
315
|
+
if name.is_a? Symbol
|
316
|
+
name = Facet.symbol_name(name)
|
317
|
+
end
|
318
|
+
|
319
|
+
"#{'-' if exclude}#{name}:[#{value}]"
|
320
|
+
end
|
321
|
+
|
322
|
+
def self.parse_facet_params(facets, exclude = false)
|
323
|
+
facet_args = []
|
324
|
+
|
325
|
+
case facets
|
326
|
+
when nil
|
327
|
+
# do nothing
|
328
|
+
when String
|
329
|
+
facet_args = [facets]
|
330
|
+
when Facet
|
331
|
+
facet_args = [facet_argument(facets.facet_type, facets.term, exclude)]
|
332
|
+
when Array
|
333
|
+
unless facets.all? {|f| f.is_a? Facet }
|
334
|
+
raise ArgumentError, "Only Facet instances can be passed in as an array; use Hash for Facet::Name => values input"
|
335
|
+
end
|
336
|
+
|
337
|
+
facet_hash = {}
|
338
|
+
facets.each do |f|
|
339
|
+
unless facet_hash[f.facet_type]
|
340
|
+
facet_hash[f.facet_type] = []
|
341
|
+
end
|
342
|
+
|
343
|
+
facet_hash[f.facet_type] << f.term
|
344
|
+
end
|
345
|
+
|
346
|
+
facet_hash.each_pair do |k,v|
|
347
|
+
if v.is_a? Array
|
348
|
+
facet_args += v.map {|el| facet_argument(k, el, exclude)}
|
349
|
+
else
|
350
|
+
facet_args << facet_argument(k, v, exclude)
|
351
|
+
end
|
352
|
+
end
|
353
|
+
when Hash
|
354
|
+
facets.each_pair do |k,v|
|
355
|
+
if v.is_a? Array
|
356
|
+
facet_args += v.map {|el| facet_argument(k, el, exclude)}
|
357
|
+
else
|
358
|
+
facet_args << facet_argument(k, v, exclude)
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
facet_args
|
364
|
+
end
|
365
|
+
|
366
|
+
def self.add_facet_conditions_params(out_params, in_params)
|
367
|
+
query = out_params['query']
|
368
|
+
|
369
|
+
search_facets = parse_facet_params(in_params[:only_facets])
|
370
|
+
exclude_facets = parse_facet_params(in_params[:except_facets], true)
|
371
|
+
|
372
|
+
unless search_facets.empty? && exclude_facets.empty?
|
373
|
+
out_params['query'] = ([query] + search_facets + exclude_facets).compact.join(' ')
|
374
|
+
end
|
375
|
+
end
|
376
|
+
|
377
|
+
def self.add_boolean_params(out_params, in_params)
|
378
|
+
bool_params = []
|
379
|
+
query = out_params['query']
|
380
|
+
|
381
|
+
unless in_params[:fee].nil?
|
382
|
+
bool_params << "#{'-' unless in_params[:fee]}fee:Y"
|
383
|
+
end
|
384
|
+
|
385
|
+
unless in_params[:has_multimedia].nil?
|
386
|
+
bool_params << "#{'-' unless in_params[:has_multimedia]}related_multimedia:Y"
|
387
|
+
end
|
388
|
+
|
389
|
+
unless in_params[:has_thumbnail].nil?
|
390
|
+
bool_params << "#{'-' unless in_params[:has_thumbnail]}small_image:Y"
|
391
|
+
end
|
392
|
+
|
393
|
+
unless bool_params.empty?
|
394
|
+
out_params['query'] = ([query] + bool_params).compact.join(' ')
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
def self.add_rank_params(out_params, in_params)
|
399
|
+
if in_params[:rank]
|
400
|
+
unless [:newest, :oldest, :closest].include?(in_params[:rank])
|
401
|
+
raise ArgumentError, "Rank should only be :newest | :oldest | :closest"
|
402
|
+
end
|
403
|
+
|
404
|
+
out_params['rank'] = in_params[:rank].to_s
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
408
|
+
def self.add_date_params(out_params, in_params)
|
409
|
+
if in_params[:begin_date]
|
410
|
+
out_params['begin_date'] = date_argument(:begin_date, in_params[:begin_date])
|
411
|
+
end
|
412
|
+
|
413
|
+
if in_params[:end_date]
|
414
|
+
out_params['end_date'] = date_argument(:end_date, in_params[:end_date])
|
415
|
+
end
|
416
|
+
|
417
|
+
if in_params[:since]
|
418
|
+
if in_params[:begin_date]
|
419
|
+
raise ArgumentError, "You can't specify both :begin_date and :since as arguments"
|
420
|
+
end
|
421
|
+
|
422
|
+
out_params['begin_date'] = date_argument(:since, in_params[:since])
|
423
|
+
end
|
424
|
+
|
425
|
+
if in_params[:before]
|
426
|
+
if in_params[:end_date]
|
427
|
+
raise ArgumentError, "You can't specify both :end_date and :before as arguments"
|
428
|
+
end
|
429
|
+
|
430
|
+
out_params['end_date'] = date_argument(:before, in_params[:before])
|
431
|
+
end
|
432
|
+
|
433
|
+
if in_params[:before] && out_params['begin_date'].nil?
|
434
|
+
out_params['begin_date'] = EARLIEST_BEGIN_DATE
|
435
|
+
end
|
436
|
+
|
437
|
+
if in_params[:since] && out_params['end_date'].nil?
|
438
|
+
out_params['end_date'] = date_argument(:end_date, Date.today + 1)
|
439
|
+
end
|
440
|
+
end
|
441
|
+
|
442
|
+
def self.add_offset_params(out_params, in_params)
|
443
|
+
if in_params[:page]
|
444
|
+
unless in_params[:page].is_a? Integer
|
445
|
+
raise ArgumentError, "Page must be an integer"
|
446
|
+
end
|
447
|
+
|
448
|
+
unless in_params[:page] >= 1
|
449
|
+
raise ArgumentError, "Page must count up from 1"
|
450
|
+
end
|
451
|
+
|
452
|
+
# Page counts from 1, offset counts from 0
|
453
|
+
out_params['offset'] = in_params[:page] - 1
|
454
|
+
end
|
455
|
+
|
456
|
+
if in_params[:offset]
|
457
|
+
unless in_params[:offset].is_a? Integer
|
458
|
+
raise ArgumentError, "Offset must be an integer"
|
459
|
+
end
|
460
|
+
|
461
|
+
out_params['offset'] = in_params[:offset]
|
462
|
+
end
|
463
|
+
end
|
464
|
+
end
|
465
|
+
end
|
466
|
+
end
|