lucid_works 0.2.0 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,29 +2,35 @@ module LucidWorks
2
2
 
3
3
  # LucidWorks::Base is our REST ORM foundation.
4
4
  #
5
- # The motivaiton for developing it was:
6
- # * ActiveResource makes a lot of assumtions about how REST APIs should work that we had to keep patching it for
7
- # (e.g. parameters must always be nested inside :resouce => {}).
5
+ # class Collection < LucidWorks::Base
6
+ # end
8
7
  #
9
- # * ActiveResource was missing some features we needed:
10
- # - send new id with POST
11
- # - associations
12
- # - multipart POST
8
+ # When creating or retrieving objects, a parent LucidWorks::Base or LucidWorks::Server object must be supplied, e.g.
13
9
  #
14
- # * The straw that broke the camel's back was the need to talk to the same type of REST API on multiple servers
15
- # simultaneously, which was so orthogonal to the design of ActiveResource that patching it to support this would
16
- # require a major re-write.
10
+ # server = LucidWorks::Server.new('http://localhost:8888')
17
11
  #
18
- # So, I decided to marry ActiveModel and RestClient.
12
+ # collection = Collection.find('collection1', :parent => server)
19
13
  #
20
- # Much of the internal structure here (method names etc) will resemble ActiveResource as I have spent much time
21
- # wandering through that code.
14
+ # datasource = Datasource.find(1, :parent => collection)
15
+ #
16
+ # This mechanism is used to build up the URI that will be used to manipulate the object in the REST API.
17
+ # For the above example:
18
+ #
19
+ # datasource.uri -> http://localhost:8888/api/collections/collection1/datasources/1
20
+ #
21
+ # If you are accessing models using associations, the parent argument is taken care of for you:
22
+ #
23
+ # datasource = server.collection('collection1').datasource(1)
22
24
 
23
25
  class Base
26
+ # Much of the internal structure here (method names etc) will resemble ActiveResource as I have spent much time
27
+ # wandering through that code.
28
+
24
29
  include ActiveModel::Validations
25
30
  include ActiveModel::Conversion
26
- extend ActiveModel::Naming
27
-
31
+ extend ActiveModel::Naming
32
+ extend ActiveModel::Translation
33
+ extend ActiveModel::Callbacks
28
34
  include Associations
29
35
 
30
36
  attr_accessor :parent # :nodoc:
@@ -33,17 +39,50 @@ module LucidWorks
33
39
  attr_accessor :raw_response # :nodoc:
34
40
  attr_accessor :response_data # :nodoc:
35
41
 
42
+ define_model_callbacks :save, :only => [:before, :after]
43
+
36
44
  class << self
45
+ include ActionView::Helpers::NumberHelper rescue nil
46
+
37
47
  attr_accessor_with_default :primary_key, :id
38
- attr_accessor :collection_name
48
+ attr_accessor :collection_name # :nodoc:
39
49
  attr_accessor_with_default :singleton, false
50
+ attr_accessor_with_default :has_schema, false
51
+
52
+ # The attributes for a model are ascertained in on of two ways.
53
+ # Without a schema, the attributes list is automatically generated when the the object is retrieved from the server.
54
+ # Alternatively, you may define a schema for your object.
55
+ # Objects with attributes defined by schemas may have validations run against those attributes during creation.
56
+ #
57
+ # Schema should be passed a block:
58
+ #
59
+ # class Collection < LucidWorks::Base
60
+ # schema do
61
+ # attribute :name
62
+ # end
63
+ #
64
+ # validates_presence_of :name
65
+ # end
66
+
67
+ def schema(&block)
68
+ @schema ||= Schema.new
69
+ if block_given?
70
+ @schema.instance_eval(&block)
71
+ @schema.create_accessors_for_attributes(self)
72
+ self.has_schema = true
73
+ end
74
+ @schema
75
+ end
76
+
77
+ # Create a new model.
78
+ #
79
+ # MyObject.create(attr => value, ..., :parent => server_or_object)
40
80
 
41
81
  def create(*arguments)
42
82
  new(*arguments).tap { |model| model.save }
43
83
  end
44
84
 
45
85
  # Retrieve one or more models from the server.
46
- #
47
86
  # Find may be called in the following ways:
48
87
  #
49
88
  # Retrieve an entire collection:
@@ -57,10 +96,10 @@ module LucidWorks
57
96
  # find(options)
58
97
  # find(:singleton, options)
59
98
  #
60
- # == Options
99
+ # === Options
61
100
  #
62
101
  # :parent - mandatory, another LucidWorks::Base instance or a LucidWorks::Server instance.
63
- #
102
+
64
103
  def find(*arguments)
65
104
  unless arguments.first.is_a?(Symbol)
66
105
  # We weren't called with a symbol, figure out what kind of find this is and re-call
@@ -85,6 +124,7 @@ module LucidWorks
85
124
 
86
125
  parent = extract_parent_from_options(options)
87
126
  includes = options.delete(:include)
127
+ order = options.delete(:order)
88
128
 
89
129
  url = case kind_of_find
90
130
  when :all; collection_url(parent)
@@ -92,7 +132,12 @@ module LucidWorks
92
132
  when :singleton; "#{parent.uri}/#{singleton_name}"
93
133
  end
94
134
 
95
- raw_response = RestClient.get(url)
135
+ raw_response = ActiveSupport::Notifications.instrument("lucid_works.request") do |payload|
136
+ payload[:method] = :get
137
+ payload[:uri] = url
138
+ payload[:response] = RestClient.get(url)
139
+ end
140
+
96
141
  data = JSON.parse(raw_response)
97
142
 
98
143
  results =
@@ -119,9 +164,12 @@ module LucidWorks
119
164
  end
120
165
  end
121
166
 
167
+ results.sort! { |a,b| a.send(order) <=> b.send(order) } if order
122
168
  results
123
169
  end
124
170
 
171
+ # Shortcut for find(:all, options)
172
+
125
173
  def all(options)
126
174
  find(:all, options)
127
175
  end
@@ -134,6 +182,43 @@ module LucidWorks
134
182
  find(:all, options).last
135
183
  end
136
184
 
185
+ # Convert the attribute value to a string. If a schema has been defined for the modeland a type has
186
+ # been defined for the attribute, it will have formatting applied as follows:
187
+ #
188
+ # - <tt>boolean</tt> will be converted to 'yes' or 'no'
189
+ # - <tt>integer</tt> will be passed to number_with_delimter
190
+ #
191
+ # If the attributes is listed in the schema as having :values =>, it will be translated.
192
+
193
+ def human_attribute_value(attribute, value)
194
+ if schema[attribute][:values]
195
+ l10n_scope = %w{activemodel models} + self.name.underscore.split('/') + [attribute]
196
+ return I18n.t(value, :scope => l10n_scope, :default => value)
197
+ end
198
+
199
+ case schema[attribute][:type]
200
+ when :boolean
201
+ value.to_yesno
202
+ when :integer
203
+ number_with_delimiter(value)
204
+ else
205
+ value.to_s
206
+ end
207
+ rescue
208
+ value.to_s
209
+ end
210
+
211
+ # For attributes listed in the schema as having :values, this will create an array-of-arrays
212
+ # suitable for use as options_for_select. The
213
+
214
+ def to_select(attribute)
215
+ raise "Can't to_select for #{attribute} as it has no values" unless schema[attribute][:values]
216
+ l10n_scope = %w{activemodel models} + self.name.underscore.split('/') + [attribute]
217
+ schema[attribute][:values].map do |value|
218
+ [human_attribute_value(attribute, value), value]
219
+ end
220
+ end
221
+
137
222
  def collection_name # :nodoc:
138
223
  @collection_name || name.underscore.gsub(/^.*\//, '').pluralize
139
224
  end
@@ -166,29 +251,49 @@ module LucidWorks
166
251
  raise ArgumentError.new("new requires a Hash") unless options.is_a?(Hash)
167
252
  @parent = self.class.extract_parent_from_options(options)
168
253
  @persisted = options.delete(:persisted) || singleton? || false
169
- @attributes = options.with_indifferent_access
254
+ @attributes = {}.with_indifferent_access
255
+ load_attributes(options)
170
256
  end
171
257
 
172
258
  def save
173
- if valid?
174
- begin
175
- if persisted?
176
- response = RestClient.put(member_url, encode, :content_type => :json)
177
- else
178
- response = RestClient.post(collection_url, encode, :content_type => :json)
179
- @persisted = true
259
+ _run_save_callbacks do
260
+ if valid?
261
+ ActiveSupport::Notifications.instrument("lucid_works.request") do |payload|
262
+ method, uri = persisted? ? [:put, member_url] : [:post, collection_url]
263
+ data = encode
264
+ payload[:method] = method
265
+ payload[:uri] = uri
266
+ payload[:data] = data
267
+ begin
268
+ response = RestClient.send(method, uri, data, :content_type => :json)
269
+ payload[:response] = response
270
+ @persisted = true
271
+ load_attributes_from_json_string(response)
272
+ true
273
+ rescue RestClient::UnprocessableEntity, RestClient::Conflict => e
274
+ payload[:exception] = e
275
+ attach_errors_to_model(e.response)
276
+ false
277
+ end
180
278
  end
181
- load_attributes_from_json_string(response)
182
- true
183
- rescue RestClient::UnprocessableEntity, RestClient::Conflict => e
184
- attach_errors_to_model(e.response)
185
- false
186
279
  end
187
280
  end
188
281
  end
189
-
282
+
283
+ def update_attributes(attrs_and_values)
284
+ attrs_and_values.each do |attr,value|
285
+ self.send("#{attr}=", value)
286
+ end
287
+ save
288
+ end
289
+
190
290
  def destroy(options={})
191
- RestClient.delete(member_url, options)
291
+ ActiveSupport::Notifications.instrument("lucid_works.request") do |payload|
292
+ payload[:method] = :delete
293
+ payload[:uri] = member_url
294
+ payload[:options] = options
295
+ payload[:repsonse] = RestClient.delete(member_url, options)
296
+ end
192
297
  end
193
298
 
194
299
  def id # :nodoc:
@@ -203,21 +308,6 @@ module LucidWorks
203
308
  @persisted
204
309
  end
205
310
 
206
- def method_missing(method_sym, *arguments) # :nodoc:
207
- return super if method_sym == :to_ary
208
- if method_sym.to_s =~ /^(\w+)=$/
209
- return @attributes[$1] = arguments.first
210
- elsif method_sym.to_s =~ /^(\w+)\?$/
211
- attr = $1
212
- predicate = true
213
- else
214
- attr = method_sym
215
- predicate = false
216
- end
217
- raise "Unknown attribute: '#{attr}'" unless @attributes.has_key?(attr)
218
- predicate ? !!@attributes[attr] : @attributes[attr]
219
- end
220
-
221
311
  def read_attribute_for_validation(key) # :nodoc:
222
312
  @attributes[key]
223
313
  end
@@ -236,10 +326,17 @@ module LucidWorks
236
326
 
237
327
  alias :uri :member_url
238
328
 
239
- def inspect
329
+ def inspect # :nodoc:
240
330
  "<#{self.class.name} " + @attributes.map { |k,v| "#{k}=#{v.inspect}" }.join(" ") + ">"
241
331
  end
242
332
 
333
+ # Convert the attribute value to a string.
334
+ # See LucidWorks::Base.human_attribute_values for details.
335
+
336
+ def human_attribute_value(attribute)
337
+ self.class.human_attribute_value(attribute, send(attribute))
338
+ end
339
+
243
340
  private
244
341
 
245
342
  def singleton? # :nodoc:
@@ -254,12 +351,30 @@ module LucidWorks
254
351
  @attributes.reject { |k,v| k.to_s == 'id'}.to_json
255
352
  end
256
353
 
257
- def load_attributes_from_json_string(response) # :nodoc:
258
- data = JSON.parse(response) rescue {}
259
- data.each do |k,v|
260
- @attributes[k] = v
354
+ def load_attributes(attributes_and_values) # :nodoc:
355
+ attributes_and_values.each do |attr, value|
356
+ # Special cases - don't overwrite built-in accessors
357
+ if attr.to_sym == :id
358
+ self.id = value
359
+ next
360
+ elsif self.class.respond_to?(:belongs_to_association_name) && attr.to_sym == self.class.belongs_to_association_name
361
+ next # Dont overwrite our connection to our parent
362
+ end
363
+ unless self.class.schema.has_attribute?(attr)
364
+ if self.class.has_schema
365
+ raise "unknown attribute: \"#{attr}\""
366
+ else
367
+ self.class.schema.add_attribute(self.class, attr, :string)
368
+ end
369
+ end
370
+ @attributes[attr] = value
261
371
  end
262
372
  end
373
+
374
+ def load_attributes_from_json_string(response) # :nodoc:
375
+ attribute_data = JSON.parse(response) rescue {}
376
+ load_attributes(attribute_data)
377
+ end
263
378
 
264
379
  def attach_errors_to_model(response) # :nodoc:
265
380
  data = JSON.parse(response) rescue nil
@@ -4,6 +4,19 @@ module LucidWorks
4
4
  class Info < Base
5
5
  self.singleton = true
6
6
  belongs_to :collection
7
+
8
+ schema do
9
+ attributes :free_disk_space, :total_disk_space, :index_size,
10
+ :index_last_modified, :collection_name,
11
+ :data_dir, :root_dir, :instance_dir,
12
+ :type => :string
13
+ attribute :index_directory # a hash
14
+ attributes :index_size_bytes, :free_disk_bytes, :total_disk_bytes,
15
+ :index_max_doc, :index_num_docs, :index_version,
16
+ :type => :integer
17
+ attributes :index_has_deletions, :index_is_optimized, :index_is_current,
18
+ :type => :boolean
19
+ end
7
20
  end
8
21
  end
9
22
  end
@@ -4,6 +4,20 @@ module LucidWorks
4
4
  class Settings < Base
5
5
  self.singleton = true
6
6
  belongs_to :collection
7
+
8
+ DEDUP_OPTIONS = %w{ off overwrite tag }
9
+
10
+ schema do
11
+ attributes :unsupervised_feedback_emphasis, :unknown_type_handling,
12
+ :click_boost_field, :click_boost_data, :query_parser, :default_sort,
13
+ :type => :string
14
+ attribute :de_duplication, :string, :values => DEDUP_OPTIONS
15
+ attributes :spellcheck, :display_facets, :ssl, :unsupervised_feedback, :query_time_stopwords,
16
+ :auto_complete, :boost_recent, :click_enabled, :show_similar, :query_time_synonyms,
17
+ :index_time_stopwords, :type => :boolean
18
+ attributes :search_server_list, :update_server_list, :stopword_list, :boosts, :synonym_list # Arrays
19
+ attribute :elevations # Hash
20
+ end
7
21
  end
8
22
  end
9
23
  end
@@ -4,11 +4,19 @@ module LucidWorks
4
4
 
5
5
  self.primary_key = :name
6
6
 
7
- has_many :datasources
8
- has_one :info, :settings, :index
7
+ has_many :datasources, :fields
8
+ has_one :info, :settings
9
+ has_one :index, :has_content => false
9
10
 
11
+ schema do
12
+ attribute :name
13
+ attribute :instance_dir
14
+ end
15
+
16
+ validates_presence_of :name
17
+
10
18
  def empty!
11
- build_index.destroy(:params => {:key => 'iaccepttherisk'})
19
+ index.destroy(:params => {:key => 'iaccepttherisk'})
12
20
  end
13
21
  end
14
22
  end
@@ -0,0 +1,8 @@
1
+ module LucidWorks
2
+
3
+ class Crawler < Base
4
+
5
+ self.primary_key = :name
6
+
7
+ end
8
+ end
@@ -0,0 +1,9 @@
1
+ module LucidWorks
2
+ class Datasource
3
+
4
+ class Crawldata < Base
5
+ self.singleton = true
6
+ belongs_to :datasource
7
+ end
8
+ end
9
+ end
@@ -9,8 +9,16 @@ module LucidWorks
9
9
  numUpdated + numNew + numUnchanged
10
10
  end
11
11
 
12
- def elapsed_time
13
- crawlStopped.to_datetime - crawlStarted.to_datetime
12
+ def crawl_stopped
13
+ Time.iso8601 crawlStopped
14
+ end
15
+
16
+ def crawl_started
17
+ Time.iso8601 crawlStarted
18
+ end
19
+
20
+ def duration
21
+ crawl_stopped - crawl_started
14
22
  end
15
23
  end
16
24
  end
@@ -4,6 +4,13 @@ module LucidWorks
4
4
  class Schedule < Base
5
5
  self.singleton = true
6
6
  belongs_to :datasource
7
+
8
+ schema do
9
+ attribute :period
10
+ attribute :start_time
11
+ attribute :active
12
+ attribute :type
13
+ end
7
14
  end
8
15
  end
9
16
  end
@@ -4,6 +4,51 @@ module LucidWorks
4
4
  class Status < Base
5
5
  self.singleton = true
6
6
  belongs_to :datasource
7
+
8
+ schema do
9
+ attributes :crawlStarted, :crawlState, :crawlStopped, :jobId
10
+ attributes :numUnchanged, :numUpdated, :numNew, :numFailed, :numDeleted, :type => :integer
11
+ end
12
+
13
+ STOPPED_STATES = %w{ IDLE STOPPED ABORTED EXCEPTION FINISHED }
14
+ POST_PROCESSING_STATES = %w{ STOPPING ABORTING }
15
+ CRAWLSTATES = STOPPED_STATES + [ 'RUNNING' ] + POST_PROCESSING_STATES
16
+
17
+ # Create predicate methods for all the crawl states
18
+ CRAWLSTATES.each do |state|
19
+ method_name = state.downcase + "?"
20
+ class_eval <<-EOF
21
+ def #{method_name}
22
+ self.crawlState == "#{state}"
23
+ end
24
+ EOF
25
+ end
26
+
27
+ def stopped?
28
+ STOPPED_STATES.include?(crawlState)
29
+ end
30
+
31
+ def post_processing?
32
+ POST_PROCESSING_STATES.include?(crawlState)
33
+ end
34
+
35
+ def doc_count
36
+ numUpdated + numNew + numUnchanged
37
+ end
38
+
39
+ def t_crawl_state
40
+ I18n.translate(crawlState.downcase,
41
+ :scope => 'activemodel.models.lucid_works.datasource.status.crawl_state',
42
+ :default => crawlState)
43
+ end
44
+
45
+ def crawl_started
46
+ Time.iso8601 crawlStarted
47
+ end
48
+
49
+ def elapsed_time
50
+ Time.now - crawl_started
51
+ end
7
52
  end
8
53
  end
9
54
  end
@@ -3,38 +3,57 @@ module LucidWorks
3
3
  class Datasource < Base
4
4
  belongs_to :collection
5
5
  has_many :histories, :class_name => :history
6
- has_one :status, :history, :schedule, :index
7
-
8
- TYPES = {
9
- :FileSystemDataSource => {
10
- :name_l10n_key => 'data_source.short_type.filesystem',
11
- :crawler => 'lucid.aperture',
12
- :type => 'file'
13
- },
14
- :WebDataSource => {
15
- :name_l10n_key => 'data_source.short_type.web_site',
16
- :crawler => 'lucid.aperture',
17
- :type => 'web'
18
- },
19
- :SolrXmlDataSource => {
20
- :name_l10n_key => 'data_source.short_type.solr',
21
- :crawler => 'lucid.solrxml',
22
- :type => 'solrxml'
23
- },
24
- :JDBCDataSource => {
25
- :name_l10n_key => 'data_source.short_type.database',
26
- :crawler => 'lucid.jdbc',
27
- :type => 'jdbc'
28
- },
29
- :SharePointDataSource => {
30
- :name_l10n_key => 'data_source.short_type.sharepoint',
31
- :crawler => 'lucid.gcm',
32
- :type => 'sharepoint'
33
- }
6
+ has_one :status, :schedule, :crawldata
7
+ has_one :index, :has_content => false
8
+
9
+ schema do
10
+ # common
11
+ attributes :name, :type, :crawler
12
+ attributes :crawl_depth, :max_bytes, :type => :integer
13
+ attribute :include_paths
14
+ attribute :exclude_paths
15
+ attribute :mapping # Hash
16
+ attribute :bounds
17
+ # web
18
+ attributes :url, :category
19
+ attribute :collect_links, :boolean
20
+ # file
21
+ attribute :path
22
+ attribute :follow_links, :boolean
23
+ end
24
+
25
+ validates_presence_of :type, :crawler, :name, :crawl_depth
26
+ validates_numericality_of :max_bytes, :allow_blank => true
27
+ validates_presence_of :url, :if => lambda { |d| d.type == 'web' }
28
+
29
+ before_save :remove_blank_max_bytes
30
+
31
+ TYPES = %w{ file web solrxml jdbc sharepoint }
32
+ BOUNDS = %w{ tree none }
33
+ CRAWLERS = {
34
+ # Later we may change these to be arrays if we decide to support more than one choice
35
+ # e.g. :web => ['lucid.aperture', 'nutch']
36
+ :file => 'lucid.aperture',
37
+ :web => 'lucid.aperture',
38
+ :solrxml => 'lucid.solrxml',
39
+ :jdbc => 'lucid.jdbc',
40
+ :sharepoint => 'lucid.gcm'
34
41
  }.with_indifferent_access
35
42
 
43
+
36
44
  def empty!
37
- build_index.destroy
45
+ index.destroy
46
+ end
47
+
48
+ def t_type
49
+ I18n.t(type, :scope => 'activemodel.models.lucid_works.datasource.type')
50
+ end
51
+
52
+ private
53
+
54
+ def remove_blank_max_bytes # :nodoc:
55
+ # API can't handle a blank max_bytes. Send nothing to select 'unlimited'
56
+ @attributes.delete :max_bytes if self.max_bytes.blank?
38
57
  end
39
58
  end
40
59
  end
@@ -0,0 +1,53 @@
1
+ module LucidWorks
2
+
3
+ class Field < Base
4
+ belongs_to :collection
5
+
6
+ TYPES = [
7
+ 'string',
8
+ 'boolean',
9
+ 'binary',
10
+ 'int',
11
+ 'float',
12
+ 'long',
13
+ 'double',
14
+ 'tint',
15
+ 'tfloat',
16
+ 'tlong',
17
+ 'tdouble',
18
+ 'uri',
19
+ 'date',
20
+ 'tdate',
21
+ 'text_ws',
22
+ 'text_en',
23
+ 'text_porter_en',
24
+ 'textTight',
25
+ 'text_cjk',
26
+ 'text_da',
27
+ 'text_de',
28
+ 'text_es',
29
+ 'text_fr',
30
+ 'text_it',
31
+ 'text_nl',
32
+ 'text_pt',
33
+ 'text_ru',
34
+ 'text_se',
35
+ 'text_fi',
36
+ 'random',
37
+ 'comma-separated',
38
+ 'textSpell',
39
+ 'payloads',
40
+ 'point',
41
+ 'location',
42
+ 'geohash'
43
+ ]
44
+
45
+ def t_field_type
46
+ self.class.t_field_type(self.field_type)
47
+ end
48
+
49
+ def self.t_field_type(type)
50
+ I18n.translate(type, :scope => 'activemodel.models.lucid_works.collection.field.field_type')
51
+ end
52
+ end
53
+ end
@@ -2,7 +2,6 @@ module LucidWorks
2
2
  class Logs < LucidWorks::Base
3
3
  self.singleton = true
4
4
 
5
- has_one :query
6
- has_one :index
5
+ has_one :query, :index, :has_content => false
7
6
  end
8
7
  end