lucid_works 0.2.0 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,29 +2,35 @@ module LucidWorks
2
2
 
3
3
  # LucidWorks::Base is our REST ORM foundation.
4
4
  #
5
- # The motivaiton for developing it was:
6
- # * ActiveResource makes a lot of assumtions about how REST APIs should work that we had to keep patching it for
7
- # (e.g. parameters must always be nested inside :resouce => {}).
5
+ # class Collection < LucidWorks::Base
6
+ # end
8
7
  #
9
- # * ActiveResource was missing some features we needed:
10
- # - send new id with POST
11
- # - associations
12
- # - multipart POST
8
+ # When creating or retrieving objects, a parent LucidWorks::Base or LucidWorks::Server object must be supplied, e.g.
13
9
  #
14
- # * The straw that broke the camel's back was the need to talk to the same type of REST API on multiple servers
15
- # simultaneously, which was so orthogonal to the design of ActiveResource that patching it to support this would
16
- # require a major re-write.
10
+ # server = LucidWorks::Server.new('http://localhost:8888')
17
11
  #
18
- # So, I decided to marry ActiveModel and RestClient.
12
+ # collection = Collection.find('collection1', :parent => server)
19
13
  #
20
- # Much of the internal structure here (method names etc) will resemble ActiveResource as I have spent much time
21
- # wandering through that code.
14
+ # datasource = Datasource.find(1, :parent => collection)
15
+ #
16
+ # This mechanism is used to build up the URI that will be used to manipulate the object in the REST API.
17
+ # For the above example:
18
+ #
19
+ # datasource.uri -> http://localhost:8888/api/collections/collection1/datasources/1
20
+ #
21
+ # If you are accessing models using associations, the parent argument is taken care of for you:
22
+ #
23
+ # datasource = server.collection('collection1').datasource(1)
22
24
 
23
25
  class Base
26
+ # Much of the internal structure here (method names etc) will resemble ActiveResource as I have spent much time
27
+ # wandering through that code.
28
+
24
29
  include ActiveModel::Validations
25
30
  include ActiveModel::Conversion
26
- extend ActiveModel::Naming
27
-
31
+ extend ActiveModel::Naming
32
+ extend ActiveModel::Translation
33
+ extend ActiveModel::Callbacks
28
34
  include Associations
29
35
 
30
36
  attr_accessor :parent # :nodoc:
@@ -33,17 +39,50 @@ module LucidWorks
33
39
  attr_accessor :raw_response # :nodoc:
34
40
  attr_accessor :response_data # :nodoc:
35
41
 
42
+ define_model_callbacks :save, :only => [:before, :after]
43
+
36
44
  class << self
45
+ include ActionView::Helpers::NumberHelper rescue nil
46
+
37
47
  attr_accessor_with_default :primary_key, :id
38
- attr_accessor :collection_name
48
+ attr_accessor :collection_name # :nodoc:
39
49
  attr_accessor_with_default :singleton, false
50
+ attr_accessor_with_default :has_schema, false
51
+
52
+ # The attributes for a model are ascertained in on of two ways.
53
+ # Without a schema, the attributes list is automatically generated when the the object is retrieved from the server.
54
+ # Alternatively, you may define a schema for your object.
55
+ # Objects with attributes defined by schemas may have validations run against those attributes during creation.
56
+ #
57
+ # Schema should be passed a block:
58
+ #
59
+ # class Collection < LucidWorks::Base
60
+ # schema do
61
+ # attribute :name
62
+ # end
63
+ #
64
+ # validates_presence_of :name
65
+ # end
66
+
67
+ def schema(&block)
68
+ @schema ||= Schema.new
69
+ if block_given?
70
+ @schema.instance_eval(&block)
71
+ @schema.create_accessors_for_attributes(self)
72
+ self.has_schema = true
73
+ end
74
+ @schema
75
+ end
76
+
77
+ # Create a new model.
78
+ #
79
+ # MyObject.create(attr => value, ..., :parent => server_or_object)
40
80
 
41
81
  def create(*arguments)
42
82
  new(*arguments).tap { |model| model.save }
43
83
  end
44
84
 
45
85
  # Retrieve one or more models from the server.
46
- #
47
86
  # Find may be called in the following ways:
48
87
  #
49
88
  # Retrieve an entire collection:
@@ -57,10 +96,10 @@ module LucidWorks
57
96
  # find(options)
58
97
  # find(:singleton, options)
59
98
  #
60
- # == Options
99
+ # === Options
61
100
  #
62
101
  # :parent - mandatory, another LucidWorks::Base instance or a LucidWorks::Server instance.
63
- #
102
+
64
103
  def find(*arguments)
65
104
  unless arguments.first.is_a?(Symbol)
66
105
  # We weren't called with a symbol, figure out what kind of find this is and re-call
@@ -85,6 +124,7 @@ module LucidWorks
85
124
 
86
125
  parent = extract_parent_from_options(options)
87
126
  includes = options.delete(:include)
127
+ order = options.delete(:order)
88
128
 
89
129
  url = case kind_of_find
90
130
  when :all; collection_url(parent)
@@ -92,7 +132,12 @@ module LucidWorks
92
132
  when :singleton; "#{parent.uri}/#{singleton_name}"
93
133
  end
94
134
 
95
- raw_response = RestClient.get(url)
135
+ raw_response = ActiveSupport::Notifications.instrument("lucid_works.request") do |payload|
136
+ payload[:method] = :get
137
+ payload[:uri] = url
138
+ payload[:response] = RestClient.get(url)
139
+ end
140
+
96
141
  data = JSON.parse(raw_response)
97
142
 
98
143
  results =
@@ -119,9 +164,12 @@ module LucidWorks
119
164
  end
120
165
  end
121
166
 
167
+ results.sort! { |a,b| a.send(order) <=> b.send(order) } if order
122
168
  results
123
169
  end
124
170
 
171
+ # Shortcut for find(:all, options)
172
+
125
173
  def all(options)
126
174
  find(:all, options)
127
175
  end
@@ -134,6 +182,43 @@ module LucidWorks
134
182
  find(:all, options).last
135
183
  end
136
184
 
185
+ # Convert the attribute value to a string. If a schema has been defined for the modeland a type has
186
+ # been defined for the attribute, it will have formatting applied as follows:
187
+ #
188
+ # - <tt>boolean</tt> will be converted to 'yes' or 'no'
189
+ # - <tt>integer</tt> will be passed to number_with_delimter
190
+ #
191
+ # If the attributes is listed in the schema as having :values =>, it will be translated.
192
+
193
+ def human_attribute_value(attribute, value)
194
+ if schema[attribute][:values]
195
+ l10n_scope = %w{activemodel models} + self.name.underscore.split('/') + [attribute]
196
+ return I18n.t(value, :scope => l10n_scope, :default => value)
197
+ end
198
+
199
+ case schema[attribute][:type]
200
+ when :boolean
201
+ value.to_yesno
202
+ when :integer
203
+ number_with_delimiter(value)
204
+ else
205
+ value.to_s
206
+ end
207
+ rescue
208
+ value.to_s
209
+ end
210
+
211
+ # For attributes listed in the schema as having :values, this will create an array-of-arrays
212
+ # suitable for use as options_for_select. The
213
+
214
+ def to_select(attribute)
215
+ raise "Can't to_select for #{attribute} as it has no values" unless schema[attribute][:values]
216
+ l10n_scope = %w{activemodel models} + self.name.underscore.split('/') + [attribute]
217
+ schema[attribute][:values].map do |value|
218
+ [human_attribute_value(attribute, value), value]
219
+ end
220
+ end
221
+
137
222
  def collection_name # :nodoc:
138
223
  @collection_name || name.underscore.gsub(/^.*\//, '').pluralize
139
224
  end
@@ -166,29 +251,49 @@ module LucidWorks
166
251
  raise ArgumentError.new("new requires a Hash") unless options.is_a?(Hash)
167
252
  @parent = self.class.extract_parent_from_options(options)
168
253
  @persisted = options.delete(:persisted) || singleton? || false
169
- @attributes = options.with_indifferent_access
254
+ @attributes = {}.with_indifferent_access
255
+ load_attributes(options)
170
256
  end
171
257
 
172
258
  def save
173
- if valid?
174
- begin
175
- if persisted?
176
- response = RestClient.put(member_url, encode, :content_type => :json)
177
- else
178
- response = RestClient.post(collection_url, encode, :content_type => :json)
179
- @persisted = true
259
+ _run_save_callbacks do
260
+ if valid?
261
+ ActiveSupport::Notifications.instrument("lucid_works.request") do |payload|
262
+ method, uri = persisted? ? [:put, member_url] : [:post, collection_url]
263
+ data = encode
264
+ payload[:method] = method
265
+ payload[:uri] = uri
266
+ payload[:data] = data
267
+ begin
268
+ response = RestClient.send(method, uri, data, :content_type => :json)
269
+ payload[:response] = response
270
+ @persisted = true
271
+ load_attributes_from_json_string(response)
272
+ true
273
+ rescue RestClient::UnprocessableEntity, RestClient::Conflict => e
274
+ payload[:exception] = e
275
+ attach_errors_to_model(e.response)
276
+ false
277
+ end
180
278
  end
181
- load_attributes_from_json_string(response)
182
- true
183
- rescue RestClient::UnprocessableEntity, RestClient::Conflict => e
184
- attach_errors_to_model(e.response)
185
- false
186
279
  end
187
280
  end
188
281
  end
189
-
282
+
283
+ def update_attributes(attrs_and_values)
284
+ attrs_and_values.each do |attr,value|
285
+ self.send("#{attr}=", value)
286
+ end
287
+ save
288
+ end
289
+
190
290
  def destroy(options={})
191
- RestClient.delete(member_url, options)
291
+ ActiveSupport::Notifications.instrument("lucid_works.request") do |payload|
292
+ payload[:method] = :delete
293
+ payload[:uri] = member_url
294
+ payload[:options] = options
295
+ payload[:repsonse] = RestClient.delete(member_url, options)
296
+ end
192
297
  end
193
298
 
194
299
  def id # :nodoc:
@@ -203,21 +308,6 @@ module LucidWorks
203
308
  @persisted
204
309
  end
205
310
 
206
- def method_missing(method_sym, *arguments) # :nodoc:
207
- return super if method_sym == :to_ary
208
- if method_sym.to_s =~ /^(\w+)=$/
209
- return @attributes[$1] = arguments.first
210
- elsif method_sym.to_s =~ /^(\w+)\?$/
211
- attr = $1
212
- predicate = true
213
- else
214
- attr = method_sym
215
- predicate = false
216
- end
217
- raise "Unknown attribute: '#{attr}'" unless @attributes.has_key?(attr)
218
- predicate ? !!@attributes[attr] : @attributes[attr]
219
- end
220
-
221
311
  def read_attribute_for_validation(key) # :nodoc:
222
312
  @attributes[key]
223
313
  end
@@ -236,10 +326,17 @@ module LucidWorks
236
326
 
237
327
  alias :uri :member_url
238
328
 
239
- def inspect
329
+ def inspect # :nodoc:
240
330
  "<#{self.class.name} " + @attributes.map { |k,v| "#{k}=#{v.inspect}" }.join(" ") + ">"
241
331
  end
242
332
 
333
+ # Convert the attribute value to a string.
334
+ # See LucidWorks::Base.human_attribute_values for details.
335
+
336
+ def human_attribute_value(attribute)
337
+ self.class.human_attribute_value(attribute, send(attribute))
338
+ end
339
+
243
340
  private
244
341
 
245
342
  def singleton? # :nodoc:
@@ -254,12 +351,30 @@ module LucidWorks
254
351
  @attributes.reject { |k,v| k.to_s == 'id'}.to_json
255
352
  end
256
353
 
257
- def load_attributes_from_json_string(response) # :nodoc:
258
- data = JSON.parse(response) rescue {}
259
- data.each do |k,v|
260
- @attributes[k] = v
354
+ def load_attributes(attributes_and_values) # :nodoc:
355
+ attributes_and_values.each do |attr, value|
356
+ # Special cases - don't overwrite built-in accessors
357
+ if attr.to_sym == :id
358
+ self.id = value
359
+ next
360
+ elsif self.class.respond_to?(:belongs_to_association_name) && attr.to_sym == self.class.belongs_to_association_name
361
+ next # Dont overwrite our connection to our parent
362
+ end
363
+ unless self.class.schema.has_attribute?(attr)
364
+ if self.class.has_schema
365
+ raise "unknown attribute: \"#{attr}\""
366
+ else
367
+ self.class.schema.add_attribute(self.class, attr, :string)
368
+ end
369
+ end
370
+ @attributes[attr] = value
261
371
  end
262
372
  end
373
+
374
+ def load_attributes_from_json_string(response) # :nodoc:
375
+ attribute_data = JSON.parse(response) rescue {}
376
+ load_attributes(attribute_data)
377
+ end
263
378
 
264
379
  def attach_errors_to_model(response) # :nodoc:
265
380
  data = JSON.parse(response) rescue nil
@@ -4,6 +4,19 @@ module LucidWorks
4
4
  class Info < Base
5
5
  self.singleton = true
6
6
  belongs_to :collection
7
+
8
+ schema do
9
+ attributes :free_disk_space, :total_disk_space, :index_size,
10
+ :index_last_modified, :collection_name,
11
+ :data_dir, :root_dir, :instance_dir,
12
+ :type => :string
13
+ attribute :index_directory # a hash
14
+ attributes :index_size_bytes, :free_disk_bytes, :total_disk_bytes,
15
+ :index_max_doc, :index_num_docs, :index_version,
16
+ :type => :integer
17
+ attributes :index_has_deletions, :index_is_optimized, :index_is_current,
18
+ :type => :boolean
19
+ end
7
20
  end
8
21
  end
9
22
  end
@@ -4,6 +4,20 @@ module LucidWorks
4
4
  class Settings < Base
5
5
  self.singleton = true
6
6
  belongs_to :collection
7
+
8
+ DEDUP_OPTIONS = %w{ off overwrite tag }
9
+
10
+ schema do
11
+ attributes :unsupervised_feedback_emphasis, :unknown_type_handling,
12
+ :click_boost_field, :click_boost_data, :query_parser, :default_sort,
13
+ :type => :string
14
+ attribute :de_duplication, :string, :values => DEDUP_OPTIONS
15
+ attributes :spellcheck, :display_facets, :ssl, :unsupervised_feedback, :query_time_stopwords,
16
+ :auto_complete, :boost_recent, :click_enabled, :show_similar, :query_time_synonyms,
17
+ :index_time_stopwords, :type => :boolean
18
+ attributes :search_server_list, :update_server_list, :stopword_list, :boosts, :synonym_list # Arrays
19
+ attribute :elevations # Hash
20
+ end
7
21
  end
8
22
  end
9
23
  end
@@ -4,11 +4,19 @@ module LucidWorks
4
4
 
5
5
  self.primary_key = :name
6
6
 
7
- has_many :datasources
8
- has_one :info, :settings, :index
7
+ has_many :datasources, :fields
8
+ has_one :info, :settings
9
+ has_one :index, :has_content => false
9
10
 
11
+ schema do
12
+ attribute :name
13
+ attribute :instance_dir
14
+ end
15
+
16
+ validates_presence_of :name
17
+
10
18
  def empty!
11
- build_index.destroy(:params => {:key => 'iaccepttherisk'})
19
+ index.destroy(:params => {:key => 'iaccepttherisk'})
12
20
  end
13
21
  end
14
22
  end
@@ -0,0 +1,8 @@
1
+ module LucidWorks
2
+
3
+ class Crawler < Base
4
+
5
+ self.primary_key = :name
6
+
7
+ end
8
+ end
@@ -0,0 +1,9 @@
1
+ module LucidWorks
2
+ class Datasource
3
+
4
+ class Crawldata < Base
5
+ self.singleton = true
6
+ belongs_to :datasource
7
+ end
8
+ end
9
+ end
@@ -9,8 +9,16 @@ module LucidWorks
9
9
  numUpdated + numNew + numUnchanged
10
10
  end
11
11
 
12
- def elapsed_time
13
- crawlStopped.to_datetime - crawlStarted.to_datetime
12
+ def crawl_stopped
13
+ Time.iso8601 crawlStopped
14
+ end
15
+
16
+ def crawl_started
17
+ Time.iso8601 crawlStarted
18
+ end
19
+
20
+ def duration
21
+ crawl_stopped - crawl_started
14
22
  end
15
23
  end
16
24
  end
@@ -4,6 +4,13 @@ module LucidWorks
4
4
  class Schedule < Base
5
5
  self.singleton = true
6
6
  belongs_to :datasource
7
+
8
+ schema do
9
+ attribute :period
10
+ attribute :start_time
11
+ attribute :active
12
+ attribute :type
13
+ end
7
14
  end
8
15
  end
9
16
  end
@@ -4,6 +4,51 @@ module LucidWorks
4
4
  class Status < Base
5
5
  self.singleton = true
6
6
  belongs_to :datasource
7
+
8
+ schema do
9
+ attributes :crawlStarted, :crawlState, :crawlStopped, :jobId
10
+ attributes :numUnchanged, :numUpdated, :numNew, :numFailed, :numDeleted, :type => :integer
11
+ end
12
+
13
+ STOPPED_STATES = %w{ IDLE STOPPED ABORTED EXCEPTION FINISHED }
14
+ POST_PROCESSING_STATES = %w{ STOPPING ABORTING }
15
+ CRAWLSTATES = STOPPED_STATES + [ 'RUNNING' ] + POST_PROCESSING_STATES
16
+
17
+ # Create predicate methods for all the crawl states
18
+ CRAWLSTATES.each do |state|
19
+ method_name = state.downcase + "?"
20
+ class_eval <<-EOF
21
+ def #{method_name}
22
+ self.crawlState == "#{state}"
23
+ end
24
+ EOF
25
+ end
26
+
27
+ def stopped?
28
+ STOPPED_STATES.include?(crawlState)
29
+ end
30
+
31
+ def post_processing?
32
+ POST_PROCESSING_STATES.include?(crawlState)
33
+ end
34
+
35
+ def doc_count
36
+ numUpdated + numNew + numUnchanged
37
+ end
38
+
39
+ def t_crawl_state
40
+ I18n.translate(crawlState.downcase,
41
+ :scope => 'activemodel.models.lucid_works.datasource.status.crawl_state',
42
+ :default => crawlState)
43
+ end
44
+
45
+ def crawl_started
46
+ Time.iso8601 crawlStarted
47
+ end
48
+
49
+ def elapsed_time
50
+ Time.now - crawl_started
51
+ end
7
52
  end
8
53
  end
9
54
  end
@@ -3,38 +3,57 @@ module LucidWorks
3
3
  class Datasource < Base
4
4
  belongs_to :collection
5
5
  has_many :histories, :class_name => :history
6
- has_one :status, :history, :schedule, :index
7
-
8
- TYPES = {
9
- :FileSystemDataSource => {
10
- :name_l10n_key => 'data_source.short_type.filesystem',
11
- :crawler => 'lucid.aperture',
12
- :type => 'file'
13
- },
14
- :WebDataSource => {
15
- :name_l10n_key => 'data_source.short_type.web_site',
16
- :crawler => 'lucid.aperture',
17
- :type => 'web'
18
- },
19
- :SolrXmlDataSource => {
20
- :name_l10n_key => 'data_source.short_type.solr',
21
- :crawler => 'lucid.solrxml',
22
- :type => 'solrxml'
23
- },
24
- :JDBCDataSource => {
25
- :name_l10n_key => 'data_source.short_type.database',
26
- :crawler => 'lucid.jdbc',
27
- :type => 'jdbc'
28
- },
29
- :SharePointDataSource => {
30
- :name_l10n_key => 'data_source.short_type.sharepoint',
31
- :crawler => 'lucid.gcm',
32
- :type => 'sharepoint'
33
- }
6
+ has_one :status, :schedule, :crawldata
7
+ has_one :index, :has_content => false
8
+
9
+ schema do
10
+ # common
11
+ attributes :name, :type, :crawler
12
+ attributes :crawl_depth, :max_bytes, :type => :integer
13
+ attribute :include_paths
14
+ attribute :exclude_paths
15
+ attribute :mapping # Hash
16
+ attribute :bounds
17
+ # web
18
+ attributes :url, :category
19
+ attribute :collect_links, :boolean
20
+ # file
21
+ attribute :path
22
+ attribute :follow_links, :boolean
23
+ end
24
+
25
+ validates_presence_of :type, :crawler, :name, :crawl_depth
26
+ validates_numericality_of :max_bytes, :allow_blank => true
27
+ validates_presence_of :url, :if => lambda { |d| d.type == 'web' }
28
+
29
+ before_save :remove_blank_max_bytes
30
+
31
+ TYPES = %w{ file web solrxml jdbc sharepoint }
32
+ BOUNDS = %w{ tree none }
33
+ CRAWLERS = {
34
+ # Later we may change these to be arrays if we decide to support more than one choice
35
+ # e.g. :web => ['lucid.aperture', 'nutch']
36
+ :file => 'lucid.aperture',
37
+ :web => 'lucid.aperture',
38
+ :solrxml => 'lucid.solrxml',
39
+ :jdbc => 'lucid.jdbc',
40
+ :sharepoint => 'lucid.gcm'
34
41
  }.with_indifferent_access
35
42
 
43
+
36
44
  def empty!
37
- build_index.destroy
45
+ index.destroy
46
+ end
47
+
48
+ def t_type
49
+ I18n.t(type, :scope => 'activemodel.models.lucid_works.datasource.type')
50
+ end
51
+
52
+ private
53
+
54
+ def remove_blank_max_bytes # :nodoc:
55
+ # API can't handle a blank max_bytes. Send nothing to select 'unlimited'
56
+ @attributes.delete :max_bytes if self.max_bytes.blank?
38
57
  end
39
58
  end
40
59
  end
@@ -0,0 +1,53 @@
1
+ module LucidWorks
2
+
3
+ class Field < Base
4
+ belongs_to :collection
5
+
6
+ TYPES = [
7
+ 'string',
8
+ 'boolean',
9
+ 'binary',
10
+ 'int',
11
+ 'float',
12
+ 'long',
13
+ 'double',
14
+ 'tint',
15
+ 'tfloat',
16
+ 'tlong',
17
+ 'tdouble',
18
+ 'uri',
19
+ 'date',
20
+ 'tdate',
21
+ 'text_ws',
22
+ 'text_en',
23
+ 'text_porter_en',
24
+ 'textTight',
25
+ 'text_cjk',
26
+ 'text_da',
27
+ 'text_de',
28
+ 'text_es',
29
+ 'text_fr',
30
+ 'text_it',
31
+ 'text_nl',
32
+ 'text_pt',
33
+ 'text_ru',
34
+ 'text_se',
35
+ 'text_fi',
36
+ 'random',
37
+ 'comma-separated',
38
+ 'textSpell',
39
+ 'payloads',
40
+ 'point',
41
+ 'location',
42
+ 'geohash'
43
+ ]
44
+
45
+ def t_field_type
46
+ self.class.t_field_type(self.field_type)
47
+ end
48
+
49
+ def self.t_field_type(type)
50
+ I18n.translate(type, :scope => 'activemodel.models.lucid_works.collection.field.field_type')
51
+ end
52
+ end
53
+ end
@@ -2,7 +2,6 @@ module LucidWorks
2
2
  class Logs < LucidWorks::Base
3
3
  self.singleton = true
4
4
 
5
- has_one :query
6
- has_one :index
5
+ has_one :query, :index, :has_content => false
7
6
  end
8
7
  end