lucid_works 0.3.9 → 0.4.9
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +33 -3
- data/config/locales/en.yml +21 -12
- data/lib/lucid_works.rb +10 -0
- data/lib/lucid_works/associations.rb +14 -12
- data/lib/lucid_works/base.rb +13 -13
- data/lib/lucid_works/collection.rb +65 -5
- data/lib/lucid_works/collection/activity.rb +33 -0
- data/lib/lucid_works/collection/activity/history.rb +20 -0
- data/lib/lucid_works/collection/activity/status.rb +14 -0
- data/lib/lucid_works/collection/settings.rb +28 -8
- data/lib/lucid_works/crawler.rb +3 -3
- data/lib/lucid_works/datasource.rb +29 -3
- data/lib/lucid_works/datasource/job.rb +9 -0
- data/lib/lucid_works/datasource/status.rb +6 -11
- data/lib/lucid_works/patch_time.rb +13 -0
- data/lib/lucid_works/schema.rb +36 -8
- data/lib/lucid_works/utils.rb +22 -0
- data/lib/lucid_works/version.rb +1 -1
- data/lucid_works.gemspec +2 -0
- data/spec/lib/lucid_works/associations_spec.rb +12 -1
- data/spec/lib/lucid_works/base_spec.rb +26 -10
- data/spec/lib/lucid_works/collection/activity/history_spec.rb +33 -0
- data/spec/lib/lucid_works/collection/activity/status_spec.rb +20 -0
- data/spec/lib/lucid_works/collection/activity_spec.rb +88 -0
- data/spec/lib/lucid_works/collection/prime_activities_spec.rb +86 -0
- data/spec/lib/lucid_works/collection_spec.rb +140 -1
- data/spec/lib/lucid_works/datasource/history_spec.rb +11 -7
- data/spec/lib/lucid_works/datasource/status_spec.rb +64 -32
- data/spec/lib/lucid_works/datasource_spec.rb +48 -13
- data/spec/lib/lucid_works/schema_spec.rb +56 -4
- data/spec/lib/lucid_works/utils_spec.rb +62 -0
- data/spec/spec_helper.rb +17 -14
- metadata +41 -3
data/README.rdoc
CHANGED
@@ -41,8 +41,7 @@ This single statement (note the periods) will connect to a LucidWorks server run
|
|
41
41
|
create_datasource(:name => 'cnn',
|
42
42
|
:crawler => 'lucid.aperture', :type => 'web',
|
43
43
|
:url => 'http://cnn.com', :crawl_depth => '1').
|
44
|
-
|
45
|
-
save
|
44
|
+
start_crawl
|
46
45
|
|
47
46
|
Now, how does it work:
|
48
47
|
|
@@ -56,10 +55,13 @@ The LucidWorks object model looks something like this:
|
|
56
55
|
| | +- Schedule
|
57
56
|
| | +- Index
|
58
57
|
| | +- Crawldata
|
58
|
+
| | +- Job
|
59
59
|
| +- Field
|
60
60
|
| +- Index
|
61
61
|
| +- Info
|
62
62
|
| +- Settings
|
63
|
+
| +- Activity -+- Status
|
64
|
+
| +- History
|
63
65
|
|
|
64
66
|
+- Logs -+- Index -+- Summary
|
65
67
|
| +- Query -+- Summary
|
@@ -150,6 +152,14 @@ Collection has_many :datasources. Datasources are modeled using the LucidWorks:
|
|
150
152
|
|
151
153
|
Note that the latter does not start a crawl of the datasource.
|
152
154
|
|
155
|
+
To start a datasource crawling:
|
156
|
+
|
157
|
+
datasource.start_crawl!
|
158
|
+
|
159
|
+
To stop a datasource crawl:
|
160
|
+
|
161
|
+
datasource.stop_crawl!
|
162
|
+
|
153
163
|
To delete all the data crawled from a data-source:
|
154
164
|
|
155
165
|
datasource.empty!
|
@@ -247,7 +257,27 @@ Then:
|
|
247
257
|
|
248
258
|
whatnot.thing -> A Thing
|
249
259
|
|
250
|
-
===
|
260
|
+
=== Schema
|
261
|
+
|
262
|
+
A class may have a schema defined as follows:
|
263
|
+
|
264
|
+
class ThingWithSchema < LucidWorks::Base
|
265
|
+
schema do
|
266
|
+
attribute :string1, :string
|
267
|
+
attribute :bool1, :boolean
|
268
|
+
attribute :integer1, :integer
|
269
|
+
attributes :string2, :string3, :string4
|
270
|
+
attributes :bool2, :bool3, :type => :boolean
|
271
|
+
attributes :int2, :int3, :type => :integer
|
272
|
+
attribute :string_with_values, :values => ['one', 'two']
|
273
|
+
attribute :dontsendme, :omit_during_update => true
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
Classes with a schema may have validations applied to its attributes.
|
278
|
+
The default attribute type is :string.
|
279
|
+
|
280
|
+
== Rationale
|
251
281
|
|
252
282
|
Originally this library started out as a set of ActiveResource classes. This required a lot of hacking of ActiveResource as ActiveResource makes a lot of assumptions about the way a REST API should work - it's basically just designed to talk to Rails applications - and many REST APIs, including this one, don't conform to those rules. Among the changes required to ActiveResource were:
|
253
283
|
|
data/config/locales/en.yml
CHANGED
@@ -4,6 +4,12 @@ en:
|
|
4
4
|
models:
|
5
5
|
lucid_works:
|
6
6
|
collection:
|
7
|
+
activity:
|
8
|
+
type:
|
9
|
+
optimize: Optimize
|
10
|
+
spelling: Spelling
|
11
|
+
click: Click
|
12
|
+
autocomplete: Autocomplete
|
7
13
|
one: Collection
|
8
14
|
other: Collections
|
9
15
|
settings:
|
@@ -14,21 +20,23 @@ en:
|
|
14
20
|
overwrite: Overwrite
|
15
21
|
tag: Tag
|
16
22
|
datasource:
|
17
|
-
one: Data
|
18
|
-
other: Data
|
23
|
+
one: Data source
|
24
|
+
other: Data sources
|
19
25
|
status:
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
crawlState:
|
27
|
+
ABORTED: Aborted
|
28
|
+
ABORTING: Aborting
|
29
|
+
EXCEPTION: Exception
|
30
|
+
FINISHED: Finished
|
31
|
+
IDLE: Idle
|
32
|
+
RUNNING: Running
|
33
|
+
STOPPED: Stopped
|
34
|
+
STOPPING: Stopping
|
29
35
|
type:
|
36
|
+
external: External
|
30
37
|
file: Local Filesystem
|
31
38
|
jdbc: Database
|
39
|
+
lwelogs: LucidWorks Solr Logs
|
32
40
|
sharepoint: Sharepoint
|
33
41
|
solrxml: Solr XML
|
34
42
|
web: Web Site
|
@@ -66,7 +74,7 @@ en:
|
|
66
74
|
de_duplication: De-duplication
|
67
75
|
display_facets: Display facets
|
68
76
|
elevations: elevations
|
69
|
-
index_time_stopwords:
|
77
|
+
index_time_stopwords: Exclude stop words from index
|
70
78
|
query_parser: Query parser
|
71
79
|
query_time_stopwords: Include stop words in searches
|
72
80
|
query_time_synonyms: Use synomyms
|
@@ -104,3 +112,4 @@ en:
|
|
104
112
|
numDeleted: Deleted docs
|
105
113
|
numUnchanged: Unchanged docs
|
106
114
|
numFailed: Failed docs
|
115
|
+
numTotal: Total docs
|
data/lib/lucid_works.rb
CHANGED
@@ -5,6 +5,7 @@ end
|
|
5
5
|
|
6
6
|
require 'active_model'
|
7
7
|
require 'active_support/core_ext/module/attr_accessor_with_default'
|
8
|
+
require 'active_support/core_ext/module/aliasing'
|
8
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
9
10
|
require 'active_support/inflector'
|
10
11
|
begin
|
@@ -13,8 +14,12 @@ rescue LoadError
|
|
13
14
|
end
|
14
15
|
require 'restclient'
|
15
16
|
require 'json'
|
17
|
+
require 'rsolr'
|
18
|
+
require 'nokogiri'
|
16
19
|
|
20
|
+
require 'lucid_works/utils'
|
17
21
|
require 'lucid_works/patch_restclient'
|
22
|
+
require 'lucid_works/patch_time'
|
18
23
|
|
19
24
|
require 'lucid_works/exceptions'
|
20
25
|
require 'lucid_works/associations'
|
@@ -23,6 +28,10 @@ require 'lucid_works/base'
|
|
23
28
|
require 'lucid_works/schema'
|
24
29
|
|
25
30
|
require 'lucid_works/collection'
|
31
|
+
require 'lucid_works/collection/activity'
|
32
|
+
require 'lucid_works/collection/activity/status'
|
33
|
+
require 'lucid_works/collection/activity/history'
|
34
|
+
#require 'lucid_works/collection/activity/schedule'
|
26
35
|
require 'lucid_works/collection/info'
|
27
36
|
require 'lucid_works/collection/index'
|
28
37
|
require 'lucid_works/collection/settings'
|
@@ -33,6 +42,7 @@ require 'lucid_works/datasource/status'
|
|
33
42
|
require 'lucid_works/datasource/history'
|
34
43
|
require 'lucid_works/datasource/schedule'
|
35
44
|
require 'lucid_works/datasource/crawldata'
|
45
|
+
require 'lucid_works/datasource/job'
|
36
46
|
require 'lucid_works/field'
|
37
47
|
require 'lucid_works/logs'
|
38
48
|
require 'lucid_works/logs/query'
|
@@ -89,26 +89,28 @@ module LucidWorks
|
|
89
89
|
def define_has_one(resource, options={})
|
90
90
|
resource_class_name = (options[:class_name] || resource).to_s.camelize
|
91
91
|
|
92
|
+
class_eval <<-EOF, __FILE__, __LINE__ + 1
|
93
|
+
def #{resource} # def child
|
94
|
+
@#{resource} || #{resource}! # @child || child!
|
95
|
+
end # end
|
96
|
+
EOF
|
97
|
+
|
92
98
|
if options[:has_content] == false
|
93
|
-
class_eval <<-
|
94
|
-
def #{resource}
|
95
|
-
#{resource_class_name}.new(:parent => self)
|
99
|
+
class_eval <<-EOF, __FILE__, __LINE__ + 1
|
100
|
+
def #{resource}! # def child!
|
101
|
+
@#{resource} = #{resource_class_name}.new(:parent => self) # @child = Child.new(options.merge :parent => self)
|
96
102
|
end # end
|
97
|
-
|
103
|
+
EOF
|
98
104
|
else
|
99
|
-
class_eval <<-
|
100
|
-
def #{resource}
|
101
|
-
@#{resource}
|
102
|
-
end # end
|
103
|
-
|
104
|
-
def #{resource}! # def resource!
|
105
|
-
@#{resource} = #{resource_class_name}.find(:parent => self) # @resource = Resource.find(:parent => self)
|
105
|
+
class_eval <<-EOF, __FILE__, __LINE__ + 1
|
106
|
+
def #{resource}! # def child!
|
107
|
+
@#{resource} = #{resource_class_name}.find(:parent => self) # @child = Child.find(:parent => self)
|
106
108
|
end # end
|
107
109
|
|
108
110
|
def build_#{resource}(options = {})
|
109
111
|
#{resource_class_name}.new(options.merge :parent => self)
|
110
112
|
end
|
111
|
-
|
113
|
+
EOF
|
112
114
|
end
|
113
115
|
end
|
114
116
|
|
data/lib/lucid_works/base.rb
CHANGED
@@ -32,6 +32,7 @@ module LucidWorks
|
|
32
32
|
extend ActiveModel::Translation
|
33
33
|
extend ActiveModel::Callbacks
|
34
34
|
include Associations
|
35
|
+
include Utils::BoolConverter
|
35
36
|
|
36
37
|
attr_accessor :parent # :nodoc:
|
37
38
|
attr_writer :id # :nodoc:
|
@@ -44,10 +45,8 @@ module LucidWorks
|
|
44
45
|
class << self
|
45
46
|
include ActionView::Helpers::NumberHelper rescue nil
|
46
47
|
|
47
|
-
attr_accessor_with_default :primary_key, :id
|
48
48
|
attr_accessor :collection_name # :nodoc:
|
49
49
|
attr_accessor_with_default :singleton, false
|
50
|
-
attr_accessor_with_default :has_schema, false
|
51
50
|
|
52
51
|
# The attributes for a model are ascertained in on of two ways.
|
53
52
|
# Without a schema, the attributes list is automatically generated when the the object is retrieved from the server.
|
@@ -69,7 +68,6 @@ module LucidWorks
|
|
69
68
|
if block_given?
|
70
69
|
@schema.instance_eval(&block)
|
71
70
|
@schema.create_accessors_for_attributes(self)
|
72
|
-
self.has_schema = true
|
73
71
|
end
|
74
72
|
@schema
|
75
73
|
end
|
@@ -182,8 +180,8 @@ module LucidWorks
|
|
182
180
|
find(:all, options).last
|
183
181
|
end
|
184
182
|
|
185
|
-
# Convert the attribute value to a string. If a schema has been defined for the
|
186
|
-
# been defined for the attribute, it will have formatting applied as follows:
|
183
|
+
# Convert the attribute value to a string. If a schema has been defined for the model
|
184
|
+
# and a type has been defined for the attribute, it will have formatting applied as follows:
|
187
185
|
#
|
188
186
|
# - <tt>boolean</tt> will be converted to 'yes' or 'no'
|
189
187
|
# - <tt>integer</tt> will be passed to number_with_delimter
|
@@ -238,7 +236,7 @@ module LucidWorks
|
|
238
236
|
else
|
239
237
|
parent = options.delete(:parent)
|
240
238
|
end
|
241
|
-
raise ArgumentError.new("parent is a required option") unless parent
|
239
|
+
raise ArgumentError.new("parent is a required option (options were #{options.inspect}") unless parent
|
242
240
|
unless parent.is_a?(Base) || parent.is_a?(Server)
|
243
241
|
raise ArgumentError.new("parent must be a LucidWorks::Server or LucidWorks::Base")
|
244
242
|
end
|
@@ -297,11 +295,11 @@ module LucidWorks
|
|
297
295
|
end
|
298
296
|
|
299
297
|
def id # :nodoc:
|
300
|
-
@attributes[self.class.primary_key]
|
298
|
+
@attributes[self.class.schema.primary_key]
|
301
299
|
end
|
302
300
|
|
303
301
|
def id=(value) # :nodoc:
|
304
|
-
@attributes[self.class.primary_key] = value
|
302
|
+
@attributes[self.class.schema.primary_key] = value
|
305
303
|
end
|
306
304
|
|
307
305
|
def persisted?
|
@@ -348,7 +346,9 @@ module LucidWorks
|
|
348
346
|
end
|
349
347
|
|
350
348
|
def encode # :nodoc:
|
351
|
-
|
349
|
+
omit_attrs = [ 'id' ]
|
350
|
+
omit_attrs += self.class.schema.attrs_to_omit_during_update if persisted?
|
351
|
+
@attributes.reject { |k,v| omit_attrs.include?(k.to_s) }.to_json
|
352
352
|
end
|
353
353
|
|
354
354
|
def load_attributes(attributes_and_values) # :nodoc:
|
@@ -361,13 +361,13 @@ module LucidWorks
|
|
361
361
|
next # Dont overwrite our connection to our parent
|
362
362
|
end
|
363
363
|
unless self.class.schema.has_attribute?(attr)
|
364
|
-
if self.class.
|
365
|
-
raise "unknown attribute: \"#{attr}\""
|
366
|
-
else
|
364
|
+
if self.class.schema.dynamic_attributes?
|
367
365
|
self.class.schema.add_attribute(self.class, attr, :string)
|
366
|
+
else
|
367
|
+
raise "unknown attribute: \"#{attr}\""
|
368
368
|
end
|
369
369
|
end
|
370
|
-
|
370
|
+
send "#{self.class.schema.sanitize_identifier(attr)}=", value
|
371
371
|
end
|
372
372
|
end
|
373
373
|
|
@@ -2,21 +2,81 @@ module LucidWorks
|
|
2
2
|
|
3
3
|
class Collection < Base
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
has_many :datasources, :fields
|
5
|
+
has_many :datasources, :fields, :activities
|
8
6
|
has_one :info, :settings
|
9
7
|
has_one :index, :has_content => false
|
10
8
|
|
11
9
|
schema do
|
12
|
-
attribute :name
|
10
|
+
attribute :name, :string, :primary_key => true
|
13
11
|
attribute :instance_dir
|
14
12
|
end
|
15
13
|
|
16
14
|
validates_presence_of :name
|
17
|
-
|
15
|
+
|
16
|
+
def destroyable?
|
17
|
+
# Don't let user destroy 'lwelogs'
|
18
|
+
instance_dir != 'lwelogs'
|
19
|
+
end
|
20
|
+
|
18
21
|
def empty!
|
19
22
|
index.destroy(:params => {:key => 'iaccepttherisk'})
|
20
23
|
end
|
24
|
+
|
25
|
+
# Setup the Collection with an RSolr object that it can use to search.
|
26
|
+
# Must be provided with the URL of a Solr instance (excluding the /solr/... path)
|
27
|
+
def rsolr_connect(solr_url, default_search_params={})
|
28
|
+
@default_search_params = default_search_params
|
29
|
+
@path_prefix = URI.parse(solr_url).path
|
30
|
+
@rsolr ||= RSolr.connect :url => solr_url.dup
|
31
|
+
end
|
32
|
+
|
33
|
+
# Perform a Solr search using RSolr
|
34
|
+
def search(search_params={}, options={})
|
35
|
+
params = @default_search_params.merge(search_params)
|
36
|
+
page = options[:page] ||= 1
|
37
|
+
per_page = options[:per_page] ||= 10
|
38
|
+
resp = @rsolr.paginate page, per_page, "#{@path_prefix}/solr/#{name}/select", :params => params
|
39
|
+
if params[:wt] == :xml
|
40
|
+
data = Nokogiri.XML(resp)
|
41
|
+
raise "search received bad XML" unless data.root
|
42
|
+
else
|
43
|
+
data = resp
|
44
|
+
end
|
45
|
+
data
|
46
|
+
end
|
47
|
+
|
48
|
+
def prime_activities
|
49
|
+
self.activities!
|
50
|
+
num_created = 0
|
51
|
+
activities_to_return = %w(optimize spelling click autocomplete).map do |type|
|
52
|
+
if act = self.activities.detect{|act| act.type == type}
|
53
|
+
act
|
54
|
+
else
|
55
|
+
num_created += 1
|
56
|
+
self.create_activity(:type => type, :active => true, 'start_time' => 3600*num_created)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
self.activities! if num_created > 0
|
60
|
+
activities_to_return
|
61
|
+
end
|
62
|
+
|
63
|
+
# return the first for each kind of activity
|
64
|
+
# don't use these if you need more than one activity
|
65
|
+
# b/c each forces an API hit
|
66
|
+
def optimize_activity
|
67
|
+
prime_activities.detect{|act| act.type == 'optimize'}
|
68
|
+
end
|
69
|
+
|
70
|
+
def spelling_activity
|
71
|
+
prime_activities.detect{|act| act.type == 'spelling'}
|
72
|
+
end
|
73
|
+
|
74
|
+
def click_activity
|
75
|
+
prime_activities.detect{|act| act.type == 'click'}
|
76
|
+
end
|
77
|
+
|
78
|
+
def autocomplete_activity
|
79
|
+
prime_activities.detect{|act| act.type == 'autocomplete'}
|
80
|
+
end
|
21
81
|
end
|
22
82
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module LucidWorks
|
2
|
+
class Collection < Base
|
3
|
+
class Activity < Base
|
4
|
+
TYPES = %w{ optimize spelling click autocomplete}
|
5
|
+
|
6
|
+
belongs_to :collection
|
7
|
+
has_many :histories, :class_name => :history
|
8
|
+
has_one :status
|
9
|
+
|
10
|
+
schema do
|
11
|
+
attributes :start_time, :period
|
12
|
+
attribute :active, :boolean
|
13
|
+
attribute :type, :string, :values => TYPES, :omit_during_update => true
|
14
|
+
end
|
15
|
+
|
16
|
+
validates_presence_of :type, :start_time
|
17
|
+
validates_numericality_of :period, :allow_blank => true
|
18
|
+
|
19
|
+
def t_type
|
20
|
+
I18n.t(type, :scope => 'activemodel.models.lucid_works.datasource.type')
|
21
|
+
end
|
22
|
+
|
23
|
+
def start
|
24
|
+
self.start_time = 0
|
25
|
+
self.active = true
|
26
|
+
save
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module LucidWorks
|
2
|
+
class Collection::Activity
|
3
|
+
class History < Base
|
4
|
+
belongs_to :activity
|
5
|
+
self.collection_name = 'history' # i.e. not the plural 'histories'
|
6
|
+
|
7
|
+
def activity_finished
|
8
|
+
Time.iso8601 activityFinished
|
9
|
+
end
|
10
|
+
|
11
|
+
def activity_started
|
12
|
+
Time.iso8601 activityStarted
|
13
|
+
end
|
14
|
+
|
15
|
+
def duration
|
16
|
+
activity_finished - activity_started
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module LucidWorks
|
2
|
+
class Collection::Activity
|
3
|
+
class Status < Base
|
4
|
+
self.singleton = true
|
5
|
+
belongs_to :activity
|
6
|
+
|
7
|
+
schema do
|
8
|
+
attribute :running, :boolean
|
9
|
+
attribute :type, :string, :values => LucidWorks::Collection::Activity::TYPES
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|