lucid_works 0.3.9 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +33 -3
- data/config/locales/en.yml +21 -12
- data/lib/lucid_works.rb +10 -0
- data/lib/lucid_works/associations.rb +14 -12
- data/lib/lucid_works/base.rb +13 -13
- data/lib/lucid_works/collection.rb +65 -5
- data/lib/lucid_works/collection/activity.rb +33 -0
- data/lib/lucid_works/collection/activity/history.rb +20 -0
- data/lib/lucid_works/collection/activity/status.rb +14 -0
- data/lib/lucid_works/collection/settings.rb +28 -8
- data/lib/lucid_works/crawler.rb +3 -3
- data/lib/lucid_works/datasource.rb +29 -3
- data/lib/lucid_works/datasource/job.rb +9 -0
- data/lib/lucid_works/datasource/status.rb +6 -11
- data/lib/lucid_works/patch_time.rb +13 -0
- data/lib/lucid_works/schema.rb +36 -8
- data/lib/lucid_works/utils.rb +22 -0
- data/lib/lucid_works/version.rb +1 -1
- data/lucid_works.gemspec +2 -0
- data/spec/lib/lucid_works/associations_spec.rb +12 -1
- data/spec/lib/lucid_works/base_spec.rb +26 -10
- data/spec/lib/lucid_works/collection/activity/history_spec.rb +33 -0
- data/spec/lib/lucid_works/collection/activity/status_spec.rb +20 -0
- data/spec/lib/lucid_works/collection/activity_spec.rb +88 -0
- data/spec/lib/lucid_works/collection/prime_activities_spec.rb +86 -0
- data/spec/lib/lucid_works/collection_spec.rb +140 -1
- data/spec/lib/lucid_works/datasource/history_spec.rb +11 -7
- data/spec/lib/lucid_works/datasource/status_spec.rb +64 -32
- data/spec/lib/lucid_works/datasource_spec.rb +48 -13
- data/spec/lib/lucid_works/schema_spec.rb +56 -4
- data/spec/lib/lucid_works/utils_spec.rb +62 -0
- data/spec/spec_helper.rb +17 -14
- metadata +41 -3
data/README.rdoc
CHANGED
@@ -41,8 +41,7 @@ This single statement (note the periods) will connect to a LucidWorks server run
|
|
41
41
|
create_datasource(:name => 'cnn',
|
42
42
|
:crawler => 'lucid.aperture', :type => 'web',
|
43
43
|
:url => 'http://cnn.com', :crawl_depth => '1').
|
44
|
-
|
45
|
-
save
|
44
|
+
start_crawl
|
46
45
|
|
47
46
|
Now, how does it work:
|
48
47
|
|
@@ -56,10 +55,13 @@ The LucidWorks object model looks something like this:
|
|
56
55
|
| | +- Schedule
|
57
56
|
| | +- Index
|
58
57
|
| | +- Crawldata
|
58
|
+
| | +- Job
|
59
59
|
| +- Field
|
60
60
|
| +- Index
|
61
61
|
| +- Info
|
62
62
|
| +- Settings
|
63
|
+
| +- Activity -+- Status
|
64
|
+
| +- History
|
63
65
|
|
|
64
66
|
+- Logs -+- Index -+- Summary
|
65
67
|
| +- Query -+- Summary
|
@@ -150,6 +152,14 @@ Collection has_many :datasources. Datasources are modeled using the LucidWorks:
|
|
150
152
|
|
151
153
|
Note that the latter does not start a crawl of the datasource.
|
152
154
|
|
155
|
+
To start a datasource crawling:
|
156
|
+
|
157
|
+
datasource.start_crawl!
|
158
|
+
|
159
|
+
To stop a datasource crawl:
|
160
|
+
|
161
|
+
datasource.stop_crawl!
|
162
|
+
|
153
163
|
To delete all the data crawled from a data-source:
|
154
164
|
|
155
165
|
datasource.empty!
|
@@ -247,7 +257,27 @@ Then:
|
|
247
257
|
|
248
258
|
whatnot.thing -> A Thing
|
249
259
|
|
250
|
-
===
|
260
|
+
=== Schema
|
261
|
+
|
262
|
+
A class may have a schema defined as follows:
|
263
|
+
|
264
|
+
class ThingWithSchema < LucidWorks::Base
|
265
|
+
schema do
|
266
|
+
attribute :string1, :string
|
267
|
+
attribute :bool1, :boolean
|
268
|
+
attribute :integer1, :integer
|
269
|
+
attributes :string2, :string3, :string4
|
270
|
+
attributes :bool2, :bool3, :type => :boolean
|
271
|
+
attributes :int2, :int3, :type => :integer
|
272
|
+
attribute :string_with_values, :values => ['one', 'two']
|
273
|
+
attribute :dontsendme, :omit_during_update => true
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
Classes with a schema may have validations applied to its attributes.
|
278
|
+
The default attribute type is :string.
|
279
|
+
|
280
|
+
== Rationale
|
251
281
|
|
252
282
|
Originally this library started out as a set of ActiveResource classes. This required a lot of hacking of ActiveResource as ActiveResource makes a lot of assumptions about the way a REST API should work - it's basically just designed to talk to Rails applications - and many REST APIs, including this one, don't conform to those rules. Among the changes required to ActiveResource were:
|
253
283
|
|
data/config/locales/en.yml
CHANGED
@@ -4,6 +4,12 @@ en:
|
|
4
4
|
models:
|
5
5
|
lucid_works:
|
6
6
|
collection:
|
7
|
+
activity:
|
8
|
+
type:
|
9
|
+
optimize: Optimize
|
10
|
+
spelling: Spelling
|
11
|
+
click: Click
|
12
|
+
autocomplete: Autocomplete
|
7
13
|
one: Collection
|
8
14
|
other: Collections
|
9
15
|
settings:
|
@@ -14,21 +20,23 @@ en:
|
|
14
20
|
overwrite: Overwrite
|
15
21
|
tag: Tag
|
16
22
|
datasource:
|
17
|
-
one: Data
|
18
|
-
other: Data
|
23
|
+
one: Data source
|
24
|
+
other: Data sources
|
19
25
|
status:
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
crawlState:
|
27
|
+
ABORTED: Aborted
|
28
|
+
ABORTING: Aborting
|
29
|
+
EXCEPTION: Exception
|
30
|
+
FINISHED: Finished
|
31
|
+
IDLE: Idle
|
32
|
+
RUNNING: Running
|
33
|
+
STOPPED: Stopped
|
34
|
+
STOPPING: Stopping
|
29
35
|
type:
|
36
|
+
external: External
|
30
37
|
file: Local Filesystem
|
31
38
|
jdbc: Database
|
39
|
+
lwelogs: LucidWorks Solr Logs
|
32
40
|
sharepoint: Sharepoint
|
33
41
|
solrxml: Solr XML
|
34
42
|
web: Web Site
|
@@ -66,7 +74,7 @@ en:
|
|
66
74
|
de_duplication: De-duplication
|
67
75
|
display_facets: Display facets
|
68
76
|
elevations: elevations
|
69
|
-
index_time_stopwords:
|
77
|
+
index_time_stopwords: Exclude stop words from index
|
70
78
|
query_parser: Query parser
|
71
79
|
query_time_stopwords: Include stop words in searches
|
72
80
|
query_time_synonyms: Use synomyms
|
@@ -104,3 +112,4 @@ en:
|
|
104
112
|
numDeleted: Deleted docs
|
105
113
|
numUnchanged: Unchanged docs
|
106
114
|
numFailed: Failed docs
|
115
|
+
numTotal: Total docs
|
data/lib/lucid_works.rb
CHANGED
@@ -5,6 +5,7 @@ end
|
|
5
5
|
|
6
6
|
require 'active_model'
|
7
7
|
require 'active_support/core_ext/module/attr_accessor_with_default'
|
8
|
+
require 'active_support/core_ext/module/aliasing'
|
8
9
|
require 'active_support/core_ext/hash/indifferent_access'
|
9
10
|
require 'active_support/inflector'
|
10
11
|
begin
|
@@ -13,8 +14,12 @@ rescue LoadError
|
|
13
14
|
end
|
14
15
|
require 'restclient'
|
15
16
|
require 'json'
|
17
|
+
require 'rsolr'
|
18
|
+
require 'nokogiri'
|
16
19
|
|
20
|
+
require 'lucid_works/utils'
|
17
21
|
require 'lucid_works/patch_restclient'
|
22
|
+
require 'lucid_works/patch_time'
|
18
23
|
|
19
24
|
require 'lucid_works/exceptions'
|
20
25
|
require 'lucid_works/associations'
|
@@ -23,6 +28,10 @@ require 'lucid_works/base'
|
|
23
28
|
require 'lucid_works/schema'
|
24
29
|
|
25
30
|
require 'lucid_works/collection'
|
31
|
+
require 'lucid_works/collection/activity'
|
32
|
+
require 'lucid_works/collection/activity/status'
|
33
|
+
require 'lucid_works/collection/activity/history'
|
34
|
+
#require 'lucid_works/collection/activity/schedule'
|
26
35
|
require 'lucid_works/collection/info'
|
27
36
|
require 'lucid_works/collection/index'
|
28
37
|
require 'lucid_works/collection/settings'
|
@@ -33,6 +42,7 @@ require 'lucid_works/datasource/status'
|
|
33
42
|
require 'lucid_works/datasource/history'
|
34
43
|
require 'lucid_works/datasource/schedule'
|
35
44
|
require 'lucid_works/datasource/crawldata'
|
45
|
+
require 'lucid_works/datasource/job'
|
36
46
|
require 'lucid_works/field'
|
37
47
|
require 'lucid_works/logs'
|
38
48
|
require 'lucid_works/logs/query'
|
@@ -89,26 +89,28 @@ module LucidWorks
|
|
89
89
|
def define_has_one(resource, options={})
|
90
90
|
resource_class_name = (options[:class_name] || resource).to_s.camelize
|
91
91
|
|
92
|
+
class_eval <<-EOF, __FILE__, __LINE__ + 1
|
93
|
+
def #{resource} # def child
|
94
|
+
@#{resource} || #{resource}! # @child || child!
|
95
|
+
end # end
|
96
|
+
EOF
|
97
|
+
|
92
98
|
if options[:has_content] == false
|
93
|
-
class_eval <<-
|
94
|
-
def #{resource}
|
95
|
-
#{resource_class_name}.new(:parent => self)
|
99
|
+
class_eval <<-EOF, __FILE__, __LINE__ + 1
|
100
|
+
def #{resource}! # def child!
|
101
|
+
@#{resource} = #{resource_class_name}.new(:parent => self) # @child = Child.new(options.merge :parent => self)
|
96
102
|
end # end
|
97
|
-
|
103
|
+
EOF
|
98
104
|
else
|
99
|
-
class_eval <<-
|
100
|
-
def #{resource}
|
101
|
-
@#{resource}
|
102
|
-
end # end
|
103
|
-
|
104
|
-
def #{resource}! # def resource!
|
105
|
-
@#{resource} = #{resource_class_name}.find(:parent => self) # @resource = Resource.find(:parent => self)
|
105
|
+
class_eval <<-EOF, __FILE__, __LINE__ + 1
|
106
|
+
def #{resource}! # def child!
|
107
|
+
@#{resource} = #{resource_class_name}.find(:parent => self) # @child = Child.find(:parent => self)
|
106
108
|
end # end
|
107
109
|
|
108
110
|
def build_#{resource}(options = {})
|
109
111
|
#{resource_class_name}.new(options.merge :parent => self)
|
110
112
|
end
|
111
|
-
|
113
|
+
EOF
|
112
114
|
end
|
113
115
|
end
|
114
116
|
|
data/lib/lucid_works/base.rb
CHANGED
@@ -32,6 +32,7 @@ module LucidWorks
|
|
32
32
|
extend ActiveModel::Translation
|
33
33
|
extend ActiveModel::Callbacks
|
34
34
|
include Associations
|
35
|
+
include Utils::BoolConverter
|
35
36
|
|
36
37
|
attr_accessor :parent # :nodoc:
|
37
38
|
attr_writer :id # :nodoc:
|
@@ -44,10 +45,8 @@ module LucidWorks
|
|
44
45
|
class << self
|
45
46
|
include ActionView::Helpers::NumberHelper rescue nil
|
46
47
|
|
47
|
-
attr_accessor_with_default :primary_key, :id
|
48
48
|
attr_accessor :collection_name # :nodoc:
|
49
49
|
attr_accessor_with_default :singleton, false
|
50
|
-
attr_accessor_with_default :has_schema, false
|
51
50
|
|
52
51
|
# The attributes for a model are ascertained in on of two ways.
|
53
52
|
# Without a schema, the attributes list is automatically generated when the the object is retrieved from the server.
|
@@ -69,7 +68,6 @@ module LucidWorks
|
|
69
68
|
if block_given?
|
70
69
|
@schema.instance_eval(&block)
|
71
70
|
@schema.create_accessors_for_attributes(self)
|
72
|
-
self.has_schema = true
|
73
71
|
end
|
74
72
|
@schema
|
75
73
|
end
|
@@ -182,8 +180,8 @@ module LucidWorks
|
|
182
180
|
find(:all, options).last
|
183
181
|
end
|
184
182
|
|
185
|
-
# Convert the attribute value to a string. If a schema has been defined for the
|
186
|
-
# been defined for the attribute, it will have formatting applied as follows:
|
183
|
+
# Convert the attribute value to a string. If a schema has been defined for the model
|
184
|
+
# and a type has been defined for the attribute, it will have formatting applied as follows:
|
187
185
|
#
|
188
186
|
# - <tt>boolean</tt> will be converted to 'yes' or 'no'
|
189
187
|
# - <tt>integer</tt> will be passed to number_with_delimter
|
@@ -238,7 +236,7 @@ module LucidWorks
|
|
238
236
|
else
|
239
237
|
parent = options.delete(:parent)
|
240
238
|
end
|
241
|
-
raise ArgumentError.new("parent is a required option") unless parent
|
239
|
+
raise ArgumentError.new("parent is a required option (options were #{options.inspect}") unless parent
|
242
240
|
unless parent.is_a?(Base) || parent.is_a?(Server)
|
243
241
|
raise ArgumentError.new("parent must be a LucidWorks::Server or LucidWorks::Base")
|
244
242
|
end
|
@@ -297,11 +295,11 @@ module LucidWorks
|
|
297
295
|
end
|
298
296
|
|
299
297
|
def id # :nodoc:
|
300
|
-
@attributes[self.class.primary_key]
|
298
|
+
@attributes[self.class.schema.primary_key]
|
301
299
|
end
|
302
300
|
|
303
301
|
def id=(value) # :nodoc:
|
304
|
-
@attributes[self.class.primary_key] = value
|
302
|
+
@attributes[self.class.schema.primary_key] = value
|
305
303
|
end
|
306
304
|
|
307
305
|
def persisted?
|
@@ -348,7 +346,9 @@ module LucidWorks
|
|
348
346
|
end
|
349
347
|
|
350
348
|
def encode # :nodoc:
|
351
|
-
|
349
|
+
omit_attrs = [ 'id' ]
|
350
|
+
omit_attrs += self.class.schema.attrs_to_omit_during_update if persisted?
|
351
|
+
@attributes.reject { |k,v| omit_attrs.include?(k.to_s) }.to_json
|
352
352
|
end
|
353
353
|
|
354
354
|
def load_attributes(attributes_and_values) # :nodoc:
|
@@ -361,13 +361,13 @@ module LucidWorks
|
|
361
361
|
next # Dont overwrite our connection to our parent
|
362
362
|
end
|
363
363
|
unless self.class.schema.has_attribute?(attr)
|
364
|
-
if self.class.
|
365
|
-
raise "unknown attribute: \"#{attr}\""
|
366
|
-
else
|
364
|
+
if self.class.schema.dynamic_attributes?
|
367
365
|
self.class.schema.add_attribute(self.class, attr, :string)
|
366
|
+
else
|
367
|
+
raise "unknown attribute: \"#{attr}\""
|
368
368
|
end
|
369
369
|
end
|
370
|
-
|
370
|
+
send "#{self.class.schema.sanitize_identifier(attr)}=", value
|
371
371
|
end
|
372
372
|
end
|
373
373
|
|
@@ -2,21 +2,81 @@ module LucidWorks
|
|
2
2
|
|
3
3
|
class Collection < Base
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
has_many :datasources, :fields
|
5
|
+
has_many :datasources, :fields, :activities
|
8
6
|
has_one :info, :settings
|
9
7
|
has_one :index, :has_content => false
|
10
8
|
|
11
9
|
schema do
|
12
|
-
attribute :name
|
10
|
+
attribute :name, :string, :primary_key => true
|
13
11
|
attribute :instance_dir
|
14
12
|
end
|
15
13
|
|
16
14
|
validates_presence_of :name
|
17
|
-
|
15
|
+
|
16
|
+
def destroyable?
|
17
|
+
# Don't let user destroy 'lwelogs'
|
18
|
+
instance_dir != 'lwelogs'
|
19
|
+
end
|
20
|
+
|
18
21
|
def empty!
|
19
22
|
index.destroy(:params => {:key => 'iaccepttherisk'})
|
20
23
|
end
|
24
|
+
|
25
|
+
# Setup the Collection with an RSolr object that it can use to search.
|
26
|
+
# Must be provided with the URL of a Solr instance (excluding the /solr/... path)
|
27
|
+
def rsolr_connect(solr_url, default_search_params={})
|
28
|
+
@default_search_params = default_search_params
|
29
|
+
@path_prefix = URI.parse(solr_url).path
|
30
|
+
@rsolr ||= RSolr.connect :url => solr_url.dup
|
31
|
+
end
|
32
|
+
|
33
|
+
# Perform a Solr search using RSolr
|
34
|
+
def search(search_params={}, options={})
|
35
|
+
params = @default_search_params.merge(search_params)
|
36
|
+
page = options[:page] ||= 1
|
37
|
+
per_page = options[:per_page] ||= 10
|
38
|
+
resp = @rsolr.paginate page, per_page, "#{@path_prefix}/solr/#{name}/select", :params => params
|
39
|
+
if params[:wt] == :xml
|
40
|
+
data = Nokogiri.XML(resp)
|
41
|
+
raise "search received bad XML" unless data.root
|
42
|
+
else
|
43
|
+
data = resp
|
44
|
+
end
|
45
|
+
data
|
46
|
+
end
|
47
|
+
|
48
|
+
def prime_activities
|
49
|
+
self.activities!
|
50
|
+
num_created = 0
|
51
|
+
activities_to_return = %w(optimize spelling click autocomplete).map do |type|
|
52
|
+
if act = self.activities.detect{|act| act.type == type}
|
53
|
+
act
|
54
|
+
else
|
55
|
+
num_created += 1
|
56
|
+
self.create_activity(:type => type, :active => true, 'start_time' => 3600*num_created)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
self.activities! if num_created > 0
|
60
|
+
activities_to_return
|
61
|
+
end
|
62
|
+
|
63
|
+
# return the first for each kind of activity
|
64
|
+
# don't use these if you need more than one activity
|
65
|
+
# b/c each forces an API hit
|
66
|
+
def optimize_activity
|
67
|
+
prime_activities.detect{|act| act.type == 'optimize'}
|
68
|
+
end
|
69
|
+
|
70
|
+
def spelling_activity
|
71
|
+
prime_activities.detect{|act| act.type == 'spelling'}
|
72
|
+
end
|
73
|
+
|
74
|
+
def click_activity
|
75
|
+
prime_activities.detect{|act| act.type == 'click'}
|
76
|
+
end
|
77
|
+
|
78
|
+
def autocomplete_activity
|
79
|
+
prime_activities.detect{|act| act.type == 'autocomplete'}
|
80
|
+
end
|
21
81
|
end
|
22
82
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module LucidWorks
|
2
|
+
class Collection < Base
|
3
|
+
class Activity < Base
|
4
|
+
TYPES = %w{ optimize spelling click autocomplete}
|
5
|
+
|
6
|
+
belongs_to :collection
|
7
|
+
has_many :histories, :class_name => :history
|
8
|
+
has_one :status
|
9
|
+
|
10
|
+
schema do
|
11
|
+
attributes :start_time, :period
|
12
|
+
attribute :active, :boolean
|
13
|
+
attribute :type, :string, :values => TYPES, :omit_during_update => true
|
14
|
+
end
|
15
|
+
|
16
|
+
validates_presence_of :type, :start_time
|
17
|
+
validates_numericality_of :period, :allow_blank => true
|
18
|
+
|
19
|
+
def t_type
|
20
|
+
I18n.t(type, :scope => 'activemodel.models.lucid_works.datasource.type')
|
21
|
+
end
|
22
|
+
|
23
|
+
def start
|
24
|
+
self.start_time = 0
|
25
|
+
self.active = true
|
26
|
+
save
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module LucidWorks
|
2
|
+
class Collection::Activity
|
3
|
+
class History < Base
|
4
|
+
belongs_to :activity
|
5
|
+
self.collection_name = 'history' # i.e. not the plural 'histories'
|
6
|
+
|
7
|
+
def activity_finished
|
8
|
+
Time.iso8601 activityFinished
|
9
|
+
end
|
10
|
+
|
11
|
+
def activity_started
|
12
|
+
Time.iso8601 activityStarted
|
13
|
+
end
|
14
|
+
|
15
|
+
def duration
|
16
|
+
activity_finished - activity_started
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module LucidWorks
|
2
|
+
class Collection::Activity
|
3
|
+
class Status < Base
|
4
|
+
self.singleton = true
|
5
|
+
belongs_to :activity
|
6
|
+
|
7
|
+
schema do
|
8
|
+
attribute :running, :boolean
|
9
|
+
attribute :type, :string, :values => LucidWorks::Collection::Activity::TYPES
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|