lucid_works 0.7.18 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.rvmrc +2 -3
- data/Gemfile +2 -8
- data/Gemfile.lock +45 -53
- data/README.rdoc +2 -6
- data/Rakefile +1 -1
- data/config/locales/en.yml +221 -239
- data/lib/lucid_works/activity.rb +8 -5
- data/lib/lucid_works/base.rb +27 -16
- data/lib/lucid_works/cache.rb +13 -0
- data/lib/lucid_works/cluster.rb +84 -0
- data/lib/lucid_works/collection/settings.rb +15 -6
- data/lib/lucid_works/collection.rb +62 -92
- data/lib/lucid_works/datasource/history.rb +2 -1
- data/lib/lucid_works/datasource/mapping.rb +12 -0
- data/lib/lucid_works/datasource/schedule.rb +5 -2
- data/lib/lucid_works/datasource/status.rb +3 -2
- data/lib/lucid_works/datasource.rb +31 -48
- data/lib/lucid_works/datasource_property.rb +2 -1
- data/lib/lucid_works/datasource_type.rb +14 -0
- data/lib/lucid_works/dynamicfield.rb +12 -0
- data/lib/lucid_works/elevation.rb +93 -0
- data/lib/lucid_works/exceptions.rb +0 -4
- data/lib/lucid_works/field.rb +31 -111
- data/lib/lucid_works/field_commons.rb +133 -0
- data/lib/lucid_works/gem_version.rb +1 -1
- data/lib/lucid_works/inflections.rb +3 -0
- data/lib/lucid_works/patch_time.rb +4 -0
- data/lib/lucid_works/request_handler.rb +16 -0
- data/lib/lucid_works/role.rb +23 -8
- data/lib/lucid_works/schema/attribute.rb +1 -1
- data/lib/lucid_works/schema/boolean_attribute.rb +1 -1
- data/lib/lucid_works/schema/integer_attribute.rb +3 -4
- data/lib/lucid_works/server/crawlers_status.rb +15 -0
- data/lib/lucid_works/server.rb +35 -14
- data/lib/lucid_works/simple_naming.rb +1 -7
- data/lib/lucid_works/synonym.rb +1 -1
- data/lib/lucid_works/version.rb +1 -0
- data/lib/lucid_works.rb +8 -1
- data/lucid_works.gemspec +8 -9
- data/spec/fixtures/zookeeper/clusterstate.json +30 -0
- data/spec/fixtures/zookeeper/clusterstate_broken_shard.json +29 -0
- data/spec/fixtures/zookeeper/live_nodes.json +28 -0
- data/spec/fixtures/zookeeper/live_nodes_no_children.json +26 -0
- data/spec/fixtures/zookeeper/live_nodes_one_child.json +36 -0
- data/spec/lib/lucid_works/base_spec.rb +33 -24
- data/spec/lib/lucid_works/cache_spec.rb +44 -0
- data/spec/lib/lucid_works/cluster_spec.rb +109 -0
- data/spec/lib/lucid_works/collection/activity_spec.rb +29 -0
- data/spec/lib/lucid_works/collection/prime_activities_spec.rb +1 -1
- data/spec/lib/lucid_works/collection/settings_spec.rb +31 -0
- data/spec/lib/lucid_works/collection_spec.rb +166 -107
- data/spec/lib/lucid_works/datasource/schedule_spec.rb +75 -46
- data/spec/lib/lucid_works/datasource/status_spec.rb +5 -5
- data/spec/lib/lucid_works/datasource_property_spec.rb +41 -0
- data/spec/lib/lucid_works/datasource_spec.rb +40 -12
- data/spec/lib/lucid_works/datasource_type_spec.rb +31 -0
- data/spec/lib/lucid_works/dynamicfield_spec.rb +214 -0
- data/spec/lib/lucid_works/elevation_spec.rb +175 -0
- data/spec/lib/lucid_works/field_spec.rb +52 -21
- data/spec/lib/lucid_works/fieldtype_spec.rb +0 -1
- data/spec/lib/lucid_works/request_handler_spec.rb +11 -0
- data/spec/lib/lucid_works/role_spec.rb +77 -0
- data/spec/lib/lucid_works/server/crawlers_status_spec.rb +21 -0
- data/spec/lib/lucid_works/server_spec.rb +123 -22
- data/spec/lib/lucid_works/{collection/synonym_spec.rb → synonym_spec.rb} +23 -22
- data/spec/lib/lucid_works/version_spec.rb +6 -0
- metadata +132 -64
- data/spec/lib/lucid_works/collection/acl_config_spec.rb +0 -212
@@ -0,0 +1,13 @@
|
|
1
|
+
module LucidWorks
|
2
|
+
class Cache < Base
|
3
|
+
belongs_to :collection
|
4
|
+
|
5
|
+
schema do
|
6
|
+
dynamic_attributes true
|
7
|
+
attribute :name, :string, :primary_key => true, :omit_during_update => true
|
8
|
+
end
|
9
|
+
|
10
|
+
validates_presence_of :size
|
11
|
+
validates_numericality_of :initial_size, :size, :only_integer => true, :greater_than_or_equal_to => 0, :allow_blank => true, :message => 'is not a valid value'
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module LucidWorks
|
4
|
+
class Cluster
|
5
|
+
class Node
|
6
|
+
include ActiveModel::Serializers::JSON
|
7
|
+
self.include_root_in_json = false
|
8
|
+
|
9
|
+
attr_reader :id, :cluster, :host, :port, :state, :solr_url, :api_url
|
10
|
+
attr_accessor :shards
|
11
|
+
|
12
|
+
def initialize(cluster, solr_url, state)
|
13
|
+
@cluster, @solr_url, @state = cluster, solr_url, state
|
14
|
+
uri = URI(solr_url)
|
15
|
+
@host, @port = uri.host, uri.port
|
16
|
+
@id = "#{host}_#{port}"
|
17
|
+
@api_url = solr_url.sub uri.path, '/api'
|
18
|
+
@shards = []
|
19
|
+
end
|
20
|
+
|
21
|
+
def as_json(options = {})
|
22
|
+
{'id' => id, 'host' => host, 'port' => port, 'state' => state, 'solr_url' => solr_url, 'api_url' => api_url, 'shards' => shards}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Shard
|
27
|
+
include ActiveModel::Serializers::JSON
|
28
|
+
self.include_root_in_json = false
|
29
|
+
|
30
|
+
attr_reader :id, :node, :name, :collection, :leader
|
31
|
+
alias :leader? :leader
|
32
|
+
|
33
|
+
def initialize(node, name, collection, leader)
|
34
|
+
@id = "#{collection}_#{name}"
|
35
|
+
@node = node
|
36
|
+
@name = name
|
37
|
+
@collection = collection
|
38
|
+
@leader = leader
|
39
|
+
end
|
40
|
+
|
41
|
+
def as_json(options = {})
|
42
|
+
{'name' => name, 'collection' => collection, 'leader' => leader}
|
43
|
+
end
|
44
|
+
|
45
|
+
def ==(other)
|
46
|
+
node == other.node && name == other.name && collection == other.collection
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
attr_reader :nodes
|
51
|
+
|
52
|
+
def initialize(solr_uri)
|
53
|
+
zk_uri = "#{solr_uri}/zookeeper?detail=true&path="
|
54
|
+
@zk_clusterstate = JSON.parse(JSON.parse(RestClient.get(zk_uri + "/clusterstate.json"))['znode']['data'])
|
55
|
+
@zk_live_nodes = JSON.parse(RestClient.get(zk_uri + "/live_nodes/"))['tree'].first['children'].
|
56
|
+
map {|child| child['data']['title'] } rescue []
|
57
|
+
|
58
|
+
@nodes = []
|
59
|
+
@zk_clusterstate.each do |slice|
|
60
|
+
slice[1]['shards'].each do |shard_name, shard_description|
|
61
|
+
unless shard_description.empty? # A shard's description can come back as empty when the shard is in trouble
|
62
|
+
shard_description['replicas'].values.each do |node_json|
|
63
|
+
node = nodes.find {|node| node.solr_url == node_json['base_url'] }
|
64
|
+
nodes << (node = Node.new(self, node_json['base_url'], determine_node_state(node_json))) unless node
|
65
|
+
new_shard = Shard.new(node, node_json['shard'], node_json['collection'], node_json['leader'] == 'true')
|
66
|
+
node.shards << new_shard unless node.shards.include?(new_shard)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
# State can be "live", "down", "recovering", "recovery_failed"
|
76
|
+
def determine_node_state(node_json)
|
77
|
+
if @zk_live_nodes.include?(node_json['node_name'])
|
78
|
+
node_json['state'] == 'active' ? 'live' : node_json['state']
|
79
|
+
else
|
80
|
+
'down'
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -14,16 +14,17 @@ module LucidWorks
|
|
14
14
|
|
15
15
|
schema do
|
16
16
|
# Indexing Settings
|
17
|
-
attribute
|
18
|
-
attribute
|
19
|
-
|
17
|
+
attribute :unknown_type_handling, :string, :nil_when_blank => true
|
18
|
+
attribute :de_duplication, :string, :values => DEDUP_OPTIONS
|
19
|
+
attributes :update_handler_autocommit_max_docs, :update_handler_autocommit_max_time,
|
20
|
+
:update_handler_autosoftcommit_max_docs, :update_handler_autosoftcommit_max_time
|
21
|
+
|
20
22
|
# Querying Settings
|
21
23
|
attribute :unsupervised_feedback_emphasis, :string, :values => FEEDBACK_EMPHASIS
|
22
24
|
attribute :default_sort, :string, :values => DEFAULT_SORTS
|
23
25
|
attribute :query_parser, :string, :values => QUERY_PARSERS
|
24
26
|
attributes :spellcheck, :display_facets, :unsupervised_feedback, :query_time_stopwords,
|
25
|
-
|
26
|
-
:type => :boolean
|
27
|
+
:auto_complete, :boost_recent, :show_similar, :query_time_synonyms, :type => :boolean
|
27
28
|
attributes :stopword_list, :boosts, :synonym_list # Arrays
|
28
29
|
|
29
30
|
# Click Settings
|
@@ -33,10 +34,18 @@ module LucidWorks
|
|
33
34
|
# Other Settings
|
34
35
|
attribute :ssl, :boolean
|
35
36
|
attribute :elevations # Hash
|
37
|
+
attributes :main_index_ram_buffer_size_mb, :main_index_merge_factor
|
36
38
|
|
37
|
-
#
|
39
|
+
# Distributed Search Settings
|
38
40
|
attributes :search_server_list, :update_server_list # Arrays
|
39
41
|
end
|
42
|
+
|
43
|
+
validates_numericality_of :main_index_ram_buffer_size_mb, :main_index_merge_factor,
|
44
|
+
:allow_blank => true, :message => 'is not a valid value'
|
45
|
+
validates_numericality_of :update_handler_autocommit_max_docs, :update_handler_autocommit_max_time,
|
46
|
+
:update_handler_autosoftcommit_max_docs, :update_handler_autosoftcommit_max_time,
|
47
|
+
:only_integer => true, :greater_than => 0, :allow_blank => true,
|
48
|
+
:message => 'is not a valid value'
|
40
49
|
end
|
41
50
|
end
|
42
51
|
end
|
@@ -3,27 +3,30 @@ module LucidWorks
|
|
3
3
|
class Collection < Base
|
4
4
|
|
5
5
|
belongs_to :server
|
6
|
-
has_many :datasources, :fields, :fieldtypes, :activities, :roles
|
6
|
+
has_many :datasources, :fields, :fieldtypes, :activities, :roles, :caches, :dynamicfields
|
7
7
|
has_one :info, :settings, :click
|
8
8
|
has_one :index, :has_content => false
|
9
|
-
|
9
|
+
|
10
10
|
schema do
|
11
11
|
attribute :name, :string, :primary_key => true
|
12
12
|
attribute :instance_dir
|
13
13
|
attribute :template
|
14
|
+
attribute :num_shards, :integer, :omit_when_blank => true
|
14
15
|
end
|
15
16
|
|
16
17
|
LOGS_COLLECTION_NAME = 'LucidWorksLogs'
|
17
|
-
AD_FILTERING = 'adfiltering'
|
18
|
-
ROLE_FILTERING = 'filterbyrole'
|
19
|
-
STATIC_ACL_CONFIG = {
|
20
|
-
"filterer.class" => "com.lucid.security.WindowsACLQueryFilterer",
|
21
|
-
"provider.class" => "com.lucid.security.ad.ADACLTagProvider",
|
22
|
-
}
|
23
|
-
MAGIC_ACL_ONLY_FILTER_SETTING = {'should_clause' => '*:* -data_source_type:smb'}
|
24
18
|
|
25
19
|
validates_presence_of :name
|
26
|
-
|
20
|
+
validates_presence_of :num_shards, :if => lambda {|c| c.server.clustered? }
|
21
|
+
validates_numericality_of :num_shards, :only_integer => true, :greater_than => 0, :allow_blank => true, :message => 'is not a valid value'
|
22
|
+
|
23
|
+
def update_caches_attributes(attributes)
|
24
|
+
attributes = attributes.with_indifferent_access
|
25
|
+
caches.each {|cache| cache.attributes = attributes[cache.name] if attributes.include?(cache.name) }
|
26
|
+
return false unless caches.all? &:valid?
|
27
|
+
caches.all? &:save
|
28
|
+
end
|
29
|
+
|
27
30
|
def destroyable?
|
28
31
|
true
|
29
32
|
end
|
@@ -36,43 +39,35 @@ module LucidWorks
|
|
36
39
|
def empty!
|
37
40
|
build_index.destroy(:params => {:key => 'iaccepttherisk'})
|
38
41
|
end
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
unless @rsolr
|
43
|
-
server_uri = self.server.host
|
44
|
-
@path_prefix = URI.parse(server_uri).path # The API key
|
45
|
-
@rsolr = RSolr.connect :url => server_uri.dup
|
46
|
-
end
|
47
|
-
@rsolr
|
42
|
+
|
43
|
+
def commit
|
44
|
+
solr.commit
|
48
45
|
end
|
49
|
-
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
46
|
+
|
47
|
+
def search(solr_params, options = {})
|
48
|
+
ActiveSupport::Notifications.instrument('solr.lucid_works') do |payload|
|
49
|
+
path = options[:path] || 'select'
|
50
|
+
begin
|
51
|
+
result = if options[:page] && options[:per_page]
|
52
|
+
# Constructing the request here instead of using result.request[:uri] to avoid losing it on RSolr::Error::Http errors
|
53
|
+
payload[:request] = solr.build_paginated_request(options[:page], options[:per_page], path, :params => solr_params)[:uri].to_s
|
54
|
+
solr.paginate(options[:page], options[:per_page], path, :params => solr_params)
|
55
|
+
else
|
56
|
+
# Constructing the request here instead of using result.request[:uri] to avoid losing it on RSolr::Error::Http errors
|
57
|
+
payload[:request] = solr.build_request(path, :params => solr_params)[:uri].to_s
|
58
|
+
solr.get(path, :params => solr_params)
|
59
|
+
end
|
60
|
+
payload[:status] = result.response[:status]
|
61
|
+
result
|
62
|
+
rescue RSolr::Error::Http => error
|
63
|
+
payload[:status] = error.response[:status]
|
64
|
+
raise
|
65
|
+
end
|
55
66
|
end
|
56
|
-
@rsolr_ext
|
57
67
|
end
|
58
68
|
|
59
|
-
|
60
|
-
|
61
|
-
rsolr.get "#{@path_prefix}/solr/#{name}/#{path_suffix}", :params => search_params
|
62
|
-
end
|
63
|
-
|
64
|
-
# Perform a Solr search using RSolr
|
65
|
-
def search(search_params={})
|
66
|
-
search_params[:page] ||= 1
|
67
|
-
search_params[:per_page] ||= 10
|
68
|
-
resp = rsolr_ext.find "#{@path_prefix}/solr/#{name}/select", search_params
|
69
|
-
if search_params[:wt] == :xml
|
70
|
-
data = Nokogiri.XML(resp)
|
71
|
-
raise "search received bad XML" unless data.root
|
72
|
-
else
|
73
|
-
data = resp
|
74
|
-
end
|
75
|
-
data
|
69
|
+
def document_count
|
70
|
+
solr.get('select', :params => {:wt => :ruby, :q => "*:*", :rows => 0})['response']['numFound']
|
76
71
|
end
|
77
72
|
|
78
73
|
def synonyms
|
@@ -85,6 +80,22 @@ module LucidWorks
|
|
85
80
|
Synonym.new(attributes.merge(:collection => self))
|
86
81
|
end
|
87
82
|
|
83
|
+
def elevations
|
84
|
+
elevations = []
|
85
|
+
settings.elevations.each do |query, elevation_entries|
|
86
|
+
elevation_entries.each do |entry|
|
87
|
+
elevations << Elevation.new(
|
88
|
+
:doc_id => entry['doc'], :collection => self, :query => query, :excluded => entry['exclude'], :persisted => true
|
89
|
+
)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
elevations
|
93
|
+
end
|
94
|
+
|
95
|
+
def build_elevation(attributes = {})
|
96
|
+
Elevation.new(attributes.merge(:collection => self))
|
97
|
+
end
|
98
|
+
|
88
99
|
def prime_activities
|
89
100
|
self.activities!.sort!{|a,b|a.id <=> b.id}
|
90
101
|
num_created = 0
|
@@ -115,7 +126,7 @@ module LucidWorks
|
|
115
126
|
|
116
127
|
# URL of Solr's build-in admin page
|
117
128
|
def admin_url
|
118
|
-
"#{server.
|
129
|
+
"#{server.server_uri}/solr/#{name}/admin/"
|
119
130
|
end
|
120
131
|
|
121
132
|
# We have to handle the jdbcdrivers model in a custom way, as a GET on .../jdbcdrivers returns a list of strings,
|
@@ -136,59 +147,18 @@ module LucidWorks
|
|
136
147
|
JSON.parse(RestClient.get(uri + "/components/all.json?handlerName=%2Flucid"))
|
137
148
|
end
|
138
149
|
|
139
|
-
def
|
140
|
-
|
141
|
-
assert_components_include_ad_xor_role
|
142
|
-
return self.components.include?(AD_FILTERING)
|
143
|
-
end
|
144
|
-
|
145
|
-
def acl_only?
|
146
|
-
filterer_config = self.filtering_settings['filterer.config']
|
147
|
-
return false if filterer_config.nil?
|
148
|
-
return self.filtering_settings['filterer.config'] != MAGIC_ACL_ONLY_FILTER_SETTING #rescue false
|
150
|
+
def available_templates
|
151
|
+
JSON.parse(RestClient.get(server.uri + "/collectiontemplates"))
|
149
152
|
end
|
150
153
|
|
151
|
-
def
|
152
|
-
|
153
|
-
raise "conflicting filtering components" if self.components.include?(AD_FILTERING) && self.components.include?(ROLE_FILTERING)
|
154
|
-
raise "missing filtering components" if ! self.components.include?(AD_FILTERING) && ! self.components.include?(ROLE_FILTERING)
|
154
|
+
def request_handler(name)
|
155
|
+
RequestHandler.new(self, name)
|
155
156
|
end
|
156
157
|
|
157
|
-
|
158
|
-
JSON.parse(RestClient.get(uri + "/filtering"))['adfiltering'] || {}
|
159
|
-
end
|
158
|
+
protected
|
160
159
|
|
161
|
-
|
162
|
-
|
163
|
-
new_component_set.delete(ROLE_FILTERING)
|
164
|
-
new_component_set.delete(AD_FILTERING)
|
165
|
-
new_component_set.unshift acl_filtering_enabled == 'true' ? AD_FILTERING : ROLE_FILTERING
|
166
|
-
end
|
167
|
-
|
168
|
-
def configure_filtering(opts)
|
169
|
-
if ! opts[:config]['java.naming.provider.url'].blank? && opts[:config]['java.naming.provider.url'] !~ %r(://)
|
170
|
-
opts[:config]['java.naming.provider.url'] = "ldap://#{opts[:config]['java.naming.provider.url']}"
|
160
|
+
def solr
|
161
|
+
@solr ||= RSolr.connect(:url => "#{server.server_uri}/solr/#{name}")
|
171
162
|
end
|
172
|
-
filtering_settings = STATIC_ACL_CONFIG.merge('provider.config' => opts[:config])
|
173
|
-
filtering_settings["filterer.config"] = opts[:acl_only] ? {} : MAGIC_ACL_ONLY_FILTER_SETTING
|
174
|
-
|
175
|
-
errors = {}
|
176
|
-
method = RestClient.send(:get, uri+'/filtering')['adfiltering'] ? :put : :post
|
177
|
-
begin
|
178
|
-
response = RestClient.send(method, uri+'/filtering/adfiltering', filtering_settings.to_json, :content_type => :json)
|
179
|
-
rescue => exception
|
180
|
-
JSON.parse(exception.response)['errors'].each {|e| errors[e['code']] = e['message']}
|
181
|
-
end
|
182
|
-
|
183
|
-
new_component_set = compute_component_set(opts[:enabled])
|
184
|
-
if new_component_set.sort != self.components.sort
|
185
|
-
begin
|
186
|
-
response = RestClient.send(:put, uri+'/components/components?handlerName=/lucid', new_component_set.to_json, :content_type => :json)
|
187
|
-
rescue => exception
|
188
|
-
JSON.parse(exception.response)['errors'].each {|e| errors[e['code']] = e['message']}
|
189
|
-
end
|
190
|
-
end
|
191
|
-
raise LucidWorks::AclConfigInvalid.new(errors) unless errors.empty?
|
192
|
-
end
|
193
163
|
end
|
194
164
|
end
|
@@ -6,7 +6,8 @@ module LucidWorks
|
|
6
6
|
self.collection_name = 'history' # i.e. not the plural 'histories'
|
7
7
|
schema do
|
8
8
|
attributes :crawl_started, :crawl_stopped, :type => :iso8601
|
9
|
-
attributes :num_updated, :num_new, :num_unchanged, :
|
9
|
+
attributes :num_updated, :num_new, :num_unchanged, :num_not_found,
|
10
|
+
:num_filter_denied, :num_access_denied, :num_robots_denied, :type => :integer
|
10
11
|
end
|
11
12
|
|
12
13
|
def doc_count
|
@@ -31,6 +31,7 @@ module LucidWorks
|
|
31
31
|
#
|
32
32
|
def frequency
|
33
33
|
case period
|
34
|
+
when 1.minute.seconds..59.minutes.seconds then 'every'
|
34
35
|
when 1.weeks.seconds then 'weekly'
|
35
36
|
when 1.days.seconds then 'daily'
|
36
37
|
when 1.hours.seconds then 'hourly'
|
@@ -44,6 +45,7 @@ module LucidWorks
|
|
44
45
|
#
|
45
46
|
def frequency=(frequency)
|
46
47
|
self.period = case frequency
|
48
|
+
when 'every' then period
|
47
49
|
when 'hourly' then 1.hours.seconds.to_i
|
48
50
|
when 'daily' then 1.days.seconds.to_i
|
49
51
|
when 'weekly' then 1.weeks.seconds.to_i
|
@@ -57,7 +59,6 @@ module LucidWorks
|
|
57
59
|
#
|
58
60
|
def next_start
|
59
61
|
return start_time if (now = Time.now) <= start_time
|
60
|
-
# require 'ruby-debug'; debugger
|
61
62
|
time_since_start = now - start_time
|
62
63
|
last_interval_num = (time_since_start / period).to_i
|
63
64
|
next_interval_num = if (time_since_start % period) == 0
|
@@ -111,8 +112,10 @@ module LucidWorks
|
|
111
112
|
self.frequency = all_attributes['frequency']
|
112
113
|
self.start_time =
|
113
114
|
case all_attributes['frequency']
|
115
|
+
when 'every'
|
116
|
+
self.period = all_attributes['period'].to_i
|
117
|
+
now.ceil(period)
|
114
118
|
when 'weekly'
|
115
|
-
# require 'ruby-debug'; debugger
|
116
119
|
start = now.beginning_of_week.advance(all_attributes['start'])
|
117
120
|
start < now ? start.advance(:weeks => 1) : start
|
118
121
|
when 'daily'
|
@@ -5,7 +5,7 @@ module LucidWorks
|
|
5
5
|
self.singleton = true
|
6
6
|
belongs_to :datasource
|
7
7
|
|
8
|
-
STOPPED_STATES = %w{ IDLE STOPPED ABORTED EXCEPTION FINISHED }
|
8
|
+
STOPPED_STATES = %w{ IDLE STOPPED ABORTED EXCEPTION FINISHED UNKNOWN }
|
9
9
|
POST_PROCESSING_STATES = %w{ STOPPING ABORTING }
|
10
10
|
CRAWLSTATES = STOPPED_STATES + [ 'RUNNING' ] + POST_PROCESSING_STATES
|
11
11
|
|
@@ -13,7 +13,8 @@ module LucidWorks
|
|
13
13
|
attribute :crawl_state, :string, :values => CRAWLSTATES
|
14
14
|
attribute :jobId
|
15
15
|
attributes :crawl_started, :crawl_stopped, :type => :iso8601
|
16
|
-
attributes :num_unchanged, :num_updated, :num_new, :num_failed, :num_deleted, :num_total,
|
16
|
+
attributes :num_unchanged, :num_updated, :num_new, :num_failed, :num_deleted, :num_total,
|
17
|
+
:num_not_found, :num_filter_denied, :num_access_denied, :num_robots_denied, :type => :integer
|
17
18
|
end
|
18
19
|
|
19
20
|
# Create predicate methods for all the crawl states
|
@@ -4,51 +4,33 @@ module LucidWorks
|
|
4
4
|
belongs_to :collection
|
5
5
|
has_many :histories, :class_name => :history, :retrieveable_en_masse => true
|
6
6
|
has_one :status, :retrieveable_en_masse => true
|
7
|
-
has_one :schedule, :crawldata
|
7
|
+
has_one :schedule, :crawldata, :mapping
|
8
8
|
has_one :index, :job, :has_content => false
|
9
9
|
|
10
|
-
TYPES = %w{ external file ftp hdfs kfs lucidworkslogs web s3 s3n smb solrxml jdbc sharepoint }
|
11
|
-
SYSTEM_TYPES = %w{ lucidworkslogs }
|
12
10
|
BOUNDS = %w{ tree none }
|
13
|
-
|
14
|
-
# Later we may change these to be arrays if we decide to support more than one choice
|
15
|
-
# e.g. :web => ['lucid.aperture', 'nutch'], :file => ['lucid.aperture', 'lucid.fs']
|
16
|
-
:file => 'lucid.aperture',
|
17
|
-
:lucidworkslogs => 'lucid.logs',
|
18
|
-
:external => 'lucid.external',
|
19
|
-
:web => 'lucid.aperture',
|
20
|
-
:solrxml => 'lucid.solrxml',
|
21
|
-
:jdbc => 'lucid.jdbc',
|
22
|
-
:sharepoint => 'lucid.gcm',
|
23
|
-
:ftp => 'lucid.fs',
|
24
|
-
:hdfs => 'lucid.fs',
|
25
|
-
:kfs => 'lucid.fs',
|
26
|
-
:smb => 'lucid.fs',
|
27
|
-
:s3n => 'lucid.fs',
|
28
|
-
:s3 => 'lucid.fs'
|
29
|
-
}.with_indifferent_access
|
30
|
-
|
11
|
+
|
31
12
|
schema do
|
32
13
|
# common
|
33
14
|
attributes :name, :crawler
|
34
|
-
attribute :type, :string
|
15
|
+
attribute :type, :string
|
35
16
|
attribute :crawl_depth, :integer, :nil_when_blank => true
|
36
17
|
attribute :max_docs, :integer
|
37
|
-
attributes :max_bytes,
|
38
|
-
attribute :
|
18
|
+
attributes :max_bytes, :commit_within, :type => :integer, :omit_when_blank => true
|
19
|
+
attribute :commit_within_sec, :custom
|
39
20
|
attribute :commit_on_finish, :boolean
|
40
|
-
attributes :include_paths, :exclude_paths, :
|
41
|
-
|
21
|
+
attributes :include_paths, :exclude_paths, :filter_follow, :filter_track, :filter_locations,
|
22
|
+
:type => :list, :separator => "\n"
|
42
23
|
attribute :bounds, :string, :values => BOUNDS
|
43
24
|
# web
|
44
25
|
attributes :url, :category
|
45
26
|
attribute :collect_links, :boolean
|
46
27
|
attribute :auth, :string, :omit_when_blank => true # Hash
|
47
|
-
|
28
|
+
attribute :proxy_host, :string, :omit_when_blank => true
|
48
29
|
attribute :proxy_port, :string, :omit_when_blank => true
|
49
30
|
attribute :proxy_username, :string, :omit_when_blank => true
|
50
31
|
attribute :proxy_password, :string, :omit_when_blank => true
|
51
32
|
attribute :ignore_robots, :boolean
|
33
|
+
attribute :add_failed_docs, :boolean
|
52
34
|
# file
|
53
35
|
attribute :path
|
54
36
|
attribute :follow_links, :boolean
|
@@ -67,11 +49,24 @@ module LucidWorks
|
|
67
49
|
attribute :password
|
68
50
|
attribute :domain
|
69
51
|
attribute :my_site_base_url, :string, :nil_when_blank => true
|
70
|
-
|
71
|
-
attribute :excluded_urls
|
52
|
+
attributes :included_urls, :excluded_urls, :type => :list, :separator => "\n"
|
72
53
|
attribute :kdcserver
|
73
54
|
attribute :use_sp_search_visibility, :boolean
|
74
55
|
attribute :aliases
|
56
|
+
attribute :feed_unpublished_documents, :boolean
|
57
|
+
attribute :push_acls, :boolean
|
58
|
+
attribute :enable_security_trimming, :boolean
|
59
|
+
attribute :username_format_in_ace
|
60
|
+
attribute :groupname_format_in_ace
|
61
|
+
attribute :ldap_server_host_address
|
62
|
+
attribute :ldap_server_port_number
|
63
|
+
attribute :ldap_server_use_ssl, :boolean
|
64
|
+
attribute :ldap_auth_type
|
65
|
+
attribute :ldap_search_base
|
66
|
+
attribute :ldap_read_ad_groups_type
|
67
|
+
attribute :ldap_cache_groups_membership, :boolean
|
68
|
+
attribute :ldap_cache_size
|
69
|
+
attribute :ldap_cache_refresh_interval
|
75
70
|
# external
|
76
71
|
attribute :source
|
77
72
|
attribute :source_type
|
@@ -97,25 +92,22 @@ module LucidWorks
|
|
97
92
|
validates_numericality_of :max_bytes, :allow_blank => true
|
98
93
|
validates_presence_of :url, :if => lambda { |d| d.type == 'web' }
|
99
94
|
|
100
|
-
#
|
101
|
-
def
|
102
|
-
commit_within.blank? ? nil : commit_within / 1.second.milliseconds
|
95
|
+
# Virtual attributes to ease UI implementation
|
96
|
+
def commit_within_sec
|
97
|
+
commit_within.blank? ? nil : commit_within / 1.second.milliseconds
|
103
98
|
end
|
104
|
-
def
|
105
|
-
self.commit_within =
|
99
|
+
def commit_within_sec=(seconds)
|
100
|
+
self.commit_within = seconds.blank? ? nil : seconds.to_i.seconds.milliseconds
|
106
101
|
end
|
107
102
|
def delete_after_days
|
108
|
-
delete_after.blank? ? nil :
|
103
|
+
delete_after.blank? ? nil : delete_after / 1.second.milliseconds / 1.day.seconds
|
109
104
|
end
|
110
105
|
def delete_after_days=(days)
|
111
106
|
self.delete_after = days.blank? ? nil : days.to_i.days.milliseconds
|
112
107
|
end
|
113
108
|
|
114
109
|
def document_count
|
115
|
-
collection.
|
116
|
-
:wt => :ruby,
|
117
|
-
:q => (type == 'lucidworkslogs' ? "*:*" : "data_source:#{id}"),
|
118
|
-
:rows => 0)['response']['numFound']
|
110
|
+
collection.search(:wt => :ruby, :q => (type == 'lucidworkslogs' ? "*:*" : "data_source:#{id}"), :rows => 0)['response']['numFound']
|
119
111
|
end
|
120
112
|
|
121
113
|
def empty!
|
@@ -132,11 +124,6 @@ module LucidWorks
|
|
132
124
|
!collection.system?
|
133
125
|
end
|
134
126
|
|
135
|
-
def crawlable?
|
136
|
-
# Don't let user schedule crawl of external datasources
|
137
|
-
!%w{ external }.include?(type)
|
138
|
-
end
|
139
|
-
|
140
127
|
def start_crawl!
|
141
128
|
build_job.save
|
142
129
|
end
|
@@ -145,10 +132,6 @@ module LucidWorks
|
|
145
132
|
build_job.destroy
|
146
133
|
end
|
147
134
|
|
148
|
-
def t_type
|
149
|
-
I18n.t(type, :scope => 'activemodel.models.lucid_works.datasource.type')
|
150
|
-
end
|
151
|
-
|
152
135
|
def progress
|
153
136
|
return nil if status.stopped?
|
154
137
|
return nil unless histories.size > 0
|
@@ -5,6 +5,7 @@ module LucidWorks
|
|
5
5
|
attr_reader :description, :name, :allowed_values, :type, :default_value, :required, :read_only, :advanced
|
6
6
|
alias :read_only? :read_only
|
7
7
|
alias :advanced? :advanced
|
8
|
+
alias :required? :required
|
8
9
|
|
9
10
|
def initialize(attributes = {})
|
10
11
|
@description = attributes['description']
|
@@ -14,7 +15,7 @@ module LucidWorks
|
|
14
15
|
@default_value = attributes['default_value']
|
15
16
|
@required = attributes['required']
|
16
17
|
@read_only = attributes['read_only']
|
17
|
-
@advanced = attributes['advanced'
|
18
|
+
@advanced = attributes['hints'].include? 'advanced' rescue false
|
18
19
|
end
|
19
20
|
end
|
20
21
|
end
|
@@ -8,6 +8,8 @@ module LucidWorks
|
|
8
8
|
"---" # Separator UI hint
|
9
9
|
]
|
10
10
|
|
11
|
+
SYSTEM_TYPES = %w{ lucidworkslogs }
|
12
|
+
|
11
13
|
attr_reader :crawler, :category, :type, :props
|
12
14
|
|
13
15
|
def initialize(crawler, attributes = {})
|
@@ -17,9 +19,21 @@ module LucidWorks
|
|
17
19
|
@props = attributes['props']
|
18
20
|
end
|
19
21
|
|
22
|
+
def system?
|
23
|
+
SYSTEM_TYPES.include? type
|
24
|
+
end
|
25
|
+
|
20
26
|
def properties
|
21
27
|
@properties ||= @props.map do |prop|
|
22
28
|
DatasourceProperty.new(prop)
|
29
|
+
end.tap do |properties|
|
30
|
+
def properties.regular
|
31
|
+
select {|p| !p.advanced? }
|
32
|
+
end
|
33
|
+
|
34
|
+
def properties.advanced
|
35
|
+
select &:advanced?
|
36
|
+
end
|
23
37
|
end
|
24
38
|
end
|
25
39
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'lucid_works/field_commons'
|
2
|
+
|
3
|
+
module LucidWorks
|
4
|
+
|
5
|
+
class Dynamicfield < Base
|
6
|
+
include LucidWorks::FieldCommons
|
7
|
+
|
8
|
+
validates_each :name, :unless => :persisted?, :allow_blank => true do |model, attr, value|
|
9
|
+
model.errors.add(attr, 'must be unique') if model.collection.dynamicfields.any? {|f| f.name == value }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|