lucid_works 0.7.18 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rvmrc +2 -3
- data/Gemfile +2 -8
- data/Gemfile.lock +45 -53
- data/README.rdoc +2 -6
- data/Rakefile +1 -1
- data/config/locales/en.yml +221 -239
- data/lib/lucid_works/activity.rb +8 -5
- data/lib/lucid_works/base.rb +27 -16
- data/lib/lucid_works/cache.rb +13 -0
- data/lib/lucid_works/cluster.rb +84 -0
- data/lib/lucid_works/collection/settings.rb +15 -6
- data/lib/lucid_works/collection.rb +62 -92
- data/lib/lucid_works/datasource/history.rb +2 -1
- data/lib/lucid_works/datasource/mapping.rb +12 -0
- data/lib/lucid_works/datasource/schedule.rb +5 -2
- data/lib/lucid_works/datasource/status.rb +3 -2
- data/lib/lucid_works/datasource.rb +31 -48
- data/lib/lucid_works/datasource_property.rb +2 -1
- data/lib/lucid_works/datasource_type.rb +14 -0
- data/lib/lucid_works/dynamicfield.rb +12 -0
- data/lib/lucid_works/elevation.rb +93 -0
- data/lib/lucid_works/exceptions.rb +0 -4
- data/lib/lucid_works/field.rb +31 -111
- data/lib/lucid_works/field_commons.rb +133 -0
- data/lib/lucid_works/gem_version.rb +1 -1
- data/lib/lucid_works/inflections.rb +3 -0
- data/lib/lucid_works/patch_time.rb +4 -0
- data/lib/lucid_works/request_handler.rb +16 -0
- data/lib/lucid_works/role.rb +23 -8
- data/lib/lucid_works/schema/attribute.rb +1 -1
- data/lib/lucid_works/schema/boolean_attribute.rb +1 -1
- data/lib/lucid_works/schema/integer_attribute.rb +3 -4
- data/lib/lucid_works/server/crawlers_status.rb +15 -0
- data/lib/lucid_works/server.rb +35 -14
- data/lib/lucid_works/simple_naming.rb +1 -7
- data/lib/lucid_works/synonym.rb +1 -1
- data/lib/lucid_works/version.rb +1 -0
- data/lib/lucid_works.rb +8 -1
- data/lucid_works.gemspec +8 -9
- data/spec/fixtures/zookeeper/clusterstate.json +30 -0
- data/spec/fixtures/zookeeper/clusterstate_broken_shard.json +29 -0
- data/spec/fixtures/zookeeper/live_nodes.json +28 -0
- data/spec/fixtures/zookeeper/live_nodes_no_children.json +26 -0
- data/spec/fixtures/zookeeper/live_nodes_one_child.json +36 -0
- data/spec/lib/lucid_works/base_spec.rb +33 -24
- data/spec/lib/lucid_works/cache_spec.rb +44 -0
- data/spec/lib/lucid_works/cluster_spec.rb +109 -0
- data/spec/lib/lucid_works/collection/activity_spec.rb +29 -0
- data/spec/lib/lucid_works/collection/prime_activities_spec.rb +1 -1
- data/spec/lib/lucid_works/collection/settings_spec.rb +31 -0
- data/spec/lib/lucid_works/collection_spec.rb +166 -107
- data/spec/lib/lucid_works/datasource/schedule_spec.rb +75 -46
- data/spec/lib/lucid_works/datasource/status_spec.rb +5 -5
- data/spec/lib/lucid_works/datasource_property_spec.rb +41 -0
- data/spec/lib/lucid_works/datasource_spec.rb +40 -12
- data/spec/lib/lucid_works/datasource_type_spec.rb +31 -0
- data/spec/lib/lucid_works/dynamicfield_spec.rb +214 -0
- data/spec/lib/lucid_works/elevation_spec.rb +175 -0
- data/spec/lib/lucid_works/field_spec.rb +52 -21
- data/spec/lib/lucid_works/fieldtype_spec.rb +0 -1
- data/spec/lib/lucid_works/request_handler_spec.rb +11 -0
- data/spec/lib/lucid_works/role_spec.rb +77 -0
- data/spec/lib/lucid_works/server/crawlers_status_spec.rb +21 -0
- data/spec/lib/lucid_works/server_spec.rb +123 -22
- data/spec/lib/lucid_works/{collection/synonym_spec.rb → synonym_spec.rb} +23 -22
- data/spec/lib/lucid_works/version_spec.rb +6 -0
- metadata +132 -64
- data/spec/lib/lucid_works/collection/acl_config_spec.rb +0 -212
@@ -0,0 +1,13 @@
|
|
1
|
+
module LucidWorks
|
2
|
+
class Cache < Base
|
3
|
+
belongs_to :collection
|
4
|
+
|
5
|
+
schema do
|
6
|
+
dynamic_attributes true
|
7
|
+
attribute :name, :string, :primary_key => true, :omit_during_update => true
|
8
|
+
end
|
9
|
+
|
10
|
+
validates_presence_of :size
|
11
|
+
validates_numericality_of :initial_size, :size, :only_integer => true, :greater_than_or_equal_to => 0, :allow_blank => true, :message => 'is not a valid value'
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module LucidWorks
|
4
|
+
class Cluster
|
5
|
+
class Node
|
6
|
+
include ActiveModel::Serializers::JSON
|
7
|
+
self.include_root_in_json = false
|
8
|
+
|
9
|
+
attr_reader :id, :cluster, :host, :port, :state, :solr_url, :api_url
|
10
|
+
attr_accessor :shards
|
11
|
+
|
12
|
+
def initialize(cluster, solr_url, state)
|
13
|
+
@cluster, @solr_url, @state = cluster, solr_url, state
|
14
|
+
uri = URI(solr_url)
|
15
|
+
@host, @port = uri.host, uri.port
|
16
|
+
@id = "#{host}_#{port}"
|
17
|
+
@api_url = solr_url.sub uri.path, '/api'
|
18
|
+
@shards = []
|
19
|
+
end
|
20
|
+
|
21
|
+
def as_json(options = {})
|
22
|
+
{'id' => id, 'host' => host, 'port' => port, 'state' => state, 'solr_url' => solr_url, 'api_url' => api_url, 'shards' => shards}
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Shard
|
27
|
+
include ActiveModel::Serializers::JSON
|
28
|
+
self.include_root_in_json = false
|
29
|
+
|
30
|
+
attr_reader :id, :node, :name, :collection, :leader
|
31
|
+
alias :leader? :leader
|
32
|
+
|
33
|
+
def initialize(node, name, collection, leader)
|
34
|
+
@id = "#{collection}_#{name}"
|
35
|
+
@node = node
|
36
|
+
@name = name
|
37
|
+
@collection = collection
|
38
|
+
@leader = leader
|
39
|
+
end
|
40
|
+
|
41
|
+
def as_json(options = {})
|
42
|
+
{'name' => name, 'collection' => collection, 'leader' => leader}
|
43
|
+
end
|
44
|
+
|
45
|
+
def ==(other)
|
46
|
+
node == other.node && name == other.name && collection == other.collection
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
attr_reader :nodes
|
51
|
+
|
52
|
+
def initialize(solr_uri)
|
53
|
+
zk_uri = "#{solr_uri}/zookeeper?detail=true&path="
|
54
|
+
@zk_clusterstate = JSON.parse(JSON.parse(RestClient.get(zk_uri + "/clusterstate.json"))['znode']['data'])
|
55
|
+
@zk_live_nodes = JSON.parse(RestClient.get(zk_uri + "/live_nodes/"))['tree'].first['children'].
|
56
|
+
map {|child| child['data']['title'] } rescue []
|
57
|
+
|
58
|
+
@nodes = []
|
59
|
+
@zk_clusterstate.each do |slice|
|
60
|
+
slice[1]['shards'].each do |shard_name, shard_description|
|
61
|
+
unless shard_description.empty? # A shard's description can come back as empty when the shard is in trouble
|
62
|
+
shard_description['replicas'].values.each do |node_json|
|
63
|
+
node = nodes.find {|node| node.solr_url == node_json['base_url'] }
|
64
|
+
nodes << (node = Node.new(self, node_json['base_url'], determine_node_state(node_json))) unless node
|
65
|
+
new_shard = Shard.new(node, node_json['shard'], node_json['collection'], node_json['leader'] == 'true')
|
66
|
+
node.shards << new_shard unless node.shards.include?(new_shard)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
# State can be "live", "down", "recovering", "recovery_failed"
|
76
|
+
def determine_node_state(node_json)
|
77
|
+
if @zk_live_nodes.include?(node_json['node_name'])
|
78
|
+
node_json['state'] == 'active' ? 'live' : node_json['state']
|
79
|
+
else
|
80
|
+
'down'
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -14,16 +14,17 @@ module LucidWorks
|
|
14
14
|
|
15
15
|
schema do
|
16
16
|
# Indexing Settings
|
17
|
-
attribute
|
18
|
-
attribute
|
19
|
-
|
17
|
+
attribute :unknown_type_handling, :string, :nil_when_blank => true
|
18
|
+
attribute :de_duplication, :string, :values => DEDUP_OPTIONS
|
19
|
+
attributes :update_handler_autocommit_max_docs, :update_handler_autocommit_max_time,
|
20
|
+
:update_handler_autosoftcommit_max_docs, :update_handler_autosoftcommit_max_time
|
21
|
+
|
20
22
|
# Querying Settings
|
21
23
|
attribute :unsupervised_feedback_emphasis, :string, :values => FEEDBACK_EMPHASIS
|
22
24
|
attribute :default_sort, :string, :values => DEFAULT_SORTS
|
23
25
|
attribute :query_parser, :string, :values => QUERY_PARSERS
|
24
26
|
attributes :spellcheck, :display_facets, :unsupervised_feedback, :query_time_stopwords,
|
25
|
-
|
26
|
-
:type => :boolean
|
27
|
+
:auto_complete, :boost_recent, :show_similar, :query_time_synonyms, :type => :boolean
|
27
28
|
attributes :stopword_list, :boosts, :synonym_list # Arrays
|
28
29
|
|
29
30
|
# Click Settings
|
@@ -33,10 +34,18 @@ module LucidWorks
|
|
33
34
|
# Other Settings
|
34
35
|
attribute :ssl, :boolean
|
35
36
|
attribute :elevations # Hash
|
37
|
+
attributes :main_index_ram_buffer_size_mb, :main_index_merge_factor
|
36
38
|
|
37
|
-
#
|
39
|
+
# Distributed Search Settings
|
38
40
|
attributes :search_server_list, :update_server_list # Arrays
|
39
41
|
end
|
42
|
+
|
43
|
+
validates_numericality_of :main_index_ram_buffer_size_mb, :main_index_merge_factor,
|
44
|
+
:allow_blank => true, :message => 'is not a valid value'
|
45
|
+
validates_numericality_of :update_handler_autocommit_max_docs, :update_handler_autocommit_max_time,
|
46
|
+
:update_handler_autosoftcommit_max_docs, :update_handler_autosoftcommit_max_time,
|
47
|
+
:only_integer => true, :greater_than => 0, :allow_blank => true,
|
48
|
+
:message => 'is not a valid value'
|
40
49
|
end
|
41
50
|
end
|
42
51
|
end
|
@@ -3,27 +3,30 @@ module LucidWorks
|
|
3
3
|
class Collection < Base
|
4
4
|
|
5
5
|
belongs_to :server
|
6
|
-
has_many :datasources, :fields, :fieldtypes, :activities, :roles
|
6
|
+
has_many :datasources, :fields, :fieldtypes, :activities, :roles, :caches, :dynamicfields
|
7
7
|
has_one :info, :settings, :click
|
8
8
|
has_one :index, :has_content => false
|
9
|
-
|
9
|
+
|
10
10
|
schema do
|
11
11
|
attribute :name, :string, :primary_key => true
|
12
12
|
attribute :instance_dir
|
13
13
|
attribute :template
|
14
|
+
attribute :num_shards, :integer, :omit_when_blank => true
|
14
15
|
end
|
15
16
|
|
16
17
|
LOGS_COLLECTION_NAME = 'LucidWorksLogs'
|
17
|
-
AD_FILTERING = 'adfiltering'
|
18
|
-
ROLE_FILTERING = 'filterbyrole'
|
19
|
-
STATIC_ACL_CONFIG = {
|
20
|
-
"filterer.class" => "com.lucid.security.WindowsACLQueryFilterer",
|
21
|
-
"provider.class" => "com.lucid.security.ad.ADACLTagProvider",
|
22
|
-
}
|
23
|
-
MAGIC_ACL_ONLY_FILTER_SETTING = {'should_clause' => '*:* -data_source_type:smb'}
|
24
18
|
|
25
19
|
validates_presence_of :name
|
26
|
-
|
20
|
+
validates_presence_of :num_shards, :if => lambda {|c| c.server.clustered? }
|
21
|
+
validates_numericality_of :num_shards, :only_integer => true, :greater_than => 0, :allow_blank => true, :message => 'is not a valid value'
|
22
|
+
|
23
|
+
def update_caches_attributes(attributes)
|
24
|
+
attributes = attributes.with_indifferent_access
|
25
|
+
caches.each {|cache| cache.attributes = attributes[cache.name] if attributes.include?(cache.name) }
|
26
|
+
return false unless caches.all? &:valid?
|
27
|
+
caches.all? &:save
|
28
|
+
end
|
29
|
+
|
27
30
|
def destroyable?
|
28
31
|
true
|
29
32
|
end
|
@@ -36,43 +39,35 @@ module LucidWorks
|
|
36
39
|
def empty!
|
37
40
|
build_index.destroy(:params => {:key => 'iaccepttherisk'})
|
38
41
|
end
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
unless @rsolr
|
43
|
-
server_uri = self.server.host
|
44
|
-
@path_prefix = URI.parse(server_uri).path # The API key
|
45
|
-
@rsolr = RSolr.connect :url => server_uri.dup
|
46
|
-
end
|
47
|
-
@rsolr
|
42
|
+
|
43
|
+
def commit
|
44
|
+
solr.commit
|
48
45
|
end
|
49
|
-
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
46
|
+
|
47
|
+
def search(solr_params, options = {})
|
48
|
+
ActiveSupport::Notifications.instrument('solr.lucid_works') do |payload|
|
49
|
+
path = options[:path] || 'select'
|
50
|
+
begin
|
51
|
+
result = if options[:page] && options[:per_page]
|
52
|
+
# Constructing the request here instead of using result.request[:uri] to avoid losing it on RSolr::Error::Http errors
|
53
|
+
payload[:request] = solr.build_paginated_request(options[:page], options[:per_page], path, :params => solr_params)[:uri].to_s
|
54
|
+
solr.paginate(options[:page], options[:per_page], path, :params => solr_params)
|
55
|
+
else
|
56
|
+
# Constructing the request here instead of using result.request[:uri] to avoid losing it on RSolr::Error::Http errors
|
57
|
+
payload[:request] = solr.build_request(path, :params => solr_params)[:uri].to_s
|
58
|
+
solr.get(path, :params => solr_params)
|
59
|
+
end
|
60
|
+
payload[:status] = result.response[:status]
|
61
|
+
result
|
62
|
+
rescue RSolr::Error::Http => error
|
63
|
+
payload[:status] = error.response[:status]
|
64
|
+
raise
|
65
|
+
end
|
55
66
|
end
|
56
|
-
@rsolr_ext
|
57
67
|
end
|
58
68
|
|
59
|
-
|
60
|
-
|
61
|
-
rsolr.get "#{@path_prefix}/solr/#{name}/#{path_suffix}", :params => search_params
|
62
|
-
end
|
63
|
-
|
64
|
-
# Perform a Solr search using RSolr
|
65
|
-
def search(search_params={})
|
66
|
-
search_params[:page] ||= 1
|
67
|
-
search_params[:per_page] ||= 10
|
68
|
-
resp = rsolr_ext.find "#{@path_prefix}/solr/#{name}/select", search_params
|
69
|
-
if search_params[:wt] == :xml
|
70
|
-
data = Nokogiri.XML(resp)
|
71
|
-
raise "search received bad XML" unless data.root
|
72
|
-
else
|
73
|
-
data = resp
|
74
|
-
end
|
75
|
-
data
|
69
|
+
def document_count
|
70
|
+
solr.get('select', :params => {:wt => :ruby, :q => "*:*", :rows => 0})['response']['numFound']
|
76
71
|
end
|
77
72
|
|
78
73
|
def synonyms
|
@@ -85,6 +80,22 @@ module LucidWorks
|
|
85
80
|
Synonym.new(attributes.merge(:collection => self))
|
86
81
|
end
|
87
82
|
|
83
|
+
def elevations
|
84
|
+
elevations = []
|
85
|
+
settings.elevations.each do |query, elevation_entries|
|
86
|
+
elevation_entries.each do |entry|
|
87
|
+
elevations << Elevation.new(
|
88
|
+
:doc_id => entry['doc'], :collection => self, :query => query, :excluded => entry['exclude'], :persisted => true
|
89
|
+
)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
elevations
|
93
|
+
end
|
94
|
+
|
95
|
+
def build_elevation(attributes = {})
|
96
|
+
Elevation.new(attributes.merge(:collection => self))
|
97
|
+
end
|
98
|
+
|
88
99
|
def prime_activities
|
89
100
|
self.activities!.sort!{|a,b|a.id <=> b.id}
|
90
101
|
num_created = 0
|
@@ -115,7 +126,7 @@ module LucidWorks
|
|
115
126
|
|
116
127
|
# URL of Solr's build-in admin page
|
117
128
|
def admin_url
|
118
|
-
"#{server.
|
129
|
+
"#{server.server_uri}/solr/#{name}/admin/"
|
119
130
|
end
|
120
131
|
|
121
132
|
# We have to handle the jdbcdrivers model in a custom way, as a GET on .../jdbcdrivers returns a list of strings,
|
@@ -136,59 +147,18 @@ module LucidWorks
|
|
136
147
|
JSON.parse(RestClient.get(uri + "/components/all.json?handlerName=%2Flucid"))
|
137
148
|
end
|
138
149
|
|
139
|
-
def
|
140
|
-
|
141
|
-
assert_components_include_ad_xor_role
|
142
|
-
return self.components.include?(AD_FILTERING)
|
143
|
-
end
|
144
|
-
|
145
|
-
def acl_only?
|
146
|
-
filterer_config = self.filtering_settings['filterer.config']
|
147
|
-
return false if filterer_config.nil?
|
148
|
-
return self.filtering_settings['filterer.config'] != MAGIC_ACL_ONLY_FILTER_SETTING #rescue false
|
150
|
+
def available_templates
|
151
|
+
JSON.parse(RestClient.get(server.uri + "/collectiontemplates"))
|
149
152
|
end
|
150
153
|
|
151
|
-
def
|
152
|
-
|
153
|
-
raise "conflicting filtering components" if self.components.include?(AD_FILTERING) && self.components.include?(ROLE_FILTERING)
|
154
|
-
raise "missing filtering components" if ! self.components.include?(AD_FILTERING) && ! self.components.include?(ROLE_FILTERING)
|
154
|
+
def request_handler(name)
|
155
|
+
RequestHandler.new(self, name)
|
155
156
|
end
|
156
157
|
|
157
|
-
|
158
|
-
JSON.parse(RestClient.get(uri + "/filtering"))['adfiltering'] || {}
|
159
|
-
end
|
158
|
+
protected
|
160
159
|
|
161
|
-
|
162
|
-
|
163
|
-
new_component_set.delete(ROLE_FILTERING)
|
164
|
-
new_component_set.delete(AD_FILTERING)
|
165
|
-
new_component_set.unshift acl_filtering_enabled == 'true' ? AD_FILTERING : ROLE_FILTERING
|
166
|
-
end
|
167
|
-
|
168
|
-
def configure_filtering(opts)
|
169
|
-
if ! opts[:config]['java.naming.provider.url'].blank? && opts[:config]['java.naming.provider.url'] !~ %r(://)
|
170
|
-
opts[:config]['java.naming.provider.url'] = "ldap://#{opts[:config]['java.naming.provider.url']}"
|
160
|
+
def solr
|
161
|
+
@solr ||= RSolr.connect(:url => "#{server.server_uri}/solr/#{name}")
|
171
162
|
end
|
172
|
-
filtering_settings = STATIC_ACL_CONFIG.merge('provider.config' => opts[:config])
|
173
|
-
filtering_settings["filterer.config"] = opts[:acl_only] ? {} : MAGIC_ACL_ONLY_FILTER_SETTING
|
174
|
-
|
175
|
-
errors = {}
|
176
|
-
method = RestClient.send(:get, uri+'/filtering')['adfiltering'] ? :put : :post
|
177
|
-
begin
|
178
|
-
response = RestClient.send(method, uri+'/filtering/adfiltering', filtering_settings.to_json, :content_type => :json)
|
179
|
-
rescue => exception
|
180
|
-
JSON.parse(exception.response)['errors'].each {|e| errors[e['code']] = e['message']}
|
181
|
-
end
|
182
|
-
|
183
|
-
new_component_set = compute_component_set(opts[:enabled])
|
184
|
-
if new_component_set.sort != self.components.sort
|
185
|
-
begin
|
186
|
-
response = RestClient.send(:put, uri+'/components/components?handlerName=/lucid', new_component_set.to_json, :content_type => :json)
|
187
|
-
rescue => exception
|
188
|
-
JSON.parse(exception.response)['errors'].each {|e| errors[e['code']] = e['message']}
|
189
|
-
end
|
190
|
-
end
|
191
|
-
raise LucidWorks::AclConfigInvalid.new(errors) unless errors.empty?
|
192
|
-
end
|
193
163
|
end
|
194
164
|
end
|
@@ -6,7 +6,8 @@ module LucidWorks
|
|
6
6
|
self.collection_name = 'history' # i.e. not the plural 'histories'
|
7
7
|
schema do
|
8
8
|
attributes :crawl_started, :crawl_stopped, :type => :iso8601
|
9
|
-
attributes :num_updated, :num_new, :num_unchanged, :
|
9
|
+
attributes :num_updated, :num_new, :num_unchanged, :num_not_found,
|
10
|
+
:num_filter_denied, :num_access_denied, :num_robots_denied, :type => :integer
|
10
11
|
end
|
11
12
|
|
12
13
|
def doc_count
|
@@ -31,6 +31,7 @@ module LucidWorks
|
|
31
31
|
#
|
32
32
|
def frequency
|
33
33
|
case period
|
34
|
+
when 1.minute.seconds..59.minutes.seconds then 'every'
|
34
35
|
when 1.weeks.seconds then 'weekly'
|
35
36
|
when 1.days.seconds then 'daily'
|
36
37
|
when 1.hours.seconds then 'hourly'
|
@@ -44,6 +45,7 @@ module LucidWorks
|
|
44
45
|
#
|
45
46
|
def frequency=(frequency)
|
46
47
|
self.period = case frequency
|
48
|
+
when 'every' then period
|
47
49
|
when 'hourly' then 1.hours.seconds.to_i
|
48
50
|
when 'daily' then 1.days.seconds.to_i
|
49
51
|
when 'weekly' then 1.weeks.seconds.to_i
|
@@ -57,7 +59,6 @@ module LucidWorks
|
|
57
59
|
#
|
58
60
|
def next_start
|
59
61
|
return start_time if (now = Time.now) <= start_time
|
60
|
-
# require 'ruby-debug'; debugger
|
61
62
|
time_since_start = now - start_time
|
62
63
|
last_interval_num = (time_since_start / period).to_i
|
63
64
|
next_interval_num = if (time_since_start % period) == 0
|
@@ -111,8 +112,10 @@ module LucidWorks
|
|
111
112
|
self.frequency = all_attributes['frequency']
|
112
113
|
self.start_time =
|
113
114
|
case all_attributes['frequency']
|
115
|
+
when 'every'
|
116
|
+
self.period = all_attributes['period'].to_i
|
117
|
+
now.ceil(period)
|
114
118
|
when 'weekly'
|
115
|
-
# require 'ruby-debug'; debugger
|
116
119
|
start = now.beginning_of_week.advance(all_attributes['start'])
|
117
120
|
start < now ? start.advance(:weeks => 1) : start
|
118
121
|
when 'daily'
|
@@ -5,7 +5,7 @@ module LucidWorks
|
|
5
5
|
self.singleton = true
|
6
6
|
belongs_to :datasource
|
7
7
|
|
8
|
-
STOPPED_STATES = %w{ IDLE STOPPED ABORTED EXCEPTION FINISHED }
|
8
|
+
STOPPED_STATES = %w{ IDLE STOPPED ABORTED EXCEPTION FINISHED UNKNOWN }
|
9
9
|
POST_PROCESSING_STATES = %w{ STOPPING ABORTING }
|
10
10
|
CRAWLSTATES = STOPPED_STATES + [ 'RUNNING' ] + POST_PROCESSING_STATES
|
11
11
|
|
@@ -13,7 +13,8 @@ module LucidWorks
|
|
13
13
|
attribute :crawl_state, :string, :values => CRAWLSTATES
|
14
14
|
attribute :jobId
|
15
15
|
attributes :crawl_started, :crawl_stopped, :type => :iso8601
|
16
|
-
attributes :num_unchanged, :num_updated, :num_new, :num_failed, :num_deleted, :num_total,
|
16
|
+
attributes :num_unchanged, :num_updated, :num_new, :num_failed, :num_deleted, :num_total,
|
17
|
+
:num_not_found, :num_filter_denied, :num_access_denied, :num_robots_denied, :type => :integer
|
17
18
|
end
|
18
19
|
|
19
20
|
# Create predicate methods for all the crawl states
|
@@ -4,51 +4,33 @@ module LucidWorks
|
|
4
4
|
belongs_to :collection
|
5
5
|
has_many :histories, :class_name => :history, :retrieveable_en_masse => true
|
6
6
|
has_one :status, :retrieveable_en_masse => true
|
7
|
-
has_one :schedule, :crawldata
|
7
|
+
has_one :schedule, :crawldata, :mapping
|
8
8
|
has_one :index, :job, :has_content => false
|
9
9
|
|
10
|
-
TYPES = %w{ external file ftp hdfs kfs lucidworkslogs web s3 s3n smb solrxml jdbc sharepoint }
|
11
|
-
SYSTEM_TYPES = %w{ lucidworkslogs }
|
12
10
|
BOUNDS = %w{ tree none }
|
13
|
-
|
14
|
-
# Later we may change these to be arrays if we decide to support more than one choice
|
15
|
-
# e.g. :web => ['lucid.aperture', 'nutch'], :file => ['lucid.aperture', 'lucid.fs']
|
16
|
-
:file => 'lucid.aperture',
|
17
|
-
:lucidworkslogs => 'lucid.logs',
|
18
|
-
:external => 'lucid.external',
|
19
|
-
:web => 'lucid.aperture',
|
20
|
-
:solrxml => 'lucid.solrxml',
|
21
|
-
:jdbc => 'lucid.jdbc',
|
22
|
-
:sharepoint => 'lucid.gcm',
|
23
|
-
:ftp => 'lucid.fs',
|
24
|
-
:hdfs => 'lucid.fs',
|
25
|
-
:kfs => 'lucid.fs',
|
26
|
-
:smb => 'lucid.fs',
|
27
|
-
:s3n => 'lucid.fs',
|
28
|
-
:s3 => 'lucid.fs'
|
29
|
-
}.with_indifferent_access
|
30
|
-
|
11
|
+
|
31
12
|
schema do
|
32
13
|
# common
|
33
14
|
attributes :name, :crawler
|
34
|
-
attribute :type, :string
|
15
|
+
attribute :type, :string
|
35
16
|
attribute :crawl_depth, :integer, :nil_when_blank => true
|
36
17
|
attribute :max_docs, :integer
|
37
|
-
attributes :max_bytes,
|
38
|
-
attribute :
|
18
|
+
attributes :max_bytes, :commit_within, :type => :integer, :omit_when_blank => true
|
19
|
+
attribute :commit_within_sec, :custom
|
39
20
|
attribute :commit_on_finish, :boolean
|
40
|
-
attributes :include_paths, :exclude_paths, :
|
41
|
-
|
21
|
+
attributes :include_paths, :exclude_paths, :filter_follow, :filter_track, :filter_locations,
|
22
|
+
:type => :list, :separator => "\n"
|
42
23
|
attribute :bounds, :string, :values => BOUNDS
|
43
24
|
# web
|
44
25
|
attributes :url, :category
|
45
26
|
attribute :collect_links, :boolean
|
46
27
|
attribute :auth, :string, :omit_when_blank => true # Hash
|
47
|
-
|
28
|
+
attribute :proxy_host, :string, :omit_when_blank => true
|
48
29
|
attribute :proxy_port, :string, :omit_when_blank => true
|
49
30
|
attribute :proxy_username, :string, :omit_when_blank => true
|
50
31
|
attribute :proxy_password, :string, :omit_when_blank => true
|
51
32
|
attribute :ignore_robots, :boolean
|
33
|
+
attribute :add_failed_docs, :boolean
|
52
34
|
# file
|
53
35
|
attribute :path
|
54
36
|
attribute :follow_links, :boolean
|
@@ -67,11 +49,24 @@ module LucidWorks
|
|
67
49
|
attribute :password
|
68
50
|
attribute :domain
|
69
51
|
attribute :my_site_base_url, :string, :nil_when_blank => true
|
70
|
-
|
71
|
-
attribute :excluded_urls
|
52
|
+
attributes :included_urls, :excluded_urls, :type => :list, :separator => "\n"
|
72
53
|
attribute :kdcserver
|
73
54
|
attribute :use_sp_search_visibility, :boolean
|
74
55
|
attribute :aliases
|
56
|
+
attribute :feed_unpublished_documents, :boolean
|
57
|
+
attribute :push_acls, :boolean
|
58
|
+
attribute :enable_security_trimming, :boolean
|
59
|
+
attribute :username_format_in_ace
|
60
|
+
attribute :groupname_format_in_ace
|
61
|
+
attribute :ldap_server_host_address
|
62
|
+
attribute :ldap_server_port_number
|
63
|
+
attribute :ldap_server_use_ssl, :boolean
|
64
|
+
attribute :ldap_auth_type
|
65
|
+
attribute :ldap_search_base
|
66
|
+
attribute :ldap_read_ad_groups_type
|
67
|
+
attribute :ldap_cache_groups_membership, :boolean
|
68
|
+
attribute :ldap_cache_size
|
69
|
+
attribute :ldap_cache_refresh_interval
|
75
70
|
# external
|
76
71
|
attribute :source
|
77
72
|
attribute :source_type
|
@@ -97,25 +92,22 @@ module LucidWorks
|
|
97
92
|
validates_numericality_of :max_bytes, :allow_blank => true
|
98
93
|
validates_presence_of :url, :if => lambda { |d| d.type == 'web' }
|
99
94
|
|
100
|
-
#
|
101
|
-
def
|
102
|
-
commit_within.blank? ? nil : commit_within / 1.second.milliseconds
|
95
|
+
# Virtual attributes to ease UI implementation
|
96
|
+
def commit_within_sec
|
97
|
+
commit_within.blank? ? nil : commit_within / 1.second.milliseconds
|
103
98
|
end
|
104
|
-
def
|
105
|
-
self.commit_within =
|
99
|
+
def commit_within_sec=(seconds)
|
100
|
+
self.commit_within = seconds.blank? ? nil : seconds.to_i.seconds.milliseconds
|
106
101
|
end
|
107
102
|
def delete_after_days
|
108
|
-
delete_after.blank? ? nil :
|
103
|
+
delete_after.blank? ? nil : delete_after / 1.second.milliseconds / 1.day.seconds
|
109
104
|
end
|
110
105
|
def delete_after_days=(days)
|
111
106
|
self.delete_after = days.blank? ? nil : days.to_i.days.milliseconds
|
112
107
|
end
|
113
108
|
|
114
109
|
def document_count
|
115
|
-
collection.
|
116
|
-
:wt => :ruby,
|
117
|
-
:q => (type == 'lucidworkslogs' ? "*:*" : "data_source:#{id}"),
|
118
|
-
:rows => 0)['response']['numFound']
|
110
|
+
collection.search(:wt => :ruby, :q => (type == 'lucidworkslogs' ? "*:*" : "data_source:#{id}"), :rows => 0)['response']['numFound']
|
119
111
|
end
|
120
112
|
|
121
113
|
def empty!
|
@@ -132,11 +124,6 @@ module LucidWorks
|
|
132
124
|
!collection.system?
|
133
125
|
end
|
134
126
|
|
135
|
-
def crawlable?
|
136
|
-
# Don't let user schedule crawl of external datasources
|
137
|
-
!%w{ external }.include?(type)
|
138
|
-
end
|
139
|
-
|
140
127
|
def start_crawl!
|
141
128
|
build_job.save
|
142
129
|
end
|
@@ -145,10 +132,6 @@ module LucidWorks
|
|
145
132
|
build_job.destroy
|
146
133
|
end
|
147
134
|
|
148
|
-
def t_type
|
149
|
-
I18n.t(type, :scope => 'activemodel.models.lucid_works.datasource.type')
|
150
|
-
end
|
151
|
-
|
152
135
|
def progress
|
153
136
|
return nil if status.stopped?
|
154
137
|
return nil unless histories.size > 0
|
@@ -5,6 +5,7 @@ module LucidWorks
|
|
5
5
|
attr_reader :description, :name, :allowed_values, :type, :default_value, :required, :read_only, :advanced
|
6
6
|
alias :read_only? :read_only
|
7
7
|
alias :advanced? :advanced
|
8
|
+
alias :required? :required
|
8
9
|
|
9
10
|
def initialize(attributes = {})
|
10
11
|
@description = attributes['description']
|
@@ -14,7 +15,7 @@ module LucidWorks
|
|
14
15
|
@default_value = attributes['default_value']
|
15
16
|
@required = attributes['required']
|
16
17
|
@read_only = attributes['read_only']
|
17
|
-
@advanced = attributes['advanced'
|
18
|
+
@advanced = attributes['hints'].include? 'advanced' rescue false
|
18
19
|
end
|
19
20
|
end
|
20
21
|
end
|
@@ -8,6 +8,8 @@ module LucidWorks
|
|
8
8
|
"---" # Separator UI hint
|
9
9
|
]
|
10
10
|
|
11
|
+
SYSTEM_TYPES = %w{ lucidworkslogs }
|
12
|
+
|
11
13
|
attr_reader :crawler, :category, :type, :props
|
12
14
|
|
13
15
|
def initialize(crawler, attributes = {})
|
@@ -17,9 +19,21 @@ module LucidWorks
|
|
17
19
|
@props = attributes['props']
|
18
20
|
end
|
19
21
|
|
22
|
+
def system?
|
23
|
+
SYSTEM_TYPES.include? type
|
24
|
+
end
|
25
|
+
|
20
26
|
def properties
|
21
27
|
@properties ||= @props.map do |prop|
|
22
28
|
DatasourceProperty.new(prop)
|
29
|
+
end.tap do |properties|
|
30
|
+
def properties.regular
|
31
|
+
select {|p| !p.advanced? }
|
32
|
+
end
|
33
|
+
|
34
|
+
def properties.advanced
|
35
|
+
select &:advanced?
|
36
|
+
end
|
23
37
|
end
|
24
38
|
end
|
25
39
|
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'lucid_works/field_commons'
|
2
|
+
|
3
|
+
module LucidWorks
|
4
|
+
|
5
|
+
class Dynamicfield < Base
|
6
|
+
include LucidWorks::FieldCommons
|
7
|
+
|
8
|
+
validates_each :name, :unless => :persisted?, :allow_blank => true do |model, attr, value|
|
9
|
+
model.errors.add(attr, 'must be unique') if model.collection.dynamicfields.any? {|f| f.name == value }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|