lucid_works 0.7.18 → 0.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. data/.rvmrc +2 -3
  2. data/Gemfile +2 -8
  3. data/Gemfile.lock +45 -53
  4. data/README.rdoc +2 -6
  5. data/Rakefile +1 -1
  6. data/config/locales/en.yml +221 -239
  7. data/lib/lucid_works/activity.rb +8 -5
  8. data/lib/lucid_works/base.rb +27 -16
  9. data/lib/lucid_works/cache.rb +13 -0
  10. data/lib/lucid_works/cluster.rb +84 -0
  11. data/lib/lucid_works/collection/settings.rb +15 -6
  12. data/lib/lucid_works/collection.rb +62 -92
  13. data/lib/lucid_works/datasource/history.rb +2 -1
  14. data/lib/lucid_works/datasource/mapping.rb +12 -0
  15. data/lib/lucid_works/datasource/schedule.rb +5 -2
  16. data/lib/lucid_works/datasource/status.rb +3 -2
  17. data/lib/lucid_works/datasource.rb +31 -48
  18. data/lib/lucid_works/datasource_property.rb +2 -1
  19. data/lib/lucid_works/datasource_type.rb +14 -0
  20. data/lib/lucid_works/dynamicfield.rb +12 -0
  21. data/lib/lucid_works/elevation.rb +93 -0
  22. data/lib/lucid_works/exceptions.rb +0 -4
  23. data/lib/lucid_works/field.rb +31 -111
  24. data/lib/lucid_works/field_commons.rb +133 -0
  25. data/lib/lucid_works/gem_version.rb +1 -1
  26. data/lib/lucid_works/inflections.rb +3 -0
  27. data/lib/lucid_works/patch_time.rb +4 -0
  28. data/lib/lucid_works/request_handler.rb +16 -0
  29. data/lib/lucid_works/role.rb +23 -8
  30. data/lib/lucid_works/schema/attribute.rb +1 -1
  31. data/lib/lucid_works/schema/boolean_attribute.rb +1 -1
  32. data/lib/lucid_works/schema/integer_attribute.rb +3 -4
  33. data/lib/lucid_works/server/crawlers_status.rb +15 -0
  34. data/lib/lucid_works/server.rb +35 -14
  35. data/lib/lucid_works/simple_naming.rb +1 -7
  36. data/lib/lucid_works/synonym.rb +1 -1
  37. data/lib/lucid_works/version.rb +1 -0
  38. data/lib/lucid_works.rb +8 -1
  39. data/lucid_works.gemspec +8 -9
  40. data/spec/fixtures/zookeeper/clusterstate.json +30 -0
  41. data/spec/fixtures/zookeeper/clusterstate_broken_shard.json +29 -0
  42. data/spec/fixtures/zookeeper/live_nodes.json +28 -0
  43. data/spec/fixtures/zookeeper/live_nodes_no_children.json +26 -0
  44. data/spec/fixtures/zookeeper/live_nodes_one_child.json +36 -0
  45. data/spec/lib/lucid_works/base_spec.rb +33 -24
  46. data/spec/lib/lucid_works/cache_spec.rb +44 -0
  47. data/spec/lib/lucid_works/cluster_spec.rb +109 -0
  48. data/spec/lib/lucid_works/collection/activity_spec.rb +29 -0
  49. data/spec/lib/lucid_works/collection/prime_activities_spec.rb +1 -1
  50. data/spec/lib/lucid_works/collection/settings_spec.rb +31 -0
  51. data/spec/lib/lucid_works/collection_spec.rb +166 -107
  52. data/spec/lib/lucid_works/datasource/schedule_spec.rb +75 -46
  53. data/spec/lib/lucid_works/datasource/status_spec.rb +5 -5
  54. data/spec/lib/lucid_works/datasource_property_spec.rb +41 -0
  55. data/spec/lib/lucid_works/datasource_spec.rb +40 -12
  56. data/spec/lib/lucid_works/datasource_type_spec.rb +31 -0
  57. data/spec/lib/lucid_works/dynamicfield_spec.rb +214 -0
  58. data/spec/lib/lucid_works/elevation_spec.rb +175 -0
  59. data/spec/lib/lucid_works/field_spec.rb +52 -21
  60. data/spec/lib/lucid_works/fieldtype_spec.rb +0 -1
  61. data/spec/lib/lucid_works/request_handler_spec.rb +11 -0
  62. data/spec/lib/lucid_works/role_spec.rb +77 -0
  63. data/spec/lib/lucid_works/server/crawlers_status_spec.rb +21 -0
  64. data/spec/lib/lucid_works/server_spec.rb +123 -22
  65. data/spec/lib/lucid_works/{collection/synonym_spec.rb → synonym_spec.rb} +23 -22
  66. data/spec/lib/lucid_works/version_spec.rb +6 -0
  67. metadata +132 -64
  68. data/spec/lib/lucid_works/collection/acl_config_spec.rb +0 -212
@@ -0,0 +1,13 @@
1
+ module LucidWorks
2
+ class Cache < Base
3
+ belongs_to :collection
4
+
5
+ schema do
6
+ dynamic_attributes true
7
+ attribute :name, :string, :primary_key => true, :omit_during_update => true
8
+ end
9
+
10
+ validates_presence_of :size
11
+ validates_numericality_of :initial_size, :size, :only_integer => true, :greater_than_or_equal_to => 0, :allow_blank => true, :message => 'is not a valid value'
12
+ end
13
+ end
@@ -0,0 +1,84 @@
1
+ require 'uri'
2
+
3
+ module LucidWorks
4
+ class Cluster
5
+ class Node
6
+ include ActiveModel::Serializers::JSON
7
+ self.include_root_in_json = false
8
+
9
+ attr_reader :id, :cluster, :host, :port, :state, :solr_url, :api_url
10
+ attr_accessor :shards
11
+
12
+ def initialize(cluster, solr_url, state)
13
+ @cluster, @solr_url, @state = cluster, solr_url, state
14
+ uri = URI(solr_url)
15
+ @host, @port = uri.host, uri.port
16
+ @id = "#{host}_#{port}"
17
+ @api_url = solr_url.sub uri.path, '/api'
18
+ @shards = []
19
+ end
20
+
21
+ def as_json(options = {})
22
+ {'id' => id, 'host' => host, 'port' => port, 'state' => state, 'solr_url' => solr_url, 'api_url' => api_url, 'shards' => shards}
23
+ end
24
+ end
25
+
26
+ class Shard
27
+ include ActiveModel::Serializers::JSON
28
+ self.include_root_in_json = false
29
+
30
+ attr_reader :id, :node, :name, :collection, :leader
31
+ alias :leader? :leader
32
+
33
+ def initialize(node, name, collection, leader)
34
+ @id = "#{collection}_#{name}"
35
+ @node = node
36
+ @name = name
37
+ @collection = collection
38
+ @leader = leader
39
+ end
40
+
41
+ def as_json(options = {})
42
+ {'name' => name, 'collection' => collection, 'leader' => leader}
43
+ end
44
+
45
+ def ==(other)
46
+ node == other.node && name == other.name && collection == other.collection
47
+ end
48
+ end
49
+
50
+ attr_reader :nodes
51
+
52
+ def initialize(solr_uri)
53
+ zk_uri = "#{solr_uri}/zookeeper?detail=true&path="
54
+ @zk_clusterstate = JSON.parse(JSON.parse(RestClient.get(zk_uri + "/clusterstate.json"))['znode']['data'])
55
+ @zk_live_nodes = JSON.parse(RestClient.get(zk_uri + "/live_nodes/"))['tree'].first['children'].
56
+ map {|child| child['data']['title'] } rescue []
57
+
58
+ @nodes = []
59
+ @zk_clusterstate.each do |slice|
60
+ slice[1]['shards'].each do |shard_name, shard_description|
61
+ unless shard_description.empty? # A shard's description can come back as empty when the shard is in trouble
62
+ shard_description['replicas'].values.each do |node_json|
63
+ node = nodes.find {|node| node.solr_url == node_json['base_url'] }
64
+ nodes << (node = Node.new(self, node_json['base_url'], determine_node_state(node_json))) unless node
65
+ new_shard = Shard.new(node, node_json['shard'], node_json['collection'], node_json['leader'] == 'true')
66
+ node.shards << new_shard unless node.shards.include?(new_shard)
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ private
74
+
75
+ # State can be "live", "down", "recovering", "recovery_failed"
76
+ def determine_node_state(node_json)
77
+ if @zk_live_nodes.include?(node_json['node_name'])
78
+ node_json['state'] == 'active' ? 'live' : node_json['state']
79
+ else
80
+ 'down'
81
+ end
82
+ end
83
+ end
84
+ end
@@ -14,16 +14,17 @@ module LucidWorks
14
14
 
15
15
  schema do
16
16
  # Indexing Settings
17
- attribute :unknown_type_handling, :string
18
- attribute :de_duplication, :string, :values => DEDUP_OPTIONS
19
-
17
+ attribute :unknown_type_handling, :string, :nil_when_blank => true
18
+ attribute :de_duplication, :string, :values => DEDUP_OPTIONS
19
+ attributes :update_handler_autocommit_max_docs, :update_handler_autocommit_max_time,
20
+ :update_handler_autosoftcommit_max_docs, :update_handler_autosoftcommit_max_time
21
+
20
22
  # Querying Settings
21
23
  attribute :unsupervised_feedback_emphasis, :string, :values => FEEDBACK_EMPHASIS
22
24
  attribute :default_sort, :string, :values => DEFAULT_SORTS
23
25
  attribute :query_parser, :string, :values => QUERY_PARSERS
24
26
  attributes :spellcheck, :display_facets, :unsupervised_feedback, :query_time_stopwords,
25
- :auto_complete, :boost_recent, :show_similar, :query_time_synonyms,
26
- :type => :boolean
27
+ :auto_complete, :boost_recent, :show_similar, :query_time_synonyms, :type => :boolean
27
28
  attributes :stopword_list, :boosts, :synonym_list # Arrays
28
29
 
29
30
  # Click Settings
@@ -33,10 +34,18 @@ module LucidWorks
33
34
  # Other Settings
34
35
  attribute :ssl, :boolean
35
36
  attribute :elevations # Hash
37
+ attributes :main_index_ram_buffer_size_mb, :main_index_merge_factor
36
38
 
37
- # Distrubuted Search Settings
39
+ # Distributed Search Settings
38
40
  attributes :search_server_list, :update_server_list # Arrays
39
41
  end
42
+
43
+ validates_numericality_of :main_index_ram_buffer_size_mb, :main_index_merge_factor,
44
+ :allow_blank => true, :message => 'is not a valid value'
45
+ validates_numericality_of :update_handler_autocommit_max_docs, :update_handler_autocommit_max_time,
46
+ :update_handler_autosoftcommit_max_docs, :update_handler_autosoftcommit_max_time,
47
+ :only_integer => true, :greater_than => 0, :allow_blank => true,
48
+ :message => 'is not a valid value'
40
49
  end
41
50
  end
42
51
  end
@@ -3,27 +3,30 @@ module LucidWorks
3
3
  class Collection < Base
4
4
 
5
5
  belongs_to :server
6
- has_many :datasources, :fields, :fieldtypes, :activities, :roles
6
+ has_many :datasources, :fields, :fieldtypes, :activities, :roles, :caches, :dynamicfields
7
7
  has_one :info, :settings, :click
8
8
  has_one :index, :has_content => false
9
-
9
+
10
10
  schema do
11
11
  attribute :name, :string, :primary_key => true
12
12
  attribute :instance_dir
13
13
  attribute :template
14
+ attribute :num_shards, :integer, :omit_when_blank => true
14
15
  end
15
16
 
16
17
  LOGS_COLLECTION_NAME = 'LucidWorksLogs'
17
- AD_FILTERING = 'adfiltering'
18
- ROLE_FILTERING = 'filterbyrole'
19
- STATIC_ACL_CONFIG = {
20
- "filterer.class" => "com.lucid.security.WindowsACLQueryFilterer",
21
- "provider.class" => "com.lucid.security.ad.ADACLTagProvider",
22
- }
23
- MAGIC_ACL_ONLY_FILTER_SETTING = {'should_clause' => '*:* -data_source_type:smb'}
24
18
 
25
19
  validates_presence_of :name
26
-
20
+ validates_presence_of :num_shards, :if => lambda {|c| c.server.clustered? }
21
+ validates_numericality_of :num_shards, :only_integer => true, :greater_than => 0, :allow_blank => true, :message => 'is not a valid value'
22
+
23
+ def update_caches_attributes(attributes)
24
+ attributes = attributes.with_indifferent_access
25
+ caches.each {|cache| cache.attributes = attributes[cache.name] if attributes.include?(cache.name) }
26
+ return false unless caches.all? &:valid?
27
+ caches.all? &:save
28
+ end
29
+
27
30
  def destroyable?
28
31
  true
29
32
  end
@@ -36,43 +39,35 @@ module LucidWorks
36
39
  def empty!
37
40
  build_index.destroy(:params => {:key => 'iaccepttherisk'})
38
41
  end
39
-
40
- # Sometimes we want to use the raw Rsolr class, instead of Rsolr::Ext
41
- def rsolr
42
- unless @rsolr
43
- server_uri = self.server.host
44
- @path_prefix = URI.parse(server_uri).path # The API key
45
- @rsolr = RSolr.connect :url => server_uri.dup
46
- end
47
- @rsolr
42
+
43
+ def commit
44
+ solr.commit
48
45
  end
49
-
50
- def rsolr_ext
51
- unless @rsolr_ext
52
- server_uri = self.server.host
53
- @path_prefix = URI.parse(server_uri).path # The API key
54
- @rsolr_ext = RSolr::Ext.connect :url => server_uri.dup
46
+
47
+ def search(solr_params, options = {})
48
+ ActiveSupport::Notifications.instrument('solr.lucid_works') do |payload|
49
+ path = options[:path] || 'select'
50
+ begin
51
+ result = if options[:page] && options[:per_page]
52
+ # Constructing the request here instead of using result.request[:uri] to avoid losing it on RSolr::Error::Http errors
53
+ payload[:request] = solr.build_paginated_request(options[:page], options[:per_page], path, :params => solr_params)[:uri].to_s
54
+ solr.paginate(options[:page], options[:per_page], path, :params => solr_params)
55
+ else
56
+ # Constructing the request here instead of using result.request[:uri] to avoid losing it on RSolr::Error::Http errors
57
+ payload[:request] = solr.build_request(path, :params => solr_params)[:uri].to_s
58
+ solr.get(path, :params => solr_params)
59
+ end
60
+ payload[:status] = result.response[:status]
61
+ result
62
+ rescue RSolr::Error::Http => error
63
+ payload[:status] = error.response[:status]
64
+ raise
65
+ end
55
66
  end
56
- @rsolr_ext
57
67
  end
58
68
 
59
- # Lower level access to rsolr, for things like autocomplete queries
60
- def rsolr_get(path_suffix, search_params)
61
- rsolr.get "#{@path_prefix}/solr/#{name}/#{path_suffix}", :params => search_params
62
- end
63
-
64
- # Perform a Solr search using RSolr
65
- def search(search_params={})
66
- search_params[:page] ||= 1
67
- search_params[:per_page] ||= 10
68
- resp = rsolr_ext.find "#{@path_prefix}/solr/#{name}/select", search_params
69
- if search_params[:wt] == :xml
70
- data = Nokogiri.XML(resp)
71
- raise "search received bad XML" unless data.root
72
- else
73
- data = resp
74
- end
75
- data
69
+ def document_count
70
+ solr.get('select', :params => {:wt => :ruby, :q => "*:*", :rows => 0})['response']['numFound']
76
71
  end
77
72
 
78
73
  def synonyms
@@ -85,6 +80,22 @@ module LucidWorks
85
80
  Synonym.new(attributes.merge(:collection => self))
86
81
  end
87
82
 
83
+ def elevations
84
+ elevations = []
85
+ settings.elevations.each do |query, elevation_entries|
86
+ elevation_entries.each do |entry|
87
+ elevations << Elevation.new(
88
+ :doc_id => entry['doc'], :collection => self, :query => query, :excluded => entry['exclude'], :persisted => true
89
+ )
90
+ end
91
+ end
92
+ elevations
93
+ end
94
+
95
+ def build_elevation(attributes = {})
96
+ Elevation.new(attributes.merge(:collection => self))
97
+ end
98
+
88
99
  def prime_activities
89
100
  self.activities!.sort!{|a,b|a.id <=> b.id}
90
101
  num_created = 0
@@ -115,7 +126,7 @@ module LucidWorks
115
126
 
116
127
  # URL of Solr's build-in admin page
117
128
  def admin_url
118
- "#{server.host}/solr/#{name}/admin/"
129
+ "#{server.server_uri}/solr/#{name}/admin/"
119
130
  end
120
131
 
121
132
  # We have to handle the jdbcdrivers model in a custom way, as a GET on .../jdbcdrivers returns a list of strings,
@@ -136,59 +147,18 @@ module LucidWorks
136
147
  JSON.parse(RestClient.get(uri + "/components/all.json?handlerName=%2Flucid"))
137
148
  end
138
149
 
139
- def filtering_enabled?
140
- # current core implementation requires exactly one of filterbyrole or adfiltering
141
- assert_components_include_ad_xor_role
142
- return self.components.include?(AD_FILTERING)
143
- end
144
-
145
- def acl_only?
146
- filterer_config = self.filtering_settings['filterer.config']
147
- return false if filterer_config.nil?
148
- return self.filtering_settings['filterer.config'] != MAGIC_ACL_ONLY_FILTER_SETTING #rescue false
150
+ def available_templates
151
+ JSON.parse(RestClient.get(server.uri + "/collectiontemplates"))
149
152
  end
150
153
 
151
- def assert_components_include_ad_xor_role
152
- # require 'ruby-debug'; debugger
153
- raise "conflicting filtering components" if self.components.include?(AD_FILTERING) && self.components.include?(ROLE_FILTERING)
154
- raise "missing filtering components" if ! self.components.include?(AD_FILTERING) && ! self.components.include?(ROLE_FILTERING)
154
+ def request_handler(name)
155
+ RequestHandler.new(self, name)
155
156
  end
156
157
 
157
- def filtering_settings
158
- JSON.parse(RestClient.get(uri + "/filtering"))['adfiltering'] || {}
159
- end
158
+ protected
160
159
 
161
- def compute_component_set(acl_filtering_enabled)
162
- new_component_set = self.components.clone
163
- new_component_set.delete(ROLE_FILTERING)
164
- new_component_set.delete(AD_FILTERING)
165
- new_component_set.unshift acl_filtering_enabled == 'true' ? AD_FILTERING : ROLE_FILTERING
166
- end
167
-
168
- def configure_filtering(opts)
169
- if ! opts[:config]['java.naming.provider.url'].blank? && opts[:config]['java.naming.provider.url'] !~ %r(://)
170
- opts[:config]['java.naming.provider.url'] = "ldap://#{opts[:config]['java.naming.provider.url']}"
160
+ def solr
161
+ @solr ||= RSolr.connect(:url => "#{server.server_uri}/solr/#{name}")
171
162
  end
172
- filtering_settings = STATIC_ACL_CONFIG.merge('provider.config' => opts[:config])
173
- filtering_settings["filterer.config"] = opts[:acl_only] ? {} : MAGIC_ACL_ONLY_FILTER_SETTING
174
-
175
- errors = {}
176
- method = RestClient.send(:get, uri+'/filtering')['adfiltering'] ? :put : :post
177
- begin
178
- response = RestClient.send(method, uri+'/filtering/adfiltering', filtering_settings.to_json, :content_type => :json)
179
- rescue => exception
180
- JSON.parse(exception.response)['errors'].each {|e| errors[e['code']] = e['message']}
181
- end
182
-
183
- new_component_set = compute_component_set(opts[:enabled])
184
- if new_component_set.sort != self.components.sort
185
- begin
186
- response = RestClient.send(:put, uri+'/components/components?handlerName=/lucid', new_component_set.to_json, :content_type => :json)
187
- rescue => exception
188
- JSON.parse(exception.response)['errors'].each {|e| errors[e['code']] = e['message']}
189
- end
190
- end
191
- raise LucidWorks::AclConfigInvalid.new(errors) unless errors.empty?
192
- end
193
163
  end
194
164
  end
@@ -6,7 +6,8 @@ module LucidWorks
6
6
  self.collection_name = 'history' # i.e. not the plural 'histories'
7
7
  schema do
8
8
  attributes :crawl_started, :crawl_stopped, :type => :iso8601
9
- attributes :num_updated, :num_new, :num_unchanged, :type => :integer
9
+ attributes :num_updated, :num_new, :num_unchanged, :num_not_found,
10
+ :num_filter_denied, :num_access_denied, :num_robots_denied, :type => :integer
10
11
  end
11
12
 
12
13
  def doc_count
@@ -0,0 +1,12 @@
1
+ module LucidWorks
2
+ class Datasource
3
+ class Mapping < Base
4
+ self.singleton = true
5
+ belongs_to :datasource
6
+
7
+ schema do
8
+ attribute :datasource_field, :string, :omit_when_blank => true
9
+ end
10
+ end
11
+ end
12
+ end
@@ -31,6 +31,7 @@ module LucidWorks
31
31
  #
32
32
  def frequency
33
33
  case period
34
+ when 1.minute.seconds..59.minutes.seconds then 'every'
34
35
  when 1.weeks.seconds then 'weekly'
35
36
  when 1.days.seconds then 'daily'
36
37
  when 1.hours.seconds then 'hourly'
@@ -44,6 +45,7 @@ module LucidWorks
44
45
  #
45
46
  def frequency=(frequency)
46
47
  self.period = case frequency
48
+ when 'every' then period
47
49
  when 'hourly' then 1.hours.seconds.to_i
48
50
  when 'daily' then 1.days.seconds.to_i
49
51
  when 'weekly' then 1.weeks.seconds.to_i
@@ -57,7 +59,6 @@ module LucidWorks
57
59
  #
58
60
  def next_start
59
61
  return start_time if (now = Time.now) <= start_time
60
- # require 'ruby-debug'; debugger
61
62
  time_since_start = now - start_time
62
63
  last_interval_num = (time_since_start / period).to_i
63
64
  next_interval_num = if (time_since_start % period) == 0
@@ -111,8 +112,10 @@ module LucidWorks
111
112
  self.frequency = all_attributes['frequency']
112
113
  self.start_time =
113
114
  case all_attributes['frequency']
115
+ when 'every'
116
+ self.period = all_attributes['period'].to_i
117
+ now.ceil(period)
114
118
  when 'weekly'
115
- # require 'ruby-debug'; debugger
116
119
  start = now.beginning_of_week.advance(all_attributes['start'])
117
120
  start < now ? start.advance(:weeks => 1) : start
118
121
  when 'daily'
@@ -5,7 +5,7 @@ module LucidWorks
5
5
  self.singleton = true
6
6
  belongs_to :datasource
7
7
 
8
- STOPPED_STATES = %w{ IDLE STOPPED ABORTED EXCEPTION FINISHED }
8
+ STOPPED_STATES = %w{ IDLE STOPPED ABORTED EXCEPTION FINISHED UNKNOWN }
9
9
  POST_PROCESSING_STATES = %w{ STOPPING ABORTING }
10
10
  CRAWLSTATES = STOPPED_STATES + [ 'RUNNING' ] + POST_PROCESSING_STATES
11
11
 
@@ -13,7 +13,8 @@ module LucidWorks
13
13
  attribute :crawl_state, :string, :values => CRAWLSTATES
14
14
  attribute :jobId
15
15
  attributes :crawl_started, :crawl_stopped, :type => :iso8601
16
- attributes :num_unchanged, :num_updated, :num_new, :num_failed, :num_deleted, :num_total, :type => :integer
16
+ attributes :num_unchanged, :num_updated, :num_new, :num_failed, :num_deleted, :num_total,
17
+ :num_not_found, :num_filter_denied, :num_access_denied, :num_robots_denied, :type => :integer
17
18
  end
18
19
 
19
20
  # Create predicate methods for all the crawl states
@@ -4,51 +4,33 @@ module LucidWorks
4
4
  belongs_to :collection
5
5
  has_many :histories, :class_name => :history, :retrieveable_en_masse => true
6
6
  has_one :status, :retrieveable_en_masse => true
7
- has_one :schedule, :crawldata
7
+ has_one :schedule, :crawldata, :mapping
8
8
  has_one :index, :job, :has_content => false
9
9
 
10
- TYPES = %w{ external file ftp hdfs kfs lucidworkslogs web s3 s3n smb solrxml jdbc sharepoint }
11
- SYSTEM_TYPES = %w{ lucidworkslogs }
12
10
  BOUNDS = %w{ tree none }
13
- CRAWLERS = {
14
- # Later we may change these to be arrays if we decide to support more than one choice
15
- # e.g. :web => ['lucid.aperture', 'nutch'], :file => ['lucid.aperture', 'lucid.fs']
16
- :file => 'lucid.aperture',
17
- :lucidworkslogs => 'lucid.logs',
18
- :external => 'lucid.external',
19
- :web => 'lucid.aperture',
20
- :solrxml => 'lucid.solrxml',
21
- :jdbc => 'lucid.jdbc',
22
- :sharepoint => 'lucid.gcm',
23
- :ftp => 'lucid.fs',
24
- :hdfs => 'lucid.fs',
25
- :kfs => 'lucid.fs',
26
- :smb => 'lucid.fs',
27
- :s3n => 'lucid.fs',
28
- :s3 => 'lucid.fs'
29
- }.with_indifferent_access
30
-
11
+
31
12
  schema do
32
13
  # common
33
14
  attributes :name, :crawler
34
- attribute :type, :string, :values => TYPES
15
+ attribute :type, :string
35
16
  attribute :crawl_depth, :integer, :nil_when_blank => true
36
17
  attribute :max_docs, :integer
37
- attributes :max_bytes, :commit_within, :type => :integer, :omit_when_blank => true
38
- attribute :commit_within_min, :custom
18
+ attributes :max_bytes, :commit_within, :type => :integer, :omit_when_blank => true
19
+ attribute :commit_within_sec, :custom
39
20
  attribute :commit_on_finish, :boolean
40
- attributes :include_paths, :exclude_paths, :type => :list, :separator => "\n"
41
- attribute :mapping, :string, :omit_when_blank => true # Hash
21
+ attributes :include_paths, :exclude_paths, :filter_follow, :filter_track, :filter_locations,
22
+ :type => :list, :separator => "\n"
42
23
  attribute :bounds, :string, :values => BOUNDS
43
24
  # web
44
25
  attributes :url, :category
45
26
  attribute :collect_links, :boolean
46
27
  attribute :auth, :string, :omit_when_blank => true # Hash
47
- attributes :proxy_host, :string, :omit_when_blank => true
28
+ attribute :proxy_host, :string, :omit_when_blank => true
48
29
  attribute :proxy_port, :string, :omit_when_blank => true
49
30
  attribute :proxy_username, :string, :omit_when_blank => true
50
31
  attribute :proxy_password, :string, :omit_when_blank => true
51
32
  attribute :ignore_robots, :boolean
33
+ attribute :add_failed_docs, :boolean
52
34
  # file
53
35
  attribute :path
54
36
  attribute :follow_links, :boolean
@@ -67,11 +49,24 @@ module LucidWorks
67
49
  attribute :password
68
50
  attribute :domain
69
51
  attribute :my_site_base_url, :string, :nil_when_blank => true
70
- attribute :included_urls
71
- attribute :excluded_urls
52
+ attributes :included_urls, :excluded_urls, :type => :list, :separator => "\n"
72
53
  attribute :kdcserver
73
54
  attribute :use_sp_search_visibility, :boolean
74
55
  attribute :aliases
56
+ attribute :feed_unpublished_documents, :boolean
57
+ attribute :push_acls, :boolean
58
+ attribute :enable_security_trimming, :boolean
59
+ attribute :username_format_in_ace
60
+ attribute :groupname_format_in_ace
61
+ attribute :ldap_server_host_address
62
+ attribute :ldap_server_port_number
63
+ attribute :ldap_server_use_ssl, :boolean
64
+ attribute :ldap_auth_type
65
+ attribute :ldap_search_base
66
+ attribute :ldap_read_ad_groups_type
67
+ attribute :ldap_cache_groups_membership, :boolean
68
+ attribute :ldap_cache_size
69
+ attribute :ldap_cache_refresh_interval
75
70
  # external
76
71
  attribute :source
77
72
  attribute :source_type
@@ -97,25 +92,22 @@ module LucidWorks
97
92
  validates_numericality_of :max_bytes, :allow_blank => true
98
93
  validates_presence_of :url, :if => lambda { |d| d.type == 'web' }
99
94
 
100
- # Fake attributes to ease UI implementation
101
- def commit_within_min
102
- commit_within.blank? ? nil : commit_within / 1.second.milliseconds / 1.minute.seconds
95
+ # Virtual attributes to ease UI implementation
96
+ def commit_within_sec
97
+ commit_within.blank? ? nil : commit_within / 1.second.milliseconds
103
98
  end
104
- def commit_within_min=(mins)
105
- self.commit_within = mins.blank? ? nil : mins.to_i.minutes.milliseconds
99
+ def commit_within_sec=(seconds)
100
+ self.commit_within = seconds.blank? ? nil : seconds.to_i.seconds.milliseconds
106
101
  end
107
102
  def delete_after_days
108
- delete_after.blank? ? nil : deleteAfter / 1.second.milliseconds / 1.day.seconds
103
+ delete_after.blank? ? nil : delete_after / 1.second.milliseconds / 1.day.seconds
109
104
  end
110
105
  def delete_after_days=(days)
111
106
  self.delete_after = days.blank? ? nil : days.to_i.days.milliseconds
112
107
  end
113
108
 
114
109
  def document_count
115
- collection.rsolr_get(:select,
116
- :wt => :ruby,
117
- :q => (type == 'lucidworkslogs' ? "*:*" : "data_source:#{id}"),
118
- :rows => 0)['response']['numFound']
110
+ collection.search(:wt => :ruby, :q => (type == 'lucidworkslogs' ? "*:*" : "data_source:#{id}"), :rows => 0)['response']['numFound']
119
111
  end
120
112
 
121
113
  def empty!
@@ -132,11 +124,6 @@ module LucidWorks
132
124
  !collection.system?
133
125
  end
134
126
 
135
- def crawlable?
136
- # Don't let user schedule crawl of external datasources
137
- !%w{ external }.include?(type)
138
- end
139
-
140
127
  def start_crawl!
141
128
  build_job.save
142
129
  end
@@ -145,10 +132,6 @@ module LucidWorks
145
132
  build_job.destroy
146
133
  end
147
134
 
148
- def t_type
149
- I18n.t(type, :scope => 'activemodel.models.lucid_works.datasource.type')
150
- end
151
-
152
135
  def progress
153
136
  return nil if status.stopped?
154
137
  return nil unless histories.size > 0
@@ -5,6 +5,7 @@ module LucidWorks
5
5
  attr_reader :description, :name, :allowed_values, :type, :default_value, :required, :read_only, :advanced
6
6
  alias :read_only? :read_only
7
7
  alias :advanced? :advanced
8
+ alias :required? :required
8
9
 
9
10
  def initialize(attributes = {})
10
11
  @description = attributes['description']
@@ -14,7 +15,7 @@ module LucidWorks
14
15
  @default_value = attributes['default_value']
15
16
  @required = attributes['required']
16
17
  @read_only = attributes['read_only']
17
- @advanced = attributes['advanced']
18
+ @advanced = attributes['hints'].include? 'advanced' rescue false
18
19
  end
19
20
  end
20
21
  end
@@ -8,6 +8,8 @@ module LucidWorks
8
8
  "---" # Separator UI hint
9
9
  ]
10
10
 
11
+ SYSTEM_TYPES = %w{ lucidworkslogs }
12
+
11
13
  attr_reader :crawler, :category, :type, :props
12
14
 
13
15
  def initialize(crawler, attributes = {})
@@ -17,9 +19,21 @@ module LucidWorks
17
19
  @props = attributes['props']
18
20
  end
19
21
 
22
+ def system?
23
+ SYSTEM_TYPES.include? type
24
+ end
25
+
20
26
  def properties
21
27
  @properties ||= @props.map do |prop|
22
28
  DatasourceProperty.new(prop)
29
+ end.tap do |properties|
30
+ def properties.regular
31
+ select {|p| !p.advanced? }
32
+ end
33
+
34
+ def properties.advanced
35
+ select &:advanced?
36
+ end
23
37
  end
24
38
  end
25
39
 
@@ -0,0 +1,12 @@
1
+ require 'lucid_works/field_commons'
2
+
3
+ module LucidWorks
4
+
5
+ class Dynamicfield < Base
6
+ include LucidWorks::FieldCommons
7
+
8
+ validates_each :name, :unless => :persisted?, :allow_blank => true do |model, attr, value|
9
+ model.errors.add(attr, 'must be unique') if model.collection.dynamicfields.any? {|f| f.name == value }
10
+ end
11
+ end
12
+ end