lucid_works 0.7.18 → 0.9.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. data/.rvmrc +2 -3
  2. data/Gemfile +2 -8
  3. data/Gemfile.lock +45 -53
  4. data/README.rdoc +2 -6
  5. data/Rakefile +1 -1
  6. data/config/locales/en.yml +221 -239
  7. data/lib/lucid_works/activity.rb +8 -5
  8. data/lib/lucid_works/base.rb +27 -16
  9. data/lib/lucid_works/cache.rb +13 -0
  10. data/lib/lucid_works/cluster.rb +84 -0
  11. data/lib/lucid_works/collection/settings.rb +15 -6
  12. data/lib/lucid_works/collection.rb +62 -92
  13. data/lib/lucid_works/datasource/history.rb +2 -1
  14. data/lib/lucid_works/datasource/mapping.rb +12 -0
  15. data/lib/lucid_works/datasource/schedule.rb +5 -2
  16. data/lib/lucid_works/datasource/status.rb +3 -2
  17. data/lib/lucid_works/datasource.rb +31 -48
  18. data/lib/lucid_works/datasource_property.rb +2 -1
  19. data/lib/lucid_works/datasource_type.rb +14 -0
  20. data/lib/lucid_works/dynamicfield.rb +12 -0
  21. data/lib/lucid_works/elevation.rb +93 -0
  22. data/lib/lucid_works/exceptions.rb +0 -4
  23. data/lib/lucid_works/field.rb +31 -111
  24. data/lib/lucid_works/field_commons.rb +133 -0
  25. data/lib/lucid_works/gem_version.rb +1 -1
  26. data/lib/lucid_works/inflections.rb +3 -0
  27. data/lib/lucid_works/patch_time.rb +4 -0
  28. data/lib/lucid_works/request_handler.rb +16 -0
  29. data/lib/lucid_works/role.rb +23 -8
  30. data/lib/lucid_works/schema/attribute.rb +1 -1
  31. data/lib/lucid_works/schema/boolean_attribute.rb +1 -1
  32. data/lib/lucid_works/schema/integer_attribute.rb +3 -4
  33. data/lib/lucid_works/server/crawlers_status.rb +15 -0
  34. data/lib/lucid_works/server.rb +35 -14
  35. data/lib/lucid_works/simple_naming.rb +1 -7
  36. data/lib/lucid_works/synonym.rb +1 -1
  37. data/lib/lucid_works/version.rb +1 -0
  38. data/lib/lucid_works.rb +8 -1
  39. data/lucid_works.gemspec +8 -9
  40. data/spec/fixtures/zookeeper/clusterstate.json +30 -0
  41. data/spec/fixtures/zookeeper/clusterstate_broken_shard.json +29 -0
  42. data/spec/fixtures/zookeeper/live_nodes.json +28 -0
  43. data/spec/fixtures/zookeeper/live_nodes_no_children.json +26 -0
  44. data/spec/fixtures/zookeeper/live_nodes_one_child.json +36 -0
  45. data/spec/lib/lucid_works/base_spec.rb +33 -24
  46. data/spec/lib/lucid_works/cache_spec.rb +44 -0
  47. data/spec/lib/lucid_works/cluster_spec.rb +109 -0
  48. data/spec/lib/lucid_works/collection/activity_spec.rb +29 -0
  49. data/spec/lib/lucid_works/collection/prime_activities_spec.rb +1 -1
  50. data/spec/lib/lucid_works/collection/settings_spec.rb +31 -0
  51. data/spec/lib/lucid_works/collection_spec.rb +166 -107
  52. data/spec/lib/lucid_works/datasource/schedule_spec.rb +75 -46
  53. data/spec/lib/lucid_works/datasource/status_spec.rb +5 -5
  54. data/spec/lib/lucid_works/datasource_property_spec.rb +41 -0
  55. data/spec/lib/lucid_works/datasource_spec.rb +40 -12
  56. data/spec/lib/lucid_works/datasource_type_spec.rb +31 -0
  57. data/spec/lib/lucid_works/dynamicfield_spec.rb +214 -0
  58. data/spec/lib/lucid_works/elevation_spec.rb +175 -0
  59. data/spec/lib/lucid_works/field_spec.rb +52 -21
  60. data/spec/lib/lucid_works/fieldtype_spec.rb +0 -1
  61. data/spec/lib/lucid_works/request_handler_spec.rb +11 -0
  62. data/spec/lib/lucid_works/role_spec.rb +77 -0
  63. data/spec/lib/lucid_works/server/crawlers_status_spec.rb +21 -0
  64. data/spec/lib/lucid_works/server_spec.rb +123 -22
  65. data/spec/lib/lucid_works/{collection/synonym_spec.rb → synonym_spec.rb} +23 -22
  66. data/spec/lib/lucid_works/version_spec.rb +6 -0
  67. metadata +132 -64
  68. data/spec/lib/lucid_works/collection/acl_config_spec.rb +0 -212
@@ -0,0 +1,13 @@
1
+ module LucidWorks
2
+ class Cache < Base
3
+ belongs_to :collection
4
+
5
+ schema do
6
+ dynamic_attributes true
7
+ attribute :name, :string, :primary_key => true, :omit_during_update => true
8
+ end
9
+
10
+ validates_presence_of :size
11
+ validates_numericality_of :initial_size, :size, :only_integer => true, :greater_than_or_equal_to => 0, :allow_blank => true, :message => 'is not a valid value'
12
+ end
13
+ end
@@ -0,0 +1,84 @@
1
+ require 'uri'
2
+
3
+ module LucidWorks
4
+ class Cluster
5
+ class Node
6
+ include ActiveModel::Serializers::JSON
7
+ self.include_root_in_json = false
8
+
9
+ attr_reader :id, :cluster, :host, :port, :state, :solr_url, :api_url
10
+ attr_accessor :shards
11
+
12
+ def initialize(cluster, solr_url, state)
13
+ @cluster, @solr_url, @state = cluster, solr_url, state
14
+ uri = URI(solr_url)
15
+ @host, @port = uri.host, uri.port
16
+ @id = "#{host}_#{port}"
17
+ @api_url = solr_url.sub uri.path, '/api'
18
+ @shards = []
19
+ end
20
+
21
+ def as_json(options = {})
22
+ {'id' => id, 'host' => host, 'port' => port, 'state' => state, 'solr_url' => solr_url, 'api_url' => api_url, 'shards' => shards}
23
+ end
24
+ end
25
+
26
+ class Shard
27
+ include ActiveModel::Serializers::JSON
28
+ self.include_root_in_json = false
29
+
30
+ attr_reader :id, :node, :name, :collection, :leader
31
+ alias :leader? :leader
32
+
33
+ def initialize(node, name, collection, leader)
34
+ @id = "#{collection}_#{name}"
35
+ @node = node
36
+ @name = name
37
+ @collection = collection
38
+ @leader = leader
39
+ end
40
+
41
+ def as_json(options = {})
42
+ {'name' => name, 'collection' => collection, 'leader' => leader}
43
+ end
44
+
45
+ def ==(other)
46
+ node == other.node && name == other.name && collection == other.collection
47
+ end
48
+ end
49
+
50
+ attr_reader :nodes
51
+
52
+ def initialize(solr_uri)
53
+ zk_uri = "#{solr_uri}/zookeeper?detail=true&path="
54
+ @zk_clusterstate = JSON.parse(JSON.parse(RestClient.get(zk_uri + "/clusterstate.json"))['znode']['data'])
55
+ @zk_live_nodes = JSON.parse(RestClient.get(zk_uri + "/live_nodes/"))['tree'].first['children'].
56
+ map {|child| child['data']['title'] } rescue []
57
+
58
+ @nodes = []
59
+ @zk_clusterstate.each do |slice|
60
+ slice[1]['shards'].each do |shard_name, shard_description|
61
+ unless shard_description.empty? # A shard's description can come back as empty when the shard is in trouble
62
+ shard_description['replicas'].values.each do |node_json|
63
+ node = nodes.find {|node| node.solr_url == node_json['base_url'] }
64
+ nodes << (node = Node.new(self, node_json['base_url'], determine_node_state(node_json))) unless node
65
+ new_shard = Shard.new(node, node_json['shard'], node_json['collection'], node_json['leader'] == 'true')
66
+ node.shards << new_shard unless node.shards.include?(new_shard)
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ private
74
+
75
+ # State can be "live", "down", "recovering", "recovery_failed"
76
+ def determine_node_state(node_json)
77
+ if @zk_live_nodes.include?(node_json['node_name'])
78
+ node_json['state'] == 'active' ? 'live' : node_json['state']
79
+ else
80
+ 'down'
81
+ end
82
+ end
83
+ end
84
+ end
@@ -14,16 +14,17 @@ module LucidWorks
14
14
 
15
15
  schema do
16
16
  # Indexing Settings
17
- attribute :unknown_type_handling, :string
18
- attribute :de_duplication, :string, :values => DEDUP_OPTIONS
19
-
17
+ attribute :unknown_type_handling, :string, :nil_when_blank => true
18
+ attribute :de_duplication, :string, :values => DEDUP_OPTIONS
19
+ attributes :update_handler_autocommit_max_docs, :update_handler_autocommit_max_time,
20
+ :update_handler_autosoftcommit_max_docs, :update_handler_autosoftcommit_max_time
21
+
20
22
  # Querying Settings
21
23
  attribute :unsupervised_feedback_emphasis, :string, :values => FEEDBACK_EMPHASIS
22
24
  attribute :default_sort, :string, :values => DEFAULT_SORTS
23
25
  attribute :query_parser, :string, :values => QUERY_PARSERS
24
26
  attributes :spellcheck, :display_facets, :unsupervised_feedback, :query_time_stopwords,
25
- :auto_complete, :boost_recent, :show_similar, :query_time_synonyms,
26
- :type => :boolean
27
+ :auto_complete, :boost_recent, :show_similar, :query_time_synonyms, :type => :boolean
27
28
  attributes :stopword_list, :boosts, :synonym_list # Arrays
28
29
 
29
30
  # Click Settings
@@ -33,10 +34,18 @@ module LucidWorks
33
34
  # Other Settings
34
35
  attribute :ssl, :boolean
35
36
  attribute :elevations # Hash
37
+ attributes :main_index_ram_buffer_size_mb, :main_index_merge_factor
36
38
 
37
- # Distrubuted Search Settings
39
+ # Distributed Search Settings
38
40
  attributes :search_server_list, :update_server_list # Arrays
39
41
  end
42
+
43
+ validates_numericality_of :main_index_ram_buffer_size_mb, :main_index_merge_factor,
44
+ :allow_blank => true, :message => 'is not a valid value'
45
+ validates_numericality_of :update_handler_autocommit_max_docs, :update_handler_autocommit_max_time,
46
+ :update_handler_autosoftcommit_max_docs, :update_handler_autosoftcommit_max_time,
47
+ :only_integer => true, :greater_than => 0, :allow_blank => true,
48
+ :message => 'is not a valid value'
40
49
  end
41
50
  end
42
51
  end
@@ -3,27 +3,30 @@ module LucidWorks
3
3
  class Collection < Base
4
4
 
5
5
  belongs_to :server
6
- has_many :datasources, :fields, :fieldtypes, :activities, :roles
6
+ has_many :datasources, :fields, :fieldtypes, :activities, :roles, :caches, :dynamicfields
7
7
  has_one :info, :settings, :click
8
8
  has_one :index, :has_content => false
9
-
9
+
10
10
  schema do
11
11
  attribute :name, :string, :primary_key => true
12
12
  attribute :instance_dir
13
13
  attribute :template
14
+ attribute :num_shards, :integer, :omit_when_blank => true
14
15
  end
15
16
 
16
17
  LOGS_COLLECTION_NAME = 'LucidWorksLogs'
17
- AD_FILTERING = 'adfiltering'
18
- ROLE_FILTERING = 'filterbyrole'
19
- STATIC_ACL_CONFIG = {
20
- "filterer.class" => "com.lucid.security.WindowsACLQueryFilterer",
21
- "provider.class" => "com.lucid.security.ad.ADACLTagProvider",
22
- }
23
- MAGIC_ACL_ONLY_FILTER_SETTING = {'should_clause' => '*:* -data_source_type:smb'}
24
18
 
25
19
  validates_presence_of :name
26
-
20
+ validates_presence_of :num_shards, :if => lambda {|c| c.server.clustered? }
21
+ validates_numericality_of :num_shards, :only_integer => true, :greater_than => 0, :allow_blank => true, :message => 'is not a valid value'
22
+
23
+ def update_caches_attributes(attributes)
24
+ attributes = attributes.with_indifferent_access
25
+ caches.each {|cache| cache.attributes = attributes[cache.name] if attributes.include?(cache.name) }
26
+ return false unless caches.all? &:valid?
27
+ caches.all? &:save
28
+ end
29
+
27
30
  def destroyable?
28
31
  true
29
32
  end
@@ -36,43 +39,35 @@ module LucidWorks
36
39
  def empty!
37
40
  build_index.destroy(:params => {:key => 'iaccepttherisk'})
38
41
  end
39
-
40
- # Sometimes we want to use the raw Rsolr class, instead of Rsolr::Ext
41
- def rsolr
42
- unless @rsolr
43
- server_uri = self.server.host
44
- @path_prefix = URI.parse(server_uri).path # The API key
45
- @rsolr = RSolr.connect :url => server_uri.dup
46
- end
47
- @rsolr
42
+
43
+ def commit
44
+ solr.commit
48
45
  end
49
-
50
- def rsolr_ext
51
- unless @rsolr_ext
52
- server_uri = self.server.host
53
- @path_prefix = URI.parse(server_uri).path # The API key
54
- @rsolr_ext = RSolr::Ext.connect :url => server_uri.dup
46
+
47
+ def search(solr_params, options = {})
48
+ ActiveSupport::Notifications.instrument('solr.lucid_works') do |payload|
49
+ path = options[:path] || 'select'
50
+ begin
51
+ result = if options[:page] && options[:per_page]
52
+ # Constructing the request here instead of using result.request[:uri] to avoid losing it on RSolr::Error::Http errors
53
+ payload[:request] = solr.build_paginated_request(options[:page], options[:per_page], path, :params => solr_params)[:uri].to_s
54
+ solr.paginate(options[:page], options[:per_page], path, :params => solr_params)
55
+ else
56
+ # Constructing the request here instead of using result.request[:uri] to avoid losing it on RSolr::Error::Http errors
57
+ payload[:request] = solr.build_request(path, :params => solr_params)[:uri].to_s
58
+ solr.get(path, :params => solr_params)
59
+ end
60
+ payload[:status] = result.response[:status]
61
+ result
62
+ rescue RSolr::Error::Http => error
63
+ payload[:status] = error.response[:status]
64
+ raise
65
+ end
55
66
  end
56
- @rsolr_ext
57
67
  end
58
68
 
59
- # Lower level access to rsolr, for things like autocomplete queries
60
- def rsolr_get(path_suffix, search_params)
61
- rsolr.get "#{@path_prefix}/solr/#{name}/#{path_suffix}", :params => search_params
62
- end
63
-
64
- # Perform a Solr search using RSolr
65
- def search(search_params={})
66
- search_params[:page] ||= 1
67
- search_params[:per_page] ||= 10
68
- resp = rsolr_ext.find "#{@path_prefix}/solr/#{name}/select", search_params
69
- if search_params[:wt] == :xml
70
- data = Nokogiri.XML(resp)
71
- raise "search received bad XML" unless data.root
72
- else
73
- data = resp
74
- end
75
- data
69
+ def document_count
70
+ solr.get('select', :params => {:wt => :ruby, :q => "*:*", :rows => 0})['response']['numFound']
76
71
  end
77
72
 
78
73
  def synonyms
@@ -85,6 +80,22 @@ module LucidWorks
85
80
  Synonym.new(attributes.merge(:collection => self))
86
81
  end
87
82
 
83
+ def elevations
84
+ elevations = []
85
+ settings.elevations.each do |query, elevation_entries|
86
+ elevation_entries.each do |entry|
87
+ elevations << Elevation.new(
88
+ :doc_id => entry['doc'], :collection => self, :query => query, :excluded => entry['exclude'], :persisted => true
89
+ )
90
+ end
91
+ end
92
+ elevations
93
+ end
94
+
95
+ def build_elevation(attributes = {})
96
+ Elevation.new(attributes.merge(:collection => self))
97
+ end
98
+
88
99
  def prime_activities
89
100
  self.activities!.sort!{|a,b|a.id <=> b.id}
90
101
  num_created = 0
@@ -115,7 +126,7 @@ module LucidWorks
115
126
 
116
127
  # URL of Solr's build-in admin page
117
128
  def admin_url
118
- "#{server.host}/solr/#{name}/admin/"
129
+ "#{server.server_uri}/solr/#{name}/admin/"
119
130
  end
120
131
 
121
132
  # We have to handle the jdbcdrivers model in a custom way, as a GET on .../jdbcdrivers returns a list of strings,
@@ -136,59 +147,18 @@ module LucidWorks
136
147
  JSON.parse(RestClient.get(uri + "/components/all.json?handlerName=%2Flucid"))
137
148
  end
138
149
 
139
- def filtering_enabled?
140
- # current core implementation requires exactly one of filterbyrole or adfiltering
141
- assert_components_include_ad_xor_role
142
- return self.components.include?(AD_FILTERING)
143
- end
144
-
145
- def acl_only?
146
- filterer_config = self.filtering_settings['filterer.config']
147
- return false if filterer_config.nil?
148
- return self.filtering_settings['filterer.config'] != MAGIC_ACL_ONLY_FILTER_SETTING #rescue false
150
+ def available_templates
151
+ JSON.parse(RestClient.get(server.uri + "/collectiontemplates"))
149
152
  end
150
153
 
151
- def assert_components_include_ad_xor_role
152
- # require 'ruby-debug'; debugger
153
- raise "conflicting filtering components" if self.components.include?(AD_FILTERING) && self.components.include?(ROLE_FILTERING)
154
- raise "missing filtering components" if ! self.components.include?(AD_FILTERING) && ! self.components.include?(ROLE_FILTERING)
154
+ def request_handler(name)
155
+ RequestHandler.new(self, name)
155
156
  end
156
157
 
157
- def filtering_settings
158
- JSON.parse(RestClient.get(uri + "/filtering"))['adfiltering'] || {}
159
- end
158
+ protected
160
159
 
161
- def compute_component_set(acl_filtering_enabled)
162
- new_component_set = self.components.clone
163
- new_component_set.delete(ROLE_FILTERING)
164
- new_component_set.delete(AD_FILTERING)
165
- new_component_set.unshift acl_filtering_enabled == 'true' ? AD_FILTERING : ROLE_FILTERING
166
- end
167
-
168
- def configure_filtering(opts)
169
- if ! opts[:config]['java.naming.provider.url'].blank? && opts[:config]['java.naming.provider.url'] !~ %r(://)
170
- opts[:config]['java.naming.provider.url'] = "ldap://#{opts[:config]['java.naming.provider.url']}"
160
+ def solr
161
+ @solr ||= RSolr.connect(:url => "#{server.server_uri}/solr/#{name}")
171
162
  end
172
- filtering_settings = STATIC_ACL_CONFIG.merge('provider.config' => opts[:config])
173
- filtering_settings["filterer.config"] = opts[:acl_only] ? {} : MAGIC_ACL_ONLY_FILTER_SETTING
174
-
175
- errors = {}
176
- method = RestClient.send(:get, uri+'/filtering')['adfiltering'] ? :put : :post
177
- begin
178
- response = RestClient.send(method, uri+'/filtering/adfiltering', filtering_settings.to_json, :content_type => :json)
179
- rescue => exception
180
- JSON.parse(exception.response)['errors'].each {|e| errors[e['code']] = e['message']}
181
- end
182
-
183
- new_component_set = compute_component_set(opts[:enabled])
184
- if new_component_set.sort != self.components.sort
185
- begin
186
- response = RestClient.send(:put, uri+'/components/components?handlerName=/lucid', new_component_set.to_json, :content_type => :json)
187
- rescue => exception
188
- JSON.parse(exception.response)['errors'].each {|e| errors[e['code']] = e['message']}
189
- end
190
- end
191
- raise LucidWorks::AclConfigInvalid.new(errors) unless errors.empty?
192
- end
193
163
  end
194
164
  end
@@ -6,7 +6,8 @@ module LucidWorks
6
6
  self.collection_name = 'history' # i.e. not the plural 'histories'
7
7
  schema do
8
8
  attributes :crawl_started, :crawl_stopped, :type => :iso8601
9
- attributes :num_updated, :num_new, :num_unchanged, :type => :integer
9
+ attributes :num_updated, :num_new, :num_unchanged, :num_not_found,
10
+ :num_filter_denied, :num_access_denied, :num_robots_denied, :type => :integer
10
11
  end
11
12
 
12
13
  def doc_count
@@ -0,0 +1,12 @@
1
+ module LucidWorks
2
+ class Datasource
3
+ class Mapping < Base
4
+ self.singleton = true
5
+ belongs_to :datasource
6
+
7
+ schema do
8
+ attribute :datasource_field, :string, :omit_when_blank => true
9
+ end
10
+ end
11
+ end
12
+ end
@@ -31,6 +31,7 @@ module LucidWorks
31
31
  #
32
32
  def frequency
33
33
  case period
34
+ when 1.minute.seconds..59.minutes.seconds then 'every'
34
35
  when 1.weeks.seconds then 'weekly'
35
36
  when 1.days.seconds then 'daily'
36
37
  when 1.hours.seconds then 'hourly'
@@ -44,6 +45,7 @@ module LucidWorks
44
45
  #
45
46
  def frequency=(frequency)
46
47
  self.period = case frequency
48
+ when 'every' then period
47
49
  when 'hourly' then 1.hours.seconds.to_i
48
50
  when 'daily' then 1.days.seconds.to_i
49
51
  when 'weekly' then 1.weeks.seconds.to_i
@@ -57,7 +59,6 @@ module LucidWorks
57
59
  #
58
60
  def next_start
59
61
  return start_time if (now = Time.now) <= start_time
60
- # require 'ruby-debug'; debugger
61
62
  time_since_start = now - start_time
62
63
  last_interval_num = (time_since_start / period).to_i
63
64
  next_interval_num = if (time_since_start % period) == 0
@@ -111,8 +112,10 @@ module LucidWorks
111
112
  self.frequency = all_attributes['frequency']
112
113
  self.start_time =
113
114
  case all_attributes['frequency']
115
+ when 'every'
116
+ self.period = all_attributes['period'].to_i
117
+ now.ceil(period)
114
118
  when 'weekly'
115
- # require 'ruby-debug'; debugger
116
119
  start = now.beginning_of_week.advance(all_attributes['start'])
117
120
  start < now ? start.advance(:weeks => 1) : start
118
121
  when 'daily'
@@ -5,7 +5,7 @@ module LucidWorks
5
5
  self.singleton = true
6
6
  belongs_to :datasource
7
7
 
8
- STOPPED_STATES = %w{ IDLE STOPPED ABORTED EXCEPTION FINISHED }
8
+ STOPPED_STATES = %w{ IDLE STOPPED ABORTED EXCEPTION FINISHED UNKNOWN }
9
9
  POST_PROCESSING_STATES = %w{ STOPPING ABORTING }
10
10
  CRAWLSTATES = STOPPED_STATES + [ 'RUNNING' ] + POST_PROCESSING_STATES
11
11
 
@@ -13,7 +13,8 @@ module LucidWorks
13
13
  attribute :crawl_state, :string, :values => CRAWLSTATES
14
14
  attribute :jobId
15
15
  attributes :crawl_started, :crawl_stopped, :type => :iso8601
16
- attributes :num_unchanged, :num_updated, :num_new, :num_failed, :num_deleted, :num_total, :type => :integer
16
+ attributes :num_unchanged, :num_updated, :num_new, :num_failed, :num_deleted, :num_total,
17
+ :num_not_found, :num_filter_denied, :num_access_denied, :num_robots_denied, :type => :integer
17
18
  end
18
19
 
19
20
  # Create predicate methods for all the crawl states
@@ -4,51 +4,33 @@ module LucidWorks
4
4
  belongs_to :collection
5
5
  has_many :histories, :class_name => :history, :retrieveable_en_masse => true
6
6
  has_one :status, :retrieveable_en_masse => true
7
- has_one :schedule, :crawldata
7
+ has_one :schedule, :crawldata, :mapping
8
8
  has_one :index, :job, :has_content => false
9
9
 
10
- TYPES = %w{ external file ftp hdfs kfs lucidworkslogs web s3 s3n smb solrxml jdbc sharepoint }
11
- SYSTEM_TYPES = %w{ lucidworkslogs }
12
10
  BOUNDS = %w{ tree none }
13
- CRAWLERS = {
14
- # Later we may change these to be arrays if we decide to support more than one choice
15
- # e.g. :web => ['lucid.aperture', 'nutch'], :file => ['lucid.aperture', 'lucid.fs']
16
- :file => 'lucid.aperture',
17
- :lucidworkslogs => 'lucid.logs',
18
- :external => 'lucid.external',
19
- :web => 'lucid.aperture',
20
- :solrxml => 'lucid.solrxml',
21
- :jdbc => 'lucid.jdbc',
22
- :sharepoint => 'lucid.gcm',
23
- :ftp => 'lucid.fs',
24
- :hdfs => 'lucid.fs',
25
- :kfs => 'lucid.fs',
26
- :smb => 'lucid.fs',
27
- :s3n => 'lucid.fs',
28
- :s3 => 'lucid.fs'
29
- }.with_indifferent_access
30
-
11
+
31
12
  schema do
32
13
  # common
33
14
  attributes :name, :crawler
34
- attribute :type, :string, :values => TYPES
15
+ attribute :type, :string
35
16
  attribute :crawl_depth, :integer, :nil_when_blank => true
36
17
  attribute :max_docs, :integer
37
- attributes :max_bytes, :commit_within, :type => :integer, :omit_when_blank => true
38
- attribute :commit_within_min, :custom
18
+ attributes :max_bytes, :commit_within, :type => :integer, :omit_when_blank => true
19
+ attribute :commit_within_sec, :custom
39
20
  attribute :commit_on_finish, :boolean
40
- attributes :include_paths, :exclude_paths, :type => :list, :separator => "\n"
41
- attribute :mapping, :string, :omit_when_blank => true # Hash
21
+ attributes :include_paths, :exclude_paths, :filter_follow, :filter_track, :filter_locations,
22
+ :type => :list, :separator => "\n"
42
23
  attribute :bounds, :string, :values => BOUNDS
43
24
  # web
44
25
  attributes :url, :category
45
26
  attribute :collect_links, :boolean
46
27
  attribute :auth, :string, :omit_when_blank => true # Hash
47
- attributes :proxy_host, :string, :omit_when_blank => true
28
+ attribute :proxy_host, :string, :omit_when_blank => true
48
29
  attribute :proxy_port, :string, :omit_when_blank => true
49
30
  attribute :proxy_username, :string, :omit_when_blank => true
50
31
  attribute :proxy_password, :string, :omit_when_blank => true
51
32
  attribute :ignore_robots, :boolean
33
+ attribute :add_failed_docs, :boolean
52
34
  # file
53
35
  attribute :path
54
36
  attribute :follow_links, :boolean
@@ -67,11 +49,24 @@ module LucidWorks
67
49
  attribute :password
68
50
  attribute :domain
69
51
  attribute :my_site_base_url, :string, :nil_when_blank => true
70
- attribute :included_urls
71
- attribute :excluded_urls
52
+ attributes :included_urls, :excluded_urls, :type => :list, :separator => "\n"
72
53
  attribute :kdcserver
73
54
  attribute :use_sp_search_visibility, :boolean
74
55
  attribute :aliases
56
+ attribute :feed_unpublished_documents, :boolean
57
+ attribute :push_acls, :boolean
58
+ attribute :enable_security_trimming, :boolean
59
+ attribute :username_format_in_ace
60
+ attribute :groupname_format_in_ace
61
+ attribute :ldap_server_host_address
62
+ attribute :ldap_server_port_number
63
+ attribute :ldap_server_use_ssl, :boolean
64
+ attribute :ldap_auth_type
65
+ attribute :ldap_search_base
66
+ attribute :ldap_read_ad_groups_type
67
+ attribute :ldap_cache_groups_membership, :boolean
68
+ attribute :ldap_cache_size
69
+ attribute :ldap_cache_refresh_interval
75
70
  # external
76
71
  attribute :source
77
72
  attribute :source_type
@@ -97,25 +92,22 @@ module LucidWorks
97
92
  validates_numericality_of :max_bytes, :allow_blank => true
98
93
  validates_presence_of :url, :if => lambda { |d| d.type == 'web' }
99
94
 
100
- # Fake attributes to ease UI implementation
101
- def commit_within_min
102
- commit_within.blank? ? nil : commit_within / 1.second.milliseconds / 1.minute.seconds
95
+ # Virtual attributes to ease UI implementation
96
+ def commit_within_sec
97
+ commit_within.blank? ? nil : commit_within / 1.second.milliseconds
103
98
  end
104
- def commit_within_min=(mins)
105
- self.commit_within = mins.blank? ? nil : mins.to_i.minutes.milliseconds
99
+ def commit_within_sec=(seconds)
100
+ self.commit_within = seconds.blank? ? nil : seconds.to_i.seconds.milliseconds
106
101
  end
107
102
  def delete_after_days
108
- delete_after.blank? ? nil : deleteAfter / 1.second.milliseconds / 1.day.seconds
103
+ delete_after.blank? ? nil : delete_after / 1.second.milliseconds / 1.day.seconds
109
104
  end
110
105
  def delete_after_days=(days)
111
106
  self.delete_after = days.blank? ? nil : days.to_i.days.milliseconds
112
107
  end
113
108
 
114
109
  def document_count
115
- collection.rsolr_get(:select,
116
- :wt => :ruby,
117
- :q => (type == 'lucidworkslogs' ? "*:*" : "data_source:#{id}"),
118
- :rows => 0)['response']['numFound']
110
+ collection.search(:wt => :ruby, :q => (type == 'lucidworkslogs' ? "*:*" : "data_source:#{id}"), :rows => 0)['response']['numFound']
119
111
  end
120
112
 
121
113
  def empty!
@@ -132,11 +124,6 @@ module LucidWorks
132
124
  !collection.system?
133
125
  end
134
126
 
135
- def crawlable?
136
- # Don't let user schedule crawl of external datasources
137
- !%w{ external }.include?(type)
138
- end
139
-
140
127
  def start_crawl!
141
128
  build_job.save
142
129
  end
@@ -145,10 +132,6 @@ module LucidWorks
145
132
  build_job.destroy
146
133
  end
147
134
 
148
- def t_type
149
- I18n.t(type, :scope => 'activemodel.models.lucid_works.datasource.type')
150
- end
151
-
152
135
  def progress
153
136
  return nil if status.stopped?
154
137
  return nil unless histories.size > 0
@@ -5,6 +5,7 @@ module LucidWorks
5
5
  attr_reader :description, :name, :allowed_values, :type, :default_value, :required, :read_only, :advanced
6
6
  alias :read_only? :read_only
7
7
  alias :advanced? :advanced
8
+ alias :required? :required
8
9
 
9
10
  def initialize(attributes = {})
10
11
  @description = attributes['description']
@@ -14,7 +15,7 @@ module LucidWorks
14
15
  @default_value = attributes['default_value']
15
16
  @required = attributes['required']
16
17
  @read_only = attributes['read_only']
17
- @advanced = attributes['advanced']
18
+ @advanced = attributes['hints'].include? 'advanced' rescue false
18
19
  end
19
20
  end
20
21
  end
@@ -8,6 +8,8 @@ module LucidWorks
8
8
  "---" # Separator UI hint
9
9
  ]
10
10
 
11
+ SYSTEM_TYPES = %w{ lucidworkslogs }
12
+
11
13
  attr_reader :crawler, :category, :type, :props
12
14
 
13
15
  def initialize(crawler, attributes = {})
@@ -17,9 +19,21 @@ module LucidWorks
17
19
  @props = attributes['props']
18
20
  end
19
21
 
22
+ def system?
23
+ SYSTEM_TYPES.include? type
24
+ end
25
+
20
26
  def properties
21
27
  @properties ||= @props.map do |prop|
22
28
  DatasourceProperty.new(prop)
29
+ end.tap do |properties|
30
+ def properties.regular
31
+ select {|p| !p.advanced? }
32
+ end
33
+
34
+ def properties.advanced
35
+ select &:advanced?
36
+ end
23
37
  end
24
38
  end
25
39
 
@@ -0,0 +1,12 @@
1
+ require 'lucid_works/field_commons'
2
+
3
+ module LucidWorks
4
+
5
+ class Dynamicfield < Base
6
+ include LucidWorks::FieldCommons
7
+
8
+ validates_each :name, :unless => :persisted?, :allow_blank => true do |model, attr, value|
9
+ model.errors.add(attr, 'must be unique') if model.collection.dynamicfields.any? {|f| f.name == value }
10
+ end
11
+ end
12
+ end