lucid_works 0.6.29 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/.rvmrc +1 -1
  2. data/Rakefile +25 -0
  3. data/config/locales/en.yml +171 -83
  4. data/lib/lucid_works/associations/has_many.rb +2 -2
  5. data/lib/lucid_works/associations/has_one.rb +1 -1
  6. data/lib/lucid_works/associations/proxy.rb +3 -3
  7. data/lib/lucid_works/associations.rb +2 -2
  8. data/lib/lucid_works/base.rb +21 -48
  9. data/lib/lucid_works/collection/click.rb +17 -0
  10. data/lib/lucid_works/collection/settings.rb +0 -1
  11. data/lib/lucid_works/collection.rb +22 -3
  12. data/lib/lucid_works/crawler.rb +13 -0
  13. data/lib/lucid_works/datasource/history.rb +5 -9
  14. data/lib/lucid_works/datasource/status.rb +8 -11
  15. data/lib/lucid_works/datasource.rb +67 -32
  16. data/lib/lucid_works/datasource_property.rb +18 -0
  17. data/lib/lucid_works/datasource_type.rb +23 -0
  18. data/lib/lucid_works/exceptions.rb +1 -0
  19. data/lib/lucid_works/field.rb +43 -2
  20. data/lib/lucid_works/fieldtype.rb +28 -0
  21. data/lib/lucid_works/gem_version.rb +1 -1
  22. data/lib/lucid_works/jdbcdriver.rb +30 -0
  23. data/lib/lucid_works/role.rb +59 -0
  24. data/lib/lucid_works/schema/attribute.rb +86 -0
  25. data/lib/lucid_works/schema/boolean_attribute.rb +34 -0
  26. data/lib/lucid_works/schema/custom_attribute.rb +15 -0
  27. data/lib/lucid_works/schema/integer_attribute.rb +32 -0
  28. data/lib/lucid_works/schema/iso8601_attribute.rb +31 -0
  29. data/lib/lucid_works/schema/string_attribute.rb +22 -0
  30. data/lib/lucid_works/schema.rb +66 -97
  31. data/lib/lucid_works/server.rb +14 -0
  32. data/lib/lucid_works.rb +12 -0
  33. data/spec/fixtures/fake_file_ds_to_be_deleted/.gitkeep +0 -0
  34. data/spec/fixtures/fake_file_ds_to_be_updated/.gitkeep +0 -0
  35. data/spec/fixtures/fake_file_ds_to_get_index_of/.gitkeep +0 -0
  36. data/spec/fixtures/fake_file_ds_to_get_schedule_of/.gitkeep +0 -0
  37. data/spec/fixtures/fake_file_ds_to_get_status_of/.gitkeep +0 -0
  38. data/spec/fixtures/fake_file_ds_to_mess_with_job_of/.gitkeep +0 -0
  39. data/spec/fixtures/fake_file_ds_to_test_progress/.gitkeep +0 -0
  40. data/spec/lib/lucid_works/associations/has_many_spec.rb +4 -3
  41. data/spec/lib/lucid_works/associations/has_one_spec.rb +4 -3
  42. data/spec/lib/lucid_works/base_spec.rb +110 -62
  43. data/spec/lib/lucid_works/collection/activity/history_spec.rb +1 -1
  44. data/spec/lib/lucid_works/collection_spec.rb +17 -17
  45. data/spec/lib/lucid_works/datasource/history_spec.rb +4 -4
  46. data/spec/lib/lucid_works/datasource/status_spec.rb +7 -7
  47. data/spec/lib/lucid_works/datasource_spec.rb +9 -8
  48. data/spec/lib/lucid_works/field_spec.rb +101 -2
  49. data/spec/lib/lucid_works/fieldtype_spec.rb +156 -0
  50. data/spec/lib/lucid_works/schema/attribute_spec.rb +136 -0
  51. data/spec/lib/lucid_works/schema_spec.rb +53 -27
  52. data/spec/spec_helper.rb +3 -50
  53. data/spec/support/active_model_lint.rb +21 -0
  54. data/spec/support/lucid_works.rb +52 -0
  55. metadata +36 -2
@@ -0,0 +1,17 @@
1
+ require 'lucid_works/field'
2
+
3
+ module LucidWorks
4
+ class Collection
5
+ class Click < Base
6
+ self.singleton = true
7
+ belongs_to :collection
8
+
9
+ schema do
10
+ attribute :type, :string, :values => %w{ q c }
11
+ attributes :req, :q, :doc, :type => :string
12
+ attributes :qt, :ct, :hits, :pos, :type => :integer
13
+ attribute :buffering, :boolean
14
+ end
15
+ end
16
+ end
17
+ end
@@ -16,7 +16,6 @@ module LucidWorks
16
16
  # Indexing Settings
17
17
  attribute :unknown_type_handling, :string, :values => LucidWorks::Field::TYPES
18
18
  attribute :de_duplication, :string, :values => DEDUP_OPTIONS
19
- attribute :index_time_stopwords, :boolean
20
19
 
21
20
  # Querying Settings
22
21
  attribute :unsupervised_feedback_emphasis, :string, :values => FEEDBACK_EMPHASIS
@@ -3,8 +3,8 @@ module LucidWorks
3
3
  class Collection < Base
4
4
 
5
5
  belongs_to :server
6
- has_many :datasources, :fields, :activities
7
- has_one :info, :settings
6
+ has_many :datasources, :fields, :fieldtypes, :activities, :roles
7
+ has_one :info, :settings, :click
8
8
  has_one :index, :has_content => false
9
9
 
10
10
  schema do
@@ -16,7 +16,6 @@ module LucidWorks
16
16
  LOGS_COLLECTION_NAME = 'LucidWorksLogs'
17
17
 
18
18
  validates_presence_of :name
19
- validates_exclusion_of :name, :in => [LOGS_COLLECTION_NAME]
20
19
 
21
20
  def destroyable?
22
21
  true
@@ -113,5 +112,25 @@ module LucidWorks
113
112
  def autocomplete_activity
114
113
  prime_activities.detect{|act| act.type == 'autocomplete'}
115
114
  end
115
+
116
+ # URL of Solr's build-in admin page
117
+ def admin_url
118
+ "#{server.host}/solr/#{name}/admin/"
119
+ end
120
+
121
+ # We have to handle the jdbcdrivers model in a custom way, as a GET on .../jdbcdrivers returns a list of strings,
122
+ # but then you can actually do a DELETE on .../jdbcdrivers/name.
123
+ # For the DELETE reason we do use a LucidWorks::Base model, but we have to create them by hand.
124
+ def jdbcdrivers
125
+ driverlist = JSON.parse RestClient.get(self.uri + '/jdbcdrivers')
126
+ driverlist.map do |drivername|
127
+ Jdbcdriver.new(:name => drivername, :collection => self)
128
+ end
129
+ end
130
+
131
+ def jdbcdriver_classes
132
+ JSON.parse RestClient.get(uri + "/jdbcdrivers/classes")
133
+ end
134
+
116
135
  end
117
136
  end
@@ -3,6 +3,19 @@ module LucidWorks
3
3
  class Crawler < Base
4
4
  schema do
5
5
  primary_key :name
6
+ attribute :datasource_types, :custom
7
+ end
8
+
9
+ attr_reader :datasource_types
10
+
11
+ def datasource_types=(array_of_hashes)
12
+ @datasource_types ||= array_of_hashes.map do |dt_attrs|
13
+ DatasourceType.new(dt_attrs)
14
+ end
15
+ end
16
+
17
+ def datasource_type(type)
18
+ datasource_types.detect { |t| t.type == type }
6
19
  end
7
20
  end
8
21
  end
@@ -4,17 +4,13 @@ module LucidWorks
4
4
  class History < Base
5
5
  belongs_to :datasource
6
6
  self.collection_name = 'history' # i.e. not the plural 'histories'
7
-
8
- def doc_count
9
- numUpdated + numNew + numUnchanged
7
+ schema do
8
+ attributes :crawl_started, :crawl_stopped, :type => :iso8601
9
+ attributes :num_updated, :num_new, :num_unchanged, :type => :integer
10
10
  end
11
11
 
12
- def crawl_stopped
13
- Time.iso8601 crawlStopped
14
- end
15
-
16
- def crawl_started
17
- Time.iso8601 crawlStarted
12
+ def doc_count
13
+ num_updated + num_new + num_unchanged
18
14
  end
19
15
 
20
16
  def duration
@@ -10,9 +10,10 @@ module LucidWorks
10
10
  CRAWLSTATES = STOPPED_STATES + [ 'RUNNING' ] + POST_PROCESSING_STATES
11
11
 
12
12
  schema do
13
- attribute :crawlState, :string, :values => CRAWLSTATES
14
- attributes :crawlStarted, :crawlStopped, :jobId
15
- attributes :numUnchanged, :numUpdated, :numNew, :numFailed, :numDeleted, :numTotal, :type => :integer
13
+ attribute :crawl_state, :string, :values => CRAWLSTATES
14
+ attribute :jobId
15
+ attributes :crawl_started, :crawl_stopped, :type => :iso8601
16
+ attributes :num_unchanged, :num_updated, :num_new, :num_failed, :num_deleted, :num_total, :type => :integer
16
17
  end
17
18
 
18
19
  # Create predicate methods for all the crawl states
@@ -20,25 +21,21 @@ module LucidWorks
20
21
  method_name = state.downcase + "?"
21
22
  class_eval <<-EOF
22
23
  def #{method_name}
23
- self.crawlState == "#{state}"
24
+ self.crawl_state == "#{state}"
24
25
  end
25
26
  EOF
26
27
  end
27
28
 
28
29
  def stopped?
29
- STOPPED_STATES.include?(crawlState)
30
+ STOPPED_STATES.include?(crawl_state)
30
31
  end
31
32
 
32
33
  def post_processing?
33
- POST_PROCESSING_STATES.include?(crawlState)
34
+ POST_PROCESSING_STATES.include?(crawl_state)
34
35
  end
35
36
 
36
37
  def doc_count
37
- numUpdated + numNew + numUnchanged
38
- end
39
-
40
- def crawl_started
41
- Time.iso8601 crawlStarted
38
+ num_updated + num_new + num_unchanged
42
39
  end
43
40
 
44
41
  def elapsed_time
@@ -2,46 +2,96 @@ module LucidWorks
2
2
 
3
3
  class Datasource < Base
4
4
  belongs_to :collection
5
- has_many :histories, :class_name => :history, :supports_all => true
6
- has_one :status, :supports_all => true
5
+ has_many :histories, :class_name => :history, :retrieveable_en_masse => true
6
+ has_one :status, :retrieveable_en_masse => true
7
7
  has_one :schedule, :crawldata
8
8
  has_one :index, :job, :has_content => false
9
9
 
10
+ TYPES = %w{ external file ftp hdfs kfs lucidworkslogs web s3 s3n smb solrxml jdbc sharepoint }
11
+ SYSTEM_TYPES = %w{ lucidworkslogs }
12
+ BOUNDS = %w{ tree none }
13
+ CRAWLERS = {
14
+ # Later we may change these to be arrays if we decide to support more than one choice
15
+ # e.g. :web => ['lucid.aperture', 'nutch'], :file => ['lucid.aperture', 'lucid.fs']
16
+ :file => 'lucid.aperture',
17
+ :lucidworkslogs => 'lucid.logs',
18
+ :external => 'lucid.external',
19
+ :web => 'lucid.aperture',
20
+ :solrxml => 'lucid.solrxml',
21
+ :jdbc => 'lucid.jdbc',
22
+ :sharepoint => 'lucid.gcm',
23
+ :ftp => 'lucid.fs',
24
+ :hdfs => 'lucid.fs',
25
+ :kfs => 'lucid.fs',
26
+ :smb => 'lucid.fs',
27
+ :s3n => 'lucid.fs',
28
+ :s3 => 'lucid.fs'
29
+ }.with_indifferent_access
30
+
10
31
  schema do
11
32
  # common
12
- attributes :name, :type, :crawler
33
+ attributes :name, :crawler
34
+ attribute :type, :string, :values => TYPES
13
35
  attributes :crawl_depth, :max_docs, :type => :integer
14
- attributes :max_bytes, :commitWithin, :type => :integer, :omit_when_blank => true
36
+ attributes :max_bytes, :commit_within, :type => :integer, :omit_when_blank => true
15
37
  attribute :commit_within_min, :custom
38
+ attribute :commit_on_finish, :boolean
16
39
  attribute :include_paths
17
40
  attribute :exclude_paths
18
- attribute :mapping # Hash
19
- attribute :bounds
41
+ attribute :mapping, :string, :omit_when_blank => true # Hash
42
+ attribute :bounds, :string, :values => BOUNDS
20
43
  # web
21
44
  attributes :url, :category
22
45
  attribute :collect_links, :boolean
46
+ attribute :auth, :string, :omit_when_blank => true # Hash
47
+ attributes :proxy_host, :string, :omit_when_blank => true
48
+ attribute :proxy_port, :string, :omit_when_blank => true
49
+ attribute :proxy_username, :string, :omit_when_blank => true
50
+ attribute :proxy_password, :string, :omit_when_blank => true
51
+ attribute :ignore_robots, :boolean
23
52
  # file
24
53
  attribute :path
25
54
  attribute :follow_links, :boolean
55
+ attribute :parsing, :boolean
56
+ attribute :indexing, :boolean
57
+ attribute :caching, :boolean
58
+ attribute :verify_access, :boolean
59
+ attribute :log_extra_detail, :boolean
60
+ attribute :fail_unsupported_file_types, :boolean
61
+ attribute :warn_unknown_mime_types, :boolean
62
+ attribute :no_duplicates, :boolean
26
63
  # sharepoint
27
- attribute :sharepointUrl
28
- attribute :connectorType
64
+ attribute :sharepoint_url
65
+ attribute :connector_type
29
66
  attribute :authorization
30
67
  attribute :username
31
68
  attribute :password
32
69
  attribute :domain
33
- attribute :mySiteBaseURL, :string, :nil_when_blank => true
34
- attribute :includedURls
35
- attribute :excludedURls
70
+ attribute :my_site_base_url, :string, :nil_when_blank => true
71
+ attribute :included_urls
72
+ attribute :excluded_urls
36
73
  attribute :kdcserver
37
- attribute :useSPSearchVisibility, :boolean
74
+ attribute :use_sp_search_visibility, :boolean
38
75
  attribute :aliases
39
76
  # external
40
77
  attribute :source
41
78
  attribute :source_type
42
79
  # lucidworkslogs
43
- attribute :deleteAfter, :integer, :omit_when_blank => true
80
+ attribute :delete_after, :integer, :omit_when_blank => true
44
81
  attribute :delete_after_days, :custom
82
+ # smb
83
+ attribute :windows_domain
84
+ attributes :parsing, :indexing, :caching, :type => :boolean
85
+ # jdbc
86
+ attribute :driver
87
+ attribute :primary_key
88
+ attribute :sql_select_statement
89
+ attribute :delta_sql_query
90
+ attribute :nested_queries # list
91
+ # solrxml
92
+ attribute :file
93
+ attribute :include_datasource_metadata, :boolean
94
+ attribute :generate_unique_key, :boolean
45
95
  end
46
96
 
47
97
  validates_presence_of :type, :crawler, :name
@@ -49,33 +99,18 @@ module LucidWorks
49
99
  validates_numericality_of :max_bytes, :allow_blank => true
50
100
  validates_presence_of :url, :if => lambda { |d| d.type == 'web' }
51
101
 
52
- TYPES = %w{ external file lucidworkslogs web solrxml jdbc sharepoint }
53
- BOUNDS = %w{ tree none }
54
- CRAWLERS = {
55
- # Later we may change these to be arrays if we decide to support more than one choice
56
- # e.g. :web => ['lucid.aperture', 'nutch']
57
- :file => 'lucid.aperture',
58
- :lucidworkslogs => 'lucid.logs',
59
- :external => 'lucid.external',
60
- :web => 'lucid.aperture',
61
- :solrxml => 'lucid.solrxml',
62
- :jdbc => 'lucid.jdbc',
63
- :sharepoint => 'lucid.gcm'
64
- }.with_indifferent_access
65
-
66
-
67
102
  # Fake attributes to ease UI implementation
68
103
  def commit_within_min
69
- commitWithin.blank? ? nil : commitWithin / 1.second.milliseconds / 1.minute.seconds
104
+ commit_within.blank? ? nil : commit_within / 1.second.milliseconds / 1.minute.seconds
70
105
  end
71
106
  def commit_within_min=(mins)
72
- self.commitWithin = mins.blank? ? nil : mins.to_i.minutes.milliseconds
107
+ self.commit_within = mins.blank? ? nil : mins.to_i.minutes.milliseconds
73
108
  end
74
109
  def delete_after_days
75
- deleteAfter.blank? ? nil : deleteAfter / 1.second.milliseconds / 1.day.seconds
110
+ delete_after.blank? ? nil : deleteAfter / 1.second.milliseconds / 1.day.seconds
76
111
  end
77
112
  def delete_after_days=(days)
78
- self.deleteAfter = days.blank? ? nil : days.to_i.days.milliseconds
113
+ self.delete_after = days.blank? ? nil : days.to_i.days.milliseconds
79
114
  end
80
115
 
81
116
  def document_count
@@ -0,0 +1,18 @@
1
+ module LucidWorks
2
+
3
+ class DatasourceProperty
4
+
5
+ attr_reader :description, :name, :allowed_values, :type, :default_value, :required, :read_only
6
+ alias :read_only? :read_only
7
+
8
+ def initialize(attributes = {})
9
+ @description = attributes['description']
10
+ @name = attributes['name']
11
+ @allowed_values = attributes['allowedValues']
12
+ @type = attributes['type']
13
+ @default_value = attributes['defaultValues']
14
+ @required = attributes['required']
15
+ @read_only = attributes['read_only']
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,23 @@
1
+ module LucidWorks
2
+
3
+ class DatasourceType
4
+
5
+ attr_reader :category, :type, :props
6
+
7
+ def initialize(attributes = {})
8
+ @category = attributes['category']
9
+ @type = attributes['type']
10
+ @props = attributes['props']
11
+ end
12
+
13
+ def properties
14
+ @properties ||= @props.map do |prop|
15
+ DatasourceProperty.new(prop)
16
+ end
17
+ end
18
+
19
+ def property(name)
20
+ properties.detect { |p| p.name == name }
21
+ end
22
+ end
23
+ end
@@ -3,4 +3,5 @@ module LucidWorks
3
3
  end
4
4
 
5
5
  class ResourceNotFound < Exception ; end
6
+ class RecordInvalid < Exception ; end
6
7
  end
@@ -3,12 +3,19 @@ module LucidWorks
3
3
  class Field < Base
4
4
  belongs_to :collection
5
5
 
6
+ INDEXING_OPTIONS = [
7
+ 'document_only',
8
+ 'document_termfreq',
9
+ 'document_termfreq_termpos'
10
+ ]
11
+
6
12
  schema do
7
13
  attribute :name, :string, :primary_key => true, :omit_during_update => true
8
14
  attribute :editable, :boolean, :omit_during_update => true
15
+ attribute :dynamic_base, :string, :omit_during_update => true
9
16
  attributes :indexed, :stored, :facet, :include_in_results,
10
17
  :search_by_default, :highlight, :multi_valued,
11
- :term_vectors, :omit_tf,
18
+ :term_vectors, :omit_tf, :omit_positions,
12
19
  :use_for_deduplication, :synonym_expansion,
13
20
  :index_for_spellcheck, :index_for_autocomplete,
14
21
  :query_time_stopword_handling,
@@ -16,6 +23,8 @@ module LucidWorks
16
23
  :type => :boolean
17
24
  attribute :default_boost, :integer
18
25
  attributes :field_type, :short_field_boost, :term_vectors, :default_value, :copy_fields, :type => :string
26
+
27
+ attribute :index_term_freq_and_pos, :custom, :values => INDEXING_OPTIONS
19
28
  end
20
29
 
21
30
  TYPES = [
@@ -82,6 +91,10 @@ module LucidWorks
82
91
  validates_each :use_in_find_similar do |model, attr, value|
83
92
  model.errors.add(attr, 'a field must be indexed for it to be used for find-similar') if value == true && !model.indexed?
84
93
  end
94
+
95
+ def dynamically_generated?
96
+ !dynamic_base.blank?
97
+ end
85
98
 
86
99
  def t_field_type
87
100
  self.class.t_field_type(self.field_type)
@@ -94,7 +107,34 @@ module LucidWorks
94
107
  def initialize(options)
95
108
  super(options.reverse_merge(:omit_tf => false, :short_field_boost => 'high'))
96
109
  end
97
-
110
+
111
+ # Meta attribute that wraps the omit_tf and omit_positions combinations
112
+ def index_term_freq_and_pos
113
+ return :document_only if !indexed? || omit_tf?
114
+ return omit_positions? ? :document_termfreq : :document_termfreq_termpos
115
+ end
116
+
117
+ def index_term_freq_and_pos=(indexing_options)
118
+ if indexed?
119
+ case indexing_options.to_sym
120
+ when :document_only
121
+ self.omit_tf = true
122
+ self.omit_positions = true
123
+ when :document_termfreq
124
+ self.omit_tf = false
125
+ self.omit_positions = true
126
+ when :document_termfreq_termpos
127
+ self.omit_tf = false
128
+ self.omit_positions = false
129
+ else
130
+ raise "Unknown indexing option: '#{indexing_options}'. Allowed values are: #{INDEXING_OPTIONS.join(', ')}"
131
+ end
132
+ else # !indexed?
133
+ self.omit_tf = true
134
+ self.omit_positions = true
135
+ end
136
+ end
137
+
98
138
  def update_attributes(attrs)
99
139
  attrs = attrs.with_indifferent_access
100
140
  if [false, '0'].include?(attrs[:stored])
@@ -105,6 +145,7 @@ module LucidWorks
105
145
  attrs[:facet] ||= false
106
146
  attrs[:synonym_expansion] ||= false
107
147
  attrs[:omit_tf] ||= false
148
+ attrs[:omit_positions] ||= false
108
149
  attrs[:short_field_boost] ||= 'high'
109
150
  attrs[:search_by_default] ||= false
110
151
  attrs[:use_in_find_similar] ||= false
@@ -0,0 +1,28 @@
1
+ module LucidWorks
2
+
3
+ class Fieldtype < Base
4
+ belongs_to :collection
5
+
6
+ ANALYZER_TYPES = %w{ default index query }
7
+
8
+ schema do
9
+ dynamic_attributes true
10
+ attribute :name, :string, :primary_key => true, :omit_during_update => true
11
+ attribute :class, :string
12
+ attribute :analyzers
13
+ end
14
+
15
+ validates_presence_of :name, :_class
16
+
17
+ # Provide support for adding more attributes during update.
18
+ def update_attributes(attrs)
19
+ attrs.keys.each do |attr|
20
+ unless self.class.schema.has_attribute?(attr)
21
+ self.class.schema.add_attribute(attr, :string) unless self.class.schema.has_attribute?(attr)
22
+ end
23
+ end
24
+
25
+ super
26
+ end
27
+ end
28
+ end